]> gitweb.michael.orlitzky.com - dead/census-tools.git/commitdiff
Added the framework for the PostGIS database integration.
authorMichael Orlitzky <michael@orlitzky.com>
Sat, 12 Sep 2009 17:41:20 +0000 (13:41 -0400)
committerMichael Orlitzky <michael@orlitzky.com>
Sat, 12 Sep 2009 17:41:20 +0000 (13:41 -0400)
Created makefile targets for the Tiger data and census database.
Added comments to the makefile describing the targets.
Created a SQL script to create the sf1_blocks table, and to add indexes on both blkidfp00 columns.
Created a new script, sf1blocks2sql (used by the makefile), which exports the Summary File 1 blocks as SQL "INSERT" statements.

bin/sf1blocks2sql [new file with mode: 0755]
makefile
sql/create-sf1_blocks-table.sql [new file with mode: 0644]

diff --git a/bin/sf1blocks2sql b/bin/sf1blocks2sql
new file mode 100755 (executable)
index 0000000..1918a29
--- /dev/null
@@ -0,0 +1,65 @@
+#!/usr/bin/python
+
+"""
+Exports geographic header records to SQL.
+We take a geographic header file, and a table name as arguments, and then
+parse that header file to create the necessary SQL statements. The generated
+SQL statements refer to the table name passed as an argument.
+
+The output is written to stdout; it can either be redirected to a file,
+or piped directly in to the database.
+"""
+
+import sys
+import os
+import site
+
+# Basically, add '../src' to our path.
+# Needed for the imports that follow.
+site.addsitedir(os.path.dirname(os.path.abspath(sys.argv[0])) + '/../src')
+
+import ExitCodes
+import GPS
+import SummaryFile1
+
+
+if (len(sys.argv) < 3):
+    print "Usage: %s <geo_file> <table_name>" % sys.argv[0]
+    raise SystemExit(ExitCodes.NotEnoughArgs)
+
+geo_file_path = sys.argv[1]
+table_name = sys.argv[2]
+
+grp = SummaryFile1.GeoRecordParser()
+blocks = grp.parse_blocks(geo_file_path)
+
+
+sql_query = """
+INSERT INTO %s (state,
+                county,
+                tract,
+                block,
+                arealand,
+                areawatr,
+                total_area,
+                tiger_blkidfp00,
+                pop100,
+                population_density) 
+
+VALUES ('%s', '%s', '%s', '%s', %.12f, %.12f, %.12f, '%s', %d, %.12f);
+"""
+
+for b in blocks:
+    # Print out the INSERT statement contained in sql_query,
+    # substituting in all of the block attributes.
+    print sql_query % (table_name,
+                       b.state,
+                       b.county,
+                       b.tract,
+                       b.block,
+                       b.arealand,
+                       b.areawatr,
+                       b.total_area(),
+                       b.tiger_blkidfp00(),
+                       b.pop100,
+                       b.population_density())
index f3136a6cda47c33afcb4d13b3d9a8cde272ae5ff..2c20ffc4f90989ffa09bfa4b0172e54e900ff17e 100644 (file)
--- a/makefile
+++ b/makefile
@@ -1,9 +1,80 @@
-.PHONY : test
+DB_NAME='census2000'
+DB_USER='postgres'
+TIGER_DATA_URL='http://www2.census.gov/cgi-bin/shapefiles/multi-file-download?files=24_MARYLAND%2Ftl_2008_24_tabblock00.zip'
 
+
+# Necessary to run test/data without prerequisites.
+#
+.PHONY : test data
+
+
+# The default task, since it comes first in the list.
+#
 all: clean test
 
+
 test:
        ./bin/run_tests
 
+
+# Remove byte-compiled python code.
+#
 clean:
        find ./ -name '*.pyc' -print0 | xargs -0 rm -f
+
+
+# Download the shapefiles from Tiger if they don't already exist.
+# For now, we're only dealing with the Census 2000 Maryland Block
+# data, so the filenames are hard-coded. Easy enough to change.
+#
+data:
+       mkdir -p data/census-2000-block/maryland/
+       if [ ! -f data/census-2000-block/maryland/tl_2008_24_tabblock00.shp ]; then \
+               wget -O tiger.zip $(TIGER_DATA_URL); \
+               unzip tiger.zip; \
+               rm tiger.zip; \
+               unzip srv/ftp/geo/tiger/TIGER2008/24_MARYLAND/tl_2008_24_tabblock00.zip \
+                      -d ./data/census-2000-block/maryland/; \
+               rm -rf srv; \
+       fi;
+
+
+# This task does a couple of things. First, it drops and re-creates
+# the DB_NAME database (or schema, whatever). Then, it adds PL/pgSQL
+# support to the database.
+#
+# At that point, we import the two PostGIS files, lwpostgis.sql and
+# spatial_ref_sys.sql. These are magic as far as I'm concerned, but
+# PostGIS requires them.
+#
+# Then, we import the Tiger data using shp2pgsql. The shapefiles
+# should exist, since this task depends on the "data" task, which
+# downloads said shapefiles.
+#
+# Finally, we create the table for the demographic data (obtained from
+# the geographic header records), and populate that table with the output
+# of the sf1blocks2sql script.
+#
+db: data
+       dropdb -U $(DB_USER) $(DB_NAME)
+       createdb -U $(DB_USER) $(DB_NAME)
+       createlang -U $(DB_USER) plpgsql $(DB_NAME)
+
+       psql -d $(DB_NAME) \
+             -U $(DB_USER) \
+             -f /usr/share/postgresql/contrib/lwpostgis.sql
+
+       psql -d $(DB_NAME) \
+             -U $(DB_USER) \
+             -f /usr/share/postgresql/contrib/spatial_ref_sys.sql
+
+       shp2pgsql -I data/census-2000-block/maryland/tl_2008_24_tabblock00.shp tiger \
+                   | psql -U $(DB_USER) -d $(DB_NAME)
+
+       psql -d $(DB_NAME) \
+             -U $(DB_USER) \
+             -f sql/create-sf1_blocks-table.sql
+
+       bin/sf1blocks2sql src/Tests/Fixtures/SummaryFile1/mdgeo.uf1 sf1_blocks \
+                          | psql -U postgres -d $(DB_NAME)
+
diff --git a/sql/create-sf1_blocks-table.sql b/sql/create-sf1_blocks-table.sql
new file mode 100644 (file)
index 0000000..71d87b5
--- /dev/null
@@ -0,0 +1,24 @@
+/*
+  We need to create a unique index on the referenced
+  foreign key column before we can create the foreign key
+  constraint.
+*/
+CREATE UNIQUE INDEX idx_tiger_blkidfp00_unique
+       ON tiger (blkidfp00);
+
+CREATE TABLE sf1_blocks (
+       id                 SERIAL           PRIMARY KEY,
+       state             varchar(2)       NOT NULL,
+       county             varchar(3)       NOT NULL,
+       tract             varchar(6)       NOT NULL,
+       block             varchar(4)       NOT NULL,
+       arealand           double precision NOT NULL,
+       areawatr           double precision NOT NULL,
+       tiger_blkidfp00    varchar(15)      NOT NULL REFERENCES tiger (blkidfp00),
+       pop100            integer          NOT NULL,
+       total_area        double precision NOT NULL,
+       population_density double precision NOT NULL
+);
+
+CREATE UNIQUE INDEX idx_sf1_blocks_tiger_blkidfp00_unique
+       ON sf1_blocks (tiger_blkidfp00);