Created makefile targets for the Tiger data and census database.
Added comments to the makefile describing the targets.
Created a SQL script to create the sf1_blocks table, and to add indexes on both blkidfp00 columns.
Created a new script, sf1blocks2sql (used by the makefile), which exports the Summary File 1 blocks as SQL "INSERT" statements.
--- /dev/null
+#!/usr/bin/python
+
+"""
+Exports geographic header records to SQL.
+We take a geographic header file, and a table name as arguments, and then
+parse that header file to create the necessary SQL statements. The generated
+SQL statements refer to the table name passed as an argument.
+
+The output is written to stdout; it can either be redirected to a file,
+or piped directly in to the database.
+"""
+
+import sys
+import os
+import site
+
+# Basically, add '../src' to our path.
+# Needed for the imports that follow.
+site.addsitedir(os.path.dirname(os.path.abspath(sys.argv[0])) + '/../src')
+
+import ExitCodes
+import GPS
+import SummaryFile1
+
+
+if (len(sys.argv) < 3):
+ print "Usage: %s <geo_file> <table_name>" % sys.argv[0]
+ raise SystemExit(ExitCodes.NotEnoughArgs)
+
+geo_file_path = sys.argv[1]
+table_name = sys.argv[2]
+
+grp = SummaryFile1.GeoRecordParser()
+blocks = grp.parse_blocks(geo_file_path)
+
+
+sql_query = """
+INSERT INTO %s (state,
+ county,
+ tract,
+ block,
+ arealand,
+ areawatr,
+ total_area,
+ tiger_blkidfp00,
+ pop100,
+ population_density)
+
+VALUES ('%s', '%s', '%s', '%s', %.12f, %.12f, %.12f, '%s', %d, %.12f);
+"""
+
+for b in blocks:
+ # Print out the INSERT statement contained in sql_query,
+ # substituting in all of the block attributes.
+ print sql_query % (table_name,
+ b.state,
+ b.county,
+ b.tract,
+ b.block,
+ b.arealand,
+ b.areawatr,
+ b.total_area(),
+ b.tiger_blkidfp00(),
+ b.pop100,
+ b.population_density())
-.PHONY : test
+DB_NAME='census2000'
+DB_USER='postgres'
+TIGER_DATA_URL='http://www2.census.gov/cgi-bin/shapefiles/multi-file-download?files=24_MARYLAND%2Ftl_2008_24_tabblock00.zip'
+
+# Necessary to run test/data without prerequisites.
+#
+.PHONY : test data
+
+
+# The default task, since it comes first in the list.
+#
all: clean test
+
test:
./bin/run_tests
+
+# Remove byte-compiled python code.
+#
clean:
find ./ -name '*.pyc' -print0 | xargs -0 rm -f
+
+
+# Download the shapefiles from Tiger if they don't already exist.
+# For now, we're only dealing with the Census 2000 Maryland Block
+# data, so the filenames are hard-coded. Easy enough to change.
+#
+data:
+ mkdir -p data/census-2000-block/maryland/
+ if [ ! -f data/census-2000-block/maryland/tl_2008_24_tabblock00.shp ]; then \
+ wget -O tiger.zip $(TIGER_DATA_URL); \
+ unzip tiger.zip; \
+ rm tiger.zip; \
+ unzip srv/ftp/geo/tiger/TIGER2008/24_MARYLAND/tl_2008_24_tabblock00.zip \
+ -d ./data/census-2000-block/maryland/; \
+ rm -rf srv; \
+ fi;
+
+
+# This task does a couple of things. First, it drops and re-creates
+# the DB_NAME database (or schema, whatever). Then, it adds PL/pgSQL
+# support to the database.
+#
+# At that point, we import the two PostGIS files, lwpostgis.sql and
+# spatial_ref_sys.sql. These are magic as far as I'm concerned, but
+# PostGIS requires them.
+#
+# Then, we import the Tiger data using shp2pgsql. The shapefiles
+# should exist, since this task depends on the "data" task, which
+# downloads said shapefiles.
+#
+# Finally, we create the table for the demographic data (obtained from
+# the geographic header records), and populate that table with the output
+# of the sf1blocks2sql script.
+#
+db: data
+ dropdb -U $(DB_USER) $(DB_NAME)
+ createdb -U $(DB_USER) $(DB_NAME)
+ createlang -U $(DB_USER) plpgsql $(DB_NAME)
+
+ psql -d $(DB_NAME) \
+ -U $(DB_USER) \
+ -f /usr/share/postgresql/contrib/lwpostgis.sql
+
+ psql -d $(DB_NAME) \
+ -U $(DB_USER) \
+ -f /usr/share/postgresql/contrib/spatial_ref_sys.sql
+
+ shp2pgsql -I data/census-2000-block/maryland/tl_2008_24_tabblock00.shp tiger \
+ | psql -U $(DB_USER) -d $(DB_NAME)
+
+ psql -d $(DB_NAME) \
+ -U $(DB_USER) \
+ -f sql/create-sf1_blocks-table.sql
+
+ bin/sf1blocks2sql src/Tests/Fixtures/SummaryFile1/mdgeo.uf1 sf1_blocks \
+ | psql -U postgres -d $(DB_NAME)
+
--- /dev/null
+/*
+ We need to create a unique index on the referenced
+ foreign key column before we can create the foreign key
+ constraint.
+*/
+CREATE UNIQUE INDEX idx_tiger_blkidfp00_unique
+ ON tiger (blkidfp00);
+
+CREATE TABLE sf1_blocks (
+ id SERIAL PRIMARY KEY,
+ state varchar(2) NOT NULL,
+ county varchar(3) NOT NULL,
+ tract varchar(6) NOT NULL,
+ block varchar(4) NOT NULL,
+ arealand double precision NOT NULL,
+ areawatr double precision NOT NULL,
+ tiger_blkidfp00 varchar(15) NOT NULL REFERENCES tiger (blkidfp00),
+ pop100 integer NOT NULL,
+ total_area double precision NOT NULL,
+ population_density double precision NOT NULL
+);
+
+CREATE UNIQUE INDEX idx_sf1_blocks_tiger_blkidfp00_unique
+ ON sf1_blocks (tiger_blkidfp00);