From b6cea827835222aa38004dd599e24c3ecc5c596b Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Sat, 12 Sep 2009 13:41:20 -0400 Subject: [PATCH] Added the framework for the PostGIS database integration. Created makefile targets for the Tiger data and census database. Added comments to the makefile describing the targets. Created a SQL script to create the sf1_blocks table, and to add indexes on both blkidfp00 columns. Created a new script, sf1blocks2sql (used by the makefile), which exports the Summary File 1 blocks as SQL "INSERT" statements. --- bin/sf1blocks2sql | 65 +++++++++++++++++++++++++++++ makefile | 73 ++++++++++++++++++++++++++++++++- sql/create-sf1_blocks-table.sql | 24 +++++++++++ 3 files changed, 161 insertions(+), 1 deletion(-) create mode 100755 bin/sf1blocks2sql create mode 100644 sql/create-sf1_blocks-table.sql diff --git a/bin/sf1blocks2sql b/bin/sf1blocks2sql new file mode 100755 index 0000000..1918a29 --- /dev/null +++ b/bin/sf1blocks2sql @@ -0,0 +1,65 @@ +#!/usr/bin/python + +""" +Exports geographic header records to SQL. +We take a geographic header file, and a table name as arguments, and then +parse that header file to create the necessary SQL statements. The generated +SQL statements refer to the table name passed as an argument. + +The output is written to stdout; it can either be redirected to a file, +or piped directly in to the database. +""" + +import sys +import os +import site + +# Basically, add '../src' to our path. +# Needed for the imports that follow. +site.addsitedir(os.path.dirname(os.path.abspath(sys.argv[0])) + '/../src') + +import ExitCodes +import GPS +import SummaryFile1 + + +if (len(sys.argv) < 3): + print "Usage: %s " % sys.argv[0] + raise SystemExit(ExitCodes.NotEnoughArgs) + +geo_file_path = sys.argv[1] +table_name = sys.argv[2] + +grp = SummaryFile1.GeoRecordParser() +blocks = grp.parse_blocks(geo_file_path) + + +sql_query = """ +INSERT INTO %s (state, + county, + tract, + block, + arealand, + areawatr, + total_area, + tiger_blkidfp00, + pop100, + population_density) + +VALUES ('%s', '%s', '%s', '%s', %.12f, %.12f, %.12f, '%s', %d, %.12f); +""" + +for b in blocks: + # Print out the INSERT statement contained in sql_query, + # substituting in all of the block attributes. + print sql_query % (table_name, + b.state, + b.county, + b.tract, + b.block, + b.arealand, + b.areawatr, + b.total_area(), + b.tiger_blkidfp00(), + b.pop100, + b.population_density()) diff --git a/makefile b/makefile index f3136a6..2c20ffc 100644 --- a/makefile +++ b/makefile @@ -1,9 +1,80 @@ -.PHONY : test +DB_NAME='census2000' +DB_USER='postgres' +TIGER_DATA_URL='http://www2.census.gov/cgi-bin/shapefiles/multi-file-download?files=24_MARYLAND%2Ftl_2008_24_tabblock00.zip' + +# Necessary to run test/data without prerequisites. +# +.PHONY : test data + + +# The default task, since it comes first in the list. +# all: clean test + test: ./bin/run_tests + +# Remove byte-compiled python code. +# clean: find ./ -name '*.pyc' -print0 | xargs -0 rm -f + + +# Download the shapefiles from Tiger if they don't already exist. +# For now, we're only dealing with the Census 2000 Maryland Block +# data, so the filenames are hard-coded. Easy enough to change. +# +data: + mkdir -p data/census-2000-block/maryland/ + if [ ! -f data/census-2000-block/maryland/tl_2008_24_tabblock00.shp ]; then \ + wget -O tiger.zip $(TIGER_DATA_URL); \ + unzip tiger.zip; \ + rm tiger.zip; \ + unzip srv/ftp/geo/tiger/TIGER2008/24_MARYLAND/tl_2008_24_tabblock00.zip \ + -d ./data/census-2000-block/maryland/; \ + rm -rf srv; \ + fi; + + +# This task does a couple of things. First, it drops and re-creates +# the DB_NAME database (or schema, whatever). Then, it adds PL/pgSQL +# support to the database. +# +# At that point, we import the two PostGIS files, lwpostgis.sql and +# spatial_ref_sys.sql. These are magic as far as I'm concerned, but +# PostGIS requires them. +# +# Then, we import the Tiger data using shp2pgsql. The shapefiles +# should exist, since this task depends on the "data" task, which +# downloads said shapefiles. +# +# Finally, we create the table for the demographic data (obtained from +# the geographic header records), and populate that table with the output +# of the sf1blocks2sql script. +# +db: data + dropdb -U $(DB_USER) $(DB_NAME) + createdb -U $(DB_USER) $(DB_NAME) + createlang -U $(DB_USER) plpgsql $(DB_NAME) + + psql -d $(DB_NAME) \ + -U $(DB_USER) \ + -f /usr/share/postgresql/contrib/lwpostgis.sql + + psql -d $(DB_NAME) \ + -U $(DB_USER) \ + -f /usr/share/postgresql/contrib/spatial_ref_sys.sql + + shp2pgsql -I data/census-2000-block/maryland/tl_2008_24_tabblock00.shp tiger \ + | psql -U $(DB_USER) -d $(DB_NAME) + + psql -d $(DB_NAME) \ + -U $(DB_USER) \ + -f sql/create-sf1_blocks-table.sql + + bin/sf1blocks2sql src/Tests/Fixtures/SummaryFile1/mdgeo.uf1 sf1_blocks \ + | psql -U postgres -d $(DB_NAME) + diff --git a/sql/create-sf1_blocks-table.sql b/sql/create-sf1_blocks-table.sql new file mode 100644 index 0000000..71d87b5 --- /dev/null +++ b/sql/create-sf1_blocks-table.sql @@ -0,0 +1,24 @@ +/* + We need to create a unique index on the referenced + foreign key column before we can create the foreign key + constraint. +*/ +CREATE UNIQUE INDEX idx_tiger_blkidfp00_unique + ON tiger (blkidfp00); + +CREATE TABLE sf1_blocks ( + id SERIAL PRIMARY KEY, + state varchar(2) NOT NULL, + county varchar(3) NOT NULL, + tract varchar(6) NOT NULL, + block varchar(4) NOT NULL, + arealand double precision NOT NULL, + areawatr double precision NOT NULL, + tiger_blkidfp00 varchar(15) NOT NULL REFERENCES tiger (blkidfp00), + pop100 integer NOT NULL, + total_area double precision NOT NULL, + population_density double precision NOT NULL +); + +CREATE UNIQUE INDEX idx_sf1_blocks_tiger_blkidfp00_unique + ON sf1_blocks (tiger_blkidfp00); -- 2.43.2