From b6cea827835222aa38004dd599e24c3ecc5c596b Mon Sep 17 00:00:00 2001
From: Michael Orlitzky <michael@orlitzky.com>
Date: Sat, 12 Sep 2009 13:41:20 -0400
Subject: [PATCH] Added the framework for the PostGIS database integration.
 Created makefile targets for the Tiger data and census database. Added
 comments to the makefile describing the targets. Created a SQL script to
 create the sf1_blocks table, and to add indexes on both blkidfp00 columns.
 Created a new script, sf1blocks2sql (used by the makefile), which exports the
 Summary File 1 blocks as SQL "INSERT" statements.

---
 bin/sf1blocks2sql               | 65 +++++++++++++++++++++++++++++
 makefile                        | 73 ++++++++++++++++++++++++++++++++-
 sql/create-sf1_blocks-table.sql | 24 +++++++++++
 3 files changed, 161 insertions(+), 1 deletion(-)
 create mode 100755 bin/sf1blocks2sql
 create mode 100644 sql/create-sf1_blocks-table.sql

diff --git a/bin/sf1blocks2sql b/bin/sf1blocks2sql
new file mode 100755
index 0000000..1918a29
--- /dev/null
+++ b/bin/sf1blocks2sql
@@ -0,0 +1,65 @@
+#!/usr/bin/python
+
+"""
+Exports geographic header records to SQL.
+We take a geographic header file, and a table name as arguments, and then
+parse that header file to create the necessary SQL statements. The generated
+SQL statements refer to the table name passed as an argument.
+
+The output is written to stdout; it can either be redirected to a file,
+or piped directly in to the database.
+"""
+
+import sys
+import os
+import site
+
+# Basically, add '../src' to our path.
+# Needed for the imports that follow.
+site.addsitedir(os.path.dirname(os.path.abspath(sys.argv[0])) + '/../src')
+
+import ExitCodes
+import GPS
+import SummaryFile1
+
+
+if (len(sys.argv) < 3):
+    print "Usage: %s <geo_file> <table_name>" % sys.argv[0]
+    raise SystemExit(ExitCodes.NotEnoughArgs)
+
+geo_file_path = sys.argv[1]
+table_name = sys.argv[2]
+
+grp = SummaryFile1.GeoRecordParser()
+blocks = grp.parse_blocks(geo_file_path)
+
+
+sql_query = """
+INSERT INTO %s (state,
+                county,
+                tract,
+                block,
+                arealand,
+                areawatr,
+                total_area,
+                tiger_blkidfp00,
+                pop100,
+                population_density) 
+
+VALUES ('%s', '%s', '%s', '%s', %.12f, %.12f, %.12f, '%s', %d, %.12f);
+"""
+
+for b in blocks:
+    # Print out the INSERT statement contained in sql_query,
+    # substituting in all of the block attributes.
+    print sql_query % (table_name,
+                       b.state,
+                       b.county,
+                       b.tract,
+                       b.block,
+                       b.arealand,
+                       b.areawatr,
+                       b.total_area(),
+                       b.tiger_blkidfp00(),
+                       b.pop100,
+                       b.population_density())
diff --git a/makefile b/makefile
index f3136a6..2c20ffc 100644
--- a/makefile
+++ b/makefile
@@ -1,9 +1,80 @@
-.PHONY : test
+DB_NAME='census2000'
+DB_USER='postgres'
+TIGER_DATA_URL='http://www2.census.gov/cgi-bin/shapefiles/multi-file-download?files=24_MARYLAND%2Ftl_2008_24_tabblock00.zip'
 
+
+# Necessary to run test/data without prerequisites.
+#
+.PHONY : test data
+
+
+# The default task, since it comes first in the list.
+#
 all: clean test
 
+
 test:
 	./bin/run_tests
 
+
+# Remove byte-compiled python code.
+#
 clean:
 	find ./ -name '*.pyc' -print0 | xargs -0 rm -f
+
+
+# Download the shapefiles from Tiger if they don't already exist.
+# For now, we're only dealing with the Census 2000 Maryland Block
+# data, so the filenames are hard-coded. Easy enough to change.
+#
+data:
+	mkdir -p data/census-2000-block/maryland/
+	if [ ! -f data/census-2000-block/maryland/tl_2008_24_tabblock00.shp ]; then \
+		wget -O tiger.zip $(TIGER_DATA_URL); \
+		unzip tiger.zip; \
+		rm tiger.zip; \
+		unzip srv/ftp/geo/tiger/TIGER2008/24_MARYLAND/tl_2008_24_tabblock00.zip \
+                      -d ./data/census-2000-block/maryland/; \
+		rm -rf srv; \
+	fi;
+
+
+# This task does a couple of things. First, it drops and re-creates
+# the DB_NAME database (or schema, whatever). Then, it adds PL/pgSQL
+# support to the database.
+#
+# At that point, we import the two PostGIS files, lwpostgis.sql and
+# spatial_ref_sys.sql. These are magic as far as I'm concerned, but
+# PostGIS requires them.
+#
+# Then, we import the Tiger data using shp2pgsql. The shapefiles
+# should exist, since this task depends on the "data" task, which
+# downloads said shapefiles.
+#
+# Finally, we create the table for the demographic data (obtained from
+# the geographic header records), and populate that table with the output
+# of the sf1blocks2sql script.
+#
+db: data
+	dropdb -U $(DB_USER) $(DB_NAME)
+	createdb -U $(DB_USER) $(DB_NAME)
+	createlang -U $(DB_USER) plpgsql $(DB_NAME)
+
+	psql -d $(DB_NAME) \
+             -U $(DB_USER) \
+             -f /usr/share/postgresql/contrib/lwpostgis.sql
+
+	psql -d $(DB_NAME) \
+             -U $(DB_USER) \
+             -f /usr/share/postgresql/contrib/spatial_ref_sys.sql
+
+	shp2pgsql -I data/census-2000-block/maryland/tl_2008_24_tabblock00.shp tiger \
+                   | psql -U $(DB_USER) -d $(DB_NAME)
+
+	psql -d $(DB_NAME) \
+             -U $(DB_USER) \
+             -f sql/create-sf1_blocks-table.sql
+
+	bin/sf1blocks2sql src/Tests/Fixtures/SummaryFile1/mdgeo.uf1 sf1_blocks \
+                          | psql -U postgres -d $(DB_NAME)
+
diff --git a/sql/create-sf1_blocks-table.sql b/sql/create-sf1_blocks-table.sql
new file mode 100644
index 0000000..71d87b5
--- /dev/null
+++ b/sql/create-sf1_blocks-table.sql
@@ -0,0 +1,24 @@
+/*
+  We need to create a unique index on the referenced
+  foreign key column before we can create the foreign key
+  constraint.
+*/
+CREATE UNIQUE INDEX idx_tiger_blkidfp00_unique
+       ON tiger (blkidfp00);
+
+CREATE TABLE sf1_blocks (
+       id                 SERIAL           PRIMARY KEY,
+       state		  varchar(2)	   NOT NULL,
+       county             varchar(3)       NOT NULL,
+       tract		  varchar(6)       NOT NULL,
+       block     	  varchar(4)       NOT NULL,
+       arealand           double precision NOT NULL,
+       areawatr           double precision NOT NULL,
+       tiger_blkidfp00    varchar(15)      NOT NULL REFERENCES tiger (blkidfp00),
+       pop100		  integer          NOT NULL,
+       total_area 	  double precision NOT NULL,
+       population_density double precision NOT NULL
+);
+
+CREATE UNIQUE INDEX idx_sf1_blocks_tiger_blkidfp00_unique
+       ON sf1_blocks (tiger_blkidfp00);
-- 
2.43.2