]> gitweb.michael.orlitzky.com - dead/census-tools.git/commitdiff
Added a unique index on the tiger_lines "tlid" column.
authorMichael Orlitzky <michael@orlitzky.com>
Mon, 19 Oct 2009 11:51:40 +0000 (07:51 -0400)
committerMichael Orlitzky <michael@orlitzky.com>
Mon, 19 Oct 2009 11:51:40 +0000 (07:51 -0400)
Created a quick sed script to filter "BEGIN;" and "END;" from the shp2pgsql output.
Updated the makefile to import the tiger_lines data one row at a time, outside of a transaction.

bin/filter-transactions [new file with mode: 0755]
makefile
sql/create_tlid_unique_index.sql [new file with mode: 0644]

diff --git a/bin/filter-transactions b/bin/filter-transactions
new file mode 100755 (executable)
index 0000000..b0c14c3
--- /dev/null
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+# Remove all occurrences of the strings "BEGIN;" and "END;" from the
+# input stream. As of Postgis v1.4.0, shp2pgsql inserts them once for
+# every 250 "INSERT" statements.
+
+sed s/"BEGIN;"//g |
+    sed s/"END;"//g
index b7ef3c24eea0b7dae47d776351dd2e0fd3527e25..bfccc236762944d8830f6b51eb797b3de351b124 100644 (file)
--- a/makefile
+++ b/makefile
@@ -129,13 +129,15 @@ db: data newdb tiger_blocks_table tiger_lines_table sf1_blocks_table
 # and leave -I out.
        for state in data/census2000/*; do                          \
                for shapefile in $$state/lines/*.shp; do            \
+                       echo "Importing $$shapefile.";              \
                        $(PG_BINDIR)/shp2pgsql                      \
                                -a                                  \
                                -s $(TIGER_SRID)                    \
-                               -D                                  \
                                $$shapefile                         \
                                tiger_lines                         \
-                               | psql -U $(DB_USER) -d $(DB_NAME); \
+                               | bin/filter-transactions           \
+                               | psql -U $(DB_USER) -d $(DB_NAME)  \
+                               > /dev/null;                        \
                done;                                               \
        done;
 
@@ -203,3 +205,8 @@ tiger_lines_table:
                tiger_lines                                            \
                | psql -U postgres -d $(DB_NAME)                       \
                 > /dev/null
+
+# Add a unique index on the "tlid" column.
+       psql -U postgres \
+             -d census   \
+             -f sql/create_tlid_unique_index.sql
diff --git a/sql/create_tlid_unique_index.sql b/sql/create_tlid_unique_index.sql
new file mode 100644 (file)
index 0000000..3a0d385
--- /dev/null
@@ -0,0 +1,8 @@
+/*
+  The TIGER/Line ID (tlid) for each line is supposed to be
+  unique. If it isn't, problems, so we enforce it in the
+  database.
+*/
+
+CREATE UNIQUE INDEX idx_tiger_lines_tlid_unique
+       ON tiger_lines (tlid);