]> gitweb.michael.orlitzky.com - dead/census-tools.git/blob - src/Data.py
a8d962d1c52518fccfb881f2c1ff46dc8d099ba2
[dead/census-tools.git] / src / Data.py
1 """
2 Classes for working with (downloading, importing) the online census
3 data.
4 """
5
6 import os
7 import urllib
8 import zipfile
9
10 import FileUtils
11
12
13 class State:
14 """
15 A state contains zero or more counties and cities. Each state has
16 its own ID, as well as its own directory on the server.
17
18 Example:
19
20 http://www2.census.gov/geo/tiger/TIGER2009/24_MARYLAND
21
22 """
23
24 TIGER_ROOT = 'http://www2.census.gov/geo/tiger/TIGER2009'
25
26 def __init__(self, initial_id=None, initial_name=None):
27 self.id = initial_id
28 self.name = initial_name
29 self.counties = []
30
31
32 def tiger_data_url(self):
33 tdu = self.TIGER_ROOT + '/'
34 tdu += str(self.id) + '_' + self.name.upper().replace(' ', '_')
35 return tdu
36
37
38 def lines_data_path(self):
39 ldp = 'data/census2000/'
40 ldp += self.name.lower().replace(' ', '_')
41 ldp += '/lines'
42 return ldp
43
44
45 def add_county(self, county_id, county_name, override_name=False):
46 """
47 We would like each county to have a pointer to its containing
48 state. This so we can compute the file URL, directory, and so
49 forth from within the county.
50 """
51 self.counties.append(County(county_id, county_name, self, override_name))
52
53
54
55 class County:
56 """
57 A county represents either a county or city. It doesn't make
58 sense, but 'county-level' data is given for certain cities which
59 don't technically belong to any county.
60 """
61
62 def __init__(self, initial_id=None,
63 initial_name=None,
64 initial_state=None,
65 override_name=False):
66 """
67 If this is a city, we should override our name with
68 e.g. 'Baltimore city' so that full_name() doesn't transform
69 'Baltimore' in to 'Baltmore County'.
70 """
71 self.id = initial_id
72 self.name = initial_name
73 self.state = initial_state
74 self.override_name = override_name
75
76
77 def state_county_id(self):
78 return str(self.state.id) + ("%03d" % self.id)
79
80
81 def full_name(self):
82 """
83 Some of the counties (e.g. Baltimore City, Washington D.C.),
84 need to have their names overridden since they aren't
85 technically counties, but are treated as such by the Census.
86 """
87 if (self.override_name == False):
88 return self.name + ' County'
89 else:
90 # "Override name" basically means, "use the name I passed
91 # you and don't add the word 'County' on to it."
92 return self.name
93
94
95 def tiger_data_url(self):
96 tdp = self.state.tiger_data_url() + '/'
97 tdp += self.state_county_id()
98 tdp += '_' + self.full_name().replace(' ', '_') + '/'
99 tdp += self.zipfile_name()
100 return tdp
101
102
103 def zipfile_name(self):
104 return 'tl_2009_' + self.state_county_id() + '_edges.zip'
105
106
107 def shapefile_path(self):
108 sfp = self.state.lines_data_path() + '/'
109 sfp += 'tl_2009_' + self.state_county_id() + '_edges.shp'
110 return sfp
111
112
113 def download_lines(states):
114 """
115 Download the TIGER/Line 'all lines' files for each county in states.
116 """
117
118 for state in states:
119 # First, create the lines data path if it doesn't exist.
120 FileUtils.mkdir_p(state.lines_data_path(), 0755)
121
122 # Now loop through the counties, and download/unzip the lines
123 # data if necessary.
124 for county in state.counties:
125 if not os.path.exists(county.shapefile_path()):
126 url = county.tiger_data_url()
127 tmpfile = county.zipfile_name()
128 print "Grabbing data for %s (%s)." % (county.full_name(), state.name)
129 print "Downloading %s to %s..." % (url, tmpfile)
130
131 try:
132 # This can fail for a bunch of reasons...
133 urllib.urlretrieve(url, tmpfile)
134 print "Unzipping %s to %s..." % (tmpfile, state.lines_data_path())
135 z = zipfile.ZipFile(tmpfile)
136 z.extractall(state.lines_data_path())
137 except:
138 # That we don't care about.
139 pass
140 finally:
141 # But we always clean up after ourselves.
142 print "Removing %s..." % tmpfile
143 FileUtils.rm_f(tmpfile)
144 print "Done.\n"