]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - htsn-import.cabal
Add SportInfo support for NFLInterceptionLeadersXML.dtd.
[dead/htsn-import.git] / htsn-import.cabal
1 name: htsn-import
2 version: 0.2.0
3 cabal-version: >= 1.8
4 author: Michael Orlitzky
5 maintainer: Michael Orlitzky <michael@orlitzky.com>
6 category: Utils
7 license: GPL-3
8 license-file: doc/LICENSE
9 build-type: Simple
10 extra-source-files:
11 doc/dbschema/*.png
12 doc/htsn-importrc.example
13 doc/man1/htsn-import.1
14 doc/CHANGES.database
15 doc/README.development
16 doc/TODO
17 makefile
18 schema/*.dtd
19 schemagen/AutoRacingResultsXML/*.xml
20 schemagen/Auto_Racing_Schedule_XML/*.xml
21 schemagen/CBASK_3PPctXML/*.xml
22 schemagen/Cbask_All_Tourn_Teams_XML/*.xml
23 schemagen/CBASK_AssistsXML/*.xml
24 schemagen/Cbask_Awards_XML/*.xml
25 schemagen/CBASK_BlocksXML/*.xml
26 schemagen/Cbask_Conf_Standings_XML/*.xml
27 schemagen/Cbask_DivII_III_Indv_Stats_XML/*.xml
28 schemagen/Cbask_DivIII_Team_Stats_XML/*.xml
29 schemagen/Cbask_DivII_Team_Stats_XML/*.xml
30 schemagen/CBASK_FGPctXML/*.xml
31 schemagen/CBASK_FoulsXML/*.xml
32 schemagen/CBASK_FTPctXML/*.xml
33 schemagen/Cbask_Indv_Scoring_XML/*.xml
34 schemagen/CBASK_Lineup_XML/*.xml
35 schemagen/CBASK_MinutesXML/*.xml
36 schemagen/Cbask_Polls_XML/*.xml
37 schemagen/cbaskpreviewxml/*.xml
38 schemagen/CBASK_ReboundsXML/*.xml
39 schemagen/CBASK_ScoringLeadersXML/*.xml
40 schemagen/Cbask_Team_ThreePT_Made_XML/*.xml
41 schemagen/Cbask_Team_ThreePT_PCT_XML/*.xml
42 schemagen/Cbask_Team_Win_Pct_XML/*.xml
43 schemagen/CBASK_TopTwentyFiveResult_XML/*.xml
44 schemagen/Cbask_Top_Twenty_Five_XML/*.xml
45 schemagen/Cbask_Tourn_Awards_XML/*.xml
46 schemagen/Cbask_Tourn_Champs_XML/*.xml
47 schemagen/Cbask_Tourn_Indiv_XML/*.xml
48 schemagen/Cbask_Tourn_Leaders_XML/*.xml
49 schemagen/Cbask_Tourn_MVP_XML/*.xml
50 schemagen/Cbask_Tourn_Records_XML/*.xml
51 schemagen/cflpreviewxml/*.xml
52 schemagen/earlylineXML/*.xml
53 schemagen/Heartbeat/*.xml
54 schemagen/Injuries_Detail_XML/*.xml
55 schemagen/injuriesxml/*.xml
56 schemagen/jfilexml/*.xml
57 schemagen/LeagueScheduleXML/*.xml
58 schemagen/Matchup_NBA_NHL_XML/*.xml
59 schemagen/Minor_Baseball_League_Leaders_XML/*.xml
60 schemagen/Minor_Baseball_Standings_XML/*.xml
61 schemagen/Minor_Baseball_Transactions_XML/*.xml
62 schemagen/minorscoresxml/*.xml
63 schemagen/mlbbattingavgxml/*.xml
64 schemagen/mlbdoublesleadersxml/*.xml
65 schemagen/MLB_ERA_Leaders/*.xml
66 schemagen/MLB_Fielding_XML/*.xml
67 schemagen/MLBGamesPlayedXML/*.xml
68 schemagen/MLB_Gaming_Matchup_XML/*.xml
69 schemagen/MLBGIDPXML/*.xml
70 schemagen/MLBHitByPitchXML/*.xml
71 schemagen/mlbhitsleadersxml/*.xml
72 schemagen/mlbhomerunsxml/*.xml
73 schemagen/MLBHRFreqXML/*.xml
74 schemagen/MLBIntWalksXML/*.xml
75 schemagen/MLBKORateXML/*.xml
76 schemagen/MLB_Lineup_XML/*.xml
77 schemagen/MLB_Matchup_XML/*.xml
78 schemagen/mlbonbasepctxml/*.xml
79 schemagen/MLBOPSXML/*.xml
80 schemagen/MLB_earlylineXML/*.xml
81 schemagen/MLB_Pitching_Appearances_Leaders/*.xml
82 schemagen/MLB_Pitching_Balks_Leaders/*.xml
83 schemagen/MLB_Pitching_CG_Leaders/*.xml
84 schemagen/MLB_Pitching_ER_Allowed_Leaders/*.xml
85 schemagen/MLB_Pitching_Hit_Batters_Leaders/*.xml
86 schemagen/MLB_Pitching_Hits_Allowed_Leaders/*.xml
87 schemagen/MLB_Pitching_HR_Allowed_Leaders/*.xml
88 schemagen/MLB_Pitching_IP_Leaders/*.xml
89 schemagen/MLB_Pitching_Runs_Allowed_Leaders/*.xml
90 schemagen/MLB_Pitching_Saves_Leaders/*.xml
91 schemagen/MLB_Pitching_Shut_Outs_Leaders/*.xml
92 schemagen/MLB_Pitching_Starts_Leaders/*.xml
93 schemagen/MLB_Pitching_Strike_Outs_Leaders/*.xml
94 schemagen/MLB_Pitching_Walks_Leaders/*.xml
95 schemagen/MLB_Pitching_WHIP_Leaders/*.xml
96 schemagen/MLB_Pitching_Wild_Pitches_Leaders/*.xml
97 schemagen/MLB_Pitching_Win_Percentage_Leaders/*.xml
98 schemagen/MLB_Pitching_WL_Leaders/*.xml
99 schemagen/MLBPlateAppsXML/*.xml
100 schemagen/mlbpreviewxml/*.xml
101 schemagen/mlbrbisxml/*.xml
102 schemagen/mlbrunsleadersxml/*.xml
103 schemagen/MLBSacFliesXML/*.xml
104 schemagen/MLBSacrificesXML/*.xml
105 schemagen/MLBSBSuccessXML/*.xml
106 schemagen/mlbsluggingpctxml/*.xml
107 schemagen/mlbstandxml/*.xml
108 schemagen/mlbstandxml_preseason/*.xml
109 schemagen/mlbstolenbasexml/*.xml
110 schemagen/mlbtotalbasesleadersxml/*.xml
111 schemagen/mlbtriplesleadersxml/*.xml
112 schemagen/MLBWalkRateXML/*.xml
113 schemagen/mlbwalksleadersxml/*.xml
114 schemagen/MLBXtraBaseHitsXML/*.xml
115 schemagen/MLS_Preview_XML/*.xml
116 schemagen/NBA3PPctXML/*.xml
117 schemagen/NBAAssistsXML/*.xml
118 schemagen/NBABlocksXML/*.xml
119 schemagen/nbaconfrecxml/*.xml
120 schemagen/nbadaysxml/*.xml
121 schemagen/nbadivisionsxml/*.xml
122 schemagen/NBAFGPctXML/*.xml
123 schemagen/NBAFoulsXML/*.xml
124 schemagen/NBAFTPctXML/*.xml
125 schemagen/NBA_Gaming_Matchup_XML/*.xml
126 schemagen/NBALineupXML/*.xml
127 schemagen/NBAMinutesXML/*.xml
128 schemagen/NBA_Playoff_Matchup_XML/*.xml
129 schemagen/nbapreviewxml/*.xml
130 schemagen/NBAReboundsXML/*.xml
131 schemagen/NBAScorersXML/*.xml
132 schemagen/nbastandxml/*.xml
133 schemagen/NBAStealsXML/*.xml
134 schemagen/nbateamleadersxml/*.xml
135 schemagen/NBA_Team_Stats_XML/*.xml
136 schemagen/nbatripledoublexml/*.xml
137 schemagen/NBATurnoversXML/*.xml
138 schemagen/NCAA_Conference_Schedule_XML/*.xml
139 schemagen/NCAA_FB_Preview_XML/*.xml
140 schemagen/newsxml/*.xml
141 schemagen/nflfirstdownxml/*.xml
142 schemagen/NFLFumbleLeaderXML/*.xml
143 schemagen/NFLGiveTakeXML/*.xml
144 schemagen/NFLGrassTurfDomeOutsideXML/*.xml
145 schemagen/NFLInside20XML/*.xml
146 schemagen/NFLInterceptionLeadersXML/*.xml
147 schemagen/NFL_KickingLeaders_XML/*.xml
148 schemagen/NFLKickoffsXML/*.xml
149 schemagen/NFLMondayNightXML/*.xml
150 schemagen/NFL_NBA_Draft_XML/*.xml
151 schemagen/NFL_NCAA_FB_Matchup_XML/*.xml
152 schemagen/NFLPassLeadXML/*.xml
153 schemagen/nflpreviewxml/*.xml
154 schemagen/NFLQBStartsXML/*.xml
155 schemagen/NFL_Roster_XML/*.xml
156 schemagen/NFLSackLeadersXML/*.xml
157 schemagen/nflstandxml/*.xml
158 schemagen/NFLTeamRankingsXML/*.xml
159 schemagen/NFL_Team_Stats_XML/*.xml
160 schemagen/NFLTopPerformanceXML/*.xml
161 schemagen/NFLTotalYardageXML/*.xml
162 schemagen/nhlpreviewxml/*.xml
163 schemagen/Odds_XML/*.xml
164 schemagen/recapxml/*.xml
165 schemagen/Schedule_Changes_XML/*.xml
166 schemagen/scoresxml/*.xml
167 schemagen/Transactions_XML/*.xml
168 schemagen/weatherxml/*.xml
169 schemagen/Weekly_Sched_XML/*.xml
170 schemagen/WNBA3PPctXML/*.xml
171 schemagen/WNBAAssistsXML/*.xml
172 schemagen/WNBABlocksXML/*.xml
173 schemagen/WNBAFGPctXML/*.xml
174 schemagen/WNBAFoulsXML/*.xml
175 schemagen/WNBAFTPctXML/*.xml
176 schemagen/WNBAMinutesXML/*.xml
177 schemagen/WNBAReboundsXML/*.xml
178 schemagen/WNBAScorersXML/*.xml
179 schemagen/wnbastandxml/*.xml
180 schemagen/WNBAStealsXML/*.xml
181 schemagen/WNBA_Team_Leaders_XML/*.xml
182 schemagen/WNBATurnoversXML/*.xml
183 schemagen/WorldBaseballPreviewXML/*.xml
184 test/shell/*.test
185 test/xml/*.xml
186 test/xml/*.dtd
187 test/xml/gameinfo/*.xml
188 test/xml/gameinfo/*.dtd
189 test/xml/sportinfo/*.xml
190 test/xml/sportinfo/*.dtd
191 synopsis:
192 Import XML files from The Sports Network into an RDBMS.
193 description:
194 /Usage/:
195 .
196 @
197 htsn-import [OPTIONS] [FILES]
198 @
199 .
200 The Sports Network <http://www.sportsnetwork.com/> offers an XML feed
201 containing various sports news and statistics. Our sister program
202 /htsn/ is capable of retrieving the feed and saving the individual
203 XML documents contained therein. But what to do with them?
204 .
205 The purpose of /htsn-import/ is to take these XML documents and
206 get them into something we can use, a relational database management
207 system (RDBMS), i.e. \"a SQL database\". The structure of
208 relational database, is, well, relational, and the feed XML is not. So
209 there is some work to do before the data can be inserted.
210 .
211 First, we must parse the XML. Each supported document type (see below)
212 has a full pickle/unpickle implementation (\"pickle\" is simply a
213 synonym for serialize here). That means that we parse the entire
214 document into a data structure, and if we pickle (serialize) that data
215 structure, we get the exact same XML document tha we started with.
216 .
217 This is important for two reasons. First, it serves as a second level
218 of validation. The first validation is performed by the XML parser,
219 but if that succeeds and unpicking fails, we know that something is
220 fishy. Second, we don't ever want to be surprised by some new element
221 or attribute showing up in the XML. The fact that we can unpickle the
222 whole thing now means that we won't be surprised in the future.
223 .
224 The aforementioned feature is especially important because we
225 automatically migrate the database schema every time we import a
226 document. If you attempt to import a \"newsxml.dtd\" document, all
227 database objects relating to the news will be created if they do not
228 exist. We don't want the schema to change out from under us without
229 warning, so it's important that no XML be parsed that would result in
230 a different schema than we had previously. Since we can
231 pickle/unpickle everything already, this should be impossible.
232 .
233 Examples and usage documentation are available in the man page.
234
235 executable htsn-import
236 build-depends:
237 base >= 4.6 && < 5,
238 cmdargs >= 0.10.6,
239 configurator >= 0.2,
240 directory >= 1.2,
241 filepath >= 1.3,
242 hslogger >= 1.2,
243 htsn-common >= 0.0.1,
244 hxt >= 9.3,
245 groundhog >= 0.5,
246 groundhog-postgresql >= 0.5,
247 groundhog-sqlite >= 0.5,
248 groundhog-th >= 0.5,
249 MissingH >= 1.2,
250 old-locale >= 1,
251 split >= 0.2,
252 tasty >= 0.8,
253 tasty-hunit >= 0.8,
254 time >= 1.4,
255 transformers >= 0.3,
256 tuple >= 0.2
257
258 main-is:
259 Main.hs
260
261 hs-source-dirs:
262 src/
263
264 other-modules:
265 Backend
266 CommandLine
267 Configuration
268 ConnectionString
269 ExitCodes
270 OptionalConfiguration
271 TSN.Codegen
272 TSN.Database
273 TSN.DbImport
274 TSN.Location
275 TSN.Parse
276 TSN.Picklers
277 TSN.Team
278 TSN.XmlImport
279 TSN.XML.AutoRacingResults
280 TSN.XML.AutoRacingSchedule
281 TSN.XML.EarlyLine
282 TSN.XML.GameInfo
283 TSN.XML.Heartbeat
284 TSN.XML.Injuries
285 TSN.XML.InjuriesDetail
286 TSN.XML.JFile
287 TSN.XML.MLBEarlyLine
288 TSN.XML.News
289 TSN.XML.Odds
290 TSN.XML.ScheduleChanges
291 TSN.XML.Scores
292 TSN.XML.SportInfo
293 TSN.XML.Weather
294 Xml
295
296 ghc-options:
297 -Wall
298 -fwarn-hi-shadowing
299 -fwarn-missing-signatures
300 -fwarn-name-shadowing
301 -fwarn-orphans
302 -fwarn-type-defaults
303 -fwarn-tabs
304 -fwarn-incomplete-record-updates
305 -fwarn-monomorphism-restriction
306 -fwarn-unused-do-bind
307 -O2
308
309 ghc-prof-options:
310 -prof
311 -fprof-auto
312 -fprof-cafs
313 -- The following unbreak profiling with template haskell. We have
314 -- to build the program twice; once without profile and again with
315 -- these flags.
316 -hisuf hi_p
317 -osuf o_p
318
319
320 test-suite testsuite
321 type: exitcode-stdio-1.0
322 hs-source-dirs: src test
323 main-is: TestSuite.hs
324 build-depends:
325 base >= 4.6 && < 5,
326 cmdargs >= 0.10.6,
327 configurator >= 0.2,
328 directory >= 1.2,
329 filepath >= 1.3,
330 hslogger >= 1.2,
331 htsn-common >= 0.0.1,
332 hxt >= 9.3,
333 groundhog >= 0.5,
334 groundhog-postgresql >= 0.5,
335 groundhog-sqlite >= 0.5,
336 groundhog-th >= 0.5,
337 MissingH >= 1.2,
338 old-locale >= 1,
339 split >= 0.2,
340 tasty >= 0.8,
341 tasty-hunit >= 0.8,
342 time >= 1.4,
343 transformers >= 0.3,
344 tuple >= 0.2
345
346 -- It's not entirely clear to me why I have to reproduce all of this.
347 ghc-options:
348 -Wall
349 -fwarn-hi-shadowing
350 -fwarn-missing-signatures
351 -fwarn-name-shadowing
352 -fwarn-orphans
353 -fwarn-type-defaults
354 -fwarn-tabs
355 -fwarn-incomplete-record-updates
356 -fwarn-monomorphism-restriction
357 -fwarn-unused-do-bind
358 -O2
359
360
361 test-suite doctests
362 type: exitcode-stdio-1.0
363 hs-source-dirs: test
364 main-is: Doctests.hs
365 build-depends:
366 base >= 4.6 && < 5,
367 -- Additional test dependencies.
368 doctest >= 0.9
369
370 -- It's not entirely clear to me why I have to reproduce all of this.
371 ghc-options:
372 -Wall
373 -fwarn-hi-shadowing
374 -fwarn-missing-signatures
375 -fwarn-name-shadowing
376 -fwarn-orphans
377 -fwarn-type-defaults
378 -fwarn-tabs
379 -fwarn-incomplete-record-updates
380 -fwarn-monomorphism-restriction
381 -fwarn-unused-do-bind
382 -rtsopts
383 -threaded
384 -optc-O3
385 -optc-march=native
386 -O2
387
388
389 -- These won't work without shelltestrunner installed in your
390 -- $PATH. Maybe there is some way to tell Cabal that.
391 test-suite shelltests
392 type: exitcode-stdio-1.0
393 hs-source-dirs: test
394 main-is: ShellTests.hs
395
396 build-depends:
397 base >= 4.6 && < 5,
398 cmdargs >= 0.10.6,
399 configurator >= 0.2,
400 directory >= 1.2,
401 filepath >= 1.3,
402 hslogger >= 1.2,
403 htsn-common >= 0.0.1,
404 hxt >= 9.3,
405 groundhog >= 0.5,
406 groundhog-postgresql >= 0.5,
407 groundhog-sqlite >= 0.5,
408 groundhog-th >= 0.5,
409 MissingH >= 1.2,
410 old-locale >= 1,
411 split >= 0.2,
412 process >= 1.1,
413 tasty >= 0.8,
414 tasty-hunit >= 0.8,
415 time >= 1.4,
416 transformers >= 0.3,
417 tuple >= 0.2
418
419
420
421 source-repository head
422 type: git
423 location: http://michael.orlitzky.com/git/htsn-import.git
424 branch: master