]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - htsn-import.cabal
Add SportInfo support for NFLPassingLeadersXML.dtd.
[dead/htsn-import.git] / htsn-import.cabal
1 name: htsn-import
2 version: 0.2.0
3 cabal-version: >= 1.8
4 author: Michael Orlitzky
5 maintainer: Michael Orlitzky <michael@orlitzky.com>
6 category: Utils
7 license: GPL-3
8 license-file: doc/LICENSE
9 build-type: Simple
10 extra-source-files:
11 doc/dbschema/*.png
12 doc/htsn-importrc.example
13 doc/man1/htsn-import.1
14 doc/CHANGES.database
15 doc/README.development
16 doc/TODO
17 makefile
18 schema/*.dtd
19 schemagen/AutoRacingResultsXML/*.xml
20 schemagen/Auto_Racing_Schedule_XML/*.xml
21 schemagen/CBASK_3PPctXML/*.xml
22 schemagen/Cbask_All_Tourn_Teams_XML/*.xml
23 schemagen/CBASK_AssistsXML/*.xml
24 schemagen/Cbask_Awards_XML/*.xml
25 schemagen/CBASK_BlocksXML/*.xml
26 schemagen/Cbask_Conf_Standings_XML/*.xml
27 schemagen/Cbask_DivII_III_Indv_Stats_XML/*.xml
28 schemagen/Cbask_DivIII_Team_Stats_XML/*.xml
29 schemagen/Cbask_DivII_Team_Stats_XML/*.xml
30 schemagen/CBASK_FGPctXML/*.xml
31 schemagen/CBASK_FoulsXML/*.xml
32 schemagen/CBASK_FTPctXML/*.xml
33 schemagen/Cbask_Indv_Scoring_XML/*.xml
34 schemagen/CBASK_Lineup_XML/*.xml
35 schemagen/CBASK_MinutesXML/*.xml
36 schemagen/Cbask_Polls_XML/*.xml
37 schemagen/cbaskpreviewxml/*.xml
38 schemagen/CBASK_ReboundsXML/*.xml
39 schemagen/CBASK_ScoringLeadersXML/*.xml
40 schemagen/Cbask_Team_ThreePT_Made_XML/*.xml
41 schemagen/Cbask_Team_ThreePT_PCT_XML/*.xml
42 schemagen/Cbask_Team_Win_Pct_XML/*.xml
43 schemagen/CBASK_TopTwentyFiveResult_XML/*.xml
44 schemagen/Cbask_Top_Twenty_Five_XML/*.xml
45 schemagen/Cbask_Tourn_Awards_XML/*.xml
46 schemagen/Cbask_Tourn_Champs_XML/*.xml
47 schemagen/Cbask_Tourn_Indiv_XML/*.xml
48 schemagen/Cbask_Tourn_Leaders_XML/*.xml
49 schemagen/Cbask_Tourn_MVP_XML/*.xml
50 schemagen/Cbask_Tourn_Records_XML/*.xml
51 schemagen/cflpreviewxml/*.xml
52 schemagen/earlylineXML/*.xml
53 schemagen/Heartbeat/*.xml
54 schemagen/Injuries_Detail_XML/*.xml
55 schemagen/injuriesxml/*.xml
56 schemagen/jfilexml/*.xml
57 schemagen/LeagueScheduleXML/*.xml
58 schemagen/Matchup_NBA_NHL_XML/*.xml
59 schemagen/Minor_Baseball_League_Leaders_XML/*.xml
60 schemagen/Minor_Baseball_Standings_XML/*.xml
61 schemagen/Minor_Baseball_Transactions_XML/*.xml
62 schemagen/minorscoresxml/*.xml
63 schemagen/mlbbattingavgxml/*.xml
64 schemagen/mlbdoublesleadersxml/*.xml
65 schemagen/MLB_ERA_Leaders/*.xml
66 schemagen/MLB_Fielding_XML/*.xml
67 schemagen/MLBGamesPlayedXML/*.xml
68 schemagen/MLB_Gaming_Matchup_XML/*.xml
69 schemagen/MLBGIDPXML/*.xml
70 schemagen/MLBHitByPitchXML/*.xml
71 schemagen/mlbhitsleadersxml/*.xml
72 schemagen/mlbhomerunsxml/*.xml
73 schemagen/MLBHRFreqXML/*.xml
74 schemagen/MLBIntWalksXML/*.xml
75 schemagen/MLBKORateXML/*.xml
76 schemagen/MLB_Lineup_XML/*.xml
77 schemagen/MLB_Matchup_XML/*.xml
78 schemagen/mlbonbasepctxml/*.xml
79 schemagen/MLBOPSXML/*.xml
80 schemagen/MLB_earlylineXML/*.xml
81 schemagen/MLB_Pitching_Appearances_Leaders/*.xml
82 schemagen/MLB_Pitching_Balks_Leaders/*.xml
83 schemagen/MLB_Pitching_CG_Leaders/*.xml
84 schemagen/MLB_Pitching_ER_Allowed_Leaders/*.xml
85 schemagen/MLB_Pitching_Hit_Batters_Leaders/*.xml
86 schemagen/MLB_Pitching_Hits_Allowed_Leaders/*.xml
87 schemagen/MLB_Pitching_HR_Allowed_Leaders/*.xml
88 schemagen/MLB_Pitching_IP_Leaders/*.xml
89 schemagen/MLB_Pitching_Runs_Allowed_Leaders/*.xml
90 schemagen/MLB_Pitching_Saves_Leaders/*.xml
91 schemagen/MLB_Pitching_Shut_Outs_Leaders/*.xml
92 schemagen/MLB_Pitching_Starts_Leaders/*.xml
93 schemagen/MLB_Pitching_Strike_Outs_Leaders/*.xml
94 schemagen/MLB_Pitching_Walks_Leaders/*.xml
95 schemagen/MLB_Pitching_WHIP_Leaders/*.xml
96 schemagen/MLB_Pitching_Wild_Pitches_Leaders/*.xml
97 schemagen/MLB_Pitching_Win_Percentage_Leaders/*.xml
98 schemagen/MLB_Pitching_WL_Leaders/*.xml
99 schemagen/MLBPlateAppsXML/*.xml
100 schemagen/mlbpreviewxml/*.xml
101 schemagen/mlbrbisxml/*.xml
102 schemagen/mlbrunsleadersxml/*.xml
103 schemagen/MLBSacFliesXML/*.xml
104 schemagen/MLBSacrificesXML/*.xml
105 schemagen/MLBSBSuccessXML/*.xml
106 schemagen/mlbsluggingpctxml/*.xml
107 schemagen/mlbstandxml/*.xml
108 schemagen/mlbstandxml_preseason/*.xml
109 schemagen/mlbstolenbasexml/*.xml
110 schemagen/mlbtotalbasesleadersxml/*.xml
111 schemagen/mlbtriplesleadersxml/*.xml
112 schemagen/MLBWalkRateXML/*.xml
113 schemagen/mlbwalksleadersxml/*.xml
114 schemagen/MLBXtraBaseHitsXML/*.xml
115 schemagen/MLS_Preview_XML/*.xml
116 schemagen/NBA3PPctXML/*.xml
117 schemagen/NBAAssistsXML/*.xml
118 schemagen/NBABlocksXML/*.xml
119 schemagen/nbaconfrecxml/*.xml
120 schemagen/nbadaysxml/*.xml
121 schemagen/nbadivisionsxml/*.xml
122 schemagen/NBAFGPctXML/*.xml
123 schemagen/NBAFoulsXML/*.xml
124 schemagen/NBAFTPctXML/*.xml
125 schemagen/NBA_Gaming_Matchup_XML/*.xml
126 schemagen/NBALineupXML/*.xml
127 schemagen/NBAMinutesXML/*.xml
128 schemagen/NBA_Playoff_Matchup_XML/*.xml
129 schemagen/nbapreviewxml/*.xml
130 schemagen/NBAReboundsXML/*.xml
131 schemagen/NBAScorersXML/*.xml
132 schemagen/nbastandxml/*.xml
133 schemagen/NBAStealsXML/*.xml
134 schemagen/nbateamleadersxml/*.xml
135 schemagen/NBA_Team_Stats_XML/*.xml
136 schemagen/nbatripledoublexml/*.xml
137 schemagen/NBATurnoversXML/*.xml
138 schemagen/NCAA_Conference_Schedule_XML/*.xml
139 schemagen/NCAA_FB_Preview_XML/*.xml
140 schemagen/newsxml/*.xml
141 schemagen/nflfirstdownxml/*.xml
142 schemagen/NFLFumbleLeaderXML/*.xml
143 schemagen/NFLGiveTakeXML/*.xml
144 schemagen/NFLGrassTurfDomeOutsideXML/*.xml
145 schemagen/NFLInside20XML/*.xml
146 schemagen/NFLInterceptionLeadersXML/*.xml
147 schemagen/NFL_KickingLeaders_XML/*.xml
148 schemagen/NFLKickoffsXML/*.xml
149 schemagen/NFLMondayNightXML/*.xml
150 schemagen/NFL_NBA_Draft_XML/*.xml
151 schemagen/NFL_NCAA_FB_Matchup_XML/*.xml
152 schemagen/NFLPassingLeadersXML/*.xml
153 schemagen/NFLPassLeadXML/*.xml
154 schemagen/nflpreviewxml/*.xml
155 schemagen/NFLQBStartsXML/*.xml
156 schemagen/NFL_Roster_XML/*.xml
157 schemagen/NFLSackLeadersXML/*.xml
158 schemagen/nflstandxml/*.xml
159 schemagen/NFLTeamRankingsXML/*.xml
160 schemagen/NFL_Team_Stats_XML/*.xml
161 schemagen/NFLTopPerformanceXML/*.xml
162 schemagen/NFLTotalYardageXML/*.xml
163 schemagen/nhlpreviewxml/*.xml
164 schemagen/Odds_XML/*.xml
165 schemagen/recapxml/*.xml
166 schemagen/Schedule_Changes_XML/*.xml
167 schemagen/scoresxml/*.xml
168 schemagen/Transactions_XML/*.xml
169 schemagen/weatherxml/*.xml
170 schemagen/Weekly_Sched_XML/*.xml
171 schemagen/WNBA3PPctXML/*.xml
172 schemagen/WNBAAssistsXML/*.xml
173 schemagen/WNBABlocksXML/*.xml
174 schemagen/WNBAFGPctXML/*.xml
175 schemagen/WNBAFoulsXML/*.xml
176 schemagen/WNBAFTPctXML/*.xml
177 schemagen/WNBAMinutesXML/*.xml
178 schemagen/WNBAReboundsXML/*.xml
179 schemagen/WNBAScorersXML/*.xml
180 schemagen/wnbastandxml/*.xml
181 schemagen/WNBAStealsXML/*.xml
182 schemagen/WNBA_Team_Leaders_XML/*.xml
183 schemagen/WNBATurnoversXML/*.xml
184 schemagen/WorldBaseballPreviewXML/*.xml
185 test/shell/*.test
186 test/xml/*.xml
187 test/xml/*.dtd
188 test/xml/gameinfo/*.xml
189 test/xml/gameinfo/*.dtd
190 test/xml/sportinfo/*.xml
191 test/xml/sportinfo/*.dtd
192 synopsis:
193 Import XML files from The Sports Network into an RDBMS.
194 description:
195 /Usage/:
196 .
197 @
198 htsn-import [OPTIONS] [FILES]
199 @
200 .
201 The Sports Network <http://www.sportsnetwork.com/> offers an XML feed
202 containing various sports news and statistics. Our sister program
203 /htsn/ is capable of retrieving the feed and saving the individual
204 XML documents contained therein. But what to do with them?
205 .
206 The purpose of /htsn-import/ is to take these XML documents and
207 get them into something we can use, a relational database management
208 system (RDBMS), i.e. \"a SQL database\". The structure of
209 relational database, is, well, relational, and the feed XML is not. So
210 there is some work to do before the data can be inserted.
211 .
212 First, we must parse the XML. Each supported document type (see below)
213 has a full pickle/unpickle implementation (\"pickle\" is simply a
214 synonym for serialize here). That means that we parse the entire
215 document into a data structure, and if we pickle (serialize) that data
216 structure, we get the exact same XML document tha we started with.
217 .
218 This is important for two reasons. First, it serves as a second level
219 of validation. The first validation is performed by the XML parser,
220 but if that succeeds and unpicking fails, we know that something is
221 fishy. Second, we don't ever want to be surprised by some new element
222 or attribute showing up in the XML. The fact that we can unpickle the
223 whole thing now means that we won't be surprised in the future.
224 .
225 The aforementioned feature is especially important because we
226 automatically migrate the database schema every time we import a
227 document. If you attempt to import a \"newsxml.dtd\" document, all
228 database objects relating to the news will be created if they do not
229 exist. We don't want the schema to change out from under us without
230 warning, so it's important that no XML be parsed that would result in
231 a different schema than we had previously. Since we can
232 pickle/unpickle everything already, this should be impossible.
233 .
234 Examples and usage documentation are available in the man page.
235
236 executable htsn-import
237 build-depends:
238 base >= 4.6 && < 5,
239 cmdargs >= 0.10.6,
240 configurator >= 0.2,
241 directory >= 1.2,
242 filepath >= 1.3,
243 hslogger >= 1.2,
244 htsn-common >= 0.0.1,
245 hxt >= 9.3,
246 groundhog >= 0.5,
247 groundhog-postgresql >= 0.5,
248 groundhog-sqlite >= 0.5,
249 groundhog-th >= 0.5,
250 MissingH >= 1.2,
251 old-locale >= 1,
252 split >= 0.2,
253 tasty >= 0.8,
254 tasty-hunit >= 0.8,
255 time >= 1.4,
256 transformers >= 0.3,
257 tuple >= 0.2
258
259 main-is:
260 Main.hs
261
262 hs-source-dirs:
263 src/
264
265 other-modules:
266 Backend
267 CommandLine
268 Configuration
269 ConnectionString
270 ExitCodes
271 OptionalConfiguration
272 TSN.Codegen
273 TSN.Database
274 TSN.DbImport
275 TSN.Location
276 TSN.Parse
277 TSN.Picklers
278 TSN.Team
279 TSN.XmlImport
280 TSN.XML.AutoRacingResults
281 TSN.XML.AutoRacingSchedule
282 TSN.XML.EarlyLine
283 TSN.XML.GameInfo
284 TSN.XML.Heartbeat
285 TSN.XML.Injuries
286 TSN.XML.InjuriesDetail
287 TSN.XML.JFile
288 TSN.XML.MLBEarlyLine
289 TSN.XML.News
290 TSN.XML.Odds
291 TSN.XML.ScheduleChanges
292 TSN.XML.Scores
293 TSN.XML.SportInfo
294 TSN.XML.Weather
295 Xml
296
297 ghc-options:
298 -Wall
299 -fwarn-hi-shadowing
300 -fwarn-missing-signatures
301 -fwarn-name-shadowing
302 -fwarn-orphans
303 -fwarn-type-defaults
304 -fwarn-tabs
305 -fwarn-incomplete-record-updates
306 -fwarn-monomorphism-restriction
307 -fwarn-unused-do-bind
308 -O2
309
310 ghc-prof-options:
311 -prof
312 -fprof-auto
313 -fprof-cafs
314 -- The following unbreak profiling with template haskell. We have
315 -- to build the program twice; once without profile and again with
316 -- these flags.
317 -hisuf hi_p
318 -osuf o_p
319
320
321 test-suite testsuite
322 type: exitcode-stdio-1.0
323 hs-source-dirs: src test
324 main-is: TestSuite.hs
325 build-depends:
326 base >= 4.6 && < 5,
327 cmdargs >= 0.10.6,
328 configurator >= 0.2,
329 directory >= 1.2,
330 filepath >= 1.3,
331 hslogger >= 1.2,
332 htsn-common >= 0.0.1,
333 hxt >= 9.3,
334 groundhog >= 0.5,
335 groundhog-postgresql >= 0.5,
336 groundhog-sqlite >= 0.5,
337 groundhog-th >= 0.5,
338 MissingH >= 1.2,
339 old-locale >= 1,
340 split >= 0.2,
341 tasty >= 0.8,
342 tasty-hunit >= 0.8,
343 time >= 1.4,
344 transformers >= 0.3,
345 tuple >= 0.2
346
347 -- It's not entirely clear to me why I have to reproduce all of this.
348 ghc-options:
349 -Wall
350 -fwarn-hi-shadowing
351 -fwarn-missing-signatures
352 -fwarn-name-shadowing
353 -fwarn-orphans
354 -fwarn-type-defaults
355 -fwarn-tabs
356 -fwarn-incomplete-record-updates
357 -fwarn-monomorphism-restriction
358 -fwarn-unused-do-bind
359 -O2
360
361
362 test-suite doctests
363 type: exitcode-stdio-1.0
364 hs-source-dirs: test
365 main-is: Doctests.hs
366 build-depends:
367 base >= 4.6 && < 5,
368 -- Additional test dependencies.
369 doctest >= 0.9
370
371 -- It's not entirely clear to me why I have to reproduce all of this.
372 ghc-options:
373 -Wall
374 -fwarn-hi-shadowing
375 -fwarn-missing-signatures
376 -fwarn-name-shadowing
377 -fwarn-orphans
378 -fwarn-type-defaults
379 -fwarn-tabs
380 -fwarn-incomplete-record-updates
381 -fwarn-monomorphism-restriction
382 -fwarn-unused-do-bind
383 -rtsopts
384 -threaded
385 -optc-O3
386 -optc-march=native
387 -O2
388
389
390 -- These won't work without shelltestrunner installed in your
391 -- $PATH. Maybe there is some way to tell Cabal that.
392 test-suite shelltests
393 type: exitcode-stdio-1.0
394 hs-source-dirs: test
395 main-is: ShellTests.hs
396
397 build-depends:
398 base >= 4.6 && < 5,
399 cmdargs >= 0.10.6,
400 configurator >= 0.2,
401 directory >= 1.2,
402 filepath >= 1.3,
403 hslogger >= 1.2,
404 htsn-common >= 0.0.1,
405 hxt >= 9.3,
406 groundhog >= 0.5,
407 groundhog-postgresql >= 0.5,
408 groundhog-sqlite >= 0.5,
409 groundhog-th >= 0.5,
410 MissingH >= 1.2,
411 old-locale >= 1,
412 split >= 0.2,
413 process >= 1.1,
414 tasty >= 0.8,
415 tasty-hunit >= 0.8,
416 time >= 1.4,
417 transformers >= 0.3,
418 tuple >= 0.2
419
420
421
422 source-repository head
423 type: git
424 location: http://michael.orlitzky.com/git/htsn-import.git
425 branch: master