]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - htsn-import.cabal
Add SportInfo support for NFLTopPuntReturnXML.dtd.
[dead/htsn-import.git] / htsn-import.cabal
1 name: htsn-import
2 version: 0.2.0
3 cabal-version: >= 1.8
4 author: Michael Orlitzky
5 maintainer: Michael Orlitzky <michael@orlitzky.com>
6 category: Utils
7 license: GPL-3
8 license-file: doc/LICENSE
9 build-type: Simple
10 extra-source-files:
11 doc/dbschema/*.png
12 doc/htsn-importrc.example
13 doc/man1/htsn-import.1
14 doc/CHANGES.database
15 doc/README.development
16 doc/TODO
17 makefile
18 schema/*.dtd
19 schemagen/AutoRacingResultsXML/*.xml
20 schemagen/Auto_Racing_Schedule_XML/*.xml
21 schemagen/CBASK_3PPctXML/*.xml
22 schemagen/Cbask_All_Tourn_Teams_XML/*.xml
23 schemagen/CBASK_AssistsXML/*.xml
24 schemagen/Cbask_Awards_XML/*.xml
25 schemagen/CBASK_BlocksXML/*.xml
26 schemagen/Cbask_Conf_Standings_XML/*.xml
27 schemagen/Cbask_DivII_III_Indv_Stats_XML/*.xml
28 schemagen/Cbask_DivIII_Team_Stats_XML/*.xml
29 schemagen/Cbask_DivII_Team_Stats_XML/*.xml
30 schemagen/CBASK_FGPctXML/*.xml
31 schemagen/CBASK_FoulsXML/*.xml
32 schemagen/CBASK_FTPctXML/*.xml
33 schemagen/Cbask_Indv_Scoring_XML/*.xml
34 schemagen/CBASK_Lineup_XML/*.xml
35 schemagen/CBASK_MinutesXML/*.xml
36 schemagen/Cbask_Polls_XML/*.xml
37 schemagen/cbaskpreviewxml/*.xml
38 schemagen/CBASK_ReboundsXML/*.xml
39 schemagen/CBASK_ScoringLeadersXML/*.xml
40 schemagen/Cbask_Team_ThreePT_Made_XML/*.xml
41 schemagen/Cbask_Team_ThreePT_PCT_XML/*.xml
42 schemagen/Cbask_Team_Win_Pct_XML/*.xml
43 schemagen/CBASK_TopTwentyFiveResult_XML/*.xml
44 schemagen/Cbask_Top_Twenty_Five_XML/*.xml
45 schemagen/Cbask_Tourn_Awards_XML/*.xml
46 schemagen/Cbask_Tourn_Champs_XML/*.xml
47 schemagen/Cbask_Tourn_Indiv_XML/*.xml
48 schemagen/Cbask_Tourn_Leaders_XML/*.xml
49 schemagen/Cbask_Tourn_MVP_XML/*.xml
50 schemagen/Cbask_Tourn_Records_XML/*.xml
51 schemagen/cflpreviewxml/*.xml
52 schemagen/earlylineXML/*.xml
53 schemagen/Heartbeat/*.xml
54 schemagen/Injuries_Detail_XML/*.xml
55 schemagen/injuriesxml/*.xml
56 schemagen/jfilexml/*.xml
57 schemagen/LeagueScheduleXML/*.xml
58 schemagen/Matchup_NBA_NHL_XML/*.xml
59 schemagen/Minor_Baseball_League_Leaders_XML/*.xml
60 schemagen/Minor_Baseball_Standings_XML/*.xml
61 schemagen/Minor_Baseball_Transactions_XML/*.xml
62 schemagen/minorscoresxml/*.xml
63 schemagen/mlbbattingavgxml/*.xml
64 schemagen/mlbdoublesleadersxml/*.xml
65 schemagen/MLB_ERA_Leaders/*.xml
66 schemagen/MLB_Fielding_XML/*.xml
67 schemagen/MLBGamesPlayedXML/*.xml
68 schemagen/MLB_Gaming_Matchup_XML/*.xml
69 schemagen/MLBGIDPXML/*.xml
70 schemagen/MLBHitByPitchXML/*.xml
71 schemagen/mlbhitsleadersxml/*.xml
72 schemagen/mlbhomerunsxml/*.xml
73 schemagen/MLBHRFreqXML/*.xml
74 schemagen/MLBIntWalksXML/*.xml
75 schemagen/MLBKORateXML/*.xml
76 schemagen/MLB_Lineup_XML/*.xml
77 schemagen/MLB_Matchup_XML/*.xml
78 schemagen/mlbonbasepctxml/*.xml
79 schemagen/MLBOPSXML/*.xml
80 schemagen/MLB_earlylineXML/*.xml
81 schemagen/MLB_Pitching_Appearances_Leaders/*.xml
82 schemagen/MLB_Pitching_Balks_Leaders/*.xml
83 schemagen/MLB_Pitching_CG_Leaders/*.xml
84 schemagen/MLB_Pitching_ER_Allowed_Leaders/*.xml
85 schemagen/MLB_Pitching_Hit_Batters_Leaders/*.xml
86 schemagen/MLB_Pitching_Hits_Allowed_Leaders/*.xml
87 schemagen/MLB_Pitching_HR_Allowed_Leaders/*.xml
88 schemagen/MLB_Pitching_IP_Leaders/*.xml
89 schemagen/MLB_Pitching_Runs_Allowed_Leaders/*.xml
90 schemagen/MLB_Pitching_Saves_Leaders/*.xml
91 schemagen/MLB_Pitching_Shut_Outs_Leaders/*.xml
92 schemagen/MLB_Pitching_Starts_Leaders/*.xml
93 schemagen/MLB_Pitching_Strike_Outs_Leaders/*.xml
94 schemagen/MLB_Pitching_Walks_Leaders/*.xml
95 schemagen/MLB_Pitching_WHIP_Leaders/*.xml
96 schemagen/MLB_Pitching_Wild_Pitches_Leaders/*.xml
97 schemagen/MLB_Pitching_Win_Percentage_Leaders/*.xml
98 schemagen/MLB_Pitching_WL_Leaders/*.xml
99 schemagen/MLBPlateAppsXML/*.xml
100 schemagen/mlbpreviewxml/*.xml
101 schemagen/mlbrbisxml/*.xml
102 schemagen/mlbrunsleadersxml/*.xml
103 schemagen/MLBSacFliesXML/*.xml
104 schemagen/MLBSacrificesXML/*.xml
105 schemagen/MLBSBSuccessXML/*.xml
106 schemagen/mlbsluggingpctxml/*.xml
107 schemagen/mlbstandxml/*.xml
108 schemagen/mlbstandxml_preseason/*.xml
109 schemagen/mlbstolenbasexml/*.xml
110 schemagen/mlbtotalbasesleadersxml/*.xml
111 schemagen/mlbtriplesleadersxml/*.xml
112 schemagen/MLBWalkRateXML/*.xml
113 schemagen/mlbwalksleadersxml/*.xml
114 schemagen/MLBXtraBaseHitsXML/*.xml
115 schemagen/MLS_Preview_XML/*.xml
116 schemagen/NBA3PPctXML/*.xml
117 schemagen/NBAAssistsXML/*.xml
118 schemagen/NBABlocksXML/*.xml
119 schemagen/nbaconfrecxml/*.xml
120 schemagen/nbadaysxml/*.xml
121 schemagen/nbadivisionsxml/*.xml
122 schemagen/NBAFGPctXML/*.xml
123 schemagen/NBAFoulsXML/*.xml
124 schemagen/NBAFTPctXML/*.xml
125 schemagen/NBA_Gaming_Matchup_XML/*.xml
126 schemagen/NBALineupXML/*.xml
127 schemagen/NBAMinutesXML/*.xml
128 schemagen/NBA_Playoff_Matchup_XML/*.xml
129 schemagen/nbapreviewxml/*.xml
130 schemagen/NBAReboundsXML/*.xml
131 schemagen/NBAScorersXML/*.xml
132 schemagen/nbastandxml/*.xml
133 schemagen/NBAStealsXML/*.xml
134 schemagen/nbateamleadersxml/*.xml
135 schemagen/NBA_Team_Stats_XML/*.xml
136 schemagen/nbatripledoublexml/*.xml
137 schemagen/NBATurnoversXML/*.xml
138 schemagen/NCAA_Conference_Schedule_XML/*.xml
139 schemagen/NCAA_FB_Preview_XML/*.xml
140 schemagen/newsxml/*.xml
141 schemagen/nflfirstdownxml/*.xml
142 schemagen/NFLFumbleLeaderXML/*.xml
143 schemagen/NFLGiveTakeXML/*.xml
144 schemagen/NFLGrassTurfDomeOutsideXML/*.xml
145 schemagen/NFLInside20XML/*.xml
146 schemagen/NFLInterceptionLeadersXML/*.xml
147 schemagen/NFL_KickingLeaders_XML/*.xml
148 schemagen/NFLKickoffsXML/*.xml
149 schemagen/NFLMondayNightXML/*.xml
150 schemagen/NFL_NBA_Draft_XML/*.xml
151 schemagen/NFL_NCAA_FB_Matchup_XML/*.xml
152 schemagen/NFLPassingLeadersXML/*.xml
153 schemagen/NFLPassLeadXML/*.xml
154 schemagen/nflpreviewxml/*.xml
155 schemagen/NFLQBStartsXML/*.xml
156 schemagen/NFLReceivingLeadersXML/*.xml
157 schemagen/NFL_Roster_XML/*.xml
158 schemagen/NFLRushingLeadersXML/*.xml
159 schemagen/NFLSackLeadersXML/*.xml
160 schemagen/nflstandxml/*.xml
161 schemagen/NFLTackleFFLeadersXML/*.xml
162 schemagen/NFLTeamRankingsXML/*.xml
163 schemagen/NFL_Team_Stats_XML/*.xml
164 schemagen/NFLTopKickoffReturnXML/*.xml
165 schemagen/NFLTopPerformanceXML/*.xml
166 schemagen/NFLTopPuntReturnXML/*.xml
167 schemagen/NFLTotalYardageXML/*.xml
168 schemagen/nhlpreviewxml/*.xml
169 schemagen/Odds_XML/*.xml
170 schemagen/recapxml/*.xml
171 schemagen/Schedule_Changes_XML/*.xml
172 schemagen/scoresxml/*.xml
173 schemagen/Transactions_XML/*.xml
174 schemagen/weatherxml/*.xml
175 schemagen/Weekly_Sched_XML/*.xml
176 schemagen/WNBA3PPctXML/*.xml
177 schemagen/WNBAAssistsXML/*.xml
178 schemagen/WNBABlocksXML/*.xml
179 schemagen/WNBAFGPctXML/*.xml
180 schemagen/WNBAFoulsXML/*.xml
181 schemagen/WNBAFTPctXML/*.xml
182 schemagen/WNBAMinutesXML/*.xml
183 schemagen/WNBAReboundsXML/*.xml
184 schemagen/WNBAScorersXML/*.xml
185 schemagen/wnbastandxml/*.xml
186 schemagen/WNBAStealsXML/*.xml
187 schemagen/WNBA_Team_Leaders_XML/*.xml
188 schemagen/WNBATurnoversXML/*.xml
189 schemagen/WorldBaseballPreviewXML/*.xml
190 test/shell/*.test
191 test/xml/*.xml
192 test/xml/*.dtd
193 test/xml/gameinfo/*.xml
194 test/xml/gameinfo/*.dtd
195 test/xml/sportinfo/*.xml
196 test/xml/sportinfo/*.dtd
197 synopsis:
198 Import XML files from The Sports Network into an RDBMS.
199 description:
200 /Usage/:
201 .
202 @
203 htsn-import [OPTIONS] [FILES]
204 @
205 .
206 The Sports Network <http://www.sportsnetwork.com/> offers an XML feed
207 containing various sports news and statistics. Our sister program
208 /htsn/ is capable of retrieving the feed and saving the individual
209 XML documents contained therein. But what to do with them?
210 .
211 The purpose of /htsn-import/ is to take these XML documents and
212 get them into something we can use, a relational database management
213 system (RDBMS), i.e. \"a SQL database\". The structure of
214 relational database, is, well, relational, and the feed XML is not. So
215 there is some work to do before the data can be inserted.
216 .
217 First, we must parse the XML. Each supported document type (see below)
218 has a full pickle/unpickle implementation (\"pickle\" is simply a
219 synonym for serialize here). That means that we parse the entire
220 document into a data structure, and if we pickle (serialize) that data
221 structure, we get the exact same XML document tha we started with.
222 .
223 This is important for two reasons. First, it serves as a second level
224 of validation. The first validation is performed by the XML parser,
225 but if that succeeds and unpicking fails, we know that something is
226 fishy. Second, we don't ever want to be surprised by some new element
227 or attribute showing up in the XML. The fact that we can unpickle the
228 whole thing now means that we won't be surprised in the future.
229 .
230 The aforementioned feature is especially important because we
231 automatically migrate the database schema every time we import a
232 document. If you attempt to import a \"newsxml.dtd\" document, all
233 database objects relating to the news will be created if they do not
234 exist. We don't want the schema to change out from under us without
235 warning, so it's important that no XML be parsed that would result in
236 a different schema than we had previously. Since we can
237 pickle/unpickle everything already, this should be impossible.
238 .
239 Examples and usage documentation are available in the man page.
240
241 executable htsn-import
242 build-depends:
243 base >= 4.6 && < 5,
244 cmdargs >= 0.10.6,
245 configurator >= 0.2,
246 directory >= 1.2,
247 filepath >= 1.3,
248 hslogger >= 1.2,
249 htsn-common >= 0.0.1,
250 hxt >= 9.3,
251 groundhog >= 0.5,
252 groundhog-postgresql >= 0.5,
253 groundhog-sqlite >= 0.5,
254 groundhog-th >= 0.5,
255 MissingH >= 1.2,
256 old-locale >= 1,
257 split >= 0.2,
258 tasty >= 0.8,
259 tasty-hunit >= 0.8,
260 time >= 1.4,
261 transformers >= 0.3,
262 tuple >= 0.2
263
264 main-is:
265 Main.hs
266
267 hs-source-dirs:
268 src/
269
270 other-modules:
271 Backend
272 CommandLine
273 Configuration
274 ConnectionString
275 ExitCodes
276 OptionalConfiguration
277 TSN.Codegen
278 TSN.Database
279 TSN.DbImport
280 TSN.Location
281 TSN.Parse
282 TSN.Picklers
283 TSN.Team
284 TSN.XmlImport
285 TSN.XML.AutoRacingResults
286 TSN.XML.AutoRacingSchedule
287 TSN.XML.EarlyLine
288 TSN.XML.GameInfo
289 TSN.XML.Heartbeat
290 TSN.XML.Injuries
291 TSN.XML.InjuriesDetail
292 TSN.XML.JFile
293 TSN.XML.MLBEarlyLine
294 TSN.XML.News
295 TSN.XML.Odds
296 TSN.XML.ScheduleChanges
297 TSN.XML.Scores
298 TSN.XML.SportInfo
299 TSN.XML.Weather
300 Xml
301
302 ghc-options:
303 -Wall
304 -fwarn-hi-shadowing
305 -fwarn-missing-signatures
306 -fwarn-name-shadowing
307 -fwarn-orphans
308 -fwarn-type-defaults
309 -fwarn-tabs
310 -fwarn-incomplete-record-updates
311 -fwarn-monomorphism-restriction
312 -fwarn-unused-do-bind
313 -O2
314
315 ghc-prof-options:
316 -prof
317 -fprof-auto
318 -fprof-cafs
319 -- The following unbreak profiling with template haskell. We have
320 -- to build the program twice; once without profile and again with
321 -- these flags.
322 -hisuf hi_p
323 -osuf o_p
324
325
326 test-suite testsuite
327 type: exitcode-stdio-1.0
328 hs-source-dirs: src test
329 main-is: TestSuite.hs
330 build-depends:
331 base >= 4.6 && < 5,
332 cmdargs >= 0.10.6,
333 configurator >= 0.2,
334 directory >= 1.2,
335 filepath >= 1.3,
336 hslogger >= 1.2,
337 htsn-common >= 0.0.1,
338 hxt >= 9.3,
339 groundhog >= 0.5,
340 groundhog-postgresql >= 0.5,
341 groundhog-sqlite >= 0.5,
342 groundhog-th >= 0.5,
343 MissingH >= 1.2,
344 old-locale >= 1,
345 split >= 0.2,
346 tasty >= 0.8,
347 tasty-hunit >= 0.8,
348 time >= 1.4,
349 transformers >= 0.3,
350 tuple >= 0.2
351
352 -- It's not entirely clear to me why I have to reproduce all of this.
353 ghc-options:
354 -Wall
355 -fwarn-hi-shadowing
356 -fwarn-missing-signatures
357 -fwarn-name-shadowing
358 -fwarn-orphans
359 -fwarn-type-defaults
360 -fwarn-tabs
361 -fwarn-incomplete-record-updates
362 -fwarn-monomorphism-restriction
363 -fwarn-unused-do-bind
364 -O2
365
366
367 test-suite doctests
368 type: exitcode-stdio-1.0
369 hs-source-dirs: test
370 main-is: Doctests.hs
371 build-depends:
372 base >= 4.6 && < 5,
373 -- Additional test dependencies.
374 doctest >= 0.9
375
376 -- It's not entirely clear to me why I have to reproduce all of this.
377 ghc-options:
378 -Wall
379 -fwarn-hi-shadowing
380 -fwarn-missing-signatures
381 -fwarn-name-shadowing
382 -fwarn-orphans
383 -fwarn-type-defaults
384 -fwarn-tabs
385 -fwarn-incomplete-record-updates
386 -fwarn-monomorphism-restriction
387 -fwarn-unused-do-bind
388 -rtsopts
389 -threaded
390 -optc-O3
391 -optc-march=native
392 -O2
393
394
395 -- These won't work without shelltestrunner installed in your
396 -- $PATH. Maybe there is some way to tell Cabal that.
397 test-suite shelltests
398 type: exitcode-stdio-1.0
399 hs-source-dirs: test
400 main-is: ShellTests.hs
401
402 build-depends:
403 base >= 4.6 && < 5,
404 cmdargs >= 0.10.6,
405 configurator >= 0.2,
406 directory >= 1.2,
407 filepath >= 1.3,
408 hslogger >= 1.2,
409 htsn-common >= 0.0.1,
410 hxt >= 9.3,
411 groundhog >= 0.5,
412 groundhog-postgresql >= 0.5,
413 groundhog-sqlite >= 0.5,
414 groundhog-th >= 0.5,
415 MissingH >= 1.2,
416 old-locale >= 1,
417 split >= 0.2,
418 process >= 1.1,
419 tasty >= 0.8,
420 tasty-hunit >= 0.8,
421 time >= 1.4,
422 transformers >= 0.3,
423 tuple >= 0.2
424
425
426
427 source-repository head
428 type: git
429 location: http://michael.orlitzky.com/git/htsn-import.git
430 branch: master