]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - htsn-import.cabal
Add SportInfo support for NFLYardsXML.dtd.
[dead/htsn-import.git] / htsn-import.cabal
1 name: htsn-import
2 version: 0.2.0
3 cabal-version: >= 1.8
4 author: Michael Orlitzky
5 maintainer: Michael Orlitzky <michael@orlitzky.com>
6 category: Utils
7 license: GPL-3
8 license-file: doc/LICENSE
9 build-type: Simple
10 extra-source-files:
11 doc/dbschema/*.png
12 doc/htsn-importrc.example
13 doc/man1/htsn-import.1
14 doc/CHANGES.database
15 doc/README.development
16 doc/TODO
17 makefile
18 schema/*.dtd
19 schemagen/AutoRacingResultsXML/*.xml
20 schemagen/Auto_Racing_Schedule_XML/*.xml
21 schemagen/CBASK_3PPctXML/*.xml
22 schemagen/Cbask_All_Tourn_Teams_XML/*.xml
23 schemagen/CBASK_AssistsXML/*.xml
24 schemagen/Cbask_Awards_XML/*.xml
25 schemagen/CBASK_BlocksXML/*.xml
26 schemagen/Cbask_Conf_Standings_XML/*.xml
27 schemagen/Cbask_DivII_III_Indv_Stats_XML/*.xml
28 schemagen/Cbask_DivIII_Team_Stats_XML/*.xml
29 schemagen/Cbask_DivII_Team_Stats_XML/*.xml
30 schemagen/CBASK_FGPctXML/*.xml
31 schemagen/CBASK_FoulsXML/*.xml
32 schemagen/CBASK_FTPctXML/*.xml
33 schemagen/Cbask_Indv_Scoring_XML/*.xml
34 schemagen/CBASK_Lineup_XML/*.xml
35 schemagen/CBASK_MinutesXML/*.xml
36 schemagen/Cbask_Polls_XML/*.xml
37 schemagen/cbaskpreviewxml/*.xml
38 schemagen/CBASK_ReboundsXML/*.xml
39 schemagen/CBASK_ScoringLeadersXML/*.xml
40 schemagen/Cbask_Team_ThreePT_Made_XML/*.xml
41 schemagen/Cbask_Team_ThreePT_PCT_XML/*.xml
42 schemagen/Cbask_Team_Win_Pct_XML/*.xml
43 schemagen/CBASK_TopTwentyFiveResult_XML/*.xml
44 schemagen/Cbask_Top_Twenty_Five_XML/*.xml
45 schemagen/Cbask_Tourn_Awards_XML/*.xml
46 schemagen/Cbask_Tourn_Champs_XML/*.xml
47 schemagen/Cbask_Tourn_Indiv_XML/*.xml
48 schemagen/Cbask_Tourn_Leaders_XML/*.xml
49 schemagen/Cbask_Tourn_MVP_XML/*.xml
50 schemagen/Cbask_Tourn_Records_XML/*.xml
51 schemagen/cflpreviewxml/*.xml
52 schemagen/earlylineXML/*.xml
53 schemagen/Heartbeat/*.xml
54 schemagen/Injuries_Detail_XML/*.xml
55 schemagen/injuriesxml/*.xml
56 schemagen/jfilexml/*.xml
57 schemagen/LeagueScheduleXML/*.xml
58 schemagen/Matchup_NBA_NHL_XML/*.xml
59 schemagen/Minor_Baseball_League_Leaders_XML/*.xml
60 schemagen/Minor_Baseball_Standings_XML/*.xml
61 schemagen/Minor_Baseball_Transactions_XML/*.xml
62 schemagen/minorscoresxml/*.xml
63 schemagen/mlbbattingavgxml/*.xml
64 schemagen/mlbdoublesleadersxml/*.xml
65 schemagen/MLB_ERA_Leaders/*.xml
66 schemagen/MLB_Fielding_XML/*.xml
67 schemagen/MLBGamesPlayedXML/*.xml
68 schemagen/MLB_Gaming_Matchup_XML/*.xml
69 schemagen/MLBGIDPXML/*.xml
70 schemagen/MLBHitByPitchXML/*.xml
71 schemagen/mlbhitsleadersxml/*.xml
72 schemagen/mlbhomerunsxml/*.xml
73 schemagen/MLBHRFreqXML/*.xml
74 schemagen/MLBIntWalksXML/*.xml
75 schemagen/MLBKORateXML/*.xml
76 schemagen/MLB_Lineup_XML/*.xml
77 schemagen/MLB_Matchup_XML/*.xml
78 schemagen/mlbonbasepctxml/*.xml
79 schemagen/MLBOPSXML/*.xml
80 schemagen/MLB_earlylineXML/*.xml
81 schemagen/MLB_Pitching_Appearances_Leaders/*.xml
82 schemagen/MLB_Pitching_Balks_Leaders/*.xml
83 schemagen/MLB_Pitching_CG_Leaders/*.xml
84 schemagen/MLB_Pitching_ER_Allowed_Leaders/*.xml
85 schemagen/MLB_Pitching_Hit_Batters_Leaders/*.xml
86 schemagen/MLB_Pitching_Hits_Allowed_Leaders/*.xml
87 schemagen/MLB_Pitching_HR_Allowed_Leaders/*.xml
88 schemagen/MLB_Pitching_IP_Leaders/*.xml
89 schemagen/MLB_Pitching_Runs_Allowed_Leaders/*.xml
90 schemagen/MLB_Pitching_Saves_Leaders/*.xml
91 schemagen/MLB_Pitching_Shut_Outs_Leaders/*.xml
92 schemagen/MLB_Pitching_Starts_Leaders/*.xml
93 schemagen/MLB_Pitching_Strike_Outs_Leaders/*.xml
94 schemagen/MLB_Pitching_Walks_Leaders/*.xml
95 schemagen/MLB_Pitching_WHIP_Leaders/*.xml
96 schemagen/MLB_Pitching_Wild_Pitches_Leaders/*.xml
97 schemagen/MLB_Pitching_Win_Percentage_Leaders/*.xml
98 schemagen/MLB_Pitching_WL_Leaders/*.xml
99 schemagen/MLBPlateAppsXML/*.xml
100 schemagen/mlbpreviewxml/*.xml
101 schemagen/mlbrbisxml/*.xml
102 schemagen/mlbrunsleadersxml/*.xml
103 schemagen/MLBSacFliesXML/*.xml
104 schemagen/MLBSacrificesXML/*.xml
105 schemagen/MLBSBSuccessXML/*.xml
106 schemagen/mlbsluggingpctxml/*.xml
107 schemagen/mlbstandxml/*.xml
108 schemagen/mlbstandxml_preseason/*.xml
109 schemagen/mlbstolenbasexml/*.xml
110 schemagen/mlbtotalbasesleadersxml/*.xml
111 schemagen/mlbtriplesleadersxml/*.xml
112 schemagen/MLBWalkRateXML/*.xml
113 schemagen/mlbwalksleadersxml/*.xml
114 schemagen/MLBXtraBaseHitsXML/*.xml
115 schemagen/MLS_Preview_XML/*.xml
116 schemagen/NBA3PPctXML/*.xml
117 schemagen/NBAAssistsXML/*.xml
118 schemagen/NBABlocksXML/*.xml
119 schemagen/nbaconfrecxml/*.xml
120 schemagen/nbadaysxml/*.xml
121 schemagen/nbadivisionsxml/*.xml
122 schemagen/NBAFGPctXML/*.xml
123 schemagen/NBAFoulsXML/*.xml
124 schemagen/NBAFTPctXML/*.xml
125 schemagen/NBA_Gaming_Matchup_XML/*.xml
126 schemagen/NBALineupXML/*.xml
127 schemagen/NBAMinutesXML/*.xml
128 schemagen/NBA_Playoff_Matchup_XML/*.xml
129 schemagen/nbapreviewxml/*.xml
130 schemagen/NBAReboundsXML/*.xml
131 schemagen/NBAScorersXML/*.xml
132 schemagen/nbastandxml/*.xml
133 schemagen/NBAStealsXML/*.xml
134 schemagen/nbateamleadersxml/*.xml
135 schemagen/NBA_Team_Stats_XML/*.xml
136 schemagen/nbatripledoublexml/*.xml
137 schemagen/NBATurnoversXML/*.xml
138 schemagen/NCAA_Conference_Schedule_XML/*.xml
139 schemagen/NCAA_FB_Preview_XML/*.xml
140 schemagen/newsxml/*.xml
141 schemagen/nflfirstdownxml/*.xml
142 schemagen/NFLFumbleLeaderXML/*.xml
143 schemagen/NFLGiveTakeXML/*.xml
144 schemagen/NFLGrassTurfDomeOutsideXML/*.xml
145 schemagen/NFLInside20XML/*.xml
146 schemagen/NFLInterceptionLeadersXML/*.xml
147 schemagen/NFL_KickingLeaders_XML/*.xml
148 schemagen/NFLKickoffsXML/*.xml
149 schemagen/NFLMondayNightXML/*.xml
150 schemagen/NFL_NBA_Draft_XML/*.xml
151 schemagen/NFL_NCAA_FB_Matchup_XML/*.xml
152 schemagen/NFLPassingLeadersXML/*.xml
153 schemagen/NFLPassLeadXML/*.xml
154 schemagen/nflpreviewxml/*.xml
155 schemagen/NFLQBStartsXML/*.xml
156 schemagen/NFLReceivingLeadersXML/*.xml
157 schemagen/NFL_Roster_XML/*.xml
158 schemagen/NFLRushingLeadersXML/*.xml
159 schemagen/NFLSackLeadersXML/*.xml
160 schemagen/nflstandxml/*.xml
161 schemagen/NFLTackleFFLeadersXML/*.xml
162 schemagen/NFLTeamRankingsXML/*.xml
163 schemagen/NFL_Team_Stats_XML/*.xml
164 schemagen/NFLTopKickoffReturnXML/*.xml
165 schemagen/NFLTopPerformanceXML/*.xml
166 schemagen/NFLTopPuntReturnXML/*.xml
167 schemagen/NFLTotalYardageXML/*.xml
168 schemagen/NFLYardsXML/*.xml
169 schemagen/nhlpreviewxml/*.xml
170 schemagen/Odds_XML/*.xml
171 schemagen/recapxml/*.xml
172 schemagen/Schedule_Changes_XML/*.xml
173 schemagen/scoresxml/*.xml
174 schemagen/Transactions_XML/*.xml
175 schemagen/weatherxml/*.xml
176 schemagen/Weekly_Sched_XML/*.xml
177 schemagen/WNBA3PPctXML/*.xml
178 schemagen/WNBAAssistsXML/*.xml
179 schemagen/WNBABlocksXML/*.xml
180 schemagen/WNBAFGPctXML/*.xml
181 schemagen/WNBAFoulsXML/*.xml
182 schemagen/WNBAFTPctXML/*.xml
183 schemagen/WNBAMinutesXML/*.xml
184 schemagen/WNBAReboundsXML/*.xml
185 schemagen/WNBAScorersXML/*.xml
186 schemagen/wnbastandxml/*.xml
187 schemagen/WNBAStealsXML/*.xml
188 schemagen/WNBA_Team_Leaders_XML/*.xml
189 schemagen/WNBATurnoversXML/*.xml
190 schemagen/WorldBaseballPreviewXML/*.xml
191 test/shell/*.test
192 test/xml/*.xml
193 test/xml/*.dtd
194 test/xml/gameinfo/*.xml
195 test/xml/gameinfo/*.dtd
196 test/xml/sportinfo/*.xml
197 test/xml/sportinfo/*.dtd
198 synopsis:
199 Import XML files from The Sports Network into an RDBMS.
200 description:
201 /Usage/:
202 .
203 @
204 htsn-import [OPTIONS] [FILES]
205 @
206 .
207 The Sports Network <http://www.sportsnetwork.com/> offers an XML feed
208 containing various sports news and statistics. Our sister program
209 /htsn/ is capable of retrieving the feed and saving the individual
210 XML documents contained therein. But what to do with them?
211 .
212 The purpose of /htsn-import/ is to take these XML documents and
213 get them into something we can use, a relational database management
214 system (RDBMS), i.e. \"a SQL database\". The structure of
215 relational database, is, well, relational, and the feed XML is not. So
216 there is some work to do before the data can be inserted.
217 .
218 First, we must parse the XML. Each supported document type (see below)
219 has a full pickle/unpickle implementation (\"pickle\" is simply a
220 synonym for serialize here). That means that we parse the entire
221 document into a data structure, and if we pickle (serialize) that data
222 structure, we get the exact same XML document tha we started with.
223 .
224 This is important for two reasons. First, it serves as a second level
225 of validation. The first validation is performed by the XML parser,
226 but if that succeeds and unpicking fails, we know that something is
227 fishy. Second, we don't ever want to be surprised by some new element
228 or attribute showing up in the XML. The fact that we can unpickle the
229 whole thing now means that we won't be surprised in the future.
230 .
231 The aforementioned feature is especially important because we
232 automatically migrate the database schema every time we import a
233 document. If you attempt to import a \"newsxml.dtd\" document, all
234 database objects relating to the news will be created if they do not
235 exist. We don't want the schema to change out from under us without
236 warning, so it's important that no XML be parsed that would result in
237 a different schema than we had previously. Since we can
238 pickle/unpickle everything already, this should be impossible.
239 .
240 Examples and usage documentation are available in the man page.
241
242 executable htsn-import
243 build-depends:
244 base >= 4.6 && < 5,
245 cmdargs >= 0.10.6,
246 configurator >= 0.2,
247 directory >= 1.2,
248 filepath >= 1.3,
249 hslogger >= 1.2,
250 htsn-common >= 0.0.1,
251 hxt >= 9.3,
252 groundhog >= 0.5,
253 groundhog-postgresql >= 0.5,
254 groundhog-sqlite >= 0.5,
255 groundhog-th >= 0.5,
256 MissingH >= 1.2,
257 old-locale >= 1,
258 split >= 0.2,
259 tasty >= 0.8,
260 tasty-hunit >= 0.8,
261 time >= 1.4,
262 transformers >= 0.3,
263 tuple >= 0.2
264
265 main-is:
266 Main.hs
267
268 hs-source-dirs:
269 src/
270
271 other-modules:
272 Backend
273 CommandLine
274 Configuration
275 ConnectionString
276 ExitCodes
277 OptionalConfiguration
278 TSN.Codegen
279 TSN.Database
280 TSN.DbImport
281 TSN.Location
282 TSN.Parse
283 TSN.Picklers
284 TSN.Team
285 TSN.XmlImport
286 TSN.XML.AutoRacingResults
287 TSN.XML.AutoRacingSchedule
288 TSN.XML.EarlyLine
289 TSN.XML.GameInfo
290 TSN.XML.Heartbeat
291 TSN.XML.Injuries
292 TSN.XML.InjuriesDetail
293 TSN.XML.JFile
294 TSN.XML.MLBEarlyLine
295 TSN.XML.News
296 TSN.XML.Odds
297 TSN.XML.ScheduleChanges
298 TSN.XML.Scores
299 TSN.XML.SportInfo
300 TSN.XML.Weather
301 Xml
302
303 ghc-options:
304 -Wall
305 -fwarn-hi-shadowing
306 -fwarn-missing-signatures
307 -fwarn-name-shadowing
308 -fwarn-orphans
309 -fwarn-type-defaults
310 -fwarn-tabs
311 -fwarn-incomplete-record-updates
312 -fwarn-monomorphism-restriction
313 -fwarn-unused-do-bind
314 -O2
315
316 ghc-prof-options:
317 -prof
318 -fprof-auto
319 -fprof-cafs
320 -- The following unbreak profiling with template haskell. We have
321 -- to build the program twice; once without profile and again with
322 -- these flags.
323 -hisuf hi_p
324 -osuf o_p
325
326
327 test-suite testsuite
328 type: exitcode-stdio-1.0
329 hs-source-dirs: src test
330 main-is: TestSuite.hs
331 build-depends:
332 base >= 4.6 && < 5,
333 cmdargs >= 0.10.6,
334 configurator >= 0.2,
335 directory >= 1.2,
336 filepath >= 1.3,
337 hslogger >= 1.2,
338 htsn-common >= 0.0.1,
339 hxt >= 9.3,
340 groundhog >= 0.5,
341 groundhog-postgresql >= 0.5,
342 groundhog-sqlite >= 0.5,
343 groundhog-th >= 0.5,
344 MissingH >= 1.2,
345 old-locale >= 1,
346 split >= 0.2,
347 tasty >= 0.8,
348 tasty-hunit >= 0.8,
349 time >= 1.4,
350 transformers >= 0.3,
351 tuple >= 0.2
352
353 -- It's not entirely clear to me why I have to reproduce all of this.
354 ghc-options:
355 -Wall
356 -fwarn-hi-shadowing
357 -fwarn-missing-signatures
358 -fwarn-name-shadowing
359 -fwarn-orphans
360 -fwarn-type-defaults
361 -fwarn-tabs
362 -fwarn-incomplete-record-updates
363 -fwarn-monomorphism-restriction
364 -fwarn-unused-do-bind
365 -O2
366
367
368 test-suite doctests
369 type: exitcode-stdio-1.0
370 hs-source-dirs: test
371 main-is: Doctests.hs
372 build-depends:
373 base >= 4.6 && < 5,
374 -- Additional test dependencies.
375 doctest >= 0.9
376
377 -- It's not entirely clear to me why I have to reproduce all of this.
378 ghc-options:
379 -Wall
380 -fwarn-hi-shadowing
381 -fwarn-missing-signatures
382 -fwarn-name-shadowing
383 -fwarn-orphans
384 -fwarn-type-defaults
385 -fwarn-tabs
386 -fwarn-incomplete-record-updates
387 -fwarn-monomorphism-restriction
388 -fwarn-unused-do-bind
389 -rtsopts
390 -threaded
391 -optc-O3
392 -optc-march=native
393 -O2
394
395
396 -- These won't work without shelltestrunner installed in your
397 -- $PATH. Maybe there is some way to tell Cabal that.
398 test-suite shelltests
399 type: exitcode-stdio-1.0
400 hs-source-dirs: test
401 main-is: ShellTests.hs
402
403 build-depends:
404 base >= 4.6 && < 5,
405 cmdargs >= 0.10.6,
406 configurator >= 0.2,
407 directory >= 1.2,
408 filepath >= 1.3,
409 hslogger >= 1.2,
410 htsn-common >= 0.0.1,
411 hxt >= 9.3,
412 groundhog >= 0.5,
413 groundhog-postgresql >= 0.5,
414 groundhog-sqlite >= 0.5,
415 groundhog-th >= 0.5,
416 MissingH >= 1.2,
417 old-locale >= 1,
418 split >= 0.2,
419 process >= 1.1,
420 tasty >= 0.8,
421 tasty-hunit >= 0.8,
422 time >= 1.4,
423 transformers >= 0.3,
424 tuple >= 0.2
425
426
427
428 source-repository head
429 type: git
430 location: http://michael.orlitzky.com/git/htsn-import.git
431 branch: master