]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - htsn-import.cabal
Begin to add EarlyLine (earlylineXML) support. Still very incomplete.
[dead/htsn-import.git] / htsn-import.cabal
1 name: htsn-import
2 version: 0.0.8
3 cabal-version: >= 1.8
4 author: Michael Orlitzky
5 maintainer: Michael Orlitzky <michael@orlitzky.com>
6 category: Utils
7 license: GPL-3
8 license-file: doc/LICENSE
9 build-type: Simple
10 extra-source-files:
11 doc/dbschema/*.png
12 doc/htsn-importrc.example
13 doc/man1/htsn-import.1
14 doc/README.development
15 doc/TODO
16 makefile
17 schema/*.dtd
18 schemagen/AutoRacingResultsXML/*.xml
19 schemagen/Auto_Racing_Schedule_XML/*.xml
20 schemagen/CBASK_3PPctXML/*.xml
21 schemagen/Cbask_All_Tourn_Teams_XML/*.xml
22 schemagen/CBASK_AssistsXML/*.xml
23 schemagen/Cbask_Awards_XML/*.xml
24 schemagen/CBASK_BlocksXML/*.xml
25 schemagen/Cbask_Conf_Standings_XML/*.xml
26 schemagen/Cbask_DivII_III_Indv_Stats_XML/*.xml
27 schemagen/Cbask_DivIII_Team_Stats_XML/*.xml
28 schemagen/Cbask_DivII_Team_Stats_XML/*.xml
29 schemagen/CBASK_FGPctXML/*.xml
30 schemagen/CBASK_FoulsXML/*.xml
31 schemagen/CBASK_FTPctXML/*.xml
32 schemagen/Cbask_Indv_Scoring_XML/*.xml
33 schemagen/CBASK_Lineup_XML/*.xml
34 schemagen/CBASK_MinutesXML/*.xml
35 schemagen/Cbask_Polls_XML/*.xml
36 schemagen/cbaskpreviewxml/*.xml
37 schemagen/CBASK_ReboundsXML/*.xml
38 schemagen/CBASK_ScoringLeadersXML/*.xml
39 schemagen/Cbask_Team_ThreePT_Made_XML/*.xml
40 schemagen/Cbask_Team_ThreePT_PCT_XML/*.xml
41 schemagen/Cbask_Team_Win_Pct_XML/*.xml
42 schemagen/CBASK_TopTwentyFiveResult_XML/*.xml
43 schemagen/Cbask_Top_Twenty_Five_XML/*.xml
44 schemagen/Cbask_Tourn_Awards_XML/*.xml
45 schemagen/Cbask_Tourn_Champs_XML/*.xml
46 schemagen/Cbask_Tourn_Indiv_XML/*.xml
47 schemagen/Cbask_Tourn_Leaders_XML/*.xml
48 schemagen/Cbask_Tourn_MVP_XML/*.xml
49 schemagen/Cbask_Tourn_Records_XML/*.xml
50 schemagen/cflpreviewxml/*.xml
51 schemagen/earlylineXML/*.xml
52 schemagen/Heartbeat/*.xml
53 schemagen/Injuries_Detail_XML/*.xml
54 schemagen/injuriesxml/*.xml
55 schemagen/jfilexml/*.xml
56 schemagen/LeagueScheduleXML/*.xml
57 schemagen/Matchup_NBA_NHL_XML/*.xml
58 schemagen/Minor_Baseball_League_Leaders_XML/*.xml
59 schemagen/Minor_Baseball_Standings_XML/*.xml
60 schemagen/Minor_Baseball_Transactions_XML/*.xml
61 schemagen/minorscoresxml/*.xml
62 schemagen/mlbbattingavgxml/*.xml
63 schemagen/mlbdoublesleadersxml/*.xml
64 schemagen/MLB_ERA_Leaders/*.xml
65 schemagen/MLB_Fielding_XML/*.xml
66 schemagen/MLBGamesPlayedXML/*.xml
67 schemagen/MLB_Gaming_Matchup_XML/*.xml
68 schemagen/MLBGIDPXML/*.xml
69 schemagen/MLBHitByPitchXML/*.xml
70 schemagen/mlbhitsleadersxml/*.xml
71 schemagen/mlbhomerunsxml/*.xml
72 schemagen/MLBHRFreqXML/*.xml
73 schemagen/MLBIntWalksXML/*.xml
74 schemagen/MLBKORateXML/*.xml
75 schemagen/MLB_Lineup_XML/*.xml
76 schemagen/MLB_Matchup_XML/*.xml
77 schemagen/mlbonbasepctxml/*.xml
78 schemagen/MLBOPSXML/*.xml
79 schemagen/MLB_Pitching_Appearances_Leaders/*.xml
80 schemagen/MLB_Pitching_Balks_Leaders/*.xml
81 schemagen/MLB_Pitching_CG_Leaders/*.xml
82 schemagen/MLB_Pitching_ER_Allowed_Leaders/*.xml
83 schemagen/MLB_Pitching_Hit_Batters_Leaders/*.xml
84 schemagen/MLB_Pitching_Hits_Allowed_Leaders/*.xml
85 schemagen/MLB_Pitching_HR_Allowed_Leaders/*.xml
86 schemagen/MLB_Pitching_IP_Leaders/*.xml
87 schemagen/MLB_Pitching_Runs_Allowed_Leaders/*.xml
88 schemagen/MLB_Pitching_Saves_Leaders/*.xml
89 schemagen/MLB_Pitching_Shut_Outs_Leaders/*.xml
90 schemagen/MLB_Pitching_Starts_Leaders/*.xml
91 schemagen/MLB_Pitching_Strike_Outs_Leaders/*.xml
92 schemagen/MLB_Pitching_Walks_Leaders/*.xml
93 schemagen/MLB_Pitching_WHIP_Leaders/*.xml
94 schemagen/MLB_Pitching_Wild_Pitches_Leaders/*.xml
95 schemagen/MLB_Pitching_Win_Percentage_Leaders/*.xml
96 schemagen/MLB_Pitching_WL_Leaders/*.xml
97 schemagen/MLBPlateAppsXML/*.xml
98 schemagen/mlbpreviewxml/*.xml
99 schemagen/mlbrbisxml/*.xml
100 schemagen/mlbrunsleadersxml/*.xml
101 schemagen/MLBSacFliesXML/*.xml
102 schemagen/MLBSacrificesXML/*.xml
103 schemagen/MLBSBSuccessXML/*.xml
104 schemagen/mlbsluggingpctxml/*.xml
105 schemagen/mlbstandxml/*.xml
106 schemagen/mlbstandxml_preseason/*.xml
107 schemagen/mlbstolenbasexml/*.xml
108 schemagen/mlbtotalbasesleadersxml/*.xml
109 schemagen/mlbtriplesleadersxml/*.xml
110 schemagen/MLBWalkRateXML/*.xml
111 schemagen/mlbwalksleadersxml/*.xml
112 schemagen/MLBXtraBaseHitsXML/*.xml
113 schemagen/MLS_Preview_XML/*.xml
114 schemagen/NBA3PPctXML/*.xml
115 schemagen/NBAAssistsXML/*.xml
116 schemagen/NBABlocksXML/*.xml
117 schemagen/nbaconfrecxml/*.xml
118 schemagen/nbadaysxml/*.xml
119 schemagen/nbadivisionsxml/*.xml
120 schemagen/NBAFGPctXML/*.xml
121 schemagen/NBAFoulsXML/*.xml
122 schemagen/NBAFTPctXML/*.xml
123 schemagen/NBA_Gaming_Matchup_XML/*.xml
124 schemagen/NBALineupXML/*.xml
125 schemagen/NBAMinutesXML/*.xml
126 schemagen/NBA_Playoff_Matchup_XML/*.xml
127 schemagen/nbapreviewxml/*.xml
128 schemagen/NBAReboundsXML/*.xml
129 schemagen/NBAScorersXML/*.xml
130 schemagen/nbastandxml/*.xml
131 schemagen/NBAStealsXML/*.xml
132 schemagen/nbateamleadersxml/*.xml
133 schemagen/NBA_Team_Stats_XML/*.xml
134 schemagen/nbatripledoublexml/*.xml
135 schemagen/NBATurnoversXML/*.xml
136 schemagen/NCAA_Conference_Schedule_XML/*.xml
137 schemagen/NCAA_FB_Preview_XML/*.xml
138 schemagen/newsxml/*.xml
139 schemagen/nflfirstdownxml/*.xml
140 schemagen/NFLFumbleLeaderXML/*.xml
141 schemagen/NFLGiveTakeXML/*.xml
142 schemagen/NFLInside20XML/*.xml
143 schemagen/NFL_KickingLeaders_XML/*.xml
144 schemagen/NFLKickoffsXML/*.xml
145 schemagen/NFLMondayNightXML/*.xml
146 schemagen/NFL_NBA_Draft_XML/*.xml
147 schemagen/NFL_NCAA_FB_Matchup_XML/*.xml
148 schemagen/NFLPassLeadXML/*.xml
149 schemagen/nflpreviewxml/*.xml
150 schemagen/NFLQBStartsXML/*.xml
151 schemagen/NFL_Roster_XML/*.xml
152 schemagen/NFLSackLeadersXML/*.xml
153 schemagen/nflstandxml/*.xml
154 schemagen/NFLTeamRankingsXML/*.xml
155 schemagen/NFL_Team_Stats_XML/*.xml
156 schemagen/NFLTopPerformanceXML/*.xml
157 schemagen/NFLTotalYardageXML/*.xml
158 schemagen/nhlpreviewxml/*.xml
159 schemagen/Odds_XML/*.xml
160 schemagen/recapxml/*.xml
161 schemagen/Schedule_Changes_XML/*.xml
162 schemagen/scoresxml/*.xml
163 schemagen/Transactions_XML/*.xml
164 schemagen/weatherxml/*.xml
165 schemagen/Weekly_Sched_XML/*.xml
166 schemagen/WNBA3PPctXML/*.xml
167 schemagen/WNBAAssistsXML/*.xml
168 schemagen/WNBABlocksXML/*.xml
169 schemagen/WNBAFGPctXML/*.xml
170 schemagen/WNBAFoulsXML/*.xml
171 schemagen/WNBAFTPctXML/*.xml
172 schemagen/WNBAMinutesXML/*.xml
173 schemagen/WNBAReboundsXML/*.xml
174 schemagen/WNBAScorersXML/*.xml
175 schemagen/wnbastandxml/*.xml
176 schemagen/WNBAStealsXML/*.xml
177 schemagen/WNBA_Team_Leaders_XML/*.xml
178 schemagen/WNBATurnoversXML/*.xml
179 schemagen/WorldBaseballPreviewXML/*.xml
180 test/shell/*.test
181 test/xml/*.xml
182 test/xml/*.dtd
183 test/xml/gameinfo/*.xml
184 test/xml/gameinfo/*.dtd
185 test/xml/sportinfo/*.xml
186 test/xml/sportinfo/*.dtd
187 synopsis:
188 Import XML files from The Sports Network into an RDBMS.
189 description:
190 /Usage/:
191 .
192 @
193 htsn-import [OPTIONS] [FILES]
194 @
195 .
196 The Sports Network <http://www.sportsnetwork.com/> offers an XML feed
197 containing various sports news and statistics. Our sister program
198 /htsn/ is capable of retrieving the feed and saving the individual
199 XML documents contained therein. But what to do with them?
200 .
201 The purpose of /htsn-import/ is to take these XML documents and
202 get them into something we can use, a relational database management
203 system (RDBMS), i.e. \"a SQL database\". The structure of
204 relational database, is, well, relational, and the feed XML is not. So
205 there is some work to do before the data can be inserted.
206 .
207 First, we must parse the XML. Each supported document type (see below)
208 has a full pickle/unpickle implementation (\"pickle\" is simply a
209 synonym for serialize here). That means that we parse the entire
210 document into a data structure, and if we pickle (serialize) that data
211 structure, we get the exact same XML document tha we started with.
212 .
213 This is important for two reasons. First, it serves as a second level
214 of validation. The first validation is performed by the XML parser,
215 but if that succeeds and unpicking fails, we know that something is
216 fishy. Second, we don't ever want to be surprised by some new element
217 or attribute showing up in the XML. The fact that we can unpickle the
218 whole thing now means that we won't be surprised in the future.
219 .
220 The aforementioned feature is especially important because we
221 automatically migrate the database schema every time we import a
222 document. If you attempt to import a \"newsxml.dtd\" document, all
223 database objects relating to the news will be created if they do not
224 exist. We don't want the schema to change out from under us without
225 warning, so it's important that no XML be parsed that would result in
226 a different schema than we had previously. Since we can
227 pickle/unpickle everything already, this should be impossible.
228 .
229 Examples and usage documentation are available in the man page.
230
231 executable htsn-import
232 build-depends:
233 base >= 4.6 && < 5,
234 cmdargs >= 0.10.6,
235 configurator >= 0.2,
236 directory >= 1.2,
237 filepath >= 1.3,
238 hslogger >= 1.2,
239 htsn-common >= 0.0.1,
240 hxt >= 9.3,
241 groundhog >= 0.5,
242 groundhog-postgresql >= 0.5,
243 groundhog-sqlite >= 0.5,
244 groundhog-th >= 0.5,
245 MissingH >= 1.2,
246 old-locale >= 1,
247 split >= 0.2,
248 tasty >= 0.8,
249 tasty-hunit >= 0.8,
250 time >= 1.4,
251 transformers >= 0.3,
252 tuple >= 0.2
253
254 main-is:
255 Main.hs
256
257 hs-source-dirs:
258 src/
259
260 other-modules:
261 Backend
262 CommandLine
263 Configuration
264 ConnectionString
265 ExitCodes
266 OptionalConfiguration
267 TSN.Codegen
268 TSN.Database
269 TSN.DbImport
270 TSN.Location
271 TSN.Parse
272 TSN.Picklers
273 TSN.Team
274 TSN.XmlImport
275 TSN.XML.AutoRacingResults
276 TSN.XML.AutoRacingSchedule
277 TSN.XML.EarlyLine
278 TSN.XML.GameInfo
279 TSN.XML.Heartbeat
280 TSN.XML.Injuries
281 TSN.XML.InjuriesDetail
282 TSN.XML.JFile
283 TSN.XML.News
284 TSN.XML.Odds
285 TSN.XML.ScheduleChanges
286 TSN.XML.Scores
287 TSN.XML.SportInfo
288 TSN.XML.Weather
289 Xml
290
291 ghc-options:
292 -Wall
293 -fwarn-hi-shadowing
294 -fwarn-missing-signatures
295 -fwarn-name-shadowing
296 -fwarn-orphans
297 -fwarn-type-defaults
298 -fwarn-tabs
299 -fwarn-incomplete-record-updates
300 -fwarn-monomorphism-restriction
301 -fwarn-unused-do-bind
302 -O2
303
304 ghc-prof-options:
305 -prof
306 -fprof-auto
307 -fprof-cafs
308 -- The following unbreak profiling with template haskell. We have
309 -- to build the program twice; once without profile and again with
310 -- these flags.
311 -hisuf hi_p
312 -osuf o_p
313
314
315 test-suite testsuite
316 type: exitcode-stdio-1.0
317 hs-source-dirs: src test
318 main-is: TestSuite.hs
319 build-depends:
320 base >= 4.6 && < 5,
321 cmdargs >= 0.10.6,
322 configurator >= 0.2,
323 directory >= 1.2,
324 filepath >= 1.3,
325 hslogger >= 1.2,
326 htsn-common >= 0.0.1,
327 hxt >= 9.3,
328 groundhog >= 0.5,
329 groundhog-postgresql >= 0.5,
330 groundhog-sqlite >= 0.5,
331 groundhog-th >= 0.5,
332 MissingH >= 1.2,
333 old-locale >= 1,
334 split >= 0.2,
335 tasty >= 0.8,
336 tasty-hunit >= 0.8,
337 time >= 1.4,
338 transformers >= 0.3,
339 tuple >= 0.2
340
341 -- It's not entirely clear to me why I have to reproduce all of this.
342 ghc-options:
343 -Wall
344 -fwarn-hi-shadowing
345 -fwarn-missing-signatures
346 -fwarn-name-shadowing
347 -fwarn-orphans
348 -fwarn-type-defaults
349 -fwarn-tabs
350 -fwarn-incomplete-record-updates
351 -fwarn-monomorphism-restriction
352 -fwarn-unused-do-bind
353 -O2
354
355
356 test-suite doctests
357 type: exitcode-stdio-1.0
358 hs-source-dirs: test
359 main-is: Doctests.hs
360 build-depends:
361 base >= 4.6 && < 5,
362 -- Additional test dependencies.
363 doctest >= 0.9
364
365 -- It's not entirely clear to me why I have to reproduce all of this.
366 ghc-options:
367 -Wall
368 -fwarn-hi-shadowing
369 -fwarn-missing-signatures
370 -fwarn-name-shadowing
371 -fwarn-orphans
372 -fwarn-type-defaults
373 -fwarn-tabs
374 -fwarn-incomplete-record-updates
375 -fwarn-monomorphism-restriction
376 -fwarn-unused-do-bind
377 -rtsopts
378 -threaded
379 -optc-O3
380 -optc-march=native
381 -O2
382
383
384 -- These won't work without shelltestrunner installed in your
385 -- $PATH. Maybe there is some way to tell Cabal that.
386 test-suite shelltests
387 type: exitcode-stdio-1.0
388 hs-source-dirs: test
389 main-is: ShellTests.hs
390
391 build-depends:
392 base >= 4.6 && < 5,
393 cmdargs >= 0.10.6,
394 configurator >= 0.2,
395 directory >= 1.2,
396 filepath >= 1.3,
397 hslogger >= 1.2,
398 htsn-common >= 0.0.1,
399 hxt >= 9.3,
400 groundhog >= 0.5,
401 groundhog-postgresql >= 0.5,
402 groundhog-sqlite >= 0.5,
403 groundhog-th >= 0.5,
404 MissingH >= 1.2,
405 old-locale >= 1,
406 split >= 0.2,
407 process >= 1.1,
408 tasty >= 0.8,
409 tasty-hunit >= 0.8,
410 time >= 1.4,
411 transformers >= 0.3,
412 tuple >= 0.2
413
414
415
416 source-repository head
417 type: git
418 location: http://michael.orlitzky.com/git/htsn-import.git
419 branch: master