]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - htsn-import.cabal
Add SportInfo support for NFLGrassTurfDomeOutsideXML.dtd.
[dead/htsn-import.git] / htsn-import.cabal
1 name: htsn-import
2 version: 0.2.0
3 cabal-version: >= 1.8
4 author: Michael Orlitzky
5 maintainer: Michael Orlitzky <michael@orlitzky.com>
6 category: Utils
7 license: GPL-3
8 license-file: doc/LICENSE
9 build-type: Simple
10 extra-source-files:
11 doc/dbschema/*.png
12 doc/htsn-importrc.example
13 doc/man1/htsn-import.1
14 doc/CHANGES.database
15 doc/README.development
16 doc/TODO
17 makefile
18 schema/*.dtd
19 schemagen/AutoRacingResultsXML/*.xml
20 schemagen/Auto_Racing_Schedule_XML/*.xml
21 schemagen/CBASK_3PPctXML/*.xml
22 schemagen/Cbask_All_Tourn_Teams_XML/*.xml
23 schemagen/CBASK_AssistsXML/*.xml
24 schemagen/Cbask_Awards_XML/*.xml
25 schemagen/CBASK_BlocksXML/*.xml
26 schemagen/Cbask_Conf_Standings_XML/*.xml
27 schemagen/Cbask_DivII_III_Indv_Stats_XML/*.xml
28 schemagen/Cbask_DivIII_Team_Stats_XML/*.xml
29 schemagen/Cbask_DivII_Team_Stats_XML/*.xml
30 schemagen/CBASK_FGPctXML/*.xml
31 schemagen/CBASK_FoulsXML/*.xml
32 schemagen/CBASK_FTPctXML/*.xml
33 schemagen/Cbask_Indv_Scoring_XML/*.xml
34 schemagen/CBASK_Lineup_XML/*.xml
35 schemagen/CBASK_MinutesXML/*.xml
36 schemagen/Cbask_Polls_XML/*.xml
37 schemagen/cbaskpreviewxml/*.xml
38 schemagen/CBASK_ReboundsXML/*.xml
39 schemagen/CBASK_ScoringLeadersXML/*.xml
40 schemagen/Cbask_Team_ThreePT_Made_XML/*.xml
41 schemagen/Cbask_Team_ThreePT_PCT_XML/*.xml
42 schemagen/Cbask_Team_Win_Pct_XML/*.xml
43 schemagen/CBASK_TopTwentyFiveResult_XML/*.xml
44 schemagen/Cbask_Top_Twenty_Five_XML/*.xml
45 schemagen/Cbask_Tourn_Awards_XML/*.xml
46 schemagen/Cbask_Tourn_Champs_XML/*.xml
47 schemagen/Cbask_Tourn_Indiv_XML/*.xml
48 schemagen/Cbask_Tourn_Leaders_XML/*.xml
49 schemagen/Cbask_Tourn_MVP_XML/*.xml
50 schemagen/Cbask_Tourn_Records_XML/*.xml
51 schemagen/cflpreviewxml/*.xml
52 schemagen/earlylineXML/*.xml
53 schemagen/Heartbeat/*.xml
54 schemagen/Injuries_Detail_XML/*.xml
55 schemagen/injuriesxml/*.xml
56 schemagen/jfilexml/*.xml
57 schemagen/LeagueScheduleXML/*.xml
58 schemagen/Matchup_NBA_NHL_XML/*.xml
59 schemagen/Minor_Baseball_League_Leaders_XML/*.xml
60 schemagen/Minor_Baseball_Standings_XML/*.xml
61 schemagen/Minor_Baseball_Transactions_XML/*.xml
62 schemagen/minorscoresxml/*.xml
63 schemagen/mlbbattingavgxml/*.xml
64 schemagen/mlbdoublesleadersxml/*.xml
65 schemagen/MLB_ERA_Leaders/*.xml
66 schemagen/MLB_Fielding_XML/*.xml
67 schemagen/MLBGamesPlayedXML/*.xml
68 schemagen/MLB_Gaming_Matchup_XML/*.xml
69 schemagen/MLBGIDPXML/*.xml
70 schemagen/MLBHitByPitchXML/*.xml
71 schemagen/mlbhitsleadersxml/*.xml
72 schemagen/mlbhomerunsxml/*.xml
73 schemagen/MLBHRFreqXML/*.xml
74 schemagen/MLBIntWalksXML/*.xml
75 schemagen/MLBKORateXML/*.xml
76 schemagen/MLB_Lineup_XML/*.xml
77 schemagen/MLB_Matchup_XML/*.xml
78 schemagen/mlbonbasepctxml/*.xml
79 schemagen/MLBOPSXML/*.xml
80 schemagen/MLB_earlylineXML/*.xml
81 schemagen/MLB_Pitching_Appearances_Leaders/*.xml
82 schemagen/MLB_Pitching_Balks_Leaders/*.xml
83 schemagen/MLB_Pitching_CG_Leaders/*.xml
84 schemagen/MLB_Pitching_ER_Allowed_Leaders/*.xml
85 schemagen/MLB_Pitching_Hit_Batters_Leaders/*.xml
86 schemagen/MLB_Pitching_Hits_Allowed_Leaders/*.xml
87 schemagen/MLB_Pitching_HR_Allowed_Leaders/*.xml
88 schemagen/MLB_Pitching_IP_Leaders/*.xml
89 schemagen/MLB_Pitching_Runs_Allowed_Leaders/*.xml
90 schemagen/MLB_Pitching_Saves_Leaders/*.xml
91 schemagen/MLB_Pitching_Shut_Outs_Leaders/*.xml
92 schemagen/MLB_Pitching_Starts_Leaders/*.xml
93 schemagen/MLB_Pitching_Strike_Outs_Leaders/*.xml
94 schemagen/MLB_Pitching_Walks_Leaders/*.xml
95 schemagen/MLB_Pitching_WHIP_Leaders/*.xml
96 schemagen/MLB_Pitching_Wild_Pitches_Leaders/*.xml
97 schemagen/MLB_Pitching_Win_Percentage_Leaders/*.xml
98 schemagen/MLB_Pitching_WL_Leaders/*.xml
99 schemagen/MLBPlateAppsXML/*.xml
100 schemagen/mlbpreviewxml/*.xml
101 schemagen/mlbrbisxml/*.xml
102 schemagen/mlbrunsleadersxml/*.xml
103 schemagen/MLBSacFliesXML/*.xml
104 schemagen/MLBSacrificesXML/*.xml
105 schemagen/MLBSBSuccessXML/*.xml
106 schemagen/mlbsluggingpctxml/*.xml
107 schemagen/mlbstandxml/*.xml
108 schemagen/mlbstandxml_preseason/*.xml
109 schemagen/mlbstolenbasexml/*.xml
110 schemagen/mlbtotalbasesleadersxml/*.xml
111 schemagen/mlbtriplesleadersxml/*.xml
112 schemagen/MLBWalkRateXML/*.xml
113 schemagen/mlbwalksleadersxml/*.xml
114 schemagen/MLBXtraBaseHitsXML/*.xml
115 schemagen/MLS_Preview_XML/*.xml
116 schemagen/NBA3PPctXML/*.xml
117 schemagen/NBAAssistsXML/*.xml
118 schemagen/NBABlocksXML/*.xml
119 schemagen/nbaconfrecxml/*.xml
120 schemagen/nbadaysxml/*.xml
121 schemagen/nbadivisionsxml/*.xml
122 schemagen/NBAFGPctXML/*.xml
123 schemagen/NBAFoulsXML/*.xml
124 schemagen/NBAFTPctXML/*.xml
125 schemagen/NBA_Gaming_Matchup_XML/*.xml
126 schemagen/NBALineupXML/*.xml
127 schemagen/NBAMinutesXML/*.xml
128 schemagen/NBA_Playoff_Matchup_XML/*.xml
129 schemagen/nbapreviewxml/*.xml
130 schemagen/NBAReboundsXML/*.xml
131 schemagen/NBAScorersXML/*.xml
132 schemagen/nbastandxml/*.xml
133 schemagen/NBAStealsXML/*.xml
134 schemagen/nbateamleadersxml/*.xml
135 schemagen/NBA_Team_Stats_XML/*.xml
136 schemagen/nbatripledoublexml/*.xml
137 schemagen/NBATurnoversXML/*.xml
138 schemagen/NCAA_Conference_Schedule_XML/*.xml
139 schemagen/NCAA_FB_Preview_XML/*.xml
140 schemagen/newsxml/*.xml
141 schemagen/nflfirstdownxml/*.xml
142 schemagen/NFLFumbleLeaderXML/*.xml
143 schemagen/NFLGiveTakeXML/*.xml
144 schemagen/NFLGrassTurfDomeOutsideXML/*.xml
145 schemagen/NFLInside20XML/*.xml
146 schemagen/NFL_KickingLeaders_XML/*.xml
147 schemagen/NFLKickoffsXML/*.xml
148 schemagen/NFLMondayNightXML/*.xml
149 schemagen/NFL_NBA_Draft_XML/*.xml
150 schemagen/NFL_NCAA_FB_Matchup_XML/*.xml
151 schemagen/NFLPassLeadXML/*.xml
152 schemagen/nflpreviewxml/*.xml
153 schemagen/NFLQBStartsXML/*.xml
154 schemagen/NFL_Roster_XML/*.xml
155 schemagen/NFLSackLeadersXML/*.xml
156 schemagen/nflstandxml/*.xml
157 schemagen/NFLTeamRankingsXML/*.xml
158 schemagen/NFL_Team_Stats_XML/*.xml
159 schemagen/NFLTopPerformanceXML/*.xml
160 schemagen/NFLTotalYardageXML/*.xml
161 schemagen/nhlpreviewxml/*.xml
162 schemagen/Odds_XML/*.xml
163 schemagen/recapxml/*.xml
164 schemagen/Schedule_Changes_XML/*.xml
165 schemagen/scoresxml/*.xml
166 schemagen/Transactions_XML/*.xml
167 schemagen/weatherxml/*.xml
168 schemagen/Weekly_Sched_XML/*.xml
169 schemagen/WNBA3PPctXML/*.xml
170 schemagen/WNBAAssistsXML/*.xml
171 schemagen/WNBABlocksXML/*.xml
172 schemagen/WNBAFGPctXML/*.xml
173 schemagen/WNBAFoulsXML/*.xml
174 schemagen/WNBAFTPctXML/*.xml
175 schemagen/WNBAMinutesXML/*.xml
176 schemagen/WNBAReboundsXML/*.xml
177 schemagen/WNBAScorersXML/*.xml
178 schemagen/wnbastandxml/*.xml
179 schemagen/WNBAStealsXML/*.xml
180 schemagen/WNBA_Team_Leaders_XML/*.xml
181 schemagen/WNBATurnoversXML/*.xml
182 schemagen/WorldBaseballPreviewXML/*.xml
183 test/shell/*.test
184 test/xml/*.xml
185 test/xml/*.dtd
186 test/xml/gameinfo/*.xml
187 test/xml/gameinfo/*.dtd
188 test/xml/sportinfo/*.xml
189 test/xml/sportinfo/*.dtd
190 synopsis:
191 Import XML files from The Sports Network into an RDBMS.
192 description:
193 /Usage/:
194 .
195 @
196 htsn-import [OPTIONS] [FILES]
197 @
198 .
199 The Sports Network <http://www.sportsnetwork.com/> offers an XML feed
200 containing various sports news and statistics. Our sister program
201 /htsn/ is capable of retrieving the feed and saving the individual
202 XML documents contained therein. But what to do with them?
203 .
204 The purpose of /htsn-import/ is to take these XML documents and
205 get them into something we can use, a relational database management
206 system (RDBMS), i.e. \"a SQL database\". The structure of
207 relational database, is, well, relational, and the feed XML is not. So
208 there is some work to do before the data can be inserted.
209 .
210 First, we must parse the XML. Each supported document type (see below)
211 has a full pickle/unpickle implementation (\"pickle\" is simply a
212 synonym for serialize here). That means that we parse the entire
213 document into a data structure, and if we pickle (serialize) that data
214 structure, we get the exact same XML document tha we started with.
215 .
216 This is important for two reasons. First, it serves as a second level
217 of validation. The first validation is performed by the XML parser,
218 but if that succeeds and unpicking fails, we know that something is
219 fishy. Second, we don't ever want to be surprised by some new element
220 or attribute showing up in the XML. The fact that we can unpickle the
221 whole thing now means that we won't be surprised in the future.
222 .
223 The aforementioned feature is especially important because we
224 automatically migrate the database schema every time we import a
225 document. If you attempt to import a \"newsxml.dtd\" document, all
226 database objects relating to the news will be created if they do not
227 exist. We don't want the schema to change out from under us without
228 warning, so it's important that no XML be parsed that would result in
229 a different schema than we had previously. Since we can
230 pickle/unpickle everything already, this should be impossible.
231 .
232 Examples and usage documentation are available in the man page.
233
234 executable htsn-import
235 build-depends:
236 base >= 4.6 && < 5,
237 cmdargs >= 0.10.6,
238 configurator >= 0.2,
239 directory >= 1.2,
240 filepath >= 1.3,
241 hslogger >= 1.2,
242 htsn-common >= 0.0.1,
243 hxt >= 9.3,
244 groundhog >= 0.5,
245 groundhog-postgresql >= 0.5,
246 groundhog-sqlite >= 0.5,
247 groundhog-th >= 0.5,
248 MissingH >= 1.2,
249 old-locale >= 1,
250 split >= 0.2,
251 tasty >= 0.8,
252 tasty-hunit >= 0.8,
253 time >= 1.4,
254 transformers >= 0.3,
255 tuple >= 0.2
256
257 main-is:
258 Main.hs
259
260 hs-source-dirs:
261 src/
262
263 other-modules:
264 Backend
265 CommandLine
266 Configuration
267 ConnectionString
268 ExitCodes
269 OptionalConfiguration
270 TSN.Codegen
271 TSN.Database
272 TSN.DbImport
273 TSN.Location
274 TSN.Parse
275 TSN.Picklers
276 TSN.Team
277 TSN.XmlImport
278 TSN.XML.AutoRacingResults
279 TSN.XML.AutoRacingSchedule
280 TSN.XML.EarlyLine
281 TSN.XML.GameInfo
282 TSN.XML.Heartbeat
283 TSN.XML.Injuries
284 TSN.XML.InjuriesDetail
285 TSN.XML.JFile
286 TSN.XML.MLBEarlyLine
287 TSN.XML.News
288 TSN.XML.Odds
289 TSN.XML.ScheduleChanges
290 TSN.XML.Scores
291 TSN.XML.SportInfo
292 TSN.XML.Weather
293 Xml
294
295 ghc-options:
296 -Wall
297 -fwarn-hi-shadowing
298 -fwarn-missing-signatures
299 -fwarn-name-shadowing
300 -fwarn-orphans
301 -fwarn-type-defaults
302 -fwarn-tabs
303 -fwarn-incomplete-record-updates
304 -fwarn-monomorphism-restriction
305 -fwarn-unused-do-bind
306 -O2
307
308 ghc-prof-options:
309 -prof
310 -fprof-auto
311 -fprof-cafs
312 -- The following unbreak profiling with template haskell. We have
313 -- to build the program twice; once without profile and again with
314 -- these flags.
315 -hisuf hi_p
316 -osuf o_p
317
318
319 test-suite testsuite
320 type: exitcode-stdio-1.0
321 hs-source-dirs: src test
322 main-is: TestSuite.hs
323 build-depends:
324 base >= 4.6 && < 5,
325 cmdargs >= 0.10.6,
326 configurator >= 0.2,
327 directory >= 1.2,
328 filepath >= 1.3,
329 hslogger >= 1.2,
330 htsn-common >= 0.0.1,
331 hxt >= 9.3,
332 groundhog >= 0.5,
333 groundhog-postgresql >= 0.5,
334 groundhog-sqlite >= 0.5,
335 groundhog-th >= 0.5,
336 MissingH >= 1.2,
337 old-locale >= 1,
338 split >= 0.2,
339 tasty >= 0.8,
340 tasty-hunit >= 0.8,
341 time >= 1.4,
342 transformers >= 0.3,
343 tuple >= 0.2
344
345 -- It's not entirely clear to me why I have to reproduce all of this.
346 ghc-options:
347 -Wall
348 -fwarn-hi-shadowing
349 -fwarn-missing-signatures
350 -fwarn-name-shadowing
351 -fwarn-orphans
352 -fwarn-type-defaults
353 -fwarn-tabs
354 -fwarn-incomplete-record-updates
355 -fwarn-monomorphism-restriction
356 -fwarn-unused-do-bind
357 -O2
358
359
360 test-suite doctests
361 type: exitcode-stdio-1.0
362 hs-source-dirs: test
363 main-is: Doctests.hs
364 build-depends:
365 base >= 4.6 && < 5,
366 -- Additional test dependencies.
367 doctest >= 0.9
368
369 -- It's not entirely clear to me why I have to reproduce all of this.
370 ghc-options:
371 -Wall
372 -fwarn-hi-shadowing
373 -fwarn-missing-signatures
374 -fwarn-name-shadowing
375 -fwarn-orphans
376 -fwarn-type-defaults
377 -fwarn-tabs
378 -fwarn-incomplete-record-updates
379 -fwarn-monomorphism-restriction
380 -fwarn-unused-do-bind
381 -rtsopts
382 -threaded
383 -optc-O3
384 -optc-march=native
385 -O2
386
387
388 -- These won't work without shelltestrunner installed in your
389 -- $PATH. Maybe there is some way to tell Cabal that.
390 test-suite shelltests
391 type: exitcode-stdio-1.0
392 hs-source-dirs: test
393 main-is: ShellTests.hs
394
395 build-depends:
396 base >= 4.6 && < 5,
397 cmdargs >= 0.10.6,
398 configurator >= 0.2,
399 directory >= 1.2,
400 filepath >= 1.3,
401 hslogger >= 1.2,
402 htsn-common >= 0.0.1,
403 hxt >= 9.3,
404 groundhog >= 0.5,
405 groundhog-postgresql >= 0.5,
406 groundhog-sqlite >= 0.5,
407 groundhog-th >= 0.5,
408 MissingH >= 1.2,
409 old-locale >= 1,
410 split >= 0.2,
411 process >= 1.1,
412 tasty >= 0.8,
413 tasty-hunit >= 0.8,
414 time >= 1.4,
415 transformers >= 0.3,
416 tuple >= 0.2
417
418
419
420 source-repository head
421 type: git
422 location: http://michael.orlitzky.com/git/htsn-import.git
423 branch: master