]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - htsn-import.cabal
07d9ac1cb44e7796b17c3aa9cf00f4bcf6d4b770
[dead/htsn-import.git] / htsn-import.cabal
1 name: htsn-import
2 version: 0.2.0
3 cabal-version: >= 1.8
4 author: Michael Orlitzky
5 maintainer: Michael Orlitzky <michael@orlitzky.com>
6 category: Utils
7 license: GPL-3
8 license-file: doc/LICENSE
9 build-type: Simple
10 extra-source-files:
11 doc/dbschema/*.png
12 doc/htsn-importrc.example
13 doc/man1/htsn-import.1
14 doc/CHANGES.database
15 doc/README.development
16 doc/TODO
17 makefile
18 schema/*.dtd
19 schemagen/AutoRacingResultsXML/*.xml
20 schemagen/Auto_Racing_Schedule_XML/*.xml
21 schemagen/CBASK_3PPctXML/*.xml
22 schemagen/Cbask_All_Tourn_Teams_XML/*.xml
23 schemagen/CBASK_AssistsXML/*.xml
24 schemagen/Cbask_Awards_XML/*.xml
25 schemagen/CBASK_BlocksXML/*.xml
26 schemagen/Cbask_Conf_Standings_XML/*.xml
27 schemagen/Cbask_DivII_III_Indv_Stats_XML/*.xml
28 schemagen/Cbask_DivIII_Team_Stats_XML/*.xml
29 schemagen/Cbask_DivII_Team_Stats_XML/*.xml
30 schemagen/CBASK_FGPctXML/*.xml
31 schemagen/CBASK_FoulsXML/*.xml
32 schemagen/CBASK_FTPctXML/*.xml
33 schemagen/Cbask_Indv_Scoring_XML/*.xml
34 schemagen/CBASK_Lineup_XML/*.xml
35 schemagen/CBASK_MinutesXML/*.xml
36 schemagen/Cbask_Polls_XML/*.xml
37 schemagen/cbaskpreviewxml/*.xml
38 schemagen/CBASK_ReboundsXML/*.xml
39 schemagen/CBASK_ScoringLeadersXML/*.xml
40 schemagen/Cbask_Team_ThreePT_Made_XML/*.xml
41 schemagen/Cbask_Team_ThreePT_PCT_XML/*.xml
42 schemagen/Cbask_Team_Win_Pct_XML/*.xml
43 schemagen/CBASK_TopTwentyFiveResult_XML/*.xml
44 schemagen/Cbask_Top_Twenty_Five_XML/*.xml
45 schemagen/Cbask_Tourn_Awards_XML/*.xml
46 schemagen/Cbask_Tourn_Champs_XML/*.xml
47 schemagen/Cbask_Tourn_Indiv_XML/*.xml
48 schemagen/Cbask_Tourn_Leaders_XML/*.xml
49 schemagen/Cbask_Tourn_MVP_XML/*.xml
50 schemagen/Cbask_Tourn_Records_XML/*.xml
51 schemagen/cflpreviewxml/*.xml
52 schemagen/earlylineXML/*.xml
53 schemagen/Heartbeat/*.xml
54 schemagen/Injuries_Detail_XML/*.xml
55 schemagen/injuriesxml/*.xml
56 schemagen/jfilexml/*.xml
57 schemagen/LeagueScheduleXML/*.xml
58 schemagen/Matchup_NBA_NHL_XML/*.xml
59 schemagen/Minor_Baseball_League_Leaders_XML/*.xml
60 schemagen/Minor_Baseball_Standings_XML/*.xml
61 schemagen/Minor_Baseball_Transactions_XML/*.xml
62 schemagen/minorscoresxml/*.xml
63 schemagen/mlbbattingavgxml/*.xml
64 schemagen/mlbdoublesleadersxml/*.xml
65 schemagen/MLB_ERA_Leaders/*.xml
66 schemagen/MLB_Fielding_XML/*.xml
67 schemagen/MLBGamesPlayedXML/*.xml
68 schemagen/MLB_Gaming_Matchup_XML/*.xml
69 schemagen/MLBGIDPXML/*.xml
70 schemagen/MLBHitByPitchXML/*.xml
71 schemagen/mlbhitsleadersxml/*.xml
72 schemagen/mlbhomerunsxml/*.xml
73 schemagen/MLBHRFreqXML/*.xml
74 schemagen/MLBIntWalksXML/*.xml
75 schemagen/MLBKORateXML/*.xml
76 schemagen/MLB_Lineup_XML/*.xml
77 schemagen/MLB_Matchup_XML/*.xml
78 schemagen/mlbonbasepctxml/*.xml
79 schemagen/MLBOPSXML/*.xml
80 schemagen/MLB_earlylineXML/*.xml
81 schemagen/MLB_Pitching_Appearances_Leaders/*.xml
82 schemagen/MLB_Pitching_Balks_Leaders/*.xml
83 schemagen/MLB_Pitching_CG_Leaders/*.xml
84 schemagen/MLB_Pitching_ER_Allowed_Leaders/*.xml
85 schemagen/MLB_Pitching_Hit_Batters_Leaders/*.xml
86 schemagen/MLB_Pitching_Hits_Allowed_Leaders/*.xml
87 schemagen/MLB_Pitching_HR_Allowed_Leaders/*.xml
88 schemagen/MLB_Pitching_IP_Leaders/*.xml
89 schemagen/MLB_Pitching_Runs_Allowed_Leaders/*.xml
90 schemagen/MLB_Pitching_Saves_Leaders/*.xml
91 schemagen/MLB_Pitching_Shut_Outs_Leaders/*.xml
92 schemagen/MLB_Pitching_Starts_Leaders/*.xml
93 schemagen/MLB_Pitching_Strike_Outs_Leaders/*.xml
94 schemagen/MLB_Pitching_Walks_Leaders/*.xml
95 schemagen/MLB_Pitching_WHIP_Leaders/*.xml
96 schemagen/MLB_Pitching_Wild_Pitches_Leaders/*.xml
97 schemagen/MLB_Pitching_Win_Percentage_Leaders/*.xml
98 schemagen/MLB_Pitching_WL_Leaders/*.xml
99 schemagen/MLBPlateAppsXML/*.xml
100 schemagen/mlbpreviewxml/*.xml
101 schemagen/mlbrbisxml/*.xml
102 schemagen/mlbrunsleadersxml/*.xml
103 schemagen/MLBSacFliesXML/*.xml
104 schemagen/MLBSacrificesXML/*.xml
105 schemagen/MLBSBSuccessXML/*.xml
106 schemagen/mlbsluggingpctxml/*.xml
107 schemagen/mlbstandxml/*.xml
108 schemagen/mlbstandxml_preseason/*.xml
109 schemagen/mlbstolenbasexml/*.xml
110 schemagen/mlbtotalbasesleadersxml/*.xml
111 schemagen/mlbtriplesleadersxml/*.xml
112 schemagen/MLBWalkRateXML/*.xml
113 schemagen/mlbwalksleadersxml/*.xml
114 schemagen/MLBXtraBaseHitsXML/*.xml
115 schemagen/MLS_Preview_XML/*.xml
116 schemagen/NBA3PPctXML/*.xml
117 schemagen/NBAAssistsXML/*.xml
118 schemagen/NBABlocksXML/*.xml
119 schemagen/nbaconfrecxml/*.xml
120 schemagen/nbadaysxml/*.xml
121 schemagen/nbadivisionsxml/*.xml
122 schemagen/NBAFGPctXML/*.xml
123 schemagen/NBAFoulsXML/*.xml
124 schemagen/NBAFTPctXML/*.xml
125 schemagen/NBA_Gaming_Matchup_XML/*.xml
126 schemagen/NBALineupXML/*.xml
127 schemagen/NBAMinutesXML/*.xml
128 schemagen/NBA_Playoff_Matchup_XML/*.xml
129 schemagen/nbapreviewxml/*.xml
130 schemagen/NBAReboundsXML/*.xml
131 schemagen/NBAScorersXML/*.xml
132 schemagen/nbastandxml/*.xml
133 schemagen/NBAStealsXML/*.xml
134 schemagen/nbateamleadersxml/*.xml
135 schemagen/NBA_Team_Stats_XML/*.xml
136 schemagen/nbatripledoublexml/*.xml
137 schemagen/NBATurnoversXML/*.xml
138 schemagen/NCAA_Conference_Schedule_XML/*.xml
139 schemagen/NCAA_FB_Preview_XML/*.xml
140 schemagen/newsxml/*.xml
141 schemagen/nflfirstdownxml/*.xml
142 schemagen/NFLFumbleLeaderXML/*.xml
143 schemagen/NFLGiveTakeXML/*.xml
144 schemagen/NFLGrassTurfDomeOutsideXML/*.xml
145 schemagen/NFLInside20XML/*.xml
146 schemagen/NFLInterceptionLeadersXML/*.xml
147 schemagen/NFL_KickingLeaders_XML/*.xml
148 schemagen/NFLKickoffsXML/*.xml
149 schemagen/NFLMondayNightXML/*.xml
150 schemagen/NFL_NBA_Draft_XML/*.xml
151 schemagen/NFL_NCAA_FB_Matchup_XML/*.xml
152 schemagen/NFLPassingLeadersXML/*.xml
153 schemagen/NFLPassLeadXML/*.xml
154 schemagen/nflpreviewxml/*.xml
155 schemagen/NFLQBStartsXML/*.xml
156 schemagen/NFLReceivingLeadersXML/*.xml
157 schemagen/NFL_Roster_XML/*.xml
158 schemagen/NFLRushingLeadersXML/*.xml
159 schemagen/NFLSackLeadersXML/*.xml
160 schemagen/nflstandxml/*.xml
161 schemagen/NFLTackleFFLeadersXML/*.xml
162 schemagen/NFLTeamRankingsXML/*.xml
163 schemagen/NFL_Team_Stats_XML/*.xml
164 schemagen/NFLTopKickoffReturnXML/*.xml
165 schemagen/NFLTopPerformanceXML/*.xml
166 schemagen/NFLTotalYardageXML/*.xml
167 schemagen/nhlpreviewxml/*.xml
168 schemagen/Odds_XML/*.xml
169 schemagen/recapxml/*.xml
170 schemagen/Schedule_Changes_XML/*.xml
171 schemagen/scoresxml/*.xml
172 schemagen/Transactions_XML/*.xml
173 schemagen/weatherxml/*.xml
174 schemagen/Weekly_Sched_XML/*.xml
175 schemagen/WNBA3PPctXML/*.xml
176 schemagen/WNBAAssistsXML/*.xml
177 schemagen/WNBABlocksXML/*.xml
178 schemagen/WNBAFGPctXML/*.xml
179 schemagen/WNBAFoulsXML/*.xml
180 schemagen/WNBAFTPctXML/*.xml
181 schemagen/WNBAMinutesXML/*.xml
182 schemagen/WNBAReboundsXML/*.xml
183 schemagen/WNBAScorersXML/*.xml
184 schemagen/wnbastandxml/*.xml
185 schemagen/WNBAStealsXML/*.xml
186 schemagen/WNBA_Team_Leaders_XML/*.xml
187 schemagen/WNBATurnoversXML/*.xml
188 schemagen/WorldBaseballPreviewXML/*.xml
189 test/shell/*.test
190 test/xml/*.xml
191 test/xml/*.dtd
192 test/xml/gameinfo/*.xml
193 test/xml/gameinfo/*.dtd
194 test/xml/sportinfo/*.xml
195 test/xml/sportinfo/*.dtd
196 synopsis:
197 Import XML files from The Sports Network into an RDBMS.
198 description:
199 /Usage/:
200 .
201 @
202 htsn-import [OPTIONS] [FILES]
203 @
204 .
205 The Sports Network <http://www.sportsnetwork.com/> offers an XML feed
206 containing various sports news and statistics. Our sister program
207 /htsn/ is capable of retrieving the feed and saving the individual
208 XML documents contained therein. But what to do with them?
209 .
210 The purpose of /htsn-import/ is to take these XML documents and
211 get them into something we can use, a relational database management
212 system (RDBMS), i.e. \"a SQL database\". The structure of
213 relational database, is, well, relational, and the feed XML is not. So
214 there is some work to do before the data can be inserted.
215 .
216 First, we must parse the XML. Each supported document type (see below)
217 has a full pickle/unpickle implementation (\"pickle\" is simply a
218 synonym for serialize here). That means that we parse the entire
219 document into a data structure, and if we pickle (serialize) that data
220 structure, we get the exact same XML document tha we started with.
221 .
222 This is important for two reasons. First, it serves as a second level
223 of validation. The first validation is performed by the XML parser,
224 but if that succeeds and unpicking fails, we know that something is
225 fishy. Second, we don't ever want to be surprised by some new element
226 or attribute showing up in the XML. The fact that we can unpickle the
227 whole thing now means that we won't be surprised in the future.
228 .
229 The aforementioned feature is especially important because we
230 automatically migrate the database schema every time we import a
231 document. If you attempt to import a \"newsxml.dtd\" document, all
232 database objects relating to the news will be created if they do not
233 exist. We don't want the schema to change out from under us without
234 warning, so it's important that no XML be parsed that would result in
235 a different schema than we had previously. Since we can
236 pickle/unpickle everything already, this should be impossible.
237 .
238 Examples and usage documentation are available in the man page.
239
240 executable htsn-import
241 build-depends:
242 base >= 4.6 && < 5,
243 cmdargs >= 0.10.6,
244 configurator >= 0.2,
245 directory >= 1.2,
246 filepath >= 1.3,
247 hslogger >= 1.2,
248 htsn-common >= 0.0.1,
249 hxt >= 9.3,
250 groundhog >= 0.5,
251 groundhog-postgresql >= 0.5,
252 groundhog-sqlite >= 0.5,
253 groundhog-th >= 0.5,
254 MissingH >= 1.2,
255 old-locale >= 1,
256 split >= 0.2,
257 tasty >= 0.8,
258 tasty-hunit >= 0.8,
259 time >= 1.4,
260 transformers >= 0.3,
261 tuple >= 0.2
262
263 main-is:
264 Main.hs
265
266 hs-source-dirs:
267 src/
268
269 other-modules:
270 Backend
271 CommandLine
272 Configuration
273 ConnectionString
274 ExitCodes
275 OptionalConfiguration
276 TSN.Codegen
277 TSN.Database
278 TSN.DbImport
279 TSN.Location
280 TSN.Parse
281 TSN.Picklers
282 TSN.Team
283 TSN.XmlImport
284 TSN.XML.AutoRacingResults
285 TSN.XML.AutoRacingSchedule
286 TSN.XML.EarlyLine
287 TSN.XML.GameInfo
288 TSN.XML.Heartbeat
289 TSN.XML.Injuries
290 TSN.XML.InjuriesDetail
291 TSN.XML.JFile
292 TSN.XML.MLBEarlyLine
293 TSN.XML.News
294 TSN.XML.Odds
295 TSN.XML.ScheduleChanges
296 TSN.XML.Scores
297 TSN.XML.SportInfo
298 TSN.XML.Weather
299 Xml
300
301 ghc-options:
302 -Wall
303 -fwarn-hi-shadowing
304 -fwarn-missing-signatures
305 -fwarn-name-shadowing
306 -fwarn-orphans
307 -fwarn-type-defaults
308 -fwarn-tabs
309 -fwarn-incomplete-record-updates
310 -fwarn-monomorphism-restriction
311 -fwarn-unused-do-bind
312 -O2
313
314 ghc-prof-options:
315 -prof
316 -fprof-auto
317 -fprof-cafs
318 -- The following unbreak profiling with template haskell. We have
319 -- to build the program twice; once without profile and again with
320 -- these flags.
321 -hisuf hi_p
322 -osuf o_p
323
324
325 test-suite testsuite
326 type: exitcode-stdio-1.0
327 hs-source-dirs: src test
328 main-is: TestSuite.hs
329 build-depends:
330 base >= 4.6 && < 5,
331 cmdargs >= 0.10.6,
332 configurator >= 0.2,
333 directory >= 1.2,
334 filepath >= 1.3,
335 hslogger >= 1.2,
336 htsn-common >= 0.0.1,
337 hxt >= 9.3,
338 groundhog >= 0.5,
339 groundhog-postgresql >= 0.5,
340 groundhog-sqlite >= 0.5,
341 groundhog-th >= 0.5,
342 MissingH >= 1.2,
343 old-locale >= 1,
344 split >= 0.2,
345 tasty >= 0.8,
346 tasty-hunit >= 0.8,
347 time >= 1.4,
348 transformers >= 0.3,
349 tuple >= 0.2
350
351 -- It's not entirely clear to me why I have to reproduce all of this.
352 ghc-options:
353 -Wall
354 -fwarn-hi-shadowing
355 -fwarn-missing-signatures
356 -fwarn-name-shadowing
357 -fwarn-orphans
358 -fwarn-type-defaults
359 -fwarn-tabs
360 -fwarn-incomplete-record-updates
361 -fwarn-monomorphism-restriction
362 -fwarn-unused-do-bind
363 -O2
364
365
366 test-suite doctests
367 type: exitcode-stdio-1.0
368 hs-source-dirs: test
369 main-is: Doctests.hs
370 build-depends:
371 base >= 4.6 && < 5,
372 -- Additional test dependencies.
373 doctest >= 0.9
374
375 -- It's not entirely clear to me why I have to reproduce all of this.
376 ghc-options:
377 -Wall
378 -fwarn-hi-shadowing
379 -fwarn-missing-signatures
380 -fwarn-name-shadowing
381 -fwarn-orphans
382 -fwarn-type-defaults
383 -fwarn-tabs
384 -fwarn-incomplete-record-updates
385 -fwarn-monomorphism-restriction
386 -fwarn-unused-do-bind
387 -rtsopts
388 -threaded
389 -optc-O3
390 -optc-march=native
391 -O2
392
393
394 -- These won't work without shelltestrunner installed in your
395 -- $PATH. Maybe there is some way to tell Cabal that.
396 test-suite shelltests
397 type: exitcode-stdio-1.0
398 hs-source-dirs: test
399 main-is: ShellTests.hs
400
401 build-depends:
402 base >= 4.6 && < 5,
403 cmdargs >= 0.10.6,
404 configurator >= 0.2,
405 directory >= 1.2,
406 filepath >= 1.3,
407 hslogger >= 1.2,
408 htsn-common >= 0.0.1,
409 hxt >= 9.3,
410 groundhog >= 0.5,
411 groundhog-postgresql >= 0.5,
412 groundhog-sqlite >= 0.5,
413 groundhog-th >= 0.5,
414 MissingH >= 1.2,
415 old-locale >= 1,
416 split >= 0.2,
417 process >= 1.1,
418 tasty >= 0.8,
419 tasty-hunit >= 0.8,
420 time >= 1.4,
421 transformers >= 0.3,
422 tuple >= 0.2
423
424
425
426 source-repository head
427 type: git
428 location: http://michael.orlitzky.com/git/htsn-import.git
429 branch: master