]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - htsn-import.cabal
6d22cab1576adf5f7c5773b92dfbe8bc95f06085
[dead/htsn-import.git] / htsn-import.cabal
1 name: htsn-import
2 version: 0.2.0
3 cabal-version: >= 1.8
4 author: Michael Orlitzky
5 maintainer: Michael Orlitzky <michael@orlitzky.com>
6 category: Utils
7 license: GPL-3
8 license-file: doc/LICENSE
9 build-type: Simple
10 extra-source-files:
11 doc/dbschema/*.png
12 doc/htsn-importrc.example
13 doc/man1/htsn-import.1
14 doc/CHANGES.database
15 doc/README.development
16 doc/TODO
17 makefile
18 schema/*.dtd
19 schemagen/AutoRacingResultsXML/*.xml
20 schemagen/Auto_Racing_Schedule_XML/*.xml
21 schemagen/CBASK_3PPctXML/*.xml
22 schemagen/Cbask_All_Tourn_Teams_XML/*.xml
23 schemagen/CBASK_AssistsXML/*.xml
24 schemagen/Cbask_Awards_XML/*.xml
25 schemagen/CBASK_BlocksXML/*.xml
26 schemagen/Cbask_Conf_Standings_XML/*.xml
27 schemagen/Cbask_DivII_III_Indv_Stats_XML/*.xml
28 schemagen/Cbask_DivIII_Team_Stats_XML/*.xml
29 schemagen/Cbask_DivII_Team_Stats_XML/*.xml
30 schemagen/CBASK_FGPctXML/*.xml
31 schemagen/CBASK_FoulsXML/*.xml
32 schemagen/CBASK_FTPctXML/*.xml
33 schemagen/Cbask_Indv_Scoring_XML/*.xml
34 schemagen/CBASK_Lineup_XML/*.xml
35 schemagen/CBASK_MinutesXML/*.xml
36 schemagen/Cbask_Polls_XML/*.xml
37 schemagen/cbaskpreviewxml/*.xml
38 schemagen/CBASK_ReboundsXML/*.xml
39 schemagen/CBASK_ScoringLeadersXML/*.xml
40 schemagen/Cbask_Team_ThreePT_Made_XML/*.xml
41 schemagen/Cbask_Team_ThreePT_PCT_XML/*.xml
42 schemagen/Cbask_Team_Win_Pct_XML/*.xml
43 schemagen/CBASK_TopTwentyFiveResult_XML/*.xml
44 schemagen/Cbask_Top_Twenty_Five_XML/*.xml
45 schemagen/Cbask_Tourn_Awards_XML/*.xml
46 schemagen/Cbask_Tourn_Champs_XML/*.xml
47 schemagen/Cbask_Tourn_Indiv_XML/*.xml
48 schemagen/Cbask_Tourn_Leaders_XML/*.xml
49 schemagen/Cbask_Tourn_MVP_XML/*.xml
50 schemagen/Cbask_Tourn_Records_XML/*.xml
51 schemagen/cflpreviewxml/*.xml
52 schemagen/earlylineXML/*.xml
53 schemagen/Heartbeat/*.xml
54 schemagen/Injuries_Detail_XML/*.xml
55 schemagen/injuriesxml/*.xml
56 schemagen/jfilexml/*.xml
57 schemagen/LeagueScheduleXML/*.xml
58 schemagen/Matchup_NBA_NHL_XML/*.xml
59 schemagen/Minor_Baseball_League_Leaders_XML/*.xml
60 schemagen/Minor_Baseball_Standings_XML/*.xml
61 schemagen/Minor_Baseball_Transactions_XML/*.xml
62 schemagen/minorscoresxml/*.xml
63 schemagen/mlbbattingavgxml/*.xml
64 schemagen/mlbdoublesleadersxml/*.xml
65 schemagen/MLB_ERA_Leaders/*.xml
66 schemagen/MLB_Fielding_XML/*.xml
67 schemagen/MLBGamesPlayedXML/*.xml
68 schemagen/MLB_Gaming_Matchup_XML/*.xml
69 schemagen/MLBGIDPXML/*.xml
70 schemagen/MLBHitByPitchXML/*.xml
71 schemagen/mlbhitsleadersxml/*.xml
72 schemagen/mlbhomerunsxml/*.xml
73 schemagen/MLBHRFreqXML/*.xml
74 schemagen/MLBIntWalksXML/*.xml
75 schemagen/MLBKORateXML/*.xml
76 schemagen/MLB_Lineup_XML/*.xml
77 schemagen/MLB_Matchup_XML/*.xml
78 schemagen/mlbonbasepctxml/*.xml
79 schemagen/MLBOPSXML/*.xml
80 schemagen/MLB_earlylineXML/*.xml
81 schemagen/MLB_Pitching_Appearances_Leaders/*.xml
82 schemagen/MLB_Pitching_Balks_Leaders/*.xml
83 schemagen/MLB_Pitching_CG_Leaders/*.xml
84 schemagen/MLB_Pitching_ER_Allowed_Leaders/*.xml
85 schemagen/MLB_Pitching_Hit_Batters_Leaders/*.xml
86 schemagen/MLB_Pitching_Hits_Allowed_Leaders/*.xml
87 schemagen/MLB_Pitching_HR_Allowed_Leaders/*.xml
88 schemagen/MLB_Pitching_IP_Leaders/*.xml
89 schemagen/MLB_Pitching_Runs_Allowed_Leaders/*.xml
90 schemagen/MLB_Pitching_Saves_Leaders/*.xml
91 schemagen/MLB_Pitching_Shut_Outs_Leaders/*.xml
92 schemagen/MLB_Pitching_Starts_Leaders/*.xml
93 schemagen/MLB_Pitching_Strike_Outs_Leaders/*.xml
94 schemagen/MLB_Pitching_Walks_Leaders/*.xml
95 schemagen/MLB_Pitching_WHIP_Leaders/*.xml
96 schemagen/MLB_Pitching_Wild_Pitches_Leaders/*.xml
97 schemagen/MLB_Pitching_Win_Percentage_Leaders/*.xml
98 schemagen/MLB_Pitching_WL_Leaders/*.xml
99 schemagen/MLBPlateAppsXML/*.xml
100 schemagen/mlbpreviewxml/*.xml
101 schemagen/mlbrbisxml/*.xml
102 schemagen/mlbrunsleadersxml/*.xml
103 schemagen/MLBSacFliesXML/*.xml
104 schemagen/MLBSacrificesXML/*.xml
105 schemagen/MLBSBSuccessXML/*.xml
106 schemagen/mlbsluggingpctxml/*.xml
107 schemagen/mlbstandxml/*.xml
108 schemagen/mlbstandxml_preseason/*.xml
109 schemagen/mlbstolenbasexml/*.xml
110 schemagen/mlbtotalbasesleadersxml/*.xml
111 schemagen/mlbtriplesleadersxml/*.xml
112 schemagen/MLBWalkRateXML/*.xml
113 schemagen/mlbwalksleadersxml/*.xml
114 schemagen/MLBXtraBaseHitsXML/*.xml
115 schemagen/MLS_Preview_XML/*.xml
116 schemagen/NBA3PPctXML/*.xml
117 schemagen/NBAAssistsXML/*.xml
118 schemagen/NBABlocksXML/*.xml
119 schemagen/nbaconfrecxml/*.xml
120 schemagen/nbadaysxml/*.xml
121 schemagen/nbadivisionsxml/*.xml
122 schemagen/NBAFGPctXML/*.xml
123 schemagen/NBAFoulsXML/*.xml
124 schemagen/NBAFTPctXML/*.xml
125 schemagen/NBA_Gaming_Matchup_XML/*.xml
126 schemagen/NBALineupXML/*.xml
127 schemagen/NBAMinutesXML/*.xml
128 schemagen/NBA_Playoff_Matchup_XML/*.xml
129 schemagen/nbapreviewxml/*.xml
130 schemagen/NBAReboundsXML/*.xml
131 schemagen/NBAScorersXML/*.xml
132 schemagen/nbastandxml/*.xml
133 schemagen/NBAStealsXML/*.xml
134 schemagen/nbateamleadersxml/*.xml
135 schemagen/NBA_Team_Stats_XML/*.xml
136 schemagen/nbatripledoublexml/*.xml
137 schemagen/NBATurnoversXML/*.xml
138 schemagen/NCAA_Conference_Schedule_XML/*.xml
139 schemagen/NCAA_FB_Preview_XML/*.xml
140 schemagen/newsxml/*.xml
141 schemagen/nflfirstdownxml/*.xml
142 schemagen/NFLFumbleLeaderXML/*.xml
143 schemagen/NFLGiveTakeXML/*.xml
144 schemagen/NFLGrassTurfDomeOutsideXML/*.xml
145 schemagen/NFLInside20XML/*.xml
146 schemagen/NFLInterceptionLeadersXML/*.xml
147 schemagen/NFL_KickingLeaders_XML/*.xml
148 schemagen/NFLKickoffsXML/*.xml
149 schemagen/NFLMondayNightXML/*.xml
150 schemagen/NFL_NBA_Draft_XML/*.xml
151 schemagen/NFL_NCAA_FB_Matchup_XML/*.xml
152 schemagen/NFLPassingLeadersXML/*.xml
153 schemagen/NFLPassLeadXML/*.xml
154 schemagen/nflpreviewxml/*.xml
155 schemagen/NFLQBStartsXML/*.xml
156 schemagen/NFLReceivingLeadersXML/*.xml
157 schemagen/NFL_Roster_XML/*.xml
158 schemagen/NFLSackLeadersXML/*.xml
159 schemagen/nflstandxml/*.xml
160 schemagen/NFLTeamRankingsXML/*.xml
161 schemagen/NFL_Team_Stats_XML/*.xml
162 schemagen/NFLTopPerformanceXML/*.xml
163 schemagen/NFLTotalYardageXML/*.xml
164 schemagen/nhlpreviewxml/*.xml
165 schemagen/Odds_XML/*.xml
166 schemagen/recapxml/*.xml
167 schemagen/Schedule_Changes_XML/*.xml
168 schemagen/scoresxml/*.xml
169 schemagen/Transactions_XML/*.xml
170 schemagen/weatherxml/*.xml
171 schemagen/Weekly_Sched_XML/*.xml
172 schemagen/WNBA3PPctXML/*.xml
173 schemagen/WNBAAssistsXML/*.xml
174 schemagen/WNBABlocksXML/*.xml
175 schemagen/WNBAFGPctXML/*.xml
176 schemagen/WNBAFoulsXML/*.xml
177 schemagen/WNBAFTPctXML/*.xml
178 schemagen/WNBAMinutesXML/*.xml
179 schemagen/WNBAReboundsXML/*.xml
180 schemagen/WNBAScorersXML/*.xml
181 schemagen/wnbastandxml/*.xml
182 schemagen/WNBAStealsXML/*.xml
183 schemagen/WNBA_Team_Leaders_XML/*.xml
184 schemagen/WNBATurnoversXML/*.xml
185 schemagen/WorldBaseballPreviewXML/*.xml
186 test/shell/*.test
187 test/xml/*.xml
188 test/xml/*.dtd
189 test/xml/gameinfo/*.xml
190 test/xml/gameinfo/*.dtd
191 test/xml/sportinfo/*.xml
192 test/xml/sportinfo/*.dtd
193 synopsis:
194 Import XML files from The Sports Network into an RDBMS.
195 description:
196 /Usage/:
197 .
198 @
199 htsn-import [OPTIONS] [FILES]
200 @
201 .
202 The Sports Network <http://www.sportsnetwork.com/> offers an XML feed
203 containing various sports news and statistics. Our sister program
204 /htsn/ is capable of retrieving the feed and saving the individual
205 XML documents contained therein. But what to do with them?
206 .
207 The purpose of /htsn-import/ is to take these XML documents and
208 get them into something we can use, a relational database management
209 system (RDBMS), i.e. \"a SQL database\". The structure of
210 relational database, is, well, relational, and the feed XML is not. So
211 there is some work to do before the data can be inserted.
212 .
213 First, we must parse the XML. Each supported document type (see below)
214 has a full pickle/unpickle implementation (\"pickle\" is simply a
215 synonym for serialize here). That means that we parse the entire
216 document into a data structure, and if we pickle (serialize) that data
217 structure, we get the exact same XML document tha we started with.
218 .
219 This is important for two reasons. First, it serves as a second level
220 of validation. The first validation is performed by the XML parser,
221 but if that succeeds and unpicking fails, we know that something is
222 fishy. Second, we don't ever want to be surprised by some new element
223 or attribute showing up in the XML. The fact that we can unpickle the
224 whole thing now means that we won't be surprised in the future.
225 .
226 The aforementioned feature is especially important because we
227 automatically migrate the database schema every time we import a
228 document. If you attempt to import a \"newsxml.dtd\" document, all
229 database objects relating to the news will be created if they do not
230 exist. We don't want the schema to change out from under us without
231 warning, so it's important that no XML be parsed that would result in
232 a different schema than we had previously. Since we can
233 pickle/unpickle everything already, this should be impossible.
234 .
235 Examples and usage documentation are available in the man page.
236
237 executable htsn-import
238 build-depends:
239 base >= 4.6 && < 5,
240 cmdargs >= 0.10.6,
241 configurator >= 0.2,
242 directory >= 1.2,
243 filepath >= 1.3,
244 hslogger >= 1.2,
245 htsn-common >= 0.0.1,
246 hxt >= 9.3,
247 groundhog >= 0.5,
248 groundhog-postgresql >= 0.5,
249 groundhog-sqlite >= 0.5,
250 groundhog-th >= 0.5,
251 MissingH >= 1.2,
252 old-locale >= 1,
253 split >= 0.2,
254 tasty >= 0.8,
255 tasty-hunit >= 0.8,
256 time >= 1.4,
257 transformers >= 0.3,
258 tuple >= 0.2
259
260 main-is:
261 Main.hs
262
263 hs-source-dirs:
264 src/
265
266 other-modules:
267 Backend
268 CommandLine
269 Configuration
270 ConnectionString
271 ExitCodes
272 OptionalConfiguration
273 TSN.Codegen
274 TSN.Database
275 TSN.DbImport
276 TSN.Location
277 TSN.Parse
278 TSN.Picklers
279 TSN.Team
280 TSN.XmlImport
281 TSN.XML.AutoRacingResults
282 TSN.XML.AutoRacingSchedule
283 TSN.XML.EarlyLine
284 TSN.XML.GameInfo
285 TSN.XML.Heartbeat
286 TSN.XML.Injuries
287 TSN.XML.InjuriesDetail
288 TSN.XML.JFile
289 TSN.XML.MLBEarlyLine
290 TSN.XML.News
291 TSN.XML.Odds
292 TSN.XML.ScheduleChanges
293 TSN.XML.Scores
294 TSN.XML.SportInfo
295 TSN.XML.Weather
296 Xml
297
298 ghc-options:
299 -Wall
300 -fwarn-hi-shadowing
301 -fwarn-missing-signatures
302 -fwarn-name-shadowing
303 -fwarn-orphans
304 -fwarn-type-defaults
305 -fwarn-tabs
306 -fwarn-incomplete-record-updates
307 -fwarn-monomorphism-restriction
308 -fwarn-unused-do-bind
309 -O2
310
311 ghc-prof-options:
312 -prof
313 -fprof-auto
314 -fprof-cafs
315 -- The following unbreak profiling with template haskell. We have
316 -- to build the program twice; once without profile and again with
317 -- these flags.
318 -hisuf hi_p
319 -osuf o_p
320
321
322 test-suite testsuite
323 type: exitcode-stdio-1.0
324 hs-source-dirs: src test
325 main-is: TestSuite.hs
326 build-depends:
327 base >= 4.6 && < 5,
328 cmdargs >= 0.10.6,
329 configurator >= 0.2,
330 directory >= 1.2,
331 filepath >= 1.3,
332 hslogger >= 1.2,
333 htsn-common >= 0.0.1,
334 hxt >= 9.3,
335 groundhog >= 0.5,
336 groundhog-postgresql >= 0.5,
337 groundhog-sqlite >= 0.5,
338 groundhog-th >= 0.5,
339 MissingH >= 1.2,
340 old-locale >= 1,
341 split >= 0.2,
342 tasty >= 0.8,
343 tasty-hunit >= 0.8,
344 time >= 1.4,
345 transformers >= 0.3,
346 tuple >= 0.2
347
348 -- It's not entirely clear to me why I have to reproduce all of this.
349 ghc-options:
350 -Wall
351 -fwarn-hi-shadowing
352 -fwarn-missing-signatures
353 -fwarn-name-shadowing
354 -fwarn-orphans
355 -fwarn-type-defaults
356 -fwarn-tabs
357 -fwarn-incomplete-record-updates
358 -fwarn-monomorphism-restriction
359 -fwarn-unused-do-bind
360 -O2
361
362
363 test-suite doctests
364 type: exitcode-stdio-1.0
365 hs-source-dirs: test
366 main-is: Doctests.hs
367 build-depends:
368 base >= 4.6 && < 5,
369 -- Additional test dependencies.
370 doctest >= 0.9
371
372 -- It's not entirely clear to me why I have to reproduce all of this.
373 ghc-options:
374 -Wall
375 -fwarn-hi-shadowing
376 -fwarn-missing-signatures
377 -fwarn-name-shadowing
378 -fwarn-orphans
379 -fwarn-type-defaults
380 -fwarn-tabs
381 -fwarn-incomplete-record-updates
382 -fwarn-monomorphism-restriction
383 -fwarn-unused-do-bind
384 -rtsopts
385 -threaded
386 -optc-O3
387 -optc-march=native
388 -O2
389
390
391 -- These won't work without shelltestrunner installed in your
392 -- $PATH. Maybe there is some way to tell Cabal that.
393 test-suite shelltests
394 type: exitcode-stdio-1.0
395 hs-source-dirs: test
396 main-is: ShellTests.hs
397
398 build-depends:
399 base >= 4.6 && < 5,
400 cmdargs >= 0.10.6,
401 configurator >= 0.2,
402 directory >= 1.2,
403 filepath >= 1.3,
404 hslogger >= 1.2,
405 htsn-common >= 0.0.1,
406 hxt >= 9.3,
407 groundhog >= 0.5,
408 groundhog-postgresql >= 0.5,
409 groundhog-sqlite >= 0.5,
410 groundhog-th >= 0.5,
411 MissingH >= 1.2,
412 old-locale >= 1,
413 split >= 0.2,
414 process >= 1.1,
415 tasty >= 0.8,
416 tasty-hunit >= 0.8,
417 time >= 1.4,
418 transformers >= 0.3,
419 tuple >= 0.2
420
421
422
423 source-repository head
424 type: git
425 location: http://michael.orlitzky.com/git/htsn-import.git
426 branch: master