X-Git-Url: http://gitweb.michael.orlitzky.com/?p=dead%2Fhtsn-import.git;a=blobdiff_plain;f=htsn-import.cabal;h=d470346fae7fbccc3111d18b06129772852d6010;hp=8c651a90e17aa44f6013d9c0658f367b59eb95bb;hb=HEAD;hpb=9d278c8b8eeff1a1317f2c3b0f7fdf5fb759ffb3 diff --git a/htsn-import.cabal b/htsn-import.cabal index 8c651a9..d470346 100644 --- a/htsn-import.cabal +++ b/htsn-import.cabal @@ -1,5 +1,5 @@ name: htsn-import -version: 0.0.1 +version: 0.2.4 cabal-version: >= 1.8 author: Michael Orlitzky maintainer: Michael Orlitzky @@ -7,29 +7,269 @@ category: Utils license: GPL-3 license-file: doc/LICENSE build-type: Simple +extra-source-files: + doc/dbschema/*.png + doc/htsn-importrc.example + doc/man1/htsn-import.1 + doc/CHANGES.database + doc/README.development + doc/TODO + makefile + schema/*.dtd + schemagen/AutoRacingResultsXML/*.xml + schemagen/Auto_Racing_Schedule_XML/*.xml + schemagen/CBASK_3PPctXML/*.xml + schemagen/Cbask_All_Tourn_Teams_XML/*.xml + schemagen/CBASK_AssistsXML/*.xml + schemagen/Cbask_Awards_XML/*.xml + schemagen/CBASK_BlocksXML/*.xml + schemagen/Cbask_Conf_Standings_XML/*.xml + schemagen/Cbask_DivII_III_Indv_Stats_XML/*.xml + schemagen/Cbask_DivIII_Team_Stats_XML/*.xml + schemagen/Cbask_DivII_Team_Stats_XML/*.xml + schemagen/CBASK_FGPctXML/*.xml + schemagen/CBASK_FoulsXML/*.xml + schemagen/CBASK_FTPctXML/*.xml + schemagen/Cbask_Indv_No_Avg_XML/*.xml + schemagen/Cbask_Indv_Scoring_XML/*.xml + schemagen/Cbask_Indv_Shooting_XML/*.xml + schemagen/CBASK_Lineup_XML/*.xml + schemagen/CBASK_MinutesXML/*.xml + schemagen/Cbask_Polls_XML/*.xml + schemagen/cbaskpreviewxml/*.xml + schemagen/CBASK_ReboundsXML/*.xml + schemagen/CBASK_ScoringLeadersXML/*.xml + schemagen/Cbask_Team_Scoring_Rebound_Margin_XML/*.xml + schemagen/Cbask_Team_Scoring_XML/*.xml + schemagen/Cbask_Team_Shooting_Pct_XML/*.xml + schemagen/Cbask_Team_ThreePT_Made_XML/*.xml + schemagen/Cbask_Team_ThreePT_PCT_XML/*.xml + schemagen/Cbask_Team_Win_Pct_XML/*.xml + schemagen/CBASK_TopTwentyFiveResult_XML/*.xml + schemagen/Cbask_Top_Twenty_Five_XML/*.xml + schemagen/Cbask_Tourn_Awards_XML/*.xml + schemagen/Cbask_Tourn_Champs_XML/*.xml + schemagen/Cbask_Tourn_Indiv_XML/*.xml + schemagen/Cbask_Tourn_Leaders_XML/*.xml + schemagen/Cbask_Tourn_MVP_XML/*.xml + schemagen/Cbask_Tourn_Records_XML/*.xml + schemagen/cflpreviewxml/*.xml + schemagen/earlylineXML/*.xml + schemagen/Heartbeat/*.xml + schemagen/Injuries_Detail_XML/*.xml + schemagen/injuriesxml/*.xml + schemagen/jfilexml/*.xml + schemagen/LeagueScheduleXML/*.xml + schemagen/Matchup_NBA_NHL_XML/*.xml + schemagen/Minor_Baseball_League_Leaders_XML/*.xml + schemagen/Minor_Baseball_Standings_XML/*.xml + schemagen/Minor_Baseball_Transactions_XML/*.xml + schemagen/minorscoresxml/*.xml + schemagen/mlbbattingavgxml/*.xml + schemagen/mlbdoublesleadersxml/*.xml + schemagen/MLB_Boxscore_XML/*.xml + schemagen/MLB_ERA_Leaders/*.xml + schemagen/MLB_Fielding_XML/*.xml + schemagen/MLBGamesPlayedXML/*.xml + schemagen/MLB_Gaming_Matchup_XML/*.xml + schemagen/MLBGIDPXML/*.xml + schemagen/MLBHitByPitchXML/*.xml + schemagen/mlbhitsleadersxml/*.xml + schemagen/mlbhomerunsxml/*.xml + schemagen/MLBHRFreqXML/*.xml + schemagen/MLBIntWalksXML/*.xml + schemagen/MLBKORateXML/*.xml + schemagen/MLB_Lineup_XML/*.xml + schemagen/MLB_Matchup_XML/*.xml + schemagen/mlbonbasepctxml/*.xml + schemagen/MLBOPSXML/*.xml + schemagen/MLB_earlylineXML/*.xml + schemagen/MLB_Pitching_Appearances_Leaders/*.xml + schemagen/MLB_Pitching_Balks_Leaders/*.xml + schemagen/MLB_Pitching_CG_Leaders/*.xml + schemagen/MLB_Pitching_ER_Allowed_Leaders/*.xml + schemagen/MLB_Pitching_Hit_Batters_Leaders/*.xml + schemagen/MLB_Pitching_Hits_Allowed_Leaders/*.xml + schemagen/MLB_Pitching_HR_Allowed_Leaders/*.xml + schemagen/MLB_Pitching_IP_Leaders/*.xml + schemagen/MLB_Pitching_Runs_Allowed_Leaders/*.xml + schemagen/MLB_Pitching_Saves_Leaders/*.xml + schemagen/MLB_Pitching_Shut_Outs_Leaders/*.xml + schemagen/MLB_Pitching_Starts_Leaders/*.xml + schemagen/MLB_Pitching_Strike_Outs_Leaders/*.xml + schemagen/MLB_Pitching_Walks_Leaders/*.xml + schemagen/MLB_Pitching_WHIP_Leaders/*.xml + schemagen/MLB_Pitching_Wild_Pitches_Leaders/*.xml + schemagen/MLB_Pitching_Win_Percentage_Leaders/*.xml + schemagen/MLB_Pitching_WL_Leaders/*.xml + schemagen/MLBPlateAppsXML/*.xml + schemagen/mlbpreviewxml/*.xml + schemagen/mlbrbisxml/*.xml + schemagen/mlbrunsleadersxml/*.xml + schemagen/MLBSacFliesXML/*.xml + schemagen/MLBSacrificesXML/*.xml + schemagen/MLBSBSuccessXML/*.xml + schemagen/mlbsluggingpctxml/*.xml + schemagen/mlbstandxml/*.xml + schemagen/mlbstandxml_preseason/*.xml + schemagen/mlbstolenbasexml/*.xml + schemagen/mlbtotalbasesleadersxml/*.xml + schemagen/mlbtriplesleadersxml/*.xml + schemagen/MLBWalkRateXML/*.xml + schemagen/mlbwalksleadersxml/*.xml + schemagen/MLBXtraBaseHitsXML/*.xml + schemagen/MLS_Preview_XML/*.xml + schemagen/NBA3PPctXML/*.xml + schemagen/NBAAssistsXML/*.xml + schemagen/NBABlocksXML/*.xml + schemagen/nbaconfrecxml/*.xml + schemagen/nbadaysxml/*.xml + schemagen/nbadivisionsxml/*.xml + schemagen/NBAFGPctXML/*.xml + schemagen/NBAFoulsXML/*.xml + schemagen/NBAFTPctXML/*.xml + schemagen/NBA_Gaming_Matchup_XML/*.xml + schemagen/NBALineupXML/*.xml + schemagen/NBAMinutesXML/*.xml + schemagen/NBA_Playoff_Matchup_XML/*.xml + schemagen/nbapreviewxml/*.xml + schemagen/NBAReboundsXML/*.xml + schemagen/NBAScorersXML/*.xml + schemagen/nbastandxml/*.xml + schemagen/NBAStealsXML/*.xml + schemagen/nbateamleadersxml/*.xml + schemagen/NBA_Team_Stats_XML/*.xml + schemagen/nbatripledoublexml/*.xml + schemagen/NBATurnoversXML/*.xml + schemagen/NCAA_Conference_Schedule_XML/*.xml + schemagen/NCAA_FB_Preview_XML/*.xml + schemagen/newsxml/*.xml + schemagen/nflfirstdownxml/*.xml + schemagen/NFLFumbleLeaderXML/*.xml + schemagen/NFLGiveTakeXML/*.xml + schemagen/NFLGrassTurfDomeOutsideXML/*.xml + schemagen/NFLInside20XML/*.xml + schemagen/NFLInterceptionLeadersXML/*.xml + schemagen/NFL_KickingLeaders_XML/*.xml + schemagen/NFLKickoffsXML/*.xml + schemagen/NFLMondayNightXML/*.xml + schemagen/NFL_NBA_Draft_XML/*.xml + schemagen/NFL_NCAA_FB_Matchup_XML/*.xml + schemagen/NFLPassingLeadersXML/*.xml + schemagen/NFLPassLeadXML/*.xml + schemagen/nflpreviewxml/*.xml + schemagen/NFL_PuntingLeaders_XML/*.xml + schemagen/NFLQBStartsXML/*.xml + schemagen/NFLReceivingLeadersXML/*.xml + schemagen/NFL_Roster_XML/*.xml + schemagen/NFLRushingLeadersXML/*.xml + schemagen/NFLSackLeadersXML/*.xml + schemagen/nflstandxml/*.xml + schemagen/NFLTackleFFLeadersXML/*.xml + schemagen/NFLTeamRankingsXML/*.xml + schemagen/NFL_Team_Stats_XML/*.xml + schemagen/NFLTopKickoffReturnXML/*.xml + schemagen/NFLTopPerformanceXML/*.xml + schemagen/NFLTopPuntReturnXML/*.xml + schemagen/NFLTotalYardageXML/*.xml + schemagen/NFLYardsXML/*.xml + schemagen/nhlpreviewxml/*.xml + schemagen/Odds_XML/*.xml + schemagen/recapxml/*.xml + schemagen/Schedule_Changes_XML/*.xml + schemagen/scoresxml/*.xml + schemagen/Transactions_XML/*.xml + schemagen/weatherxml/*.xml + schemagen/Weekly_Sched_XML/*.xml + schemagen/WNBA3PPctXML/*.xml + schemagen/WNBAAssistsXML/*.xml + schemagen/WNBABlocksXML/*.xml + schemagen/WNBAFGPctXML/*.xml + schemagen/WNBAFoulsXML/*.xml + schemagen/WNBAFTPctXML/*.xml + schemagen/WNBAMinutesXML/*.xml + schemagen/WNBAReboundsXML/*.xml + schemagen/WNBAScorersXML/*.xml + schemagen/wnbastandxml/*.xml + schemagen/WNBAStealsXML/*.xml + schemagen/WNBA_Team_Leaders_XML/*.xml + schemagen/WNBATurnoversXML/*.xml + schemagen/WorldBaseballPreviewXML/*.xml + test/shell/*.test + test/xml/*.xml + test/xml/*.dtd + test/xml/gameinfo/*.xml + test/xml/gameinfo/*.dtd + test/xml/sportinfo/*.xml + test/xml/sportinfo/*.dtd synopsis: Import XML files from The Sports Network into an RDBMS. description: - Import XML files from The Sports Network into an RDBMS. + /Usage/: + . + @ + htsn-import [OPTIONS] [FILES] + @ + . + The Sports Network offers an XML feed + containing various sports news and statistics. Our sister program + /htsn/ is capable of retrieving the feed and saving the individual + XML documents contained therein. But what to do with them? + . + The purpose of /htsn-import/ is to take these XML documents and + get them into something we can use, a relational database management + system (RDBMS), i.e. \"a SQL database\". The structure of + relational database, is, well, relational, and the feed XML is not. So + there is some work to do before the data can be inserted. + . + First, we must parse the XML. Each supported document type (see below) + has a full pickle/unpickle implementation (\"pickle\" is simply a + synonym for serialize here). That means that we parse the entire + document into a data structure, and if we pickle (serialize) that data + structure, we get the exact same XML document tha we started with. + . + This is important for two reasons. First, it serves as a second level + of validation. The first validation is performed by the XML parser, + but if that succeeds and unpicking fails, we know that something is + fishy. Second, we don't ever want to be surprised by some new element + or attribute showing up in the XML. The fact that we can unpickle the + whole thing now means that we won't be surprised in the future. + . + The aforementioned feature is especially important because we + automatically migrate the database schema every time we import a + document. If you attempt to import a \"newsxml.dtd\" document, all + database objects relating to the news will be created if they do not + exist. We don't want the schema to change out from under us without + warning, so it's important that no XML be parsed that would result in + a different schema than we had previously. Since we can + pickle/unpickle everything already, this should be impossible. + . + Examples and usage documentation are available in the man page. executable htsn-import build-depends: - ansi-terminal == 0.6.*, - base == 4.*, + base >= 4.6 && < 5, cmdargs >= 0.10.6, - configurator == 0.2.*, - directory == 1.2.*, - filepath == 1.3.*, - hslogger == 1.2.*, - htsn-common == 0.0.1, - hxt == 9.3.*, - groundhog == 0.4.*, - groundhog-sqlite == 0.4.*, - groundhog-th == 0.4.*, - old-locale == 1.0.*, - time == 1.4.*, - transformers == 0.3.*, - tuple == 0.2.* + configurator >= 0.2, + containers >= 0.5, + directory >= 1.2, + filepath >= 1.3, + fixed-vector-hetero >= 0.3, + hslogger >= 1.2, + htsn-common >= 0.0.1, + hxt >= 9.3, + groundhog >= 0.7, + groundhog-postgresql >= 0.7, + groundhog-sqlite >= 0.7, + groundhog-th >= 0.7, + MissingH >= 1.2, + old-locale >= 1, + split >= 0.2, + tasty >= 0.8, + tasty-hunit >= 0.8, + time >= 1.4, + transformers >= 0.3, + tuple >= 0.2 main-is: Main.hs @@ -37,8 +277,122 @@ executable htsn-import hs-source-dirs: src/ + other-modules: + Backend + CommandLine + Configuration + ConnectionString + ExitCodes + Misc + OptionalConfiguration + TSN.Codegen + TSN.Database + TSN.DbImport + TSN.Location + TSN.Parse + TSN.Picklers + TSN.Team + TSN.XmlImport + TSN.XML.AutoRacingDriverList + TSN.XML.AutoRacingResults + TSN.XML.AutoRacingSchedule + TSN.XML.EarlyLine + TSN.XML.GameInfo + TSN.XML.Heartbeat + TSN.XML.Injuries + TSN.XML.InjuriesDetail + TSN.XML.JFile + TSN.XML.MLBEarlyLine + TSN.XML.News + TSN.XML.Odds + TSN.XML.ScheduleChanges + TSN.XML.Scores + TSN.XML.SportInfo + TSN.XML.Weather + Xml + + ghc-options: + -Wall + -fcontext-stack=50 + -fwarn-hi-shadowing + -fwarn-missing-signatures + -fwarn-name-shadowing + -fwarn-orphans + -fwarn-type-defaults + -fwarn-tabs + -fwarn-incomplete-record-updates + -fwarn-monomorphism-restriction + -fwarn-unused-do-bind + -O2 + + ghc-prof-options: + -prof + -fprof-auto + -fprof-cafs + -- The following unbreak profiling with template haskell. We have + -- to build the program twice; once without profile and again with + -- these flags. + -hisuf hi_p + -osuf o_p + + +test-suite testsuite + type: exitcode-stdio-1.0 + hs-source-dirs: src test + main-is: TestSuite.hs + build-depends: + base >= 4.6 && < 5, + cmdargs >= 0.10.6, + configurator >= 0.2, + containers >= 0.5, + directory >= 1.2, + filepath >= 1.3, + fixed-vector-hetero >= 0.3, + hslogger >= 1.2, + htsn-common >= 0.0.1, + hxt >= 9.3, + groundhog >= 0.7, + groundhog-postgresql >= 0.7, + groundhog-sqlite >= 0.7, + groundhog-th >= 0.7, + MissingH >= 1.2, + old-locale >= 1, + split >= 0.2, + tasty >= 0.8, + tasty-hunit >= 0.8, + time >= 1.4, + transformers >= 0.3, + tuple >= 0.2 + + -- It's not entirely clear to me why I have to reproduce all of this. + ghc-options: + -Wall + -fcontext-stack=50 + -fwarn-hi-shadowing + -fwarn-missing-signatures + -fwarn-name-shadowing + -fwarn-orphans + -fwarn-type-defaults + -fwarn-tabs + -fwarn-incomplete-record-updates + -fwarn-monomorphism-restriction + -fwarn-unused-do-bind + -O2 + + +test-suite doctests + type: exitcode-stdio-1.0 + hs-source-dirs: test + main-is: Doctests.hs + build-depends: + base >= 4.6 && < 5, + -- Additional test dependencies. + doctest >= 0.9 + + -- It's not entirely clear to me why I have to reproduce all of this. ghc-options: -Wall + -fcontext-stack=50 -fwarn-hi-shadowing -fwarn-missing-signatures -fwarn-name-shadowing @@ -54,10 +408,40 @@ executable htsn-import -optc-march=native -O2 - ghc-prof-options: - -prof - -auto-all - -caf-all + +-- These won't work without shelltestrunner installed in your +-- $PATH. Maybe there is some way to tell Cabal that. +test-suite shelltests + type: exitcode-stdio-1.0 + hs-source-dirs: test + main-is: ShellTests.hs + + build-depends: + base >= 4.6 && < 5, + cmdargs >= 0.10.6, + configurator >= 0.2, + containers >= 0.5, + directory >= 1.2, + filepath >= 1.3, + fixed-vector-hetero >= 0.3, + hslogger >= 1.2, + htsn-common >= 0.0.1, + hxt >= 9.3, + groundhog >= 0.7, + groundhog-postgresql >= 0.7, + groundhog-sqlite >= 0.7, + groundhog-th >= 0.7, + MissingH >= 1.2, + old-locale >= 1, + split >= 0.2, + process >= 1.1, + tasty >= 0.8, + tasty-hunit >= 0.8, + time >= 1.4, + transformers >= 0.3, + tuple >= 0.2 + + source-repository head type: git