name: htsn-import
-version: 0.0.1
+version: 0.0.8
cabal-version: >= 1.8
author: Michael Orlitzky
maintainer: Michael Orlitzky <michael@orlitzky.com>
license: GPL-3
license-file: doc/LICENSE
build-type: Simple
+extra-source-files:
+ doc/dbschema/*.png
+ doc/htsn-importrc.example
+ doc/man1/htsn-import.1
+ doc/README.development
+ doc/TODO
+ makefile
+ schema/*.dtd
+ schemagen/AutoRacingResultsXML/*.xml
+ schemagen/Auto_Racing_Schedule_XML/*.xml
+ schemagen/CBASK_3PPctXML/*.xml
+ schemagen/Cbask_All_Tourn_Teams_XML/*.xml
+ schemagen/CBASK_AssistsXML/*.xml
+ schemagen/Cbask_Awards_XML/*.xml
+ schemagen/CBASK_BlocksXML/*.xml
+ schemagen/Cbask_Conf_Standings_XML/*.xml
+ schemagen/Cbask_DivII_III_Indv_Stats_XML/*.xml
+ schemagen/Cbask_DivIII_Team_Stats_XML/*.xml
+ schemagen/Cbask_DivII_Team_Stats_XML/*.xml
+ schemagen/CBASK_FGPctXML/*.xml
+ schemagen/CBASK_FoulsXML/*.xml
+ schemagen/CBASK_FTPctXML/*.xml
+ schemagen/Cbask_Indv_Scoring_XML/*.xml
+ schemagen/CBASK_Lineup_XML/*.xml
+ schemagen/CBASK_MinutesXML/*.xml
+ schemagen/Cbask_Polls_XML/*.xml
+ schemagen/cbaskpreviewxml/*.xml
+ schemagen/CBASK_ReboundsXML/*.xml
+ schemagen/CBASK_ScoringLeadersXML/*.xml
+ schemagen/Cbask_Team_ThreePT_Made_XML/*.xml
+ schemagen/Cbask_Team_ThreePT_PCT_XML/*.xml
+ schemagen/Cbask_Team_Win_Pct_XML/*.xml
+ schemagen/CBASK_TopTwentyFiveResult_XML/*.xml
+ schemagen/Cbask_Top_Twenty_Five_XML/*.xml
+ schemagen/Cbask_Tourn_Awards_XML/*.xml
+ schemagen/Cbask_Tourn_Champs_XML/*.xml
+ schemagen/Cbask_Tourn_Indiv_XML/*.xml
+ schemagen/Cbask_Tourn_Leaders_XML/*.xml
+ schemagen/Cbask_Tourn_MVP_XML/*.xml
+ schemagen/Cbask_Tourn_Records_XML/*.xml
+ schemagen/cflpreviewxml/*.xml
+ schemagen/Heartbeat/*.xml
+ schemagen/Injuries_Detail_XML/*.xml
+ schemagen/injuriesxml/*.xml
+ schemagen/jfilexml/*.xml
+ schemagen/LeagueScheduleXML/*.xml
+ schemagen/Matchup_NBA_NHL_XML/*.xml
+ schemagen/Minor_Baseball_League_Leaders_XML/*.xml
+ schemagen/Minor_Baseball_Standings_XML/*.xml
+ schemagen/Minor_Baseball_Transactions_XML/*.xml
+ schemagen/minorscoresxml/*.xml
+ schemagen/mlbbattingavgxml/*.xml
+ schemagen/mlbdoublesleadersxml/*.xml
+ schemagen/MLB_ERA_Leaders/*.xml
+ schemagen/MLB_Fielding_XML/*.xml
+ schemagen/MLBGamesPlayedXML/*.xml
+ schemagen/MLB_Gaming_Matchup_XML/*.xml
+ schemagen/MLBGIDPXML/*.xml
+ schemagen/MLBHitByPitchXML/*.xml
+ schemagen/mlbhitsleadersxml/*.xml
+ schemagen/mlbhomerunsxml/*.xml
+ schemagen/MLBHRFreqXML/*.xml
+ schemagen/MLBIntWalksXML/*.xml
+ schemagen/MLBKORateXML/*.xml
+ schemagen/MLB_Lineup_XML/*.xml
+ schemagen/MLB_Matchup_XML/*.xml
+ schemagen/mlbonbasepctxml/*.xml
+ schemagen/MLBOPSXML/*.xml
+ schemagen/MLB_Pitching_Appearances_Leaders/*.xml
+ schemagen/MLB_Pitching_Balks_Leaders/*.xml
+ schemagen/MLB_Pitching_CG_Leaders/*.xml
+ schemagen/MLB_Pitching_ER_Allowed_Leaders/*.xml
+ schemagen/MLB_Pitching_Hit_Batters_Leaders/*.xml
+ schemagen/MLB_Pitching_Hits_Allowed_Leaders/*.xml
+ schemagen/MLB_Pitching_HR_Allowed_Leaders/*.xml
+ schemagen/MLB_Pitching_IP_Leaders/*.xml
+ schemagen/MLB_Pitching_Runs_Allowed_Leaders/*.xml
+ schemagen/MLB_Pitching_Saves_Leaders/*.xml
+ schemagen/MLB_Pitching_Shut_Outs_Leaders/*.xml
+ schemagen/MLB_Pitching_Starts_Leaders/*.xml
+ schemagen/MLB_Pitching_Strike_Outs_Leaders/*.xml
+ schemagen/MLB_Pitching_Walks_Leaders/*.xml
+ schemagen/MLB_Pitching_WHIP_Leaders/*.xml
+ schemagen/MLB_Pitching_Wild_Pitches_Leaders/*.xml
+ schemagen/MLB_Pitching_Win_Percentage_Leaders/*.xml
+ schemagen/MLB_Pitching_WL_Leaders/*.xml
+ schemagen/MLBPlateAppsXML/*.xml
+ schemagen/mlbpreviewxml/*.xml
+ schemagen/mlbrbisxml/*.xml
+ schemagen/mlbrunsleadersxml/*.xml
+ schemagen/MLBSacFliesXML/*.xml
+ schemagen/MLBSacrificesXML/*.xml
+ schemagen/MLBSBSuccessXML/*.xml
+ schemagen/mlbsluggingpctxml/*.xml
+ schemagen/mlbstandxml/*.xml
+ schemagen/mlbstandxml_preseason/*.xml
+ schemagen/mlbstolenbasexml/*.xml
+ schemagen/mlbtotalbasesleadersxml/*.xml
+ schemagen/mlbtriplesleadersxml/*.xml
+ schemagen/MLBWalkRateXML/*.xml
+ schemagen/mlbwalksleadersxml/*.xml
+ schemagen/MLBXtraBaseHitsXML/*.xml
+ schemagen/MLS_Preview_XML/*.xml
+ schemagen/NBA3PPctXML/*.xml
+ schemagen/NBAAssistsXML/*.xml
+ schemagen/NBABlocksXML/*.xml
+ schemagen/nbaconfrecxml/*.xml
+ schemagen/nbadaysxml/*.xml
+ schemagen/nbadivisionsxml/*.xml
+ schemagen/NBAFGPctXML/*.xml
+ schemagen/NBAFoulsXML/*.xml
+ schemagen/NBAFTPctXML/*.xml
+ schemagen/NBA_Gaming_Matchup_XML/*.xml
+ schemagen/NBALineupXML/*.xml
+ schemagen/NBAMinutesXML/*.xml
+ schemagen/NBA_Playoff_Matchup_XML/*.xml
+ schemagen/nbapreviewxml/*.xml
+ schemagen/NBAReboundsXML/*.xml
+ schemagen/NBAScorersXML/*.xml
+ schemagen/nbastandxml/*.xml
+ schemagen/NBAStealsXML/*.xml
+ schemagen/nbateamleadersxml/*.xml
+ schemagen/NBA_Team_Stats_XML/*.xml
+ schemagen/nbatripledoublexml/*.xml
+ schemagen/NBATurnoversXML/*.xml
+ schemagen/NCAA_Conference_Schedule_XML/*.xml
+ schemagen/NCAA_FB_Preview_XML/*.xml
+ schemagen/newsxml/*.xml
+ schemagen/nflfirstdownxml/*.xml
+ schemagen/NFLFumbleLeaderXML/*.xml
+ schemagen/NFLGiveTakeXML/*.xml
+ schemagen/NFLInside20XML/*.xml
+ schemagen/NFL_KickingLeaders_XML/*.xml
+ schemagen/NFLKickoffsXML/*.xml
+ schemagen/NFLMondayNightXML/*.xml
+ schemagen/NFL_NBA_Draft_XML/*.xml
+ schemagen/NFL_NCAA_FB_Matchup_XML/*.xml
+ schemagen/NFLPassLeadXML/*.xml
+ schemagen/nflpreviewxml/*.xml
+ schemagen/NFLQBStartsXML/*.xml
+ schemagen/NFL_Roster_XML/*.xml
+ schemagen/NFLSackLeadersXML/*.xml
+ schemagen/nflstandxml/*.xml
+ schemagen/NFLTeamRankingsXML/*.xml
+ schemagen/NFL_Team_Stats_XML/*.xml
+ schemagen/NFLTopPerformanceXML/*.xml
+ schemagen/NFLTotalYardageXML/*.xml
+ schemagen/nhlpreviewxml/*.xml
+ schemagen/Odds_XML/*.xml
+ schemagen/recapxml/*.xml
+ schemagen/Schedule_Changes_XML/*.xml
+ schemagen/scoresxml/*.xml
+ schemagen/Transactions_XML/*.xml
+ schemagen/weatherxml/*.xml
+ schemagen/Weekly_Sched_XML/*.xml
+ schemagen/WNBA3PPctXML/*.xml
+ schemagen/WNBAAssistsXML/*.xml
+ schemagen/WNBABlocksXML/*.xml
+ schemagen/WNBAFGPctXML/*.xml
+ schemagen/WNBAFoulsXML/*.xml
+ schemagen/WNBAFTPctXML/*.xml
+ schemagen/WNBAMinutesXML/*.xml
+ schemagen/WNBAReboundsXML/*.xml
+ schemagen/WNBAScorersXML/*.xml
+ schemagen/wnbastandxml/*.xml
+ schemagen/WNBAStealsXML/*.xml
+ schemagen/WNBA_Team_Leaders_XML/*.xml
+ schemagen/WNBATurnoversXML/*.xml
+ schemagen/WorldBaseballPreviewXML/*.xml
+ test/shell/*.test
+ test/xml/*.xml
+ test/xml/*.dtd
+ test/xml/gameinfo/*.xml
+ test/xml/gameinfo/*.dtd
+ test/xml/sportinfo/*.xml
+ test/xml/sportinfo/*.dtd
synopsis:
Import XML files from The Sports Network into an RDBMS.
description:
- Import XML files from The Sports Network into an RDBMS.
+ /Usage/:
+ .
+ @
+ htsn-import [OPTIONS] [FILES]
+ @
+ .
+ The Sports Network <http://www.sportsnetwork.com/> offers an XML feed
+ containing various sports news and statistics. Our sister program
+ /htsn/ is capable of retrieving the feed and saving the individual
+ XML documents contained therein. But what to do with them?
+ .
+ The purpose of /htsn-import/ is to take these XML documents and
+ get them into something we can use, a relational database management
+ system (RDBMS), i.e. \"a SQL database\". The structure of
+ relational database, is, well, relational, and the feed XML is not. So
+ there is some work to do before the data can be inserted.
+ .
+ First, we must parse the XML. Each supported document type (see below)
+ has a full pickle/unpickle implementation (\"pickle\" is simply a
+ synonym for serialize here). That means that we parse the entire
+ document into a data structure, and if we pickle (serialize) that data
+ structure, we get the exact same XML document tha we started with.
+ .
+ This is important for two reasons. First, it serves as a second level
+ of validation. The first validation is performed by the XML parser,
+ but if that succeeds and unpicking fails, we know that something is
+ fishy. Second, we don't ever want to be surprised by some new element
+ or attribute showing up in the XML. The fact that we can unpickle the
+ whole thing now means that we won't be surprised in the future.
+ .
+ The aforementioned feature is especially important because we
+ automatically migrate the database schema every time we import a
+ document. If you attempt to import a \"newsxml.dtd\" document, all
+ database objects relating to the news will be created if they do not
+ exist. We don't want the schema to change out from under us without
+ warning, so it's important that no XML be parsed that would result in
+ a different schema than we had previously. Since we can
+ pickle/unpickle everything already, this should be impossible.
+ .
+ Examples and usage documentation are available in the man page.
executable htsn-import
build-depends:
- base == 4.*,
+ base >= 4.6 && < 5,
cmdargs >= 0.10.6,
- configurator == 0.2.*,
- directory == 1.2.*,
- filepath == 1.3.*,
- hslogger == 1.2.*,
- htsn-common == 0.0.1,
- hxt == 9.3.*,
- groundhog == 0.4.*,
- groundhog-postgresql == 0.4.*,
- groundhog-sqlite == 0.4.*,
- groundhog-th == 0.4.*,
- MissingH == 1.2.*,
- old-locale == 1.0.*,
- tasty == 0.7.*,
- tasty-hunit == 0.4.*,
- time == 1.4.*,
- transformers == 0.3.*,
- tuple == 0.2.*
+ configurator >= 0.2,
+ directory >= 1.2,
+ filepath >= 1.3,
+ hslogger >= 1.2,
+ htsn-common >= 0.0.1,
+ hxt >= 9.3,
+ groundhog >= 0.5,
+ groundhog-postgresql >= 0.5,
+ groundhog-sqlite >= 0.5,
+ groundhog-th >= 0.5,
+ MissingH >= 1.2,
+ old-locale >= 1,
+ split >= 0.2,
+ tasty >= 0.8,
+ tasty-hunit >= 0.8,
+ time >= 1.4,
+ transformers >= 0.3,
+ tuple >= 0.2
main-is:
Main.hs
hs-source-dirs:
src/
+ other-modules:
+ Backend
+ CommandLine
+ Configuration
+ ConnectionString
+ ExitCodes
+ OptionalConfiguration
+ TSN.Codegen
+ TSN.Database
+ TSN.DbImport
+ TSN.Location
+ TSN.Parse
+ TSN.Picklers
+ TSN.Team
+ TSN.XmlImport
+ TSN.XML.AutoRacingResults
+ TSN.XML.AutoRacingSchedule
+ TSN.XML.GameInfo
+ TSN.XML.Heartbeat
+ TSN.XML.Injuries
+ TSN.XML.InjuriesDetail
+ TSN.XML.JFile
+ TSN.XML.News
+ TSN.XML.Odds
+ TSN.XML.ScheduleChanges
+ TSN.XML.Scores
+ TSN.XML.SportInfo
+ TSN.XML.Weather
+ Xml
+
ghc-options:
-Wall
-fwarn-hi-shadowing
-fwarn-incomplete-record-updates
-fwarn-monomorphism-restriction
-fwarn-unused-do-bind
- -rtsopts
- -threaded
- -optc-O3
- -optc-march=native
-O2
ghc-prof-options:
-prof
- -auto-all
- -caf-all
+ -fprof-auto
+ -fprof-cafs
+ -- The following unbreak profiling with template haskell. We have
+ -- to build the program twice; once without profile and again with
+ -- these flags.
+ -hisuf hi_p
+ -osuf o_p
test-suite testsuite
hs-source-dirs: src test
main-is: TestSuite.hs
build-depends:
- base == 4.*,
+ base >= 4.6 && < 5,
cmdargs >= 0.10.6,
- configurator == 0.2.*,
- directory == 1.2.*,
- filepath == 1.3.*,
- hslogger == 1.2.*,
- htsn-common == 0.0.1,
- hxt == 9.3.*,
- groundhog == 0.4.*,
- groundhog-postgresql == 0.4.*,
- groundhog-sqlite == 0.4.*,
- groundhog-th == 0.4.*,
- MissingH == 1.2.*,
- old-locale == 1.0.*,
- tasty == 0.7.*,
- tasty-hunit == 0.4.*,
- time == 1.4.*,
- transformers == 0.3.*,
- tuple == 0.2.*
+ configurator >= 0.2,
+ directory >= 1.2,
+ filepath >= 1.3,
+ hslogger >= 1.2,
+ htsn-common >= 0.0.1,
+ hxt >= 9.3,
+ groundhog >= 0.5,
+ groundhog-postgresql >= 0.5,
+ groundhog-sqlite >= 0.5,
+ groundhog-th >= 0.5,
+ MissingH >= 1.2,
+ old-locale >= 1,
+ split >= 0.2,
+ tasty >= 0.8,
+ tasty-hunit >= 0.8,
+ time >= 1.4,
+ transformers >= 0.3,
+ tuple >= 0.2
+
+ -- It's not entirely clear to me why I have to reproduce all of this.
+ ghc-options:
+ -Wall
+ -fwarn-hi-shadowing
+ -fwarn-missing-signatures
+ -fwarn-name-shadowing
+ -fwarn-orphans
+ -fwarn-type-defaults
+ -fwarn-tabs
+ -fwarn-incomplete-record-updates
+ -fwarn-monomorphism-restriction
+ -fwarn-unused-do-bind
+ -O2
+
+
+test-suite doctests
+ type: exitcode-stdio-1.0
+ hs-source-dirs: test
+ main-is: Doctests.hs
+ build-depends:
+ base >= 4.6 && < 5,
+ -- Additional test dependencies.
+ doctest >= 0.9
-- It's not entirely clear to me why I have to reproduce all of this.
ghc-options:
-O2
+-- These won't work without shelltestrunner installed in your
+-- $PATH. Maybe there is some way to tell Cabal that.
+test-suite shelltests
+ type: exitcode-stdio-1.0
+ hs-source-dirs: test
+ main-is: ShellTests.hs
+
+ build-depends:
+ base >= 4.6 && < 5,
+ cmdargs >= 0.10.6,
+ configurator >= 0.2,
+ directory >= 1.2,
+ filepath >= 1.3,
+ hslogger >= 1.2,
+ htsn-common >= 0.0.1,
+ hxt >= 9.3,
+ groundhog >= 0.5,
+ groundhog-postgresql >= 0.5,
+ groundhog-sqlite >= 0.5,
+ groundhog-th >= 0.5,
+ MissingH >= 1.2,
+ old-locale >= 1,
+ split >= 0.2,
+ process >= 1.1,
+ tasty >= 0.8,
+ tasty-hunit >= 0.8,
+ time >= 1.4,
+ transformers >= 0.3,
+ tuple >= 0.2
+
+
+
source-repository head
type: git
location: http://michael.orlitzky.com/git/htsn-import.git