X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=htsn-import.cabal;h=b7922477be5c5c3fde8eb4c4c4725d77d0aa72b8;hb=1851def5c02f42b90298672e1de89cba417c61ca;hp=4136b1e21ce419c3dbafce35527c9be28ed3d3af;hpb=3e6ba8b8a49258f56d283daf8768ccef99dd4b63;p=dead%2Fhtsn-import.git diff --git a/htsn-import.cabal b/htsn-import.cabal index 4136b1e..b792247 100644 --- a/htsn-import.cabal +++ b/htsn-import.cabal @@ -1,5 +1,5 @@ name: htsn-import -version: 0.0.1 +version: 0.0.4 cabal-version: >= 1.8 author: Michael Orlitzky maintainer: Michael Orlitzky @@ -7,25 +7,88 @@ category: Utils license: GPL-3 license-file: doc/LICENSE build-type: Simple +extra-source-files: + doc/dbschema/*.png + doc/htsn-importrc.example + doc/man1/htsn-import.1 + doc/README.dbschema + doc/README.schemagen + doc/TODO + makefile + schema/*.dtd + schemagen/Auto_Racing_Schedule_XML/*.xml + schemagen/Heartbeat/*.xml + schemagen/injuriesxml/*.xml + schemagen/Injuries_Detail_XML/*.xml + schemagen/newsxml/*.xml + schemagen/Odds_XML/*.xml + schemagen/scoresxml/*.xml + schemagen/weatherxml/*.xml + test/shell/*.test + test/xml/*.xml + test/xml/*.dtd synopsis: Import XML files from The Sports Network into an RDBMS. description: - Import XML files from The Sports Network into an RDBMS. + /Usage/: + . + @ + htsn-import [OPTIONS] [FILES] + @ + . + The Sports Network offers an XML feed + containing various sports news and statistics. Our sister program + /htsn/ is capable of retrieving the feed and saving the individual + XML documents contained therein. But what to do with them? + . + The purpose of /htsn-import/ is to take these XML documents and + get them into something we can use, a relational database management + system (RDBMS), loosely known as a SQL database. The structure of + relational database, is, well, relational, and the feed XML is not. So + there is some work to do before the data can be inserted. + . + First, we must parse the XML. Each supported document type (see below) + has a full pickle/unpickle implementation (\"pickle\" is simply a + synonym for serialize here). That means that we parse the entire + document into a data structure, and if we pickle (serialize) that data + structure, we get the exact same XML document tha we started with. + . + This is important for two reasons. First, it serves as a second level + of validation. The first validation is performed by the XML parser, + but if that succeeds and unpicking fails, we know that something is + fishy. Second, we don't ever want to be surprised by some new element + or attribute showing up in the XML. The fact that we can unpickle the + whole thing now means that we won't be surprised in the future. + . + The aforementioned feature is especially important because we + automatically migrate the database schema every time we import a + document. If you attempt to import a \"newsxml.dtd\" document, all + database objects relating to the news will be created if they do not + exist. We don't want the schema to change out from under us without + warning, so it's important that no XML be parsed that would result in + a different schema than we had previously. Since we can + pickle/unpickle everything already, this should be impossible. + . + Examples and usage documentation are available in the man page. executable htsn-import build-depends: - ansi-terminal == 0.6.*, base == 4.*, cmdargs >= 0.10.6, configurator == 0.2.*, directory == 1.2.*, filepath == 1.3.*, hslogger == 1.2.*, + htsn-common == 0.0.1, hxt == 9.3.*, groundhog == 0.4.*, + groundhog-postgresql == 0.4.*, groundhog-sqlite == 0.4.*, groundhog-th == 0.4.*, + MissingH == 1.2.*, old-locale == 1.0.*, + tasty == 0.8.*, + tasty-hunit == 0.8.*, time == 1.4.*, transformers == 0.3.*, tuple == 0.2.* @@ -36,6 +99,102 @@ executable htsn-import hs-source-dirs: src/ + other-modules: + Backend + CommandLine + Configuration + ConnectionString + ExitCodes + OptionalConfiguration + TSN.Codegen + TSN.Database + TSN.DbImport + TSN.Picklers + TSN.XmlImport + TSN.XML.AutoRacingSchedule + TSN.XML.Heartbeat + TSN.XML.Injuries + TSN.XML.InjuriesDetail + TSN.XML.News + TSN.XML.Odds + TSN.XML.Scores + TSN.XML.Weather + Xml + + ghc-options: + -Wall + -fwarn-hi-shadowing + -fwarn-missing-signatures + -fwarn-name-shadowing + -fwarn-orphans + -fwarn-type-defaults + -fwarn-tabs + -fwarn-incomplete-record-updates + -fwarn-monomorphism-restriction + -fwarn-unused-do-bind + -O2 + + ghc-prof-options: + -prof + -fprof-auto + -fprof-cafs + -- The following unbreak profiling with template haskell. We have + -- to build the program twice; once without profile and again with + -- these flags. + -hisuf hi_p + -osuf o_p + + +test-suite testsuite + type: exitcode-stdio-1.0 + hs-source-dirs: src test + main-is: TestSuite.hs + build-depends: + base == 4.*, + cmdargs >= 0.10.6, + configurator == 0.2.*, + directory == 1.2.*, + filepath == 1.3.*, + hslogger == 1.2.*, + htsn-common == 0.0.1, + hxt == 9.3.*, + groundhog == 0.4.*, + groundhog-postgresql == 0.4.*, + groundhog-sqlite == 0.4.*, + groundhog-th == 0.4.*, + MissingH == 1.2.*, + old-locale == 1.0.*, + tasty == 0.8.*, + tasty-hunit == 0.8.*, + time == 1.4.*, + transformers == 0.3.*, + tuple == 0.2.* + + -- It's not entirely clear to me why I have to reproduce all of this. + ghc-options: + -Wall + -fwarn-hi-shadowing + -fwarn-missing-signatures + -fwarn-name-shadowing + -fwarn-orphans + -fwarn-type-defaults + -fwarn-tabs + -fwarn-incomplete-record-updates + -fwarn-monomorphism-restriction + -fwarn-unused-do-bind + -O2 + + +test-suite doctests + type: exitcode-stdio-1.0 + hs-source-dirs: test + main-is: Doctests.hs + build-depends: + base == 4.*, + -- Additional test dependencies. + doctest == 0.9.* + + -- It's not entirely clear to me why I have to reproduce all of this. ghc-options: -Wall -fwarn-hi-shadowing @@ -53,10 +212,37 @@ executable htsn-import -optc-march=native -O2 - ghc-prof-options: - -prof - -auto-all - -caf-all + +-- These won't work without shelltestrunner installed in your +-- $PATH. Maybe there is some way to tell Cabal that. +test-suite shelltests + type: exitcode-stdio-1.0 + hs-source-dirs: test + main-is: ShellTests.hs + + build-depends: + base == 4.*, + cmdargs >= 0.10.6, + configurator == 0.2.*, + directory == 1.2.*, + filepath == 1.3.*, + hslogger == 1.2.*, + htsn-common == 0.0.1, + hxt == 9.3.*, + groundhog == 0.4.*, + groundhog-postgresql == 0.4.*, + groundhog-sqlite == 0.4.*, + groundhog-th == 0.4.*, + MissingH == 1.2.*, + old-locale == 1.0.*, + process == 1.1.*, + tasty == 0.8.*, + tasty-hunit == 0.8.*, + time == 1.4.*, + transformers == 0.3.*, + tuple == 0.2.* + + source-repository head type: git