X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=htsn-import.cabal;h=ab5bb7c13ae11e033f4aa7d1de62507b1802ff90;hb=6582d0664af4a86919ce6d080dade6f81688d427;hp=8c651a90e17aa44f6013d9c0658f367b59eb95bb;hpb=9d278c8b8eeff1a1317f2c3b0f7fdf5fb759ffb3;p=dead%2Fhtsn-import.git diff --git a/htsn-import.cabal b/htsn-import.cabal index 8c651a9..ab5bb7c 100644 --- a/htsn-import.cabal +++ b/htsn-import.cabal @@ -7,14 +7,59 @@ category: Utils license: GPL-3 license-file: doc/LICENSE build-type: Simple +extra-source-files: + doc/htsnrc-import.example + doc/man1/htsn-import.1 + makefile + test/xml/*.xml + schema/*.dtd + schemagen/*/*.xml synopsis: Import XML files from The Sports Network into an RDBMS. description: - Import XML files from The Sports Network into an RDBMS. + /Usage/: + . + @ + htsn-import [OPTIONS] [FILES] + @ + . + The Sports Network offers an XML feed + containing various sports news and statistics. Our sister program + /htsn/ is capable of retrieving the feed and saving the individual + XML documents contained therein. But what to do with them? + . + The purpose of /htsn-import/ is to take these XML documents and + get them into something we can use, a relational database management + system (RDBMS), loosely known as a SQL database. The structure of + relational database, is, well, relational, and the feed XML is not. So + there is some work to do before the data can be inserted. + . + First, we must parse the XML. Each supported document type (see below) + has a full pickle/unpickle implementation (\"pickle\" is simply a + synonym for serialize here). That means that we parse the entire + document into a data structure, and if we pickle (serialize) that data + structure, we get the exact same XML document tha we started with. + . + This is important for two reasons. First, it serves as a second level + of validation. The first validation is performed by the XML parser, + but if that succeeds and unpicking fails, we know that something is + fishy. Second, we don't ever want to be surprised by some new element + or attribute showing up in the XML. The fact that we can unpickle the + whole thing now means that we won't be surprised in the future. + . + The aforementioned feature is especially important because we + automatically migrate the database schema every time we import a + document. If you attempt to import a \"newsxml.dtd\" document, all + database objects relating to the news will be created if they do not + exist. We don't want the schema to change out from under us without + warning, so it's important that no XML be parsed that would result in + a different schema than we had previously. Since we can + pickle/unpickle everything already, this should be impossible. + . + Examples and usage documentation are available in the man page. executable htsn-import build-depends: - ansi-terminal == 0.6.*, base == 4.*, cmdargs >= 0.10.6, configurator == 0.2.*, @@ -24,9 +69,13 @@ executable htsn-import htsn-common == 0.0.1, hxt == 9.3.*, groundhog == 0.4.*, + groundhog-postgresql == 0.4.*, groundhog-sqlite == 0.4.*, groundhog-th == 0.4.*, + MissingH == 1.2.*, old-locale == 1.0.*, + tasty == 0.7.*, + tasty-hunit == 0.4.*, time == 1.4.*, transformers == 0.3.*, tuple == 0.2.* @@ -37,6 +86,24 @@ executable htsn-import hs-source-dirs: src/ + other-modules: + Backend + CommandLine + Configuration + ConnectionString + ExitCodes + OptionalConfiguration + TSN.Codegen + TSN.DbImport + TSN.Picklers + TSN.XmlImport + TSN.XML.Heartbeat + TSN.XML.Injuries + TSN.XML.InjuriesDetail + TSN.XML.News + TSN.XML.Odds + Xml + ghc-options: -Wall -fwarn-hi-shadowing @@ -59,6 +126,79 @@ executable htsn-import -auto-all -caf-all + +test-suite testsuite + type: exitcode-stdio-1.0 + hs-source-dirs: src test + main-is: TestSuite.hs + build-depends: + base == 4.*, + cmdargs >= 0.10.6, + configurator == 0.2.*, + directory == 1.2.*, + filepath == 1.3.*, + hslogger == 1.2.*, + htsn-common == 0.0.1, + hxt == 9.3.*, + groundhog == 0.4.*, + groundhog-postgresql == 0.4.*, + groundhog-sqlite == 0.4.*, + groundhog-th == 0.4.*, + MissingH == 1.2.*, + old-locale == 1.0.*, + tasty == 0.7.*, + tasty-hunit == 0.4.*, + time == 1.4.*, + transformers == 0.3.*, + tuple == 0.2.* + + -- It's not entirely clear to me why I have to reproduce all of this. + ghc-options: + -Wall + -fwarn-hi-shadowing + -fwarn-missing-signatures + -fwarn-name-shadowing + -fwarn-orphans + -fwarn-type-defaults + -fwarn-tabs + -fwarn-incomplete-record-updates + -fwarn-monomorphism-restriction + -fwarn-unused-do-bind + -rtsopts + -threaded + -optc-O3 + -optc-march=native + -O2 + + +test-suite doctests + type: exitcode-stdio-1.0 + hs-source-dirs: test + main-is: Doctests.hs + build-depends: + base == 4.*, + -- Additional test dependencies. + doctest == 0.9.* + + -- It's not entirely clear to me why I have to reproduce all of this. + ghc-options: + -Wall + -fwarn-hi-shadowing + -fwarn-missing-signatures + -fwarn-name-shadowing + -fwarn-orphans + -fwarn-type-defaults + -fwarn-tabs + -fwarn-incomplete-record-updates + -fwarn-monomorphism-restriction + -fwarn-unused-do-bind + -rtsopts + -threaded + -optc-O3 + -optc-march=native + -O2 + + source-repository head type: git location: http://michael.orlitzky.com/git/htsn-import.git