X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=htsn-import.cabal;h=fd9a17c65d92904fc67b2748f5b71b13d469ca88;hb=1ea954f7e3d673b427368575d4873d968faee0e3;hp=362beac36ef73d053f313ad5737f8a22bb2f8509;hpb=04f468994697d4d983a09a8e612948b23a973848;p=dead%2Fhtsn-import.git diff --git a/htsn-import.cabal b/htsn-import.cabal index 362beac..fd9a17c 100644 --- a/htsn-import.cabal +++ b/htsn-import.cabal @@ -7,10 +7,66 @@ category: Utils license: GPL-3 license-file: doc/LICENSE build-type: Simple +extra-source-files: + doc/dbschema/*.png + doc/htsn-importrc.example + doc/man1/htsn-import.1 + doc/README.dbschema + doc/README.schemagen + doc/TODO + makefile + test/xml/*.xml + test/xml/*.dtd + schema/*.dtd + schemagen/Heartbeat/*.xml + schemagen/injuriesxml/*.xml + schemagen/Injuries_Detail_XML/*.xml + schemagen/newsxml/*.xml + schemagen/Odds_XML/*.xml + schemagen/weatherxml/*.xml synopsis: Import XML files from The Sports Network into an RDBMS. description: - Import XML files from The Sports Network into an RDBMS. + /Usage/: + . + @ + htsn-import [OPTIONS] [FILES] + @ + . + The Sports Network offers an XML feed + containing various sports news and statistics. Our sister program + /htsn/ is capable of retrieving the feed and saving the individual + XML documents contained therein. But what to do with them? + . + The purpose of /htsn-import/ is to take these XML documents and + get them into something we can use, a relational database management + system (RDBMS), loosely known as a SQL database. The structure of + relational database, is, well, relational, and the feed XML is not. So + there is some work to do before the data can be inserted. + . + First, we must parse the XML. Each supported document type (see below) + has a full pickle/unpickle implementation (\"pickle\" is simply a + synonym for serialize here). That means that we parse the entire + document into a data structure, and if we pickle (serialize) that data + structure, we get the exact same XML document tha we started with. + . + This is important for two reasons. First, it serves as a second level + of validation. The first validation is performed by the XML parser, + but if that succeeds and unpicking fails, we know that something is + fishy. Second, we don't ever want to be surprised by some new element + or attribute showing up in the XML. The fact that we can unpickle the + whole thing now means that we won't be surprised in the future. + . + The aforementioned feature is especially important because we + automatically migrate the database schema every time we import a + document. If you attempt to import a \"newsxml.dtd\" document, all + database objects relating to the news will be created if they do not + exist. We don't want the schema to change out from under us without + warning, so it's important that no XML be parsed that would result in + a different schema than we had previously. Since we can + pickle/unpickle everything already, this should be impossible. + . + Examples and usage documentation are available in the man page. executable htsn-import build-depends: @@ -40,6 +96,26 @@ executable htsn-import hs-source-dirs: src/ + other-modules: + Backend + CommandLine + Configuration + ConnectionString + ExitCodes + OptionalConfiguration + TSN.Codegen + TSN.Database + TSN.DbImport + TSN.Picklers + TSN.XmlImport + TSN.XML.Heartbeat + TSN.XML.Injuries + TSN.XML.InjuriesDetail + TSN.XML.News + TSN.XML.Odds + TSN.XML.Weather + Xml + ghc-options: -Wall -fwarn-hi-shadowing @@ -59,8 +135,13 @@ executable htsn-import ghc-prof-options: -prof - -auto-all - -caf-all + -fprof-auto + -fprof-cafs + -- The following unbreak profiling with template haskell. We have + -- to build the program twice; once without profile and again with + -- these flags. + -hisuf hi_p + -osuf o_p test-suite testsuite @@ -107,6 +188,34 @@ test-suite testsuite -O2 +test-suite doctests + type: exitcode-stdio-1.0 + hs-source-dirs: test + main-is: Doctests.hs + build-depends: + base == 4.*, + -- Additional test dependencies. + doctest == 0.9.* + + -- It's not entirely clear to me why I have to reproduce all of this. + ghc-options: + -Wall + -fwarn-hi-shadowing + -fwarn-missing-signatures + -fwarn-name-shadowing + -fwarn-orphans + -fwarn-type-defaults + -fwarn-tabs + -fwarn-incomplete-record-updates + -fwarn-monomorphism-restriction + -fwarn-unused-do-bind + -rtsopts + -threaded + -optc-O3 + -optc-march=native + -O2 + + source-repository head type: git location: http://michael.orlitzky.com/git/htsn-import.git