htsn-import.cabal

   1 name:           htsn-import
   2 version:        0.0.1
   3 cabal-version:  >= 1.8
   4 author:         Michael Orlitzky
   5 maintainer:     Michael Orlitzky <michael@orlitzky.com>
   6 category:       Utils
   7 license:        GPL-3
   8 license-file:   doc/LICENSE
   9 build-type:     Simple
  10 extra-source-files:
  11   doc/dbschema/*.png
  12   doc/htsn-importrc.example
  13   doc/man1/htsn-import.1
  14   doc/README.dbschema
  15   doc/README.schemagen
  16   doc/TODO
  17   makefile
  18   test/xml/*.xml
  19   test/xml/*.dtd
  20   schema/*.dtd
  21   schemagen/Heartbeat/*.xml
  22   schemagen/injuriesxml/*.xml
  23   schemagen/Injuries_Detail_XML/*.xml
  24   schemagen/newsxml/*.xml
  25   schemagen/Odds_XML/*.xml
  26   schemagen/weatherxml/*.xml
  27 synopsis:
  28   Import XML files from The Sports Network into an RDBMS.
  29 description:
  30   /Usage/:
  31   .
  32   @
  33   htsn-import [OPTIONS] [FILES]
  34   @
  35   .
  36   The Sports Network <http://www.sportsnetwork.com/> offers an XML feed
  37   containing various sports news and statistics. Our sister program
  38   /htsn/ is capable of retrieving the feed and saving the individual
  39   XML documents contained therein. But what to do with them?
  40   .
  41   The purpose of /htsn-import/ is to take these XML documents and
  42   get them into something we can use, a relational database management
  43   system (RDBMS), loosely known as a SQL database. The structure of
  44   relational database, is, well, relational, and the feed XML is not. So
  45   there is some work to do before the data can be inserted.
  46   .
  47   First, we must parse the XML. Each supported document type (see below)
  48   has a full pickle/unpickle implementation (\"pickle\" is simply a
  49   synonym for serialize here). That means that we parse the entire
  50   document into a data structure, and if we pickle (serialize) that data
  51   structure, we get the exact same XML document tha we started with.
  52   .
  53   This is important for two reasons. First, it serves as a second level
  54   of validation. The first validation is performed by the XML parser,
  55   but if that succeeds and unpicking fails, we know that something is
  56   fishy. Second, we don't ever want to be surprised by some new element
  57   or attribute showing up in the XML. The fact that we can unpickle the
  58   whole thing now means that we won't be surprised in the future.
  59   .
  60   The aforementioned feature is especially important because we
  61   automatically migrate the database schema every time we import a
  62   document. If you attempt to import a \"newsxml.dtd\" document, all
  63   database objects relating to the news will be created if they do not
  64   exist. We don't want the schema to change out from under us without
  65   warning, so it's important that no XML be parsed that would result in
  66   a different schema than we had previously. Since we can
  67   pickle/unpickle everything already, this should be impossible.
  68   .
  69   Examples and usage documentation are available in the man page.
  70
  71 executable htsn-import
  72   build-depends:
  73     base                        == 4.*,
  74     cmdargs                     >= 0.10.6,
  75     configurator                == 0.2.*,
  76     directory                   == 1.2.*,
  77     filepath                    == 1.3.*,
  78     hslogger                    == 1.2.*,
  79     htsn-common                 == 0.0.1,
  80     hxt                         == 9.3.*,
  81     groundhog                   == 0.4.*,
  82     groundhog-postgresql        == 0.4.*,
  83     groundhog-sqlite            == 0.4.*,
  84     groundhog-th                == 0.4.*,
  85     MissingH                    == 1.2.*,
  86     old-locale                  == 1.0.*,
  87     tasty                       == 0.7.*,
  88     tasty-hunit                 == 0.4.*,
  89     time                        == 1.4.*,
  90     transformers                == 0.3.*,
  91     tuple                       == 0.2.*
  92
  93   main-is:
  94     Main.hs
  95
  96   hs-source-dirs:
  97     src/
  98
  99   other-modules:
 100     Backend
 101     CommandLine
 102     Configuration
 103     ConnectionString
 104     ExitCodes
 105     OptionalConfiguration
 106     TSN.Codegen
 107     TSN.Database
 108     TSN.DbImport
 109     TSN.Picklers
 110     TSN.XmlImport
 111     TSN.XML.Heartbeat
 112     TSN.XML.Injuries
 113     TSN.XML.InjuriesDetail
 114     TSN.XML.News
 115     TSN.XML.Odds
 116     TSN.XML.Weather
 117     Xml
 118
 119   ghc-options:
 120     -Wall
 121     -fwarn-hi-shadowing
 122     -fwarn-missing-signatures
 123     -fwarn-name-shadowing
 124     -fwarn-orphans
 125     -fwarn-type-defaults
 126     -fwarn-tabs
 127     -fwarn-incomplete-record-updates
 128     -fwarn-monomorphism-restriction
 129     -fwarn-unused-do-bind
 130     -O2
 131
 132   ghc-prof-options:
 133     -prof
 134     -fprof-auto
 135     -fprof-cafs
 136     -- The following unbreak profiling with template haskell. We have
 137     -- to build the program twice; once without profile and again with
 138     -- these flags.
 139     -hisuf hi_p
 140     -osuf o_p
 141
 142
 143 test-suite testsuite
 144   type: exitcode-stdio-1.0
 145   hs-source-dirs: src test
 146   main-is: TestSuite.hs
 147   build-depends:
 148     base                        == 4.*,
 149     cmdargs                     >= 0.10.6,
 150     configurator                == 0.2.*,
 151     directory                   == 1.2.*,
 152     filepath                    == 1.3.*,
 153     hslogger                    == 1.2.*,
 154     htsn-common                 == 0.0.1,
 155     hxt                         == 9.3.*,
 156     groundhog                   == 0.4.*,
 157     groundhog-postgresql        == 0.4.*,
 158     groundhog-sqlite            == 0.4.*,
 159     groundhog-th                == 0.4.*,
 160     MissingH                    == 1.2.*,
 161     old-locale                  == 1.0.*,
 162     tasty                       == 0.7.*,
 163     tasty-hunit                 == 0.4.*,
 164     time                        == 1.4.*,
 165     transformers                == 0.3.*,
 166     tuple                       == 0.2.*
 167
 168   -- It's not entirely clear to me why I have to reproduce all of this.
 169   ghc-options:
 170     -Wall
 171     -fwarn-hi-shadowing
 172     -fwarn-missing-signatures
 173     -fwarn-name-shadowing
 174     -fwarn-orphans
 175     -fwarn-type-defaults
 176     -fwarn-tabs
 177     -fwarn-incomplete-record-updates
 178     -fwarn-monomorphism-restriction
 179     -fwarn-unused-do-bind
 180     -O2
 181
 182
 183 test-suite doctests
 184   type: exitcode-stdio-1.0
 185   hs-source-dirs: test
 186   main-is: Doctests.hs
 187   build-depends:
 188     base      == 4.*,
 189     -- Additional test dependencies.
 190     doctest   == 0.9.*
 191
 192   -- It's not entirely clear to me why I have to reproduce all of this.
 193   ghc-options:
 194     -Wall
 195     -fwarn-hi-shadowing
 196     -fwarn-missing-signatures
 197     -fwarn-name-shadowing
 198     -fwarn-orphans
 199     -fwarn-type-defaults
 200     -fwarn-tabs
 201     -fwarn-incomplete-record-updates
 202     -fwarn-monomorphism-restriction
 203     -fwarn-unused-do-bind
 204     -rtsopts
 205     -threaded
 206     -optc-O3
 207     -optc-march=native
 208     -O2
 209
 210
 211 source-repository head
 212   type: git
 213   location: http://michael.orlitzky.com/git/htsn-import.git
 214   branch: master