From 4ab5b57dc58b2b1d75c89c3e7e8bf0e7269ec29e Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Sun, 12 Jan 2014 18:32:21 -0500 Subject: [PATCH] Add a description to the cabal file, and begin to define extra modules/source files that will be needed for a release. --- htsn-import.cabal | 66 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/htsn-import.cabal b/htsn-import.cabal index 362beac..99df37f 100644 --- a/htsn-import.cabal +++ b/htsn-import.cabal @@ -7,10 +7,56 @@ category: Utils license: GPL-3 license-file: doc/LICENSE build-type: Simple +extra-source-files: + doc/htsnrc-import.example + doc/man1/htsn-import.1 + makefile + test/xml/*.xml + schema/*.dtd + schemagen/*/*.xml synopsis: Import XML files from The Sports Network into an RDBMS. description: - Import XML files from The Sports Network into an RDBMS. + /Usage/: + . + @ + htsn-import [OPTIONS] [FILES] + @ + . + The Sports Network offers an XML feed + containing various sports news and statistics. Our sister program + /htsn/ is capable of retrieving the feed and saving the individual + XML documents contained therein. But what to do with them? + . + The purpose of /htsn-import/ is to take these XML documents and + get them into something we can use, a relational database management + system (RDBMS), loosely known as a SQL database. The structure of + relational database, is, well, relational, and the feed XML is not. So + there is some work to do before the data can be inserted. + . + First, we must parse the XML. Each supported document type (see below) + has a full pickle/unpickle implementation (\"pickle\" is simply a + synonym for serialize here). That means that we parse the entire + document into a data structure, and if we pickle (serialize) that data + structure, we get the exact same XML document tha we started with. + . + This is important for two reasons. First, it serves as a second level + of validation. The first validation is performed by the XML parser, + but if that succeeds and unpicking fails, we know that something is + fishy. Second, we don't ever want to be surprised by some new element + or attribute showing up in the XML. The fact that we can unpickle the + whole thing now means that we won't be surprised in the future. + . + The aforementioned feature is especially important because we + automatically migrate the database schema every time we import a + document. If you attempt to import a \"newsxml.dtd\" document, all + database objects relating to the news will be created if they do not + exist. We don't want the schema to change out from under us without + warning, so it's important that no XML be parsed that would result in + a different schema than we had previously. Since we can + pickle/unpickle everything already, this should be impossible. + . + Examples and usage documentation are available in the man page. executable htsn-import build-depends: @@ -40,6 +86,24 @@ executable htsn-import hs-source-dirs: src/ + other-modules: + Backend + CommandLine + Configuration + ConnectionString + ExitCodes + OptionalConfiguration + TSN.Codegen + TSN.DbImport + TSN.Picklers + TSN.XmlImport + TSN.XML.Heartbeat + TSN.XML.Injuries + TSN.XML.InjuriesDetail + TSN.XML.News + TSN.XML.Odds + Xml + ghc-options: -Wall -fwarn-hi-shadowing -- 2.44.2