]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/Main.hs
Add a backend configuration option.
[dead/htsn-import.git] / src / Main.hs
1 module Main
2 where
3
4 import Control.Arrow ( (&&&), arr, returnA )
5 import Control.Monad ( when )
6 import Control.Monad.IO.Class ( liftIO )
7 import Database.Groundhog.Core ( PersistEntity )
8 import Database.Groundhog.Sqlite (
9 defaultMigrationLogger,
10 insert,
11 migrate,
12 runDbConn,
13 runMigration,
14 withSqliteConn )
15 import Data.Maybe ( isNothing )
16 import Data.Monoid ( (<>) )
17 import System.Console.CmdArgs ( def )
18 import System.Exit ( exitWith, ExitCode (ExitFailure) )
19 import Text.XML.HXT.Core (
20 ArrowXml,
21 IOStateArrow,
22 SysConfigList,
23 XmlPickler,
24 XmlTree,
25 (>>>),
26 (/>),
27 getAttrl,
28 getText,
29 hasName,
30 no,
31 readDocument,
32 runX,
33 unpickleDoc,
34 withPreserveComment,
35 withRemoveWS,
36 withSubstDTDEntities,
37 withValidate,
38 xpickle,
39 yes )
40
41 import CommandLine ( get_args )
42 import Configuration ( Configuration(..), merge_optional )
43 import ExitCodes (
44 exit_no_connection_string,
45 exit_no_xml_files )
46 import Network.Services.TSN.Logging ( init_logging )
47 import qualified OptionalConfiguration as OC (
48 OptionalConfiguration ( connection_string, xml_files ),
49 from_rc )
50 import Network.Services.TSN.Report (
51 report_info,
52 report_error )
53 import qualified TSN.Injuries as Injuries (
54 Listing,
55 Message ( listings ) )
56 import qualified TSN.InjuriesDetail as InjuriesDetail (
57 Listing ( player_listings ),
58 Message ( listings ),
59 PlayerListing )
60
61
62
63 -- | A list of options passed to 'readDocument' when we parse an XML
64 -- document. We don't validate because the DTDs from TSN are
65 -- wrong. As a result, we don't want to keep useless DTDs
66 -- areound. Thus we disable 'withSubstDTDEntities' which, when
67 -- combined with "withValidate no", prevents HXT from trying to read
68 -- the DTD at all.
69 --
70 parse_opts :: SysConfigList
71 parse_opts =
72 [ withPreserveComment no,
73 withRemoveWS yes,
74 withSubstDTDEntities no,
75 withValidate no ]
76
77
78 -- | We put the 'XmlTree' argument last so that it's easy to eta
79 -- reduce all of the import_foo functions that call this.
80 --
81 import_generic :: (XmlPickler a, PersistEntity b)
82 => b -- ^ Dummy Listing instance needed for 'migrate'
83 -> (a -> [b]) -- ^ listings getter
84 -> XmlTree
85 -> IO ()
86 import_generic dummy g xml =
87 withSqliteConn "foo.sqlite3" $ runDbConn $ do
88 runMigration defaultMigrationLogger $ do
89 migrate dummy
90 let root_element = unpickleDoc xpickle xml
91 case root_element of
92 Nothing -> let msg = "Could not unpickle document in import_generic."
93 in liftIO $ report_error msg
94 Just elt -> mapM_ (\l -> insert l) (g elt)
95
96 -- | Import TSN.Injuries from an 'XmlTree'.
97 import_injuries :: XmlTree -> IO ()
98 import_injuries =
99 import_generic
100 (undefined :: Injuries.Listing)
101 Injuries.listings
102
103 -- | Import TSN.InjuriesDetail from an 'XmlTree'.
104 import_injuries_detail :: XmlTree -> IO ()
105 import_injuries_detail =
106 import_generic
107 (undefined :: InjuriesDetail.PlayerListing)
108 ( (concatMap InjuriesDetail.player_listings) . InjuriesDetail.listings)
109
110 import_file :: FilePath -> IO ()
111 import_file path = do
112 report_info $ "Attempting to import " ++ path ++ "."
113 results <- runX $ readA >>> (doctypeA &&& returnA) >>> (arr import_with_dtd)
114 case results of
115 [] -> report_error $ "Unable to determine DTD for file " ++ path ++ "."
116 (r:_) -> r -- Need to do something with the result or it gets GCed?
117 -- We do only expect one result fortunately.
118 where
119 -- | An arrow that reads a document into an 'XmlTree'.
120 readA :: IOStateArrow s a XmlTree
121 readA = readDocument parse_opts path
122
123 -- | An arrow which parses the doctype "SYSTEM" of an 'XmlTree'.
124 -- We use these to determine the parser to use.
125 doctypeA :: ArrowXml a => a XmlTree String
126 doctypeA = getAttrl >>> hasName "doctype-SYSTEM" /> getText
127
128 -- | Takes a 'Doctype', 'XmlTree' pair and uses the 'Doctype' to
129 -- determine which function to call on the 'XmlTree'.
130 import_with_dtd :: (String, XmlTree) -> IO ()
131 import_with_dtd (dtd,xml)
132 | dtd == "injuriesxml.dtd" = import_injuries xml
133 | dtd == "Injuries_Detail_XML.dtd" = import_injuries_detail xml
134 | otherwise = report_info $
135 "Unrecognized DTD in " ++ path ++ ": " ++ dtd ++ "."
136
137
138 main :: IO ()
139 main = do
140 rc_cfg <- OC.from_rc
141 cmd_cfg <- get_args
142
143 -- Merge the config file options with the command-line ones,
144 -- prefering the command-line ones.
145 let opt_config = rc_cfg <> cmd_cfg
146
147 -- Update a default config with any options that have been set in
148 -- either the config file or on the command-line. We initialize
149 -- logging before the missing parameter checks below so that we can
150 -- log the errors.
151 let cfg = (def :: Configuration) `merge_optional` opt_config
152 init_logging (log_file cfg) (log_level cfg) (syslog cfg)
153
154 -- Check the optional config for missing required options.
155 when (null $ OC.xml_files opt_config) $ do
156 report_error "No XML files given."
157 exitWith (ExitFailure exit_no_xml_files)
158
159 -- There's a default connection string, namely the empty string, but
160 -- it's not much use to us. So we make sure that we were given
161 -- something explicitly.
162 when (isNothing (OC.connection_string opt_config)) $ do
163 report_error "No connection string supplied."
164 exitWith (ExitFailure exit_no_connection_string)
165
166
167 return ()