]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/Main.hs
Provide a default connection string (for sqlite).
[dead/htsn-import.git] / src / Main.hs
1 {-# LANGUAGE NoMonomorphismRestriction #-}
2 module Main
3 where
4
5 import Control.Arrow ( (&&&), arr, returnA )
6 import Control.Monad ( when )
7 import Control.Monad.IO.Class ( liftIO )
8 import Database.Groundhog (
9 defaultMigrationLogger,
10 insert,
11 migrate,
12 runMigration )
13 import Database.Groundhog.Core ( PersistEntity )
14 import Database.Groundhog.Generic ( runDbConn )
15 import Database.Groundhog.Sqlite (
16 withSqliteConn )
17 import Database.Groundhog.Postgresql (
18 withPostgresqlConn )
19 import Data.Monoid ( (<>) )
20 import System.Console.CmdArgs ( def )
21 import System.Exit ( exitWith, ExitCode (ExitFailure) )
22 import System.IO.Error ( catchIOError )
23 import Text.XML.HXT.Core (
24 ArrowXml,
25 IOStateArrow,
26 SysConfigList,
27 XmlPickler,
28 XmlTree,
29 (>>>),
30 (/>),
31 getAttrl,
32 getText,
33 hasName,
34 no,
35 readDocument,
36 runX,
37 unpickleDoc,
38 withPreserveComment,
39 withRemoveWS,
40 withSubstDTDEntities,
41 withValidate,
42 xpickle,
43 yes )
44
45 import Backend ( Backend(..) )
46 import CommandLine ( get_args )
47 import Configuration ( Configuration(..), merge_optional )
48 import ConnectionString ( ConnectionString(..) )
49 import ExitCodes ( exit_no_xml_files )
50 import Network.Services.TSN.Logging ( init_logging )
51 import qualified OptionalConfiguration as OC (
52 OptionalConfiguration ( xml_files ),
53 from_rc )
54 import Network.Services.TSN.Report (
55 report_info,
56 report_error )
57 import qualified TSN.Injuries as Injuries (
58 Listing,
59 Message ( listings ) )
60 import qualified TSN.InjuriesDetail as InjuriesDetail (
61 Listing ( player_listings ),
62 Message ( listings ),
63 PlayerListing )
64
65
66
67 -- | A list of options passed to 'readDocument' when we parse an XML
68 -- document. We don't validate because the DTDs from TSN are
69 -- wrong. As a result, we don't want to keep useless DTDs
70 -- areound. Thus we disable 'withSubstDTDEntities' which, when
71 -- combined with "withValidate no", prevents HXT from trying to read
72 -- the DTD at all.
73 --
74 parse_opts :: SysConfigList
75 parse_opts =
76 [ withPreserveComment no,
77 withRemoveWS yes,
78 withSubstDTDEntities no,
79 withValidate no ]
80
81
82 -- | We put the 'Configuration' and 'XmlTree' arguments last so that
83 -- it's easy to eta reduce all of the import_foo functions that call
84 -- this.
85 --
86 import_generic :: (XmlPickler a, PersistEntity b)
87 => b -- ^ Dummy Listing instance needed for 'migrate'
88 -> (a -> [b]) -- ^ listings getter
89 -> Configuration
90 -> XmlTree
91 -> IO (Maybe Int) -- ^ Return the number of records inserted.
92 import_generic dummy g cfg xml
93 | backend cfg == Postgres = withPostgresqlConn cs go
94 | otherwise = withSqliteConn cs go
95 where
96 -- | Pull the real connection String out of the configuration.
97 cs :: String
98 cs = get_connection_string $ connection_string cfg
99
100 -- Needs NoMonomorphismRestriction to be allowed to return
101 -- different types in the two cases above.
102 go = runDbConn $ do
103 runMigration defaultMigrationLogger $ migrate dummy
104 let root_element = unpickleDoc xpickle xml
105 case root_element of
106 Nothing -> do
107 let msg = "Could not unpickle document in import_generic."
108 liftIO $ report_error msg
109 return Nothing
110 Just elt -> do
111 ids <- mapM insert (g elt)
112 return $ Just (length ids)
113
114
115
116 -- | Import TSN.Injuries from an 'XmlTree'.
117 import_injuries :: Configuration -> XmlTree -> IO (Maybe Int)
118 import_injuries =
119 import_generic
120 (undefined :: Injuries.Listing)
121 Injuries.listings
122
123 -- | Import TSN.InjuriesDetail from an 'XmlTree'.
124 import_injuries_detail :: Configuration -> XmlTree -> IO (Maybe Int)
125 import_injuries_detail =
126 import_generic
127 (undefined :: InjuriesDetail.PlayerListing)
128 ( (concatMap InjuriesDetail.player_listings) . InjuriesDetail.listings)
129
130 import_file :: Configuration -> FilePath -> IO ()
131 import_file cfg path = do
132 results <- catchIOError
133 parse_and_import
134 (\e -> do
135 report_error (show e)
136 report_error $ "Failed to import file " ++ path ++ "."
137 -- Return a nonempty list so we don't claim incorrectly that
138 -- we couldn't parse the DTD.
139 return [ Nothing ] )
140
141 case results of
142 -- If results' is empty, one of the arrows return "nothing."
143 [] -> report_error $ "Unable to determine DTD for file " ++ path ++ "."
144 (r:_) ->
145 case r of
146 Nothing -> return ()
147 Just cnt -> report_info $ "Successfully imported " ++
148 (show cnt) ++
149 " records from " ++ path ++ "."
150 where
151 -- | An arrow that reads a document into an 'XmlTree'.
152 readA :: IOStateArrow s a XmlTree
153 readA = readDocument parse_opts path
154
155 -- | An arrow which parses the doctype "SYSTEM" of an 'XmlTree'.
156 -- We use these to determine the parser to use.
157 doctypeA :: ArrowXml a => a XmlTree String
158 doctypeA = getAttrl >>> hasName "doctype-SYSTEM" /> getText
159
160 -- | Combine the arrows above as well as the function below
161 -- (arrowized with 'arr') into an IO action that does everything
162 -- (parses and then runs the import on what was parsed).
163 --
164 -- The result of runX has type IO [IO (Maybe Int)]. We thus use
165 -- bind (>>=) and sequence to combine all of the IOs into one
166 -- big one outside of the list.
167 parse_and_import :: IO [Maybe Int]
168 parse_and_import =
169 runX (readA >>> (doctypeA &&& returnA) >>> (arr import_with_dtd))
170 >>=
171 sequence
172
173 -- | Takes a 'Doctype', 'XmlTree' pair and uses the 'Doctype' to
174 -- determine which function to call on the 'XmlTree'.
175 import_with_dtd :: (String, XmlTree) -> IO (Maybe Int)
176 import_with_dtd (dtd,xml)
177 | dtd == "injuriesxml.dtd" = import_injuries cfg xml
178 | dtd == "Injuries_Detail_XML.dtd" = import_injuries_detail cfg xml
179 | otherwise = do
180 report_info $ "Unrecognized DTD in " ++ path ++ ": " ++ dtd ++ "."
181 return Nothing
182
183 main :: IO ()
184 main = do
185 rc_cfg <- OC.from_rc
186 cmd_cfg <- get_args
187
188 -- Merge the config file options with the command-line ones,
189 -- prefering the command-line ones.
190 let opt_config = rc_cfg <> cmd_cfg
191
192 -- Update a default config with any options that have been set in
193 -- either the config file or on the command-line. We initialize
194 -- logging before the missing parameter checks below so that we can
195 -- log the errors.
196 let cfg = (def :: Configuration) `merge_optional` opt_config
197 init_logging (log_file cfg) (log_level cfg) (syslog cfg)
198
199 -- Check the optional config for missing required options.
200 when (null $ OC.xml_files opt_config) $ do
201 report_error "No XML files given."
202 exitWith (ExitFailure exit_no_xml_files)
203
204 -- We don't do this in parallel (for now?) to keep the error
205 -- messages nice and linear.
206 mapM_ (import_file cfg) (OC.xml_files opt_config)