]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/Main.hs
Reorganize the import_foo functions so that the database choice comes sooner and...
[dead/htsn-import.git] / src / Main.hs
1 {-# LANGUAGE NoMonomorphismRestriction #-}
2 module Main
3 where
4
5 import Control.Arrow ( (&&&), arr, returnA )
6 import Control.Monad ( when )
7 import Control.Monad.IO.Class ( MonadIO, liftIO )
8 import Database.Groundhog (
9 defaultMigrationLogger,
10 insert,
11 migrate,
12 runMigration )
13 import Database.Groundhog.Core ( PersistBackend, PersistEntity )
14 import Database.Groundhog.Generic ( runDbConn )
15 import Database.Groundhog.Sqlite (
16 withSqliteConn )
17 import Database.Groundhog.Postgresql (
18 withPostgresqlConn )
19 import Data.Monoid ( (<>) )
20 import System.Console.CmdArgs ( def )
21 import System.Exit ( exitWith, ExitCode (ExitFailure) )
22 import System.IO.Error ( catchIOError )
23 import Text.XML.HXT.Core (
24 ArrowXml,
25 IOStateArrow,
26 XmlPickler,
27 XmlTree,
28 (>>>),
29 (/>),
30 getAttrl,
31 getText,
32 hasName,
33 readDocument,
34 runX,
35 unpickleDoc,
36 xpickle )
37
38 import Backend ( Backend(..) )
39 import CommandLine ( get_args )
40 import Configuration ( Configuration(..), merge_optional )
41 import ConnectionString ( ConnectionString(..) )
42 import ExitCodes ( exit_no_xml_files )
43 import Network.Services.TSN.Logging ( init_logging )
44 import qualified OptionalConfiguration as OC (
45 OptionalConfiguration ( xml_files ),
46 from_rc )
47 import Network.Services.TSN.Report (
48 report_info,
49 report_error )
50 import qualified TSN.Injuries as Injuries (
51 Listing,
52 Message ( listings ) )
53 import qualified TSN.InjuriesDetail as InjuriesDetail (
54 Listing ( player_listings ),
55 Message ( listings ),
56 PlayerListing )
57 import qualified TSN.News as News ( Message )
58 import Xml ( parse_opts )
59
60
61 -- | We put the 'Configuration' and 'XmlTree' arguments last so that
62 -- it's easy to eta reduce all of the import_foo functions that call
63 -- this.
64 --
65 import_generic :: (XmlPickler a, MonadIO m, PersistEntity b, PersistBackend m)
66 => b -- ^ Dummy Listing instance needed for 'migrate'
67 -> (a -> [b]) -- ^ listings getter
68 -> XmlTree
69 -> m (Maybe Int) -- ^ Return the number of records inserted.
70 import_generic dummy g xml = do
71 -- Needs NoMonomorphismRestriction to be allowed to return
72 -- different types in the two cases above.
73 runMigration defaultMigrationLogger $ migrate dummy
74 let root_element = unpickleDoc xpickle xml
75 case root_element of
76 Nothing -> do
77 let msg = "Could not unpickle document in import_generic."
78 liftIO $ report_error msg
79 return Nothing
80 Just elt -> do
81 ids <- mapM insert (g elt)
82 return $ Just (length ids)
83
84
85
86 -- | Import TSN.News from an 'XmlTree'.
87 import_news :: (MonadIO m, PersistBackend m)
88 => XmlTree
89 -> m (Maybe Int)
90 import_news = -- This implementation is wrroooonnnnngggg.
91 import_generic
92 (undefined :: News.Message)
93 (\m -> [m] :: [News.Message]) -- Turn a Message into a [Message]
94
95 -- | Import TSN.Injuries from an 'XmlTree'.
96 import_injuries :: (MonadIO m, PersistBackend m)
97 => XmlTree
98 -> m (Maybe Int)
99 import_injuries =
100 import_generic
101 (undefined :: Injuries.Listing)
102 Injuries.listings
103
104 -- | Import TSN.InjuriesDetail from an 'XmlTree'.
105 import_injuries_detail :: (MonadIO m, PersistBackend m)
106 => XmlTree
107 -> m (Maybe Int)
108 import_injuries_detail =
109 import_generic
110 (undefined :: InjuriesDetail.PlayerListing)
111 ( (concatMap InjuriesDetail.player_listings) . InjuriesDetail.listings)
112
113 import_file :: Configuration -> FilePath -> IO ()
114 import_file cfg path = do
115 results <- catchIOError
116 parse_and_import
117 (\e -> do
118 report_error (show e)
119 report_error $ "Failed to import file " ++ path ++ "."
120 -- Return a nonempty list so we don't claim incorrectly that
121 -- we couldn't parse the DTD.
122 return [ Nothing ] )
123
124 case results of
125 -- If results' is empty, one of the arrows return "nothing."
126 [] -> report_error $ "Unable to determine DTD for file " ++ path ++ "."
127 (r:_) ->
128 case r of
129 Nothing -> return ()
130 Just cnt -> report_info $ "Successfully imported " ++
131 (show cnt) ++
132 " records from " ++ path ++ "."
133 where
134 -- | An arrow that reads a document into an 'XmlTree'.
135 readA :: IOStateArrow s a XmlTree
136 readA = readDocument parse_opts path
137
138 -- | An arrow which parses the doctype "SYSTEM" of an 'XmlTree'.
139 -- We use these to determine the parser to use.
140 doctypeA :: ArrowXml a => a XmlTree String
141 doctypeA = getAttrl >>> hasName "doctype-SYSTEM" /> getText
142
143 -- | Combine the arrows above as well as the function below
144 -- (arrowized with 'arr') into an IO action that does everything
145 -- (parses and then runs the import on what was parsed).
146 --
147 -- The result of runX has type IO [IO (Maybe Int)]. We thus use
148 -- bind (>>=) and sequence to combine all of the IOs into one
149 -- big one outside of the list.
150 parse_and_import :: IO [Maybe Int]
151 parse_and_import =
152 runX (readA >>> (doctypeA &&& returnA) >>> (arr import_with_dtd))
153 >>=
154 sequence
155
156 -- | Takes a 'Doctype', 'XmlTree' pair and uses the 'Doctype' to
157 -- determine which function to call on the 'XmlTree'.
158 import_with_dtd :: (String, XmlTree) -> IO (Maybe Int)
159 import_with_dtd (dtd,xml) =
160 if backend cfg == Postgres
161 then withPostgresqlConn cs $ runDbConn $ importer xml
162 else withSqliteConn cs $ runDbConn $ importer xml
163 where
164 -- | Pull the real connection String out of the configuration.
165 cs :: String
166 cs = get_connection_string $ connection_string cfg
167
168 importer
169 | dtd == "injuriesxml.dtd" = import_injuries
170 | dtd == "Injuries_Detail_XML.dtd" = import_injuries_detail
171 | dtd == "newsxml.dtd" = import_news
172 | otherwise = \_ -> do -- Dummy arg simplifies the other cases.
173 let errmsg = "Unrecognized DTD in " ++ path ++ ": " ++ dtd ++ "."
174 liftIO $ report_info errmsg
175 return Nothing
176
177
178
179 main :: IO ()
180 main = do
181 rc_cfg <- OC.from_rc
182 cmd_cfg <- get_args
183
184 -- Merge the config file options with the command-line ones,
185 -- prefering the command-line ones.
186 let opt_config = rc_cfg <> cmd_cfg
187
188 -- Update a default config with any options that have been set in
189 -- either the config file or on the command-line. We initialize
190 -- logging before the missing parameter checks below so that we can
191 -- log the errors.
192 let cfg = (def :: Configuration) `merge_optional` opt_config
193 init_logging (log_file cfg) (log_level cfg) (syslog cfg)
194
195 -- Check the optional config for missing required options.
196 when (null $ OC.xml_files opt_config) $ do
197 report_error "No XML files given."
198 exitWith (ExitFailure exit_no_xml_files)
199
200 -- We don't do this in parallel (for now?) to keep the error
201 -- messages nice and linear.
202 mapM_ (import_file cfg) (OC.xml_files opt_config)