]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/Main.hs
Make NewsTeams and NewsLocations unique.
[dead/htsn-import.git] / src / Main.hs
1 {-# LANGUAGE DoAndIfThenElse #-}
2 {-# LANGUAGE NoMonomorphismRestriction #-}
3 module Main
4 where
5
6 import Control.Arrow ( (&&&), (>>^), arr, returnA )
7 import Control.Concurrent ( threadDelay )
8 import Control.Exception ( SomeException, catch )
9 import Control.Monad ( when )
10 import Database.Groundhog.Generic ( runDbConn )
11 import Database.Groundhog.Sqlite (
12 withSqliteConn )
13 import Database.Groundhog.Postgresql (
14 withPostgresqlConn )
15 import Data.Monoid ( (<>) )
16 import Network.Services.TSN.Logging ( init_logging )
17 import System.Console.CmdArgs ( def )
18 import System.Directory ( removeFile )
19 import System.Exit ( exitWith, ExitCode (ExitFailure) )
20 import System.IO.Error ( catchIOError )
21 import Text.XML.HXT.Core (
22 ArrowXml,
23 IOStateArrow,
24 XmlTree,
25 (>>>),
26 (/>),
27 getAttrl,
28 getText,
29 hasName,
30 readDocument,
31 runX )
32
33 import Backend ( Backend(..) )
34 import CommandLine ( get_args )
35 import Configuration ( Configuration(..), merge_optional )
36 import ConnectionString ( ConnectionString(..) )
37 import ExitCodes ( exit_no_xml_files )
38 import qualified OptionalConfiguration as OC (
39 OptionalConfiguration ( xml_files ),
40 from_rc )
41 import Network.Services.TSN.Report (
42 report_info,
43 report_error )
44 import TSN.DbImport ( DbImport(..), ImportResult(..) )
45 import qualified TSN.XML.Heartbeat as Heartbeat ( verify )
46 import qualified TSN.XML.Injuries as Injuries ( Listing )
47 import qualified TSN.XML.InjuriesDetail as InjuriesDetail ( PlayerListing )
48 import qualified TSN.XML.News as News ( News )
49 import qualified TSN.XML.Odds as Odds ( Message )
50 import Xml ( DtdName(..), parse_opts )
51
52
53 -- | This is where most of the work happens. This function is called
54 -- on every file that we would like to import. It determines which
55 -- importer to use based on the DTD, processes the file, and then
56 -- returns whether or not any records were imported. If the file was
57 -- processed, the number of records imported is returned (wrapped in
58 -- a Just). Otherwise, if the file was not processed, 'Nothing' is
59 -- returned.
60 --
61 -- Since we are already in arrow world with HXT, the
62 -- 'import_with_dtd' function is lifted to an 'Arrow' as well with
63 -- 'arr'. This prevents us from having to do a bunch of unwrapping
64 -- and rewrapping with the associated error checking.
65 --
66 import_file :: Configuration -- ^ A configuration object needed for the
67 -- 'backend' and 'connection_string'.
68
69 -> FilePath -- ^ The path of the XML file to import.
70
71 -> IO (Maybe Int) -- ^ If we processed the file, Just the number
72 -- of records imported. Otherwise, Nothing.
73 import_file cfg path = do
74 results <- parse_and_import `catch` exception_handler
75 case results of
76 [] -> do
77 -- One of the arrows returned "nothing."
78 report_error $ "Unable to determine DTD for file " ++ path ++ "."
79 return Nothing
80 (ImportFailed errmsg:_) -> do
81 report_error errmsg
82 return Nothing
83 (ImportSkipped infomsg:_) -> do
84 -- We processed the message but didn't import anything. Return
85 -- "success" so that the XML file is deleted.
86 report_info infomsg
87 return $ Just 0
88 (ImportSucceeded count:_) -> do
89 report_info $ "Successfully imported " ++ (show count) ++
90 " records from " ++ path ++ "."
91 return $ Just count
92 (ImportUnsupported infomsg:_) -> do
93 -- For now we return "success" for these too, since we know we don't
94 -- support a bunch of DTDs and we want them to get deleted.
95 report_info infomsg
96 return $ Just 0
97 where
98 -- | This will catch *any* exception, even the ones thrown by
99 -- Haskell's 'error' (which should never occur under normal
100 -- circumstances).
101 exception_handler :: SomeException -> IO [ImportResult]
102 exception_handler e = do
103 report_error (show e)
104 let errdesc = "Failed to import file " ++ path ++ "."
105 -- Return a nonempty list so we don't claim incorrectly that
106 -- we couldn't parse the DTD.
107 return [ImportFailed errdesc]
108
109 -- | An arrow that reads a document into an 'XmlTree'.
110 readA :: IOStateArrow s a XmlTree
111 readA = readDocument parse_opts path
112
113 -- | An arrow which parses the doctype "SYSTEM" of an 'XmlTree'.
114 -- We use these to determine the parser to use.
115 dtdnameA :: ArrowXml a => a XmlTree DtdName
116 dtdnameA = getAttrl >>> hasName "doctype-SYSTEM" /> getText >>^ DtdName
117
118 -- | Combine the arrows above as well as the function below
119 -- (arrowized with 'arr') into an IO action that does everything
120 -- (parses and then runs the import on what was parsed).
121 --
122 -- The result of runX has type IO [IO ImportResult]. We thus use
123 -- bind (>>=) and sequence to combine all of the IOs into one
124 -- big one outside of the list.
125 parse_and_import :: IO [ImportResult]
126 parse_and_import =
127 runX (readA >>> (dtdnameA &&& returnA) >>> (arr import_with_dtd))
128 >>=
129 sequence
130
131 -- | Takes a ('DtdName', 'XmlTree') pair and uses the 'DtdName'
132 -- to determine which function to call on the 'XmlTree'.
133 import_with_dtd :: (DtdName, XmlTree) -> IO ImportResult
134 import_with_dtd (DtdName dtd,xml)
135 -- We special-case the heartbeat so it doesn't have to run in
136 -- the database monad.
137 | dtd == "Heartbeat.dtd" = Heartbeat.verify xml
138 | otherwise =
139 -- We need NoMonomorphismRestriction here.
140 if backend cfg == Postgres
141 then withPostgresqlConn cs $ runDbConn $ importer xml
142 else withSqliteConn cs $ runDbConn $ importer xml
143 where
144 -- | Pull the real connection String out of the configuration.
145 cs :: String
146 cs = get_connection_string $ connection_string cfg
147
148 importer
149 | dtd == "injuriesxml.dtd" =
150 dbimport (undefined :: Injuries.Listing)
151
152 | dtd == "Injuries_Detail_XML.dtd" =
153 dbimport (undefined :: InjuriesDetail.PlayerListing)
154
155 | dtd == "newsxml.dtd" =
156 dbimport (undefined :: News.News)
157
158 | dtd == "Odds_XML.dtd" = undefined
159
160 | otherwise = \_ -> do -- Dummy arg simplifies the other cases.
161 let infomsg =
162 "Unrecognized DTD in " ++ path ++ ": " ++ dtd ++ "."
163 return $ ImportUnsupported infomsg
164
165
166 -- | Entry point of the program. It twiddles some knobs for
167 -- configuration options and then calls 'import_file' on each XML file
168 -- given on the command-line.
169 --
170 -- Any file successfully processed is then removed, and we're done.
171 --
172 main :: IO ()
173 main = do
174 rc_cfg <- OC.from_rc
175 cmd_cfg <- get_args
176
177 -- Merge the config file options with the command-line ones,
178 -- prefering the command-line ones.
179 let opt_config = rc_cfg <> cmd_cfg
180
181 -- Update a default config with any options that have been set in
182 -- either the config file or on the command-line. We initialize
183 -- logging before the missing parameter checks below so that we can
184 -- log the errors.
185 let cfg = (def :: Configuration) `merge_optional` opt_config
186 init_logging (log_level cfg) (log_file cfg) (syslog cfg)
187
188 -- Check the optional config for missing required options.
189 when (null $ OC.xml_files opt_config) $ do
190 report_error "No XML files given."
191 exitWith (ExitFailure exit_no_xml_files)
192
193 -- We don't do this in parallel (for now?) to keep the error
194 -- messages nice and linear.
195 results <- mapM (import_file cfg) (OC.xml_files opt_config)
196
197 -- Zip the results with the files list to find out which ones can be
198 -- deleted.
199 let result_pairs = zip (OC.xml_files opt_config) results
200 let victims = [ (p,c) | (p, Just c) <- result_pairs ]
201 let imported_count = sum $ map snd victims
202 report_info $ "Imported " ++ (show imported_count) ++ " records total."
203 mapM_ ((kill True) . fst) victims
204
205 where
206 -- | Wrap these two actions into one function so that we don't
207 -- report that the file was removed if the exception handler is
208 -- run.
209 remove_and_report path = do
210 removeFile path
211 report_info $ "Removed processed file " ++ path ++ "."
212
213 -- | Try to remove @path@ and potentially try again.
214 kill try_again path =
215 (remove_and_report path) `catchIOError` exception_handler
216 where
217 -- | A wrapper around threadDelay which takes seconds instead of
218 -- microseconds as its argument.
219 thread_sleep :: Int -> IO ()
220 thread_sleep seconds = do
221 let microseconds = seconds * (10 ^ (6 :: Int))
222 threadDelay microseconds
223
224 -- | If we can't remove the file, report that, and try once
225 -- more after waiting a few seconds.
226 exception_handler :: IOError -> IO ()
227 exception_handler e = do
228 report_error (show e)
229 report_error $ "Failed to remove imported file " ++ path ++ "."
230 if try_again then do
231 report_info "Waiting 5 seconds to attempt removal again..."
232 thread_sleep 5
233 kill False path
234 else
235 report_info $ "Giving up on " ++ path ++ "."