]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - Main.hs
3ea260642286dc5cc44efa510152eb892c53e271
[dead/htsn-import.git] / Main.hs
1 {-# LANGUAGE NoMonomorphismRestriction #-}
2 module Main
3 where
4
5 import Control.Arrow ( (&&&), arr, returnA )
6 import Control.Monad ( when )
7 import Control.Monad.IO.Class ( MonadIO, liftIO )
8 import Database.Groundhog.Generic ( runDbConn )
9 import Database.Groundhog.Sqlite (
10 withSqliteConn )
11 import Database.Groundhog.Postgresql (
12 withPostgresqlConn )
13 import Data.Monoid ( (<>) )
14 import Network.Services.TSN.Logging ( init_logging )
15 import System.Console.CmdArgs ( def )
16 import System.Exit ( exitWith, ExitCode (ExitFailure) )
17 import System.IO.Error ( catchIOError )
18 import Text.XML.HXT.Core (
19 ArrowXml,
20 IOStateArrow,
21 XmlTree,
22 (>>>),
23 (/>),
24 getAttrl,
25 getText,
26 hasName,
27 readDocument,
28 runX )
29
30 import Backend ( Backend(..) )
31 import CommandLine ( get_args )
32 import Configuration ( Configuration(..), merge_optional )
33 import ConnectionString ( ConnectionString(..) )
34 import ExitCodes ( exit_no_xml_files )
35 import qualified OptionalConfiguration as OC (
36 OptionalConfiguration ( xml_files ),
37 from_rc )
38 import Network.Services.TSN.Report (
39 report_info,
40 report_error )
41 import TSN.DbImport
42 import qualified TSN.XML.Injuries as Injuries ( Listing )
43 import qualified TSN.XML.InjuriesDetail as InjuriesDetail ( PlayerListing )
44 import qualified TSN.XML.News as News ( Message )
45 import Xml ( parse_opts )
46
47
48
49 import_file :: Configuration -> FilePath -> IO ()
50 import_file cfg path = do
51 results <- catchIOError
52 parse_and_import
53 (\e -> do
54 report_error (show e)
55 report_error $ "Failed to import file " ++ path ++ "."
56 -- Return a nonempty list so we don't claim incorrectly that
57 -- we couldn't parse the DTD.
58 return [ Nothing ] )
59
60 case results of
61 -- If results' is empty, one of the arrows return "nothing."
62 [] -> report_error $ "Unable to determine DTD for file " ++ path ++ "."
63 (r:_) ->
64 case r of
65 Nothing -> return ()
66 Just cnt -> report_info $ "Successfully imported " ++
67 (show cnt) ++
68 " records from " ++ path ++ "."
69 where
70 -- | An arrow that reads a document into an 'XmlTree'.
71 readA :: IOStateArrow s a XmlTree
72 readA = readDocument parse_opts path
73
74 -- | An arrow which parses the doctype "SYSTEM" of an 'XmlTree'.
75 -- We use these to determine the parser to use.
76 doctypeA :: ArrowXml a => a XmlTree String
77 doctypeA = getAttrl >>> hasName "doctype-SYSTEM" /> getText
78
79 -- | Combine the arrows above as well as the function below
80 -- (arrowized with 'arr') into an IO action that does everything
81 -- (parses and then runs the import on what was parsed).
82 --
83 -- The result of runX has type IO [IO (Maybe Int)]. We thus use
84 -- bind (>>=) and sequence to combine all of the IOs into one
85 -- big one outside of the list.
86 parse_and_import :: IO [Maybe Int]
87 parse_and_import =
88 runX (readA >>> (doctypeA &&& returnA) >>> (arr import_with_dtd))
89 >>=
90 sequence
91
92 -- | Takes a 'Doctype', 'XmlTree' pair and uses the 'Doctype' to
93 -- determine which function to call on the 'XmlTree'.
94 import_with_dtd :: (String, XmlTree) -> IO (Maybe Int)
95 import_with_dtd (dtd,xml) =
96 -- We need NoMonomorphismRestriction here.
97 if backend cfg == Postgres
98 then withPostgresqlConn cs $ runDbConn $ importer xml
99 else withSqliteConn cs $ runDbConn $ importer xml
100 where
101 -- | Pull the real connection String out of the configuration.
102 cs :: String
103 cs = get_connection_string $ connection_string cfg
104
105 importer
106 | dtd == "injuriesxml.dtd" =
107 dbimport (undefined :: Injuries.Listing)
108
109 | dtd == "Injuries_Detail_XML.dtd" =
110 dbimport (undefined :: InjuriesDetail.PlayerListing)
111
112 | dtd == "newsxml.dtd" =
113 dbimport (undefined :: News.Message)
114
115 | otherwise = \_ -> do -- Dummy arg simplifies the other cases.
116 let errmsg = "Unrecognized DTD in " ++ path ++ ": " ++ dtd ++ "."
117 liftIO $ report_info errmsg
118 return Nothing
119
120
121
122 main :: IO ()
123 main = do
124 rc_cfg <- OC.from_rc
125 cmd_cfg <- get_args
126
127 -- Merge the config file options with the command-line ones,
128 -- prefering the command-line ones.
129 let opt_config = rc_cfg <> cmd_cfg
130
131 -- Update a default config with any options that have been set in
132 -- either the config file or on the command-line. We initialize
133 -- logging before the missing parameter checks below so that we can
134 -- log the errors.
135 let cfg = (def :: Configuration) `merge_optional` opt_config
136 init_logging (log_file cfg) (log_level cfg) (syslog cfg)
137
138 -- Check the optional config for missing required options.
139 when (null $ OC.xml_files opt_config) $ do
140 report_error "No XML files given."
141 exitWith (ExitFailure exit_no_xml_files)
142
143 -- We don't do this in parallel (for now?) to keep the error
144 -- messages nice and linear.
145 mapM_ (import_file cfg) (OC.xml_files opt_config)