]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/Main.hs
Implement database insertion (generically!).
[dead/htsn-import.git] / src / Main.hs
1 module Main
2 where
3
4 import Control.Arrow ( (&&&), arr, returnA )
5 import Database.Groundhog.Core ( PersistEntity )
6 import Database.Groundhog.Sqlite (
7 defaultMigrationLogger,
8 insert,
9 migrate,
10 runDbConn,
11 runMigration,
12 withSqliteConn )
13 import Text.XML.HXT.Core (
14 ArrowXml,
15 IOStateArrow,
16 SysConfigList,
17 XmlPickler,
18 XmlTree,
19 (>>>),
20 (/>),
21 getAttrl,
22 getText,
23 hasName,
24 no,
25 readDocument,
26 runX,
27 unpickleDoc,
28 withPreserveComment,
29 withRemoveWS,
30 withSubstDTDEntities,
31 withValidate,
32 xpickle,
33 yes )
34 import System.Environment ( getArgs )
35 import qualified TSN.Injuries as Injuries (
36 Listing,
37 Message ( listings ) )
38 import qualified TSN.InjuriesDetail as InjuriesDetail (
39 Listing ( player_listings ),
40 Message ( listings ),
41 PlayerListing )
42
43
44 -- | A list of options passed to 'readDocument' when we parse an XML
45 -- document. We don't validate because the DTDs from TSN are
46 -- wrong. As a result, we don't want to keep useless DTDs
47 -- areound. Thus we disable 'withSubstDTDEntities' which, when
48 -- combined with "withValidate no", prevents HXT from trying to read
49 -- the DTD at all.
50 --
51 parse_opts :: SysConfigList
52 parse_opts =
53 [ withPreserveComment no,
54 withRemoveWS yes,
55 withSubstDTDEntities no,
56 withValidate no ]
57
58
59 -- | We put the 'XmlTree' argument last so that it's easy to eta
60 -- reduce all of the import_foo functions that call this.
61 --
62 import_generic :: (XmlPickler a, PersistEntity b)
63 => b -- ^ Dummy Listing instance needed for 'migrate'
64 -> (a -> [b]) -- ^ listings getter
65 -> XmlTree
66 -> IO ()
67 import_generic dummy g xml =
68 withSqliteConn "foo.sqlite3" $ runDbConn $ do
69 runMigration defaultMigrationLogger $ do
70 migrate dummy
71 let msg = unpickleDoc xpickle xml
72 case msg of
73 Nothing -> error "Should unpickle!"
74 Just m -> mapM_ (\l -> insert l) (g m)
75
76 -- | Import TSN.Injuries from an 'XmlTree'.
77 import_injuries :: XmlTree -> IO ()
78 import_injuries =
79 import_generic
80 (undefined :: Injuries.Listing)
81 Injuries.listings
82
83 -- | Import TSN.InjuriesDetail from an 'XmlTree'.
84 import_injuries_detail :: XmlTree -> IO ()
85 import_injuries_detail =
86 import_generic
87 (undefined :: InjuriesDetail.PlayerListing)
88 ( (concatMap InjuriesDetail.player_listings) . InjuriesDetail.listings)
89
90 import_file :: FilePath -> IO ()
91 import_file path = do
92 results <- runX $ readA >>> (doctypeA &&& returnA) >>> (arr import_with_dtd)
93 case results of
94 [] -> error "ERROR: Unable to determine DOCTYPE."
95 (r:_) -> r -- Need to do something with the result or it gets GCed?
96 -- We do only expect one result fortunately.
97 where
98 -- | An arrow that reads a document into an 'XmlTree'.
99 readA :: IOStateArrow s a XmlTree
100 readA = readDocument parse_opts path
101
102 -- | An arrow which parses the doctype "SYSTEM" of an 'XmlTree'.
103 -- We use these to determine the parser to use.
104 doctypeA :: ArrowXml a => a XmlTree String
105 doctypeA = getAttrl >>> hasName "doctype-SYSTEM" /> getText
106
107 -- | Takes a 'Doctype', 'XmlTree' pair and uses the 'Doctype' to
108 -- determine which function to call on the 'XmlTree'.
109 import_with_dtd :: (String, XmlTree) -> IO ()
110 import_with_dtd (dtd,xml)
111 | dtd == "injuriesxml.dtd" = import_injuries xml
112 | dtd == "Injuries_Detail_XML.dtd" = import_injuries_detail xml
113 | otherwise = error "ERROR: Unrecognized DTD."
114
115
116 main :: IO ()
117 main = do
118 args <- getArgs
119 import_file (args !! 0)