]> gitweb.michael.orlitzky.com - dead/lwn-epub.git/blob - src/Main.hs
655f1bef40c0680bda2c98e2d4ae9bc6f06c2cd8
[dead/lwn-epub.git] / src / Main.hs
1 {-# LANGUAGE ScopedTypeVariables, RecordWildCards #-}
2 module Main
3 where
4
5 import Prelude hiding (readFile)
6 import System.Directory(doesFileExist)
7 import System.IO (
8 Handle,
9 IOMode (WriteMode),
10 openBinaryFile,
11 stdout
12 )
13 import System.IO.UTF8 (readFile)
14 import Text.XML.HXT.Core
15
16 import CommandLine (Args(..), apply_args, show_help)
17 import LWN.Page
18
19
20 my_read_opts :: SysConfigList
21 my_read_opts = [ withValidate no,
22 withParseHTML yes,
23 withWarnings no ]
24
25 -- | My version of HandsomeSoup's parseHTML.
26 my_read :: String -> IOStateArrow s b XmlTree
27 my_read = readString my_read_opts
28
29 -- | Try to parse the given article using HXT. We try a few different
30 -- methods; if none of them work, we return 'Nothing'.
31 get_xml_from_article :: String -> IO (Maybe (IOStateArrow s b XmlTree))
32 get_xml_from_article s = do
33 article <- real_article_path s
34 is_file <- doesFileExist article
35 case is_file of
36 True -> do
37 contents <- readFile article
38 return $ Just $ my_read contents
39 False -> do
40 -- Download the URL and try to parse it.
41 return Nothing
42
43 -- | If we're given an empty path, return a handle to
44 -- 'stdout'. Otherwise, open the given file and return a read/write
45 -- handle to that.
46 get_output_handle :: FilePath -> IO Handle
47 get_output_handle path =
48 if (null path) then
49 return stdout
50 else
51 openBinaryFile path WriteMode
52
53
54 -- | Convert the given article to either a URL or a filesystem
55 -- path. If the given article exists on the filesystem, we assume
56 -- it's a file. Otherwise, we check to see if it's a URL. Failing
57 -- that, we try to construct a URL from what we're given and do our
58 -- best.
59 real_article_path :: String -> IO String
60 real_article_path = return . id
61
62 main :: IO ()
63 main = do
64 Args{..} <- apply_args
65 output_handle <- get_output_handle output
66 maybe_html <- get_xml_from_article article
67
68 case maybe_html of
69
70 Just html -> do
71 result <- parse html
72 case result of
73 Just stuff -> epublish stuff output_handle
74 Nothing -> return ()
75
76 Nothing -> do
77 _ <- show_help
78 return ()
79
80 putStrLn "Done."