Add config file parsing.
[dead/lwn-epub.git] / src / Main.hs
1 {-# LANGUAGE ScopedTypeVariables, RecordWildCards #-}
2 module Main
3 where
4
5 import Prelude hiding (readFile)
6 import System.Directory (doesFileExist)
7 import System.IO (
8 Handle,
9 IOMode (WriteMode),
10 openBinaryFile,
11 stdout
12 )
13 import System.IO.UTF8 (readFile)
14 import Text.Regex.Posix ((=~))
15 import Text.XML.HXT.Core
16
17 import CommandLine (show_help)
18 import Configuration (Cfg(..), get_cfg)
19 import LWN.Page
20 import LWN.URI (is_lwn_url, make_absolute_url)
21 import Misc (contains)
22
23
24 my_read_opts :: SysConfigList
25 my_read_opts = [ withValidate no,
26 withParseHTML yes,
27 withWarnings no ]
28
29 -- | My version of HandsomeSoup's parseHTML.
30 my_read :: String -> IOStateArrow s b XmlTree
31 my_read = readString my_read_opts
32
33 -- | Try to parse the given article using HXT. We try a few different
34 -- methods; if none of them work, we return 'Nothing'.
35 get_xml_from_article :: String -> IO (Maybe (IOStateArrow s b XmlTree))
36 get_xml_from_article s = do
37 article <- real_article_path s
38 is_file <- doesFileExist article
39 case is_file of
40 True -> do
41 contents <- readFile article
42 return $ Just $ my_read contents
43 False -> do
44 -- Download the URL and try to parse it.
45 return Nothing
46
47 -- | If we're given an empty path, return a handle to
48 -- 'stdout'. Otherwise, open the given file and return a read/write
49 -- handle to that.
50 get_output_handle :: FilePath -> IO Handle
51 get_output_handle path =
52 if (null path) then
53 return stdout
54 else
55 openBinaryFile path WriteMode
56
57
58
59 -- | Convert the given article to either a URL or a filesystem
60 -- path. If the given article exists on the filesystem, we assume
61 -- it's a file. Otherwise, we check to see if it's a URL. Failing
62 -- that, we try to construct a URL from what we're given and do our
63 -- best.
64 real_article_path :: String -> IO String
65 real_article_path s = do
66 is_file <- doesFileExist s
67 return $ if is_file then s else check_cases
68 where
69 abs_current =
70 case make_absolute_url "current" of
71 Nothing -> s
72 Just ac -> ac
73 abs_s =
74 case make_absolute_url s of
75 Nothing -> s
76 Just as -> as
77
78 check_cases :: String
79 check_cases
80 | is_lwn_url s = s
81 | s `contains` "current" = abs_current
82 | s =~ "^[0-9]+$" = abs_s
83 | otherwise = s -- Give up
84
85 main :: IO ()
86 main = do
87 Cfg{..} <- get_cfg
88 output_handle <- get_output_handle output
89 maybe_html <- get_xml_from_article article
90
91 case maybe_html of
92
93 Just html -> do
94 result <- parse html
95 case result of
96 Just stuff -> epublish stuff output_handle
97 Nothing -> do
98 _ <- show_help
99 return ()
100
101 Nothing -> do
102 _ <- show_help
103 return ()