1 {-# LANGUAGE ScopedTypeVariables, RecordWildCards #-}
5 import Prelude hiding (readFile)
6 import System.Directory (doesFileExist)
13 import System.IO.UTF8 (readFile)
14 import Text.Regex.Posix ((=~))
15 import Text.XML.HXT.Core
17 import CommandLine (show_help)
18 import Configuration (Cfg(..), get_cfg)
20 import LWN.URI (is_lwn_url, make_absolute_url)
21 import Misc (contains)
24 my_read_opts :: SysConfigList
25 my_read_opts = [ withValidate no,
29 -- | My version of HandsomeSoup's parseHTML.
30 my_read :: String -> IOStateArrow s b XmlTree
31 my_read = readString my_read_opts
33 -- | Try to parse the given article using HXT. We try a few different
34 -- methods; if none of them work, we return 'Nothing'.
35 get_xml_from_article :: String -> IO (Maybe (IOStateArrow s b XmlTree))
36 get_xml_from_article s = do
37 article <- real_article_path s
38 is_file <- doesFileExist article
41 contents <- readFile article
42 return $ Just $ my_read contents
44 -- Download the URL and try to parse it.
47 -- | If we're given an empty path, return a handle to
48 -- 'stdout'. Otherwise, open the given file and return a read/write
50 get_output_handle :: FilePath -> IO Handle
51 get_output_handle path =
55 openBinaryFile path WriteMode
59 -- | Convert the given article to either a URL or a filesystem
60 -- path. If the given article exists on the filesystem, we assume
61 -- it's a file. Otherwise, we check to see if it's a URL. Failing
62 -- that, we try to construct a URL from what we're given and do our
64 real_article_path :: String -> IO String
65 real_article_path s = do
66 is_file <- doesFileExist s
67 return $ if is_file then s else check_cases
70 case make_absolute_url "current" of
74 case make_absolute_url s of
81 | s `contains` "current" = abs_current
82 | s =~ "^[0-9]+$" = abs_s
83 | otherwise = s -- Give up
88 output_handle <- get_output_handle output
89 maybe_html <- get_xml_from_article article
96 Just stuff -> epublish stuff output_handle