{-# LANGUAGE DeriveDataTypeable #-}
-module CommandLine (Args(..), apply_args)
+module CommandLine (Args(..), apply_args, show_help)
where
-- Get the version from Cabal.
startswith "Requires at least" s
+show_help :: IO (CmdArgs Args)
+show_help = withArgs ["--help"] parse_args
+
parse_args :: IO (CmdArgs Args)
parse_args = do
x <- getArgs
Left err ->
if (is_missing_arg_error err) then
-- Disregard the error message, show help instead.
- withArgs ["--help"] parse_args
+ show_help
else do
hPutStrLn stderr err
exitWith (ExitFailure exit_args_parse_failed)
module Main
where
+import Prelude hiding (readFile)
+import System.Directory(doesFileExist)
import System.IO (
Handle,
IOMode (WriteMode),
openBinaryFile,
stdout
)
+import System.IO.UTF8 (readFile)
import Text.XML.HXT.Core
-import CommandLine (Args(..), apply_args)
+import CommandLine (Args(..), apply_args, show_help)
import LWN.Page
-my_read :: String -> IOStateArrow s b XmlTree
-my_read =
- readDocument [ withValidate no,
+
+my_read_opts :: SysConfigList
+my_read_opts = [ withValidate no,
withParseHTML yes,
- withInputEncoding utf8,
withWarnings no ]
+-- | My version of HandsomeSoup's parseHTML.
+my_read :: String -> IOStateArrow s b XmlTree
+my_read = readString my_read_opts
+
+-- | Try to parse the given article using HXT. We try a few different
+-- methods; if none of them work, we return 'Nothing'.
+get_xml_from_article :: String -> IO (Maybe (IOStateArrow s b XmlTree))
+get_xml_from_article s = do
+ article <- real_article_path s
+ is_file <- doesFileExist article
+ case is_file of
+ True -> do
+ contents <- readFile article
+ return $ Just $ my_read contents
+ False -> do
+ -- Download the URL and try to parse it.
+ return Nothing
+
-- | If we're given an empty path, return a handle to
-- 'stdout'. Otherwise, open the given file and return a read/write
-- handle to that.
main = do
Args{..} <- apply_args
output_handle <- get_output_handle output
- input_path <- real_article_path article
- let html = my_read input_path
- result <- parse html
+ maybe_html <- get_xml_from_article article
+
+ case maybe_html of
+
+ Just html -> do
+ result <- parse html
+ case result of
+ Just stuff -> epublish stuff output_handle
+ Nothing -> return ()
- case result of
- Just stuff -> epublish stuff output_handle
- Nothing -> return ()
+ Nothing -> do
+ _ <- show_help
+ return ()
putStrLn "Done."