{-# LANGUAGE ScopedTypeVariables, RecordWildCards #-} module Main where import Prelude hiding (readFile) import System.Directory(doesFileExist) import System.IO ( Handle, IOMode (WriteMode), openBinaryFile, stdout ) import System.IO.UTF8 (readFile) import Text.XML.HXT.Core import CommandLine (Args(..), apply_args, show_help) import LWN.Page my_read_opts :: SysConfigList my_read_opts = [ withValidate no, withParseHTML yes, withWarnings no ] -- | My version of HandsomeSoup's parseHTML. my_read :: String -> IOStateArrow s b XmlTree my_read = readString my_read_opts -- | Try to parse the given article using HXT. We try a few different -- methods; if none of them work, we return 'Nothing'. get_xml_from_article :: String -> IO (Maybe (IOStateArrow s b XmlTree)) get_xml_from_article s = do article <- real_article_path s is_file <- doesFileExist article case is_file of True -> do contents <- readFile article return $ Just $ my_read contents False -> do -- Download the URL and try to parse it. return Nothing -- | If we're given an empty path, return a handle to -- 'stdout'. Otherwise, open the given file and return a read/write -- handle to that. get_output_handle :: FilePath -> IO Handle get_output_handle path = if (null path) then return stdout else openBinaryFile path WriteMode -- | Convert the given article to either a URL or a filesystem -- path. If the given article exists on the filesystem, we assume -- it's a file. Otherwise, we check to see if it's a URL. Failing -- that, we try to construct a URL from what we're given and do our -- best. real_article_path :: String -> IO String real_article_path = return . id main :: IO () main = do Args{..} <- apply_args output_handle <- get_output_handle output maybe_html <- get_xml_from_article article case maybe_html of Just html -> do result <- parse html case result of Just stuff -> epublish stuff output_handle Nothing -> do _ <- show_help return () Nothing -> do _ <- show_help return ()