{-# LANGUAGE ScopedTypeVariables, RecordWildCards #-} module Main where import Prelude hiding (readFile) import System.Directory (doesFileExist) import System.IO ( Handle, IOMode (WriteMode), openBinaryFile, stdout ) import System.IO.UTF8 (readFile) import Text.Regex.Posix ((=~)) import Text.XML.HXT.Core import CommandLine (show_help) import Configuration (Cfg(..), get_cfg) import LWN.Page import LWN.URI (is_lwn_url, make_absolute_url) import Misc (contains) my_read_opts :: SysConfigList my_read_opts = [ withValidate no, withParseHTML yes, withWarnings no ] -- | My version of HandsomeSoup's parseHTML. my_read :: String -> IOStateArrow s b XmlTree my_read = readString my_read_opts -- | Try to parse the given article using HXT. We try a few different -- methods; if none of them work, we return 'Nothing'. get_xml_from_article :: String -> IO (Maybe (IOStateArrow s b XmlTree)) get_xml_from_article s = do article <- real_article_path s is_file <- doesFileExist article case is_file of True -> do contents <- readFile article return $ Just $ my_read contents False -> do -- Download the URL and try to parse it. return Nothing -- | If we're given an empty path, return a handle to -- 'stdout'. Otherwise, open the given file and return a read/write -- handle to that. get_output_handle :: FilePath -> IO Handle get_output_handle path = if (null path) then return stdout else openBinaryFile path WriteMode -- | Convert the given article to either a URL or a filesystem -- path. If the given article exists on the filesystem, we assume -- it's a file. Otherwise, we check to see if it's a URL. Failing -- that, we try to construct a URL from what we're given and do our -- best. real_article_path :: String -> IO String real_article_path s = do is_file <- doesFileExist s return $ if is_file then s else check_cases where abs_current = case make_absolute_url "current" of Nothing -> s Just ac -> ac abs_s = case make_absolute_url s of Nothing -> s Just as -> as check_cases :: String check_cases | is_lwn_url s = s | s `contains` "current" = abs_current | s =~ "^[0-9]+$" = abs_s | otherwise = s -- Give up main :: IO () main = do Cfg{..} <- get_cfg output_handle <- get_output_handle output maybe_html <- get_xml_from_article article case maybe_html of Just html -> do result <- parse html case result of Just stuff -> epublish stuff output_handle Nothing -> do _ <- show_help return () Nothing -> do _ <- show_help return ()