1 {-# LANGUAGE ScopedTypeVariables, RecordWildCards, DoAndIfThenElse #-}
5 import Control.Monad (when)
6 import Data.Maybe (fromJust)
7 import Prelude hiding (readFile)
8 import System.Directory (doesFileExist)
17 import System.IO.UTF8 (readFile)
18 import Test.HUnit (Assertion, assertEqual)
19 import Test.Framework (Test, testGroup)
20 import Test.Framework.Providers.HUnit (testCase)
21 import Text.Regex.Posix ((=~))
22 import Text.XML.HXT.Core hiding (when)
24 import CommandLine (show_help)
25 import Configuration (Cfg(..), get_cfg, use_account)
26 import LWN.HTTP (get_page, log_in, make_cookie_jar)
27 import LWN.Page (epublish, parse)
28 import LWN.URI (is_lwn_url, make_absolute_url, make_https)
29 import Misc (contains)
32 my_read_opts :: SysConfigList
33 my_read_opts = [ withValidate no,
37 -- | My version of HandsomeSoup's parseHTML.
38 my_read :: String -> IOStateArrow s b XmlTree
39 my_read = readString my_read_opts
41 -- | Try to parse the given article using HXT. We try a few different
42 -- methods; if none of them work, we return 'Nothing'.
43 get_xml_from_article :: Cfg -> IO (Maybe (IOStateArrow s b XmlTree))
44 get_xml_from_article cfg = do
45 my_article <- real_article_path (article cfg)
46 is_file <- doesFileExist my_article
49 contents <- readFile my_article
50 return $ Just $ my_read contents
52 -- Download the URL and try to parse it.
53 if use_account cfg then do
54 -- use_account would be false if these fromJusts would fail.
56 li_result <- log_in cj
57 (fromJust $ username cfg)
58 (fromJust $ password cfg)
59 when (not li_result) $ do
60 hPutStrLn stderr "Failed to log in."
62 html <- get_page (Just cj) my_article
66 Just h -> Just $ my_read h
68 html <- get_page Nothing my_article
72 Just h -> Just $ my_read h
74 -- | If we're given an empty path, return a handle to
75 -- 'stdout'. Otherwise, open the given file and return a read/write
77 get_output_handle :: FilePath -> IO Handle
78 get_output_handle path =
82 openBinaryFile path WriteMode
86 -- | Convert the given article to either a URL or a filesystem
87 -- path. If the given article exists on the filesystem, we assume
88 -- it's a file. Otherwise, we check to see if it's a URL. Failing
89 -- that, we try to construct a URL from what we're given and do our
91 real_article_path :: String -> IO String
92 real_article_path s = do
93 is_file <- doesFileExist s
94 return $ if is_file then s else check_cases
97 case make_absolute_url "current" of
101 case make_absolute_url ("Articles/" ++ s) of
105 check_cases :: String
107 | is_lwn_url s = make_https s
108 | s `contains` "current" = abs_current
109 | s =~ "^[0-9]+$" = abs_article
110 | otherwise = s -- Give up
115 output_handle <- get_output_handle (output cfg)
117 maybe_html <- get_xml_from_article cfg
123 Just stuff -> epublish stuff output_handle
133 test_current_article_path :: Assertion
134 test_current_article_path = do
135 let expected = "https://lwn.net/current"
136 actual <- real_article_path "current"
137 assertEqual "Current article path constructed" expected actual
139 test_numbered_article_path :: Assertion
140 test_numbered_article_path = do
141 let expected = "https://lwn.net/Articles/69"
142 actual <- real_article_path "69" -- I'm twelve
143 assertEqual "Numbered article path constructed" expected actual
146 test_full_article_path :: Assertion
147 test_full_article_path = do
148 let expected = "https://lwn.net/Articles/502979/"
149 actual <- real_article_path "https://lwn.net/Articles/502979/"
150 assertEqual "Full article path left alone" expected actual
152 test_non_https_article_path :: Assertion
153 test_non_https_article_path = do
154 let expected = "https://lwn.net/Articles/502979/"
155 actual <- real_article_path "http://lwn.net/Articles/502979/"
156 assertEqual "Non-https URL made https" expected actual
160 testGroup "Main Tests" [
161 testCase "Current article path constructed" test_current_article_path,
162 testCase "Numbered article path constructed" test_numbered_article_path,
163 testCase "Full article path left alone" test_full_article_path,
164 testCase "Non-https URL made https" test_non_https_article_path ]