1 {-# LANGUAGE DoAndIfThenElse #-}
5 import Control.Monad (when)
6 import Data.Maybe (fromJust)
7 import Prelude hiding (readFile)
8 import System.Directory (doesFileExist)
17 import System.IO.UTF8 (readFile)
18 import Test.HUnit (Assertion, assertEqual)
19 import Test.Framework (Test, testGroup)
20 import Test.Framework.Providers.HUnit (testCase)
21 import Text.Regex.Posix ((=~))
22 import Text.XML.HXT.Core hiding (when)
24 import CommandLine (show_help)
25 import Configuration (Cfg(..), get_cfg, use_account)
26 import LWN.HTTP (get_page, log_in, make_cookie_jar)
27 import LWN.Page (epublish, parse)
28 import LWN.URI (add_trailing_slash, is_lwn_url, make_absolute_url, make_https)
29 import Misc (contains)
32 my_read_opts :: SysConfigList
33 my_read_opts = [ withValidate no,
37 -- | My version of HandsomeSoup's parseHTML.
38 my_read :: String -> IOStateArrow s b XmlTree
39 my_read = readString my_read_opts
41 -- | Try to parse the given article using HXT. We try a few different
42 -- methods; if none of them work, we return 'Nothing'.
43 get_xml_from_article :: Cfg -> IO (Maybe (IOStateArrow s b XmlTree))
44 get_xml_from_article cfg = do
45 my_article <- real_article_path (article cfg)
46 is_file <- doesFileExist my_article
49 contents <- readFile my_article
50 return $ Just $ my_read contents
52 -- Download the URL and try to parse it.
53 if use_account cfg then do
54 -- use_account would be false if these fromJusts would fail.
56 li_result <- log_in cj
57 (fromJust $ username cfg)
58 (fromJust $ password cfg)
59 when (not li_result) $ do
60 hPutStrLn stderr "Failed to log in."
62 html <- get_page (Just cj) my_article
63 print $ fromJust $ html
67 Just h -> Just $ my_read h
69 html <- get_page Nothing my_article
70 putStrLn "Not logged in."
71 print $ fromJust $ html
75 Just h -> Just $ my_read h
77 -- | If we're given an empty path, return a handle to
78 -- 'stdout'. Otherwise, open the given file and return a read/write
80 get_output_handle :: FilePath -> IO Handle
81 get_output_handle path =
85 openBinaryFile path WriteMode
89 -- | Convert the given article to either a URL or a filesystem
90 -- path. If the given article exists on the filesystem, we assume
91 -- it's a file. Otherwise, we check to see if it's a URL. Failing
92 -- that, we try to construct a URL from what we're given and do our
94 real_article_path :: String -> IO String
95 real_article_path s = do
96 is_file <- doesFileExist s
97 return $ if is_file then s else add_trailing_slash check_cases
100 case make_absolute_url "current" of
104 case make_absolute_url ("Articles/" ++ s) of
108 check_cases :: String
110 | is_lwn_url s = make_https s
111 | s `contains` "current" = abs_current
112 | s =~ "^[0-9]+$" = abs_article
113 | otherwise = s -- Give up
118 output_handle <- get_output_handle (output cfg)
120 when (use_account cfg) $ do
121 putStrLn "Using account."
123 maybe_html <- get_xml_from_article cfg
129 Just stuff -> epublish stuff output_handle
139 test_current_article_path :: Assertion
140 test_current_article_path = do
141 let expected = "https://lwn.net/current/"
142 actual <- real_article_path "current"
143 assertEqual "Current article path constructed" expected actual
145 test_numbered_article_path :: Assertion
146 test_numbered_article_path = do
147 let expected = "https://lwn.net/Articles/69/"
148 actual <- real_article_path "69" -- I'm twelve
149 assertEqual "Numbered article path constructed" expected actual
152 test_full_article_path :: Assertion
153 test_full_article_path = do
154 let expected = "https://lwn.net/Articles/502979/"
155 actual <- real_article_path "https://lwn.net/Articles/502979/"
156 assertEqual "Full article path left alone" expected actual
158 test_non_https_article_path :: Assertion
159 test_non_https_article_path = do
160 let expected = "https://lwn.net/Articles/502979/"
161 actual <- real_article_path "http://lwn.net/Articles/502979/"
162 assertEqual "Non-https URL made https" expected actual
166 testGroup "Main Tests" [
167 testCase "Current article path constructed" test_current_article_path,
168 testCase "Numbered article path constructed" test_numbered_article_path,
169 testCase "Full article path left alone" test_full_article_path,
170 testCase "Non-https URL made https" test_non_https_article_path ]