1 {-# LANGUAGE DoAndIfThenElse #-}
5 import Control.Concurrent (threadDelay)
6 import Control.Monad (when)
7 import Data.Maybe (fromJust, isJust)
8 import Prelude hiding (readFile)
9 import System.Directory (doesFileExist)
18 import System.IO.UTF8 (readFile)
19 import Test.HUnit (Assertion, assertEqual)
20 import Test.Framework (Test, testGroup)
21 import Test.Framework.Providers.HUnit (testCase)
22 import Text.Regex.Posix ((=~))
23 import Text.XML.HXT.Core (
34 import CommandLine (show_help)
35 import Configuration (Cfg(..), get_cfg, use_account)
36 import LWN.HTTP (get_page, log_in, make_cookie_jar)
37 import LWN.Page (epublish, parse)
38 import LWN.URI (add_trailing_slash, is_lwn_url, make_absolute_url, make_https)
39 import Misc (contains)
42 my_read_opts :: SysConfigList
43 my_read_opts = [ withValidate no,
47 -- | My version of HandsomeSoup's parseHTML.
48 my_read :: String -> IOStateArrow s b XmlTree
49 my_read = readString my_read_opts
52 -- |A wrapper around threadDelay which takes seconds instead of
53 -- microseconds as its argument.
54 thread_sleep :: Int -> IO ()
55 thread_sleep seconds = do
56 let microseconds = seconds * (10 ^ (6 :: Int))
57 threadDelay microseconds
60 -- | Try to parse the given article using HXT. We try a few different
61 -- methods; if none of them work, we return 'Nothing'.
62 get_xml_from_article :: Cfg -> IO (Maybe (IOStateArrow s b XmlTree))
63 get_xml_from_article cfg = do
64 my_article <- real_article_path (article cfg)
65 is_file <- doesFileExist my_article
68 contents <- readFile my_article
69 return $ Just $ my_read contents
71 -- Download the URL and try to parse it.
72 if use_account cfg then do
73 -- use_account would be false if these fromJusts would fail.
75 li_result <- log_in cj
76 (fromJust $ username cfg)
77 (fromJust $ password cfg)
79 -- Without this, the cookie file is empty during
83 when (isJust li_result) $ do
84 let msg = "Failed to log in. " ++ (fromJust li_result)
87 html <- get_page (Just cj) my_article
91 let msg = "Failed to retrieve page. " ++ err
94 Right h -> return $ Just $ my_read h
96 html <- get_page Nothing my_article
99 let msg = "Failed to retrieve page. " ++ err
102 Right h -> return $ Just $ my_read h
104 -- | If we're given an empty path, return a handle to
105 -- 'stdout'. Otherwise, open the given file and return a read/write
107 get_output_handle :: FilePath -> IO Handle
108 get_output_handle path =
112 openBinaryFile path WriteMode
116 -- | Convert the given article to either a URL or a filesystem
117 -- path. If the given article exists on the filesystem, we assume
118 -- it's a file. Otherwise, we check to see if it's a URL. Failing
119 -- that, we try to construct a URL from what we're given and do our
121 real_article_path :: String -> IO String
122 real_article_path s = do
123 is_file <- doesFileExist s
124 return $ if is_file then s else add_trailing_slash check_cases
127 case make_absolute_url "current" of
131 case make_absolute_url ("Articles/" ++ s) of
135 check_cases :: String
137 | is_lwn_url s = make_https s
138 | s `contains` "current" = abs_current
139 | s =~ "^[0-9]+$" = abs_article
140 | otherwise = s -- Give up
145 output_handle <- get_output_handle (output cfg)
147 when (use_account cfg) $ do
148 putStrLn "Using account."
150 maybe_html <- get_xml_from_article cfg
156 Just stuff -> epublish stuff output_handle
166 test_current_article_path :: Assertion
167 test_current_article_path = do
168 let expected = "https://lwn.net/current/"
169 actual <- real_article_path "current"
170 assertEqual "Current article path constructed" expected actual
172 test_numbered_article_path :: Assertion
173 test_numbered_article_path = do
174 let expected = "https://lwn.net/Articles/69/"
175 actual <- real_article_path "69" -- I'm twelve
176 assertEqual "Numbered article path constructed" expected actual
179 test_full_article_path :: Assertion
180 test_full_article_path = do
181 let expected = "https://lwn.net/Articles/502979/"
182 actual <- real_article_path "https://lwn.net/Articles/502979/"
183 assertEqual "Full article path left alone" expected actual
185 test_non_https_article_path :: Assertion
186 test_non_https_article_path = do
187 let expected = "https://lwn.net/Articles/502979/"
188 actual <- real_article_path "http://lwn.net/Articles/502979/"
189 assertEqual "Non-https URL made https" expected actual
193 testGroup "Main Tests" [
194 testCase "Current article path constructed" test_current_article_path,
195 testCase "Numbered article path constructed" test_numbered_article_path,
196 testCase "Full article path left alone" test_full_article_path,
197 testCase "Non-https URL made https" test_non_https_article_path ]