Add some tests for the article URL construction.
[dead/lwn-epub.git] / src / Main.hs
1 {-# LANGUAGE ScopedTypeVariables, RecordWildCards #-}
2 module Main
3 where
4
5 import Prelude hiding (readFile)
6 import System.Directory (doesFileExist)
7 import System.IO (
8 Handle,
9 IOMode (WriteMode),
10 openBinaryFile,
11 stdout
12 )
13 import System.IO.UTF8 (readFile)
14 import Test.HUnit (Assertion, assertEqual)
15 import Test.Framework (Test, testGroup)
16 import Test.Framework.Providers.HUnit (testCase)
17 import Text.Regex.Posix ((=~))
18 import Text.XML.HXT.Core
19
20 import CommandLine (show_help)
21 import Configuration (Cfg(..), get_cfg)
22 import LWN.Page
23 import LWN.URI (is_lwn_url, make_absolute_url, make_https)
24 import Misc (contains)
25
26
27 my_read_opts :: SysConfigList
28 my_read_opts = [ withValidate no,
29 withParseHTML yes,
30 withWarnings no ]
31
32 -- | My version of HandsomeSoup's parseHTML.
33 my_read :: String -> IOStateArrow s b XmlTree
34 my_read = readString my_read_opts
35
36 -- | Try to parse the given article using HXT. We try a few different
37 -- methods; if none of them work, we return 'Nothing'.
38 get_xml_from_article :: String -> IO (Maybe (IOStateArrow s b XmlTree))
39 get_xml_from_article s = do
40 article <- real_article_path s
41 is_file <- doesFileExist article
42 case is_file of
43 True -> do
44 contents <- readFile article
45 return $ Just $ my_read contents
46 False -> do
47 -- Download the URL and try to parse it.
48 return Nothing
49
50 -- | If we're given an empty path, return a handle to
51 -- 'stdout'. Otherwise, open the given file and return a read/write
52 -- handle to that.
53 get_output_handle :: FilePath -> IO Handle
54 get_output_handle path =
55 if (null path) then
56 return stdout
57 else
58 openBinaryFile path WriteMode
59
60
61
62 -- | Convert the given article to either a URL or a filesystem
63 -- path. If the given article exists on the filesystem, we assume
64 -- it's a file. Otherwise, we check to see if it's a URL. Failing
65 -- that, we try to construct a URL from what we're given and do our
66 -- best.
67 real_article_path :: String -> IO String
68 real_article_path s = do
69 is_file <- doesFileExist s
70 return $ if is_file then s else check_cases
71 where
72 abs_current =
73 case make_absolute_url "current" of
74 Nothing -> s
75 Just ac -> ac
76 abs_article =
77 case make_absolute_url ("Articles/" ++ s) of
78 Nothing -> s
79 Just as -> as
80
81 check_cases :: String
82 check_cases
83 | is_lwn_url s = make_https s
84 | s `contains` "current" = abs_current
85 | s =~ "^[0-9]+$" = abs_article
86 | otherwise = s -- Give up
87
88 main :: IO ()
89 main = do
90 Cfg{..} <- get_cfg
91 output_handle <- get_output_handle output
92 maybe_html <- get_xml_from_article article
93
94 case maybe_html of
95
96 Just html -> do
97 result <- parse html
98 case result of
99 Just stuff -> epublish stuff output_handle
100 Nothing -> do
101 _ <- show_help
102 return ()
103
104 Nothing -> do
105 _ <- show_help
106 return ()
107
108
109
110 test_current_article_path :: Assertion
111 test_current_article_path = do
112 let expected = "https://lwn.net/current"
113 actual <- real_article_path "current"
114 assertEqual "Current article path constructed" expected actual
115
116 test_numbered_article_path :: Assertion
117 test_numbered_article_path = do
118 let expected = "https://lwn.net/Articles/69"
119 actual <- real_article_path "69" -- I'm twelve
120 assertEqual "Numbered article path constructed" expected actual
121
122
123 test_full_article_path :: Assertion
124 test_full_article_path = do
125 let expected = "https://lwn.net/Articles/502979/"
126 actual <- real_article_path "https://lwn.net/Articles/502979/"
127 assertEqual "Full article path left alone" expected actual
128
129 test_non_https_article_path :: Assertion
130 test_non_https_article_path = do
131 let expected = "https://lwn.net/Articles/502979/"
132 actual <- real_article_path "http://lwn.net/Articles/502979/"
133 assertEqual "Non-https URL made https" expected actual
134
135 main_tests :: Test
136 main_tests =
137 testGroup "Main Tests" [
138 testCase "Current article path constructed" test_current_article_path,
139 testCase "Numbered article path constructed" test_numbered_article_path,
140 testCase "Full article path left alone" test_full_article_path,
141 testCase "Non-https URL made https" test_non_https_article_path ]
142