]> gitweb.michael.orlitzky.com - dead/lwn-epub.git/blob - src/LWN/Article.hs
Fix the duplicated full story body bug.
[dead/lwn-epub.git] / src / LWN / Article.hs
1 module LWN.Article (
2 Article(..),
3 Byline(..),
4 Title(..),
5 BodyHtml(..),
6 article_tests,
7 real_article_path
8 )
9 where
10
11 import Data.List (isPrefixOf)
12 import System.Directory (doesFileExist)
13 import Test.HUnit (Assertion, assertEqual)
14 import Test.Framework (Test, testGroup)
15 import Test.Framework.Providers.HUnit (testCase)
16 import Text.Regex.Posix ((=~))
17 import Text.XML.HXT.Core (
18 selem,
19 none,
20 runX,
21 txt,
22 xshow)
23
24 import LWN.URI (
25 add_trailing_slash,
26 is_lwn_url,
27 try_make_absolute_url,
28 make_https)
29
30 import LWN.XHTML (XHTML, XML, to_xhtml, to_xml)
31
32 newtype Title = Title { getTitle :: String }
33 newtype Byline = Byline { getByline :: Maybe String }
34 newtype BodyHtml = BodyHtml { getBodyHtml :: String }
35
36 instance Show Title where
37 show = getTitle
38
39 instance Show Byline where
40 show (Byline (Just bl)) = bl
41 show (Byline Nothing ) = ""
42
43 instance Show BodyHtml where
44 show = getBodyHtml
45
46 instance XHTML Title where
47 to_xhtml (Title t) = "<h2>" ++ t ++ "</h2>"
48
49 instance XHTML Byline where
50 to_xhtml (Byline (Just bl)) = "<p><em>" ++ bl ++ "</em></p>"
51 to_xhtml (Byline Nothing) = ""
52
53 instance XHTML BodyHtml where
54 to_xhtml = getBodyHtml
55
56
57 instance XML Title where
58 to_xml (Title t) =
59 selem "h2" [ txt t ]
60
61 instance XML Byline where
62 to_xml (Byline (Just bl)) =
63 selem "p" [ selem "em" [ txt bl ] ]
64 to_xml (Byline Nothing) = none
65
66 instance XML BodyHtml where
67 -- Don't question this. I tried to do it correctly and
68 -- all hell broke loose.
69 to_xml (BodyHtml bh) = txt bh
70
71 data Article = Article { title :: Title,
72 byline :: Byline,
73 body_html :: BodyHtml }
74
75 instance XHTML Article where
76 to_xhtml (Article t bl b) =
77 "<div>" ++
78 (to_xhtml t) ++
79 (to_xhtml bl) ++
80 (to_xhtml b) ++
81 "</div>"
82
83 instance XML Article where
84 to_xml (Article t bl b) =
85 selem "div" [to_xml t, to_xml bl, to_xml b]
86
87 -- | Convert the given article to either a URL or a filesystem
88 -- path. If the given article exists on the filesystem, we assume
89 -- it's a file. Otherwise, we check to see if it's a URL. Failing
90 -- that, we try to construct a URL from what we're given and do our
91 -- best.
92 real_article_path :: String -> IO String
93 real_article_path path = do
94 is_file <- doesFileExist path
95 return $ if is_file then path else add_trailing_slash check_cases
96 where
97 abs_current = try_make_absolute_url ("/" ++ path)
98 abs_article = try_make_absolute_url ("Articles/" ++ path)
99 abs_full_article = try_make_absolute_url path
100
101 check_cases :: String
102 check_cases
103 | is_lwn_url path = make_https path
104 | isPrefixOf "current" path = abs_current
105 | path =~ "^[0-9]+$" = abs_article
106 | path =~ "^/Articles/[0-9]+/?$" = abs_full_article
107 | otherwise = path -- Give up
108
109
110
111 test_current_article_path :: Assertion
112 test_current_article_path = do
113 let expected = "https://lwn.net/current/"
114 actual <- real_article_path "current"
115 assertEqual "Current article path constructed" expected actual
116
117 test_current_bigpage_article_path :: Assertion
118 test_current_bigpage_article_path = do
119 let expected = "https://lwn.net/current/bigpage"
120 actual <- real_article_path "current/bigpage"
121 assertEqual "Current bigpage article path constructed" expected actual
122
123 test_numbered_article_path :: Assertion
124 test_numbered_article_path = do
125 let expected = "https://lwn.net/Articles/69/"
126 actual <- real_article_path "69" -- I'm twelve
127 assertEqual "Numbered article path constructed" expected actual
128
129
130 test_full_article_path :: Assertion
131 test_full_article_path = do
132 let expected = "https://lwn.net/Articles/502979/"
133 actual <- real_article_path "https://lwn.net/Articles/502979/"
134 assertEqual "Full article path left alone" expected actual
135
136 test_non_https_article_path :: Assertion
137 test_non_https_article_path = do
138 let expected = "https://lwn.net/Articles/502979/"
139 actual <- real_article_path "http://lwn.net/Articles/502979/"
140 assertEqual "Non-https URL made https" expected actual
141
142
143
144 -- | Compares the output of (xshow . to_xml) and to_xhtml; they should
145 -- match.
146 test_to_xml :: Assertion
147 test_to_xml = do
148 actual_xml' <- runX . xshow $ to_xml input_article
149 let actual_xml = actual_xml' !! 0
150
151 let expected_xml = to_xhtml input_article
152
153 assertEqual
154 "The to_xml function works on a trivial example"
155 expected_xml
156 actual_xml
157 where
158 t = Title "Hello, world!"
159 bl = Byline $ Just "Breaking News"
160 b = BodyHtml "<p>Hello, world!</p>"
161 input_article = Article t bl b
162
163
164
165 article_tests :: Test
166 article_tests =
167 testGroup "Article Tests" [
168 testCase "Current article path constructed" test_current_article_path,
169 testCase
170 "Current bigpage article path constructed"
171 test_current_bigpage_article_path,
172 testCase "Numbered article path constructed" test_numbered_article_path,
173 testCase "Full article path left alone" test_full_article_path,
174 testCase "Non-https URL made https" test_non_https_article_path,
175 testCase "The to_xml function works on a trivial example" test_to_xml ]