]> gitweb.michael.orlitzky.com - dead/lwn-epub.git/blob - src/LWN/FullPage.hs
cc89737d18e933227847181fedf31b98b514fa6a
[dead/lwn-epub.git] / src / LWN / FullPage.hs
1 module LWN.FullPage
2 where
3
4 import Data.String.Utils (strip)
5 import Data.Maybe (fromJust)
6 import Data.Tree.NTree.TypeDefs (NTree)
7 import Text.XML.HXT.Core (
8 IOSArrow,
9 XmlTree,
10 XNode,
11 (>>>),
12 (/>),
13 getText,
14 runX,
15 xshow
16 )
17 import Text.HandsomeSoup (css)
18
19 import Epublishable
20 import LWN.Article
21 import XHTML
22
23 -- | An LWN page with more than one article on it. These require
24 -- different parsing and display functions than the single-article
25 -- pages.
26 data FullPage = FullPage { articles :: [Article] }
27
28 articles_xhtml :: FullPage -> String
29 articles_xhtml fp = concatMap show (articles x)
30
31 instance XHTML FullPage where
32 to_xhtml fp =
33 "<?xml version=\"1.0\" encoding=\"utf-8\" ?>" ++
34 "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\"" ++
35 "\"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">" ++
36 "\n<head>\n" ++
37 " <meta http-equiv=\"Content-Type\"" ++
38 " content=\"application/xhtml+xml; charset=utf-8\" />" ++
39 " <title>" ++ (headline a) ++ "</title>\n" ++
40 "</head>\n" ++
41 "<body>\n" ++
42 "<div>\n\n" ++
43 (articles_xhtml fp) ++
44 "\n\n</div>\n" ++
45 "\n</body>\n" ++
46 "</html>"
47
48 instance Epublishable FullPage where
49 parse xml = do
50 articles <- parse_articles xml
51 case articles of
52 (x:xs)@all -> return $ Just $ FullPage all
53 _ -> return Nothing
54
55 title _ = "LWN.net"
56
57
58 -- parse_headline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String)
59 -- parse_headline xml = do
60 -- let element_filter = xml >>> css "div.PageHeadline h1"
61 -- let element_text_filter = element_filter /> getText
62 -- element_text <- runX element_text_filter
63 -- return $ case element_text of
64 -- [x] -> Just $ strip x
65 -- [] -> Nothing
66 -- _ -> error "Found more than one headline."
67
68 -- parse_byline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String)
69 -- parse_byline xml = do
70 -- let element_filter = xml >>> css "div.Byline"
71 -- let element_text_filter = element_filter /> getText
72 -- element_text <- runX element_text_filter
73 -- return $ case element_text of
74 -- [x] -> Just $ strip x
75 -- [] -> Nothing
76 -- _ -> error "Found more than one byline."
77
78
79 -- parse_body :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String)
80 -- parse_body xml = do
81 -- let element_filter = xml >>> css "div.ArticleText"
82 -- let element_html_filter = xshow element_filter
83 -- element_html <- runX element_html_filter
84 -- return $ case element_html of
85 -- [x] -> Just x
86 -- [] -> Nothing
87 -- _ -> error "Found more than one article."
88
89
90 -- parse_articles :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe [Article])
91 -- parse_articles xml = do
92 -- parsed_headline <- parse_headline xml
93 -- parsed_byline <- parse_byline xml
94 -- parsed_body <- parse_body xml
95 -- let headline' = fromJust parsed_headline
96 -- let byline' = fromJust parsed_byline
97 -- let body' = fromJust parsed_body
98 -- return $ Just $ [Article headline' byline' body']
99