4 import Data.String.Utils (strip)
5 import Data.Maybe (fromJust)
6 import Data.Tree.NTree.TypeDefs (NTree)
7 import Text.XML.HXT.Core (
17 import Text.HandsomeSoup (css)
23 -- | An LWN page with more than one article on it. These require
24 -- different parsing and display functions than the single-article
26 data FullPage = FullPage { articles :: [Article] }
28 articles_xhtml :: FullPage -> String
29 articles_xhtml fp = concatMap show (articles x)
31 instance XHTML FullPage where
33 "<?xml version=\"1.0\" encoding=\"utf-8\" ?>" ++
34 "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\"" ++
35 "\"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">" ++
37 " <meta http-equiv=\"Content-Type\"" ++
38 " content=\"application/xhtml+xml; charset=utf-8\" />" ++
39 " <title>" ++ (headline a) ++ "</title>\n" ++
43 (articles_xhtml fp) ++
48 instance Epublishable FullPage where
50 articles <- parse_articles xml
52 (x:xs)@all -> return $ Just $ FullPage all
58 -- parse_headline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String)
59 -- parse_headline xml = do
60 -- let element_filter = xml >>> css "div.PageHeadline h1"
61 -- let element_text_filter = element_filter /> getText
62 -- element_text <- runX element_text_filter
63 -- return $ case element_text of
64 -- [x] -> Just $ strip x
66 -- _ -> error "Found more than one headline."
68 -- parse_byline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String)
69 -- parse_byline xml = do
70 -- let element_filter = xml >>> css "div.Byline"
71 -- let element_text_filter = element_filter /> getText
72 -- element_text <- runX element_text_filter
73 -- return $ case element_text of
74 -- [x] -> Just $ strip x
76 -- _ -> error "Found more than one byline."
79 -- parse_body :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String)
80 -- parse_body xml = do
81 -- let element_filter = xml >>> css "div.ArticleText"
82 -- let element_html_filter = xshow element_filter
83 -- element_html <- runX element_html_filter
84 -- return $ case element_html of
87 -- _ -> error "Found more than one article."
90 -- parse_articles :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe [Article])
91 -- parse_articles xml = do
92 -- parsed_headline <- parse_headline xml
93 -- parsed_byline <- parse_byline xml
94 -- parsed_body <- parse_body xml
95 -- let headline' = fromJust parsed_headline
96 -- let byline' = fromJust parsed_byline
97 -- let body' = fromJust parsed_body
98 -- return $ Just $ [Article headline' byline' body']