4 import Data.String.Utils (strip)
5 import Data.Maybe (fromJust)
6 import Data.Tree.NTree.TypeDefs (NTree)
7 import Text.XML.HXT.Core (
17 import Text.HandsomeSoup (css)
23 -- | Defines the ArticlePage data type, containing one 'Article'.
24 data ArticlePage = ArticlePage { article :: Article }
27 instance XHTML ArticlePage where
28 to_xhtml (ArticlePage a) =
29 "<?xml version=\"1.0\" encoding=\"utf-8\" ?>" ++
30 "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\"" ++
31 "\"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">" ++
33 " <meta http-equiv=\"Content-Type\"" ++
34 " content=\"application/xhtml+xml; charset=utf-8\" />" ++
35 " <title>" ++ (headline a) ++ "</title>\n" ++
45 instance Epublishable ArticlePage where
47 articles <- parse_articles xml
49 Just [x] -> return $ Just $ ArticlePage x
52 title (ArticlePage x) = headline x
55 -- | Takes data from an LWN page and determines whether or not it's a
56 -- single article (as opposed to a page with multiple articles).
57 is_article_page :: String -> IO Bool
58 is_article_page _ = return True
61 parse_headline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String)
62 parse_headline xml = do
63 let element_filter = xml >>> css "div.PageHeadline h1"
64 let element_text_filter = element_filter /> getText
65 element_text <- runX element_text_filter
66 return $ case element_text of
69 _ -> error "Found more than one headline."
71 parse_byline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String)
73 let element_filter = xml >>> css "div.Byline"
74 let element_text_filter = element_filter /> getText
75 element_text <- runX element_text_filter
76 return $ case element_text of
79 _ -> error "Found more than one byline."
82 parse_body :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String)
84 let element_filter = xml >>> css "div.ArticleText"
85 let element_html_filter = xshow element_filter
86 element_html <- runX element_html_filter
87 return $ case element_html of
90 _ -> error "Found more than one article."
93 parse_articles :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe [Article])
94 parse_articles xml = do
95 parsed_headline <- parse_headline xml
96 parsed_byline <- parse_byline xml
97 parsed_body <- parse_body xml
98 let headline' = fromJust parsed_headline
99 let byline' = fromJust parsed_byline
100 let body' = fromJust parsed_body
101 return $ Just $ [Article headline' byline' body']