+++ /dev/null
-module LWN.ArticlePage
-where
-
-import Data.String.Utils (strip)
-import Data.Maybe (fromJust)
-import Data.Tree.NTree.TypeDefs (NTree)
-import Text.XML.HXT.Core (
- IOSArrow,
- XmlTree,
- XNode,
- (>>>),
- (/>),
- getText,
- runX,
- xshow
- )
-import Text.HandsomeSoup (css)
-
-import Epublishable
-import LWN.Article
-import XHTML
-
--- | Defines the ArticlePage data type, containing one 'Article'.
-data ArticlePage = ArticlePage { article :: Article }
-
-
-instance XHTML ArticlePage where
- to_xhtml (ArticlePage a) =
- "<?xml version=\"1.0\" encoding=\"utf-8\" ?>" ++
- "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\"" ++
- "\"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">" ++
- "<head>" ++
- " <meta http-equiv=\"Content-Type\"" ++
- " content=\"application/xhtml+xml; charset=utf-8\" />" ++
- " <title>" ++ (show $ LWN.Article.title a) ++ "</title>" ++
- "</head>" ++
- "<body>" ++
- "<div>" ++
- (to_xhtml a) ++
- "</div>" ++
- "</body>" ++
- "</html>"
-
-
-instance Epublishable ArticlePage where
- parse xml = do
- articles <- parse_articles xml
- case articles of
- Just [x] -> return $ Just $ ArticlePage x
- _ -> return Nothing
-
- title (ArticlePage x) = show $ LWN.Article.title x
-
-
--- | Takes data from an LWN page and determines whether or not it's a
--- single article (as opposed to a page with multiple articles).
-is_article_page :: String -> IO Bool
-is_article_page _ = return True
-
-
-parse_headline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String)
-parse_headline xml = do
- let element_filter = xml >>> css "div.PageHeadline h1"
- let element_text_filter = element_filter /> getText
- element_text <- runX element_text_filter
- return $ case element_text of
- [x] -> Just $ strip x
- [] -> Nothing
- _ -> error "Found more than one headline."
-
-parse_byline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String)
-parse_byline xml = do
- let element_filter = xml >>> css "div.Byline"
- let element_text_filter = element_filter /> getText
- element_text <- runX element_text_filter
- return $ case element_text of
- [x] -> Just $ strip x
- [] -> Nothing
- _ -> error "Found more than one byline."
-
-
-parse_body :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String)
-parse_body xml = do
- let element_filter = xml >>> css "div.ArticleText"
- let element_html_filter = xshow element_filter
- element_html <- runX element_html_filter
- return $ case element_html of
- [x] -> Just x
- [] -> Nothing
- _ -> error "Found more than one article."
-
-
-parse_articles :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe [Article])
-parse_articles xml = do
- parsed_headline <- parse_headline xml
- parsed_byline <- parse_byline xml
- parsed_body <- parse_body xml
- let title' = Title (fromJust parsed_headline)
- let byline' = Byline parsed_byline
- let body' = BodyHtml (fromJust parsed_body)
- return $ Just $ [Article title' byline' body']