module LWN.ArticlePage where import Data.String.Utils (strip) import Data.Maybe (fromJust) import Data.Tree.NTree.TypeDefs (NTree) import Text.XML.HXT.Core ( IOSArrow, XmlTree, XNode, (>>>), (/>), getText, runX, xshow ) import Text.HandsomeSoup (css) import Epublishable import LWN.Article import XHTML -- | Defines the ArticlePage data type, containing one 'Article'. data ArticlePage = ArticlePage { article :: Article } instance XHTML ArticlePage where to_xhtml (ArticlePage a) = "" ++ "" ++ "" ++ " " ++ " " ++ (show $ LWN.Article.title a) ++ "" ++ "" ++ "" ++ "
" ++ (to_xhtml a) ++ "
" ++ "" ++ "" instance Epublishable ArticlePage where parse xml = do articles <- parse_articles xml case articles of Just [x] -> return $ Just $ ArticlePage x _ -> return Nothing title (ArticlePage x) = show $ LWN.Article.title x -- | Takes data from an LWN page and determines whether or not it's a -- single article (as opposed to a page with multiple articles). is_article_page :: String -> IO Bool is_article_page _ = return True parse_headline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) parse_headline xml = do let element_filter = xml >>> css "div.PageHeadline h1" let element_text_filter = element_filter /> getText element_text <- runX element_text_filter return $ case element_text of [x] -> Just $ strip x [] -> Nothing _ -> error "Found more than one headline." parse_byline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) parse_byline xml = do let element_filter = xml >>> css "div.Byline" let element_text_filter = element_filter /> getText element_text <- runX element_text_filter return $ case element_text of [x] -> Just $ strip x [] -> Nothing _ -> error "Found more than one byline." parse_body :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) parse_body xml = do let element_filter = xml >>> css "div.ArticleText" let element_html_filter = xshow element_filter element_html <- runX element_html_filter return $ case element_html of [x] -> Just x [] -> Nothing _ -> error "Found more than one article." parse_articles :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe [Article]) parse_articles xml = do parsed_headline <- parse_headline xml parsed_byline <- parse_byline xml parsed_body <- parse_body xml let title' = Title (fromJust parsed_headline) let byline' = Byline parsed_byline let body' = BodyHtml (fromJust parsed_body) return $ Just $ [Article title' byline' body']