module LWN.FullPage where import Data.String.Utils (strip) import Data.Maybe (fromJust) import Data.Tree.NTree.TypeDefs (NTree) import Text.XML.HXT.Core ( IOSArrow, XmlTree, XNode, (>>>), (/>), getText, runX, xshow ) import Text.HandsomeSoup (css) import Epublishable import LWN.Article import XHTML -- | An LWN page with more than one article on it. These require -- different parsing and display functions than the single-article -- pages. data FullPage = FullPage { articles :: [Article] } articles_xhtml :: FullPage -> String articles_xhtml fp = concatMap show (articles x) instance XHTML FullPage where to_xhtml fp = "" ++ "" ++ "\n\n" ++ " " ++ " " ++ (headline a) ++ "\n" ++ "\n" ++ "\n" ++ "
\n\n" ++ (articles_xhtml fp) ++ "\n\n
\n" ++ "\n\n" ++ "" instance Epublishable FullPage where parse xml = do articles <- parse_articles xml case articles of (x:xs)@all -> return $ Just $ FullPage all _ -> return Nothing title _ = "LWN.net" -- parse_headline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) -- parse_headline xml = do -- let element_filter = xml >>> css "div.PageHeadline h1" -- let element_text_filter = element_filter /> getText -- element_text <- runX element_text_filter -- return $ case element_text of -- [x] -> Just $ strip x -- [] -> Nothing -- _ -> error "Found more than one headline." -- parse_byline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) -- parse_byline xml = do -- let element_filter = xml >>> css "div.Byline" -- let element_text_filter = element_filter /> getText -- element_text <- runX element_text_filter -- return $ case element_text of -- [x] -> Just $ strip x -- [] -> Nothing -- _ -> error "Found more than one byline." -- parse_body :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) -- parse_body xml = do -- let element_filter = xml >>> css "div.ArticleText" -- let element_html_filter = xshow element_filter -- element_html <- runX element_html_filter -- return $ case element_html of -- [x] -> Just x -- [] -> Nothing -- _ -> error "Found more than one article." -- parse_articles :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe [Article]) -- parse_articles xml = do -- parsed_headline <- parse_headline xml -- parsed_byline <- parse_byline xml -- parsed_body <- parse_body xml -- let headline' = fromJust parsed_headline -- let byline' = fromJust parsed_byline -- let body' = fromJust parsed_body -- return $ Just $ [Article headline' byline' body']