X-Git-Url: http://gitweb.michael.orlitzky.com/?p=dead%2Flwn-epub.git;a=blobdiff_plain;f=src%2FLWN%2FFullPage.hs;h=cc89737d18e933227847181fedf31b98b514fa6a;hp=9ff782c89bc0b7e60cda43a7d47379b97470083b;hb=d5bab2c486d30e54d3e94cc3bdbb230f2cd1f3f3;hpb=b591d5f5c3c47d253c24144beae1edf8648cd94b diff --git a/src/LWN/FullPage.hs b/src/LWN/FullPage.hs index 9ff782c..cc89737 100644 --- a/src/LWN/FullPage.hs +++ b/src/LWN/FullPage.hs @@ -1,6 +1,21 @@ module LWN.FullPage where +import Data.String.Utils (strip) +import Data.Maybe (fromJust) +import Data.Tree.NTree.TypeDefs (NTree) +import Text.XML.HXT.Core ( + IOSArrow, + XmlTree, + XNode, + (>>>), + (/>), + getText, + runX, + xshow + ) +import Text.HandsomeSoup (css) + import Epublishable import LWN.Article import XHTML @@ -10,8 +25,25 @@ import XHTML -- pages. data FullPage = FullPage { articles :: [Article] } +articles_xhtml :: FullPage -> String +articles_xhtml fp = concatMap show (articles x) + instance XHTML FullPage where - to_xhtml fp = "" + to_xhtml fp = + "" ++ + "" ++ + "\n\n" ++ + " " ++ + " " ++ (headline a) ++ "\n" ++ + "\n" ++ + "\n" ++ + "
\n\n" ++ + (articles_xhtml fp) ++ + "\n\n
\n" ++ + "\n\n" ++ + "" instance Epublishable FullPage where parse xml = do @@ -21,3 +53,47 @@ instance Epublishable FullPage where _ -> return Nothing title _ = "LWN.net" + + +-- parse_headline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) +-- parse_headline xml = do +-- let element_filter = xml >>> css "div.PageHeadline h1" +-- let element_text_filter = element_filter /> getText +-- element_text <- runX element_text_filter +-- return $ case element_text of +-- [x] -> Just $ strip x +-- [] -> Nothing +-- _ -> error "Found more than one headline." + +-- parse_byline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) +-- parse_byline xml = do +-- let element_filter = xml >>> css "div.Byline" +-- let element_text_filter = element_filter /> getText +-- element_text <- runX element_text_filter +-- return $ case element_text of +-- [x] -> Just $ strip x +-- [] -> Nothing +-- _ -> error "Found more than one byline." + + +-- parse_body :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) +-- parse_body xml = do +-- let element_filter = xml >>> css "div.ArticleText" +-- let element_html_filter = xshow element_filter +-- element_html <- runX element_html_filter +-- return $ case element_html of +-- [x] -> Just x +-- [] -> Nothing +-- _ -> error "Found more than one article." + + +-- parse_articles :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe [Article]) +-- parse_articles xml = do +-- parsed_headline <- parse_headline xml +-- parsed_byline <- parse_byline xml +-- parsed_body <- parse_body xml +-- let headline' = fromJust parsed_headline +-- let byline' = fromJust parsed_byline +-- let body' = fromJust parsed_body +-- return $ Just $ [Article headline' byline' body'] +