X-Git-Url: http://gitweb.michael.orlitzky.com/?p=dead%2Flwn-epub.git;a=blobdiff_plain;f=src%2FLWN%2FArticlePage.hs;fp=src%2FLWN%2FArticlePage.hs;h=0000000000000000000000000000000000000000;hp=5964013f418303db52ce1ebe7dfdea123cdfe70c;hb=6f0e6cbece7e1b1a3c6b43d19eb2f29088af981c;hpb=ed2ed8abd62ba3dec7f799253de1133732f8c153 diff --git a/src/LWN/ArticlePage.hs b/src/LWN/ArticlePage.hs deleted file mode 100644 index 5964013..0000000 --- a/src/LWN/ArticlePage.hs +++ /dev/null @@ -1,101 +0,0 @@ -module LWN.ArticlePage -where - -import Data.String.Utils (strip) -import Data.Maybe (fromJust) -import Data.Tree.NTree.TypeDefs (NTree) -import Text.XML.HXT.Core ( - IOSArrow, - XmlTree, - XNode, - (>>>), - (/>), - getText, - runX, - xshow - ) -import Text.HandsomeSoup (css) - -import Epublishable -import LWN.Article -import XHTML - --- | Defines the ArticlePage data type, containing one 'Article'. -data ArticlePage = ArticlePage { article :: Article } - - -instance XHTML ArticlePage where - to_xhtml (ArticlePage a) = - "" ++ - "" ++ - "" ++ - " " ++ - " " ++ (show $ LWN.Article.title a) ++ "" ++ - "" ++ - "" ++ - "
" ++ - (to_xhtml a) ++ - "
" ++ - "" ++ - "" - - -instance Epublishable ArticlePage where - parse xml = do - articles <- parse_articles xml - case articles of - Just [x] -> return $ Just $ ArticlePage x - _ -> return Nothing - - title (ArticlePage x) = show $ LWN.Article.title x - - --- | Takes data from an LWN page and determines whether or not it's a --- single article (as opposed to a page with multiple articles). -is_article_page :: String -> IO Bool -is_article_page _ = return True - - -parse_headline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) -parse_headline xml = do - let element_filter = xml >>> css "div.PageHeadline h1" - let element_text_filter = element_filter /> getText - element_text <- runX element_text_filter - return $ case element_text of - [x] -> Just $ strip x - [] -> Nothing - _ -> error "Found more than one headline." - -parse_byline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) -parse_byline xml = do - let element_filter = xml >>> css "div.Byline" - let element_text_filter = element_filter /> getText - element_text <- runX element_text_filter - return $ case element_text of - [x] -> Just $ strip x - [] -> Nothing - _ -> error "Found more than one byline." - - -parse_body :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) -parse_body xml = do - let element_filter = xml >>> css "div.ArticleText" - let element_html_filter = xshow element_filter - element_html <- runX element_html_filter - return $ case element_html of - [x] -> Just x - [] -> Nothing - _ -> error "Found more than one article." - - -parse_articles :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe [Article]) -parse_articles xml = do - parsed_headline <- parse_headline xml - parsed_byline <- parse_byline xml - parsed_body <- parse_body xml - let title' = Title (fromJust parsed_headline) - let byline' = Byline parsed_byline - let body' = BodyHtml (fromJust parsed_body) - return $ Just $ [Article title' byline' body']