+module LWN.ArticlePage
+where
+
+import Data.String.Utils (strip)
+import Data.Maybe (fromJust)
+import Data.Tree.NTree.TypeDefs (NTree)
+import Text.XML.HXT.Core (
+ IOSArrow,
+ XmlTree,
+ XNode,
+ (>>>),
+ (/>),
+ getText,
+ runX,
+ xshow
+ )
+import Text.HandsomeSoup (css)
+
+import Epublishable
+import LWN.Article
+import XHTML
+
+-- | Defines the ArticlePage data type, containing one 'Article'.
+data ArticlePage = ArticlePage { article :: Article }
+
+
+instance XHTML ArticlePage where
+ to_xhtml (ArticlePage a) =
+ "<?xml version=\"1.0\" encoding=\"utf-8\" ?>" ++
+ "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\"" ++
+ "\"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">" ++
+ "\n<head>\n" ++
+ " <meta http-equiv=\"Content-Type\"" ++
+ " content=\"application/xhtml+xml; charset=utf-8\" />" ++
+ " <title>" ++ (headline a) ++ "</title>\n" ++
+ "</head>\n" ++
+ "<body>\n" ++
+ "<div>\n\n" ++
+ (to_xhtml a) ++
+ "\n\n</div>\n" ++
+ "\n</body>\n" ++
+ "</html>"
+
+
+instance Epublishable ArticlePage where
+ parse xml = do
+ articles <- parse_articles xml
+ case articles of
+ Just [x] -> return $ Just $ ArticlePage x
+ _ -> return Nothing
+
+ title (ArticlePage x) = headline x
+
+
+-- | Takes data from an LWN page and determines whether or not it's a
+-- single article (as opposed to a page with multiple articles).
+is_article_page :: String -> IO Bool
+is_article_page _ = return True
+
+
+parse_headline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String)
+parse_headline xml = do
+ let element_filter = xml >>> css "div.PageHeadline h1"
+ let element_text_filter = element_filter /> getText
+ element_text <- runX element_text_filter
+ return $ case element_text of
+ [x] -> Just $ strip x
+ [] -> Nothing
+ _ -> error "Found more than one headline."
+
+parse_byline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String)
+parse_byline xml = do
+ let element_filter = xml >>> css "div.Byline"
+ let element_text_filter = element_filter /> getText
+ element_text <- runX element_text_filter
+ return $ case element_text of
+ [x] -> Just $ strip x
+ [] -> Nothing
+ _ -> error "Found more than one byline."
+
+
+parse_body :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String)
+parse_body xml = do
+ let element_filter = xml >>> css "div.ArticleText"
+ let element_html_filter = xshow element_filter
+ element_html <- runX element_html_filter
+ return $ case element_html of
+ [x] -> Just x
+ [] -> Nothing
+ _ -> error "Found more than one article."
+
+
+parse_articles :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe [Article])
+parse_articles xml = do
+ parsed_headline <- parse_headline xml
+ parsed_byline <- parse_byline xml
+ parsed_body <- parse_body xml
+ let headline' = fromJust parsed_headline
+ let byline' = fromJust parsed_byline
+ let body' = fromJust parsed_body
+ return $ Just $ [Article headline' byline' body']