src/LWN/FullPage.hs

   1 module LWN.FullPage
   2 where
   3
   4 import Data.String.Utils (strip)
   5 import Data.Maybe (fromJust)
   6 import Data.Tree.NTree.TypeDefs (NTree)
   7 import Text.XML.HXT.Core (
   8   IOSArrow,
   9   XmlTree,
  10   XNode,
  11   (>>>),
  12   (/>),
  13   getText,
  14   runX,
  15   xshow
  16   )
  17 import Text.HandsomeSoup (css)
  18
  19 import Epublishable
  20 import LWN.Article
  21 import XHTML
  22
  23 -- | An LWN page with more than one article on it. These require
  24 --   different parsing and display functions than the single-article
  25 --   pages.
  26 data FullPage = FullPage { articles :: [Article] }
  27
  28 articles_xhtml :: FullPage -> String
  29 articles_xhtml fp = concatMap show (articles x)
  30
  31 instance XHTML FullPage where
  32   to_xhtml fp =
  33     "<?xml version=\"1.0\" encoding=\"utf-8\" ?>" ++
  34     "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\"" ++
  35     "\"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">" ++
  36     "\n<head>\n" ++
  37     "  <meta http-equiv=\"Content-Type\"" ++
  38     " content=\"application/xhtml+xml; charset=utf-8\" />" ++
  39     "  <title>" ++ (headline a) ++ "</title>\n" ++
  40     "</head>\n" ++
  41     "<body>\n" ++
  42     "<div>\n\n" ++
  43     (articles_xhtml fp) ++
  44     "\n\n</div>\n" ++
  45     "\n</body>\n" ++
  46     "</html>"
  47
  48 instance Epublishable FullPage where
  49   parse xml = do
  50     articles <- parse_articles xml
  51     case articles of
  52       (x:xs)@all -> return $ Just $ FullPage all
  53       _          -> return Nothing
  54
  55   title _ = "LWN.net"
  56
  57
  58 -- parse_headline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String)
  59 -- parse_headline xml = do
  60 --   let element_filter = xml >>> css "div.PageHeadline h1"
  61 --   let element_text_filter = element_filter /> getText
  62 --   element_text <- runX element_text_filter
  63 --   return $ case element_text of
  64 --             [x] -> Just $ strip x
  65 --             []  -> Nothing
  66 --             _   -> error "Found more than one headline."
  67
  68 -- parse_byline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String)
  69 -- parse_byline xml = do
  70 --   let element_filter = xml >>> css "div.Byline"
  71 --   let element_text_filter = element_filter /> getText
  72 --   element_text <- runX element_text_filter
  73 --   return $ case element_text of
  74 --             [x] -> Just $ strip x
  75 --             []  -> Nothing
  76 --             _   -> error "Found more than one byline."
  77
  78
  79 -- parse_body :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String)
  80 -- parse_body xml = do
  81 --   let element_filter = xml >>> css "div.ArticleText"
  82 --   let element_html_filter = xshow element_filter
  83 --   element_html <- runX element_html_filter
  84 --   return $ case element_html of
  85 --             [x] -> Just x
  86 --             []  -> Nothing
  87 --             _   -> error "Found more than one article."
  88
  89
  90 -- parse_articles :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe [Article])
  91 -- parse_articles xml = do
  92 --   parsed_headline <- parse_headline xml
  93 --   parsed_byline   <- parse_byline xml
  94 --   parsed_body     <- parse_body xml
  95 --   let headline' = fromJust parsed_headline
  96 --   let byline'   = fromJust parsed_byline
  97 --   let body'     = fromJust parsed_body
  98 --   return $ Just $ [Article headline' byline' body']
  99