{-# LANGUAGE DoAndIfThenElse #-} module LWN.Page where import qualified Data.Map as Map (lookup) import Data.Time (getCurrentTime) import System.IO (Handle, hClose, hFlush) import qualified Data.ByteString.Lazy as B (ByteString, hPut) import Data.String.Utils (split, strip) import Data.Maybe (catMaybes, fromJust, isNothing) import Test.HUnit (Assertion, assertEqual) import Test.Framework (Test, testGroup) import Test.Framework.Providers.HUnit (testCase) import Text.Pandoc ( defaultParserState, defaultWriterOptions, readHtml, writeEPUB, writerEPUBMetadata) import Text.XML.HXT.Core ( ArrowXml, IOSArrow, XmlTree, (>>>), (/>), (//>), changeAttrValue, getAttrValue, getChildren, getText, hasAttrValue, hasName, isElem, mkName, none, processAttrl, processTopDown, runX, setElemName, xshow, when) import Text.HandsomeSoup (css, parseHtml) import LWN.Article import LWN.HTTP (ImageMap, download_image_urls) import LWN.URI (URL, try_make_absolute_url) import LWN.XHTML (XHTML, to_xhtml) import Misc (contains) -- Should be called *after* preprocessing. download_images :: IOSArrow XmlTree XmlTree -> IO ImageMap download_images xml = do image_urls <- runX $ xml >>> image_srcs download_image_urls image_urls data Page = -- | An LWN page with one article on it. ArticlePage { article :: Article } | -- | An LWN page with more than one article on it. These require -- different parsing and display functions than the single-article -- pages. FullPage { headline :: String, articles :: [Article] } instance XHTML Page where to_xhtml (ArticlePage a) = "" ++ "" ++ "
" ++ " " ++ "