{-# LANGUAGE DoAndIfThenElse #-} module LWN.Page where import Control.Concurrent.ParallelIO (parallel) import qualified Data.Map as Map (lookup) import Data.Time (getCurrentTime) import qualified Data.ByteString.Lazy as B (ByteString, hPut) import Data.String.Utils (split, strip) import qualified Data.Map as Map (Map, empty, insert) import Data.Maybe (catMaybes, fromJust, isNothing) import Prelude hiding (readFile) import System.IO (Handle, hClose, hFlush) import Test.HUnit (Assertion, assertEqual) import Test.Framework (Test, testGroup) import Test.Framework.Providers.HUnit (testCase) import Text.Pandoc ( defaultParserState, defaultWriterOptions, readHtml, writeEPUB, writerEPUBMetadata) import Text.XML.HXT.Core ( ArrowXml, IOSArrow, XmlTree, ($<), (>>>), (/>), (//>), changeAttrValue, getAttrValue, getChildren, getText, hasName, none, processAttrl, processTopDown, this, runX, xshow, when) import Text.HandsomeSoup (css, parseHtml) import Configuration (Cfg, full_stories) import LWN.Article import LWN.HTTP ( ImageMap, download_image_urls, get_article_contents) import LWN.URI (URL) import LWN.XHTML ( XHTML, full_story_urls, image_srcs, full_story_link, full_story_paragraph, is_image, preprocess, remove_byline, remove_full_story_paragraphs, remove_title, to_xhtml, to_xml, xml_from_contents) data Page = -- | An LWN page with one article on it. ArticlePage { article :: Article } | -- | An LWN page with more than one article on it. These require -- different parsing and display functions than the single-article -- pages. FullPage { headline :: String, articles :: [Article] } instance XHTML Page where to_xhtml (ArticlePage a) = "" ++ "" ++ "
" ++ " " ++ "