{-# LANGUAGE DoAndIfThenElse #-} module LWN.Page where import qualified Data.Map as Map import Data.Time (getCurrentTime) import System.IO (Handle) import qualified Data.ByteString.Lazy as B (ByteString, hPut) import Data.String.Utils (split, strip) import Data.Maybe (catMaybes, fromJust, isNothing) import Data.Tree.NTree.TypeDefs (NTree) import Test.HUnit (Assertion, assertEqual) import Test.Framework (Test, testGroup) import Test.Framework.Providers.HUnit (testCase) import Text.Pandoc import Text.XML.HXT.Core ( ArrowXml, IOSArrow, XmlTree, XNode, (>>>), (/>), (//>), changeAttrValue, getAttrValue, getChildren, getText, hasAttrValue, hasName, isElem, mkName, none, processAttrl, processTopDown, runX, setElemName, xshow, when ) import Text.HandsomeSoup (css, parseHtml) import LWN.Article import LWN.HTTP (save_image) import LWN.URI (URL, try_make_absolute_url) import Misc (contains) import XHTML -- Map absolute image URLs to local system file paths where the image -- referenced by the URL is stored. type ImageMap = Map.Map URL FilePath -- Should be called *after* preprocessing. download_images :: IOSArrow XmlTree (NTree XNode) -> IO ImageMap download_images xml = do image_urls <- runX $ xml >>> image_srcs files <- mapM save_image image_urls let pairs = zip image_urls files return $ foldl my_insert empty_map pairs where empty_map = Map.empty :: ImageMap my_insert :: ImageMap -> (URL, Maybe FilePath) -> ImageMap my_insert dict (_, Nothing) = dict my_insert dict (k, Just v) = Map.insert k v dict data Page = -- | An LWN page with one article on it. ArticlePage { article :: Article } | -- | An LWN page with more than one article on it. These require -- different parsing and display functions than the single-article -- pages. FullPage { headline :: String, articles :: [Article] } instance XHTML Page where to_xhtml (ArticlePage a) = "" ++ "" ++ "
" ++ " " ++ "