getChildren,
getText,
hasName,
+ none,
processAttrl,
processTopDown,
this,
is_image,
preprocess,
remove_byline,
+ remove_full_story_paragraphs,
remove_title,
to_xhtml,
to_xml,
lookup_func :: (ArrowXml a) => URL -> a XmlTree XmlTree
lookup_func href =
case Map.lookup href story_map of
- -- Leave it alone if we don't have the full story.
- Nothing -> this
+ -- Drop the paragraph if we don't have the contents.
+ Nothing -> none
Just v -> to_xml v
article_xml :: (ArrowXml a) => a XmlTree XmlTree
article_xml =
lookup_func
- $<
+ $< -- From HXT's Control.Arrow.ArrowList
(this /> full_story_link >>> getAttrValue "href")
replace_remote_img_srcs :: (ArrowXml a) => ImageMap -> a XmlTree XmlTree
parse :: Cfg -> IOSArrow XmlTree XmlTree -> IO (Maybe Page)
parse cfg xml = do
- story_map <- download_full_stories cfg xml
- let fs_xml = if (full_stories cfg) then
- xml >>> insert_full_stories story_map
- else
- xml
+ fs_xml <- if (full_stories cfg) then do
+ story_map <- download_full_stories cfg xml
+ return $ xml >>> insert_full_stories story_map
+ else do
+ -- Get rid of them if we don't want them.
+ return $ xml >>> remove_full_story_paragraphs
let clean_xml = fs_xml >>> preprocess
image_map <- download_images clean_xml