15 import Text.HandsomeSoup (css)
16 import Text.XML.HXT.Core (
40 import LWN.URI (URL, try_make_absolute_url)
41 import Misc (contains)
45 to_xhtml :: a -> String
48 to_xml :: (ArrowXml b) => a -> (b XmlTree XmlTree)
50 -- | Options used when parsing HTML.
51 my_read_opts :: SysConfigList
52 my_read_opts = [ withValidate no,
56 -- | My version of HandsomeSoup's parseHTML.
57 parse_lwn :: String -> IOStateArrow s b XmlTree
58 parse_lwn = readString my_read_opts
61 -- | Takes the result of get_article_contents and calls parse_lwn on
62 -- the contained value.
63 xml_from_contents :: (Maybe String) -> Maybe (IOStateArrow s b XmlTree)
69 -- | Preprocessing common to both page types.
70 preprocess :: (ArrowXml a) => a XmlTree XmlTree
72 make_image_srcs_absolute
76 replace_links_with_spans
79 is_link :: (ArrowXml a) => a XmlTree XmlTree
81 isElem >>> hasName "a"
84 remove_comment_links :: (ArrowXml a) => a XmlTree XmlTree
85 remove_comment_links =
86 processTopDown $ kill_comments `when` is_link
89 hasAttrValue "href" (contains "#Comments")
92 none `when` is_comment_link
94 replace_links_with_spans :: (ArrowXml a) => a XmlTree XmlTree
95 replace_links_with_spans =
96 processTopDown $ (make_span >>> remove_attrs) `when` is_link
98 make_span = setElemName $ mkName "span"
99 remove_attrs = processAttrl none
102 is_title :: (ArrowXml a) => a XmlTree XmlTree
106 (hasAttrValue "class" (== "SummaryHL"))
109 is_byline :: (ArrowXml a) => a XmlTree XmlTree
113 (hasAttrValue "class" (== "FeatureByLine"))
116 is_image :: (ArrowXml a) => a XmlTree XmlTree
117 is_image = isElem >>> hasName "img"
119 remove_title :: (ArrowXml a) => a XmlTree XmlTree
121 processTopDown ((none) `when` is_title)
124 remove_byline :: (ArrowXml a) => a XmlTree XmlTree
126 processTopDown ((none) `when` is_byline)
129 image_srcs :: (ArrowXml a) => a XmlTree URL
135 make_image_srcs_absolute :: (ArrowXml a) => a XmlTree XmlTree
136 make_image_srcs_absolute =
137 processTopDown (make_srcs_absolute `when` is_image)
139 change_src :: (ArrowXml a) => a XmlTree XmlTree
141 changeAttrValue try_make_absolute_url
143 make_srcs_absolute :: (ArrowXml a) => a XmlTree XmlTree
145 processAttrl $ change_src `when` hasName "src"