module Epublishable
where
-import Codec.EBook
-import qualified Data.ByteString.Lazy as B (writeFile)
-import Data.List (foldl')
+import Text.Pandoc
+import qualified Data.ByteString.Lazy as B (ByteString, writeFile)
+import Data.Time (getCurrentTime)
import Data.Tree.NTree.TypeDefs (NTree)
import System.FilePath (normalise)
import Text.XML.HXT.Core (IOSArrow, XNode, XmlTree)
-import Misc (string_to_bytestring)
import XHTML
title :: a -> String
- epublish :: a -> FilePath -> Integer -> IO ()
- epublish obj path time = do
- let book_name = title obj
- let book =
- emptyBook {
- bookID = "http://lwn.net/" ++ book_name,
- bookAuthor = "LWN <http://lwn.net> Copyright Eklektix, Inc.",
- bookTitle = book_name
- }
+ metadata :: a -> IO String
+ metadata obj = do
+ date <- getCurrentTime
+ return $
+ "<dc:creator>http://lwn.net/</dc:creator>\n" ++
+ "<dc:date>" ++ (show date) ++ "</dc:date>\n" ++
+ "<dc:language>en-US</dc:language>\n" ++
+ "<dc:rights>Copyright Eklektix, Inc.</dc:rights>\n" ++
+ "<dc:title>" ++ (title obj) ++ "</dc:title>\n"
+
+ epublish :: a -> FilePath -> IO ()
+ epublish obj path = do
let xhtml = to_xhtml obj
- bs_xhtml <- string_to_bytestring xhtml
- let iid = "iid-1"
+ epmd <- metadata obj
+ epub <- xhtml_to_epub epmd xhtml
let normalized_path = normalise path
- let metadata = Just (ChapterMetadata book_name)
- let bi = BookItem iid normalized_path bs_xhtml opsMediatype metadata
- let bookFull = foldl' addItem2Book book [bi]
- let outdata = book2Bin bookFull time
- B.writeFile normalized_path outdata
+ B.writeFile normalized_path epub
+
+xhtml_to_epub :: String -> String -> IO B.ByteString
+xhtml_to_epub epmd =
+ write_epub . read_html
+ where
+ my_writer_options = defaultWriterOptions { writerEPUBMetadata = epmd }
+ write_epub = writeEPUB Nothing [] my_writer_options
+ read_html = readHtml defaultParserState
module LWN.FullPage
where
+import Data.String.Utils (strip)
+import Data.Maybe (fromJust)
+import Data.Tree.NTree.TypeDefs (NTree)
+import Text.XML.HXT.Core (
+ IOSArrow,
+ XmlTree,
+ XNode,
+ (>>>),
+ (/>),
+ getText,
+ runX,
+ xshow
+ )
+import Text.HandsomeSoup (css)
+
import Epublishable
import LWN.Article
import XHTML
-- pages.
data FullPage = FullPage { articles :: [Article] }
+articles_xhtml :: FullPage -> String
+articles_xhtml fp = concatMap show (articles x)
+
instance XHTML FullPage where
- to_xhtml fp = ""
+ to_xhtml fp =
+ "<?xml version=\"1.0\" encoding=\"utf-8\" ?>" ++
+ "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\"" ++
+ "\"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">" ++
+ "\n<head>\n" ++
+ " <meta http-equiv=\"Content-Type\"" ++
+ " content=\"application/xhtml+xml; charset=utf-8\" />" ++
+ " <title>" ++ (headline a) ++ "</title>\n" ++
+ "</head>\n" ++
+ "<body>\n" ++
+ "<div>\n\n" ++
+ (articles_xhtml fp) ++
+ "\n\n</div>\n" ++
+ "\n</body>\n" ++
+ "</html>"
instance Epublishable FullPage where
parse xml = do
_ -> return Nothing
title _ = "LWN.net"
+
+
+-- parse_headline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String)
+-- parse_headline xml = do
+-- let element_filter = xml >>> css "div.PageHeadline h1"
+-- let element_text_filter = element_filter /> getText
+-- element_text <- runX element_text_filter
+-- return $ case element_text of
+-- [x] -> Just $ strip x
+-- [] -> Nothing
+-- _ -> error "Found more than one headline."
+
+-- parse_byline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String)
+-- parse_byline xml = do
+-- let element_filter = xml >>> css "div.Byline"
+-- let element_text_filter = element_filter /> getText
+-- element_text <- runX element_text_filter
+-- return $ case element_text of
+-- [x] -> Just $ strip x
+-- [] -> Nothing
+-- _ -> error "Found more than one byline."
+
+
+-- parse_body :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String)
+-- parse_body xml = do
+-- let element_filter = xml >>> css "div.ArticleText"
+-- let element_html_filter = xshow element_filter
+-- element_html <- runX element_html_filter
+-- return $ case element_html of
+-- [x] -> Just x
+-- [] -> Nothing
+-- _ -> error "Found more than one article."
+
+
+-- parse_articles :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe [Article])
+-- parse_articles xml = do
+-- parsed_headline <- parse_headline xml
+-- parsed_byline <- parse_byline xml
+-- parsed_body <- parse_body xml
+-- let headline' = fromJust parsed_headline
+-- let byline' = fromJust parsed_byline
+-- let body' = fromJust parsed_body
+-- return $ Just $ [Article headline' byline' body']
+
+{-# LANGUAGE ScopedTypeVariables #-}
module Main
where
import Data.Maybe (fromJust)
import Text.HandsomeSoup (parseHtml)
-import System.Time (ClockTime( TOD ), getClockTime)
import Epublishable
import LWN.ArticlePage
article_html <- readFile "test/fixtures/501317-article.html"
ioap <- parse $ parseHtml article_html
let article_page :: ArticlePage = fromJust $ ioap
- (TOD t _) <- getClockTime
- epublish article_page "out.epub" t
+ epublish article_page "out.epub"
putStrLn "Done."