]> gitweb.michael.orlitzky.com - dead/htnef.git/commitdiff
Initial commit. master
authorMichael Orlitzky <michael@orlitzky.com>
Tue, 18 Aug 2009 20:00:06 +0000 (16:00 -0400)
committerMichael Orlitzky <michael@orlitzky.com>
Tue, 18 Aug 2009 20:00:06 +0000 (16:00 -0400)
13 files changed:
makefile [new file with mode: 0644]
src/htnef.hs [new file with mode: 0644]
src/tnef/attachment.hs [new file with mode: 0644]
src/tnef/file.hs [new file with mode: 0644]
src/tnef/object.hs [new file with mode: 0644]
src/tnef/object_lvl.hs [new file with mode: 0644]
src/tnef/object_name.hs [new file with mode: 0644]
test/body.tnef [new file with mode: 0644]
test/data-before-name.tnef [new file with mode: 0644]
test/multi-name-property.tnef [new file with mode: 0644]
test/two-files.tnef [new file with mode: 0644]
test/ytnef_outlook_2k3_appointment.tnef [new file with mode: 0644]
test/ytnef_random_sample.tnef [new file with mode: 0644]

diff --git a/makefile b/makefile
new file mode 100644 (file)
index 0000000..94bbbdc
--- /dev/null
+++ b/makefile
@@ -0,0 +1,15 @@
+all: htnef
+
+htnef: src/*.hs src/tnef/*.hs
+       ghc -O2 -Wall --make -fglasgow-exts -o bin/htnef src/*.hs src/tnef/*.hs
+
+profile: src/*.hs src/tnef/*.hs
+       ghc -O2 -Wall -prof -auto-all --make -fglasgow-exts -o bin/htnef src/*.hs src/tnef/*.hs
+
+clean:
+       rm -f bin/htnef
+       rm -f src/*.hi
+       rm -f src/*.o
+       rm -f src/tnef/*.hi
+       rm -f src/tnef/*.o
+       rm -f htnef.prof
diff --git a/src/htnef.hs b/src/htnef.hs
new file mode 100644 (file)
index 0000000..8163644
--- /dev/null
@@ -0,0 +1,44 @@
+--
+-- htnef, a program to do things to TNEF files.
+--
+-- Copyright Michael Orlitzky
+--
+-- http://michael.orlitzky.com/
+--
+-- This program is free software: you can redistribute it and/or modify
+-- it under the terms of the GNU General Public License as published by
+-- the Free Software Foundation, either version 3 of the License, or
+-- (at your option) any later version.
+--
+-- This program is distributed in the hope that it will be useful,
+-- but WITHOUT ANY WARRANTY; without even the implied warranty of
+-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+-- GNU General Public License for more details.
+--
+-- http://www.fsf.org/licensing/licenses/gpl.html
+
+import Data.Binary
+import System.Environment (getArgs)
+import Tnef.File
+           
+
+-- Sequential IO
+-- Perform x, ignoring the result.
+-- Then return the result of doing the rest of the operations,
+-- which will be a single IO (). This forces evaluation
+-- of a list of IOs, i.e. [IO ()]
+sequential_io :: [IO ()] -> IO ()
+sequential_io [] = return ()
+sequential_io (x:xs) = do
+  x 
+  sequential_io xs
+
+
+-- Decode whatever file is passed on the command line.
+main :: IO ()
+main = do
+  args <- getArgs
+  tf :: TnefFile <- decodeFile (args !! 0)
+  putStrLn (show tf) -- Dump the data...
+  sequential_io (write_tnef_file tf) -- And write the attachments
+  
\ No newline at end of file
diff --git a/src/tnef/attachment.hs b/src/tnef/attachment.hs
new file mode 100644 (file)
index 0000000..7526d0e
--- /dev/null
@@ -0,0 +1,65 @@
+--
+-- Copyright Michael Orlitzky
+--
+-- http://michael.orlitzky.com/
+--
+-- This program is free software: you can redistribute it and/or modify
+-- it under the terms of the GNU General Public License as published by
+-- the Free Software Foundation, either version 3 of the License, or
+-- (at your option) any later version.
+--
+-- This program is distributed in the hope that it will be useful,
+-- but WITHOUT ANY WARRANTY; without even the implied warranty of
+-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+-- GNU General Public License for more details.
+--
+-- http://www.fsf.org/licensing/licenses/gpl.html
+--
+
+module Tnef.Attachment where
+
+import Tnef.Object
+
+data TnefAttachment = TnefAttachment { att_mod_date :: TnefObject,
+                                       att_data :: TnefObject,
+                                       att_file_name :: TnefObject,
+                                       att_meta_file :: TnefObject,
+                                       att_attachment :: TnefObject }
+                    | NilAttachment
+                      deriving (Show)
+
+
+get_attachment_list :: [TnefObject] -> [TnefAttachment]
+get_attachment_list [] = []
+get_attachment_list [Nil] = []
+get_attachment_list (x:xs)
+    | (show (obj_name x) == "Attachment Rendering Data") = (make_attachment xs) : get_attachment_list xs
+    | otherwise = get_attachment_list xs
+    
+
+make_attachment :: [TnefObject] -> TnefAttachment
+make_attachment xs
+    | length xs < 5 = NilAttachment
+    | otherwise = TnefAttachment m d f mf a
+                  where
+                    m = get_first_with_name xs "Attachment Modification Date"
+                    d = get_first_with_name xs "Attachment Data"
+                    f = get_first_with_name xs "Attachment File Name"
+                    mf = get_first_with_name xs "Attachment Meta File"
+                    a = get_first_with_name xs "Attachment"
+
+safe_head :: [TnefObject] -> TnefObject
+safe_head xs
+    | ((length xs) == 0) = Nil
+    | otherwise = head xs
+
+
+get_first_with_name :: [TnefObject] -> String -> TnefObject
+get_first_with_name xs name =
+    safe_head (dropWhile ((/= name) . show . obj_name) xs)
+
+
+write_attachment :: TnefAttachment -> IO ()
+write_attachment NilAttachment = putStrLn "NilAttachment"
+write_attachment x = do
+  write_tnef_object (show_data (att_file_name x)) (att_data x)
diff --git a/src/tnef/file.hs b/src/tnef/file.hs
new file mode 100644 (file)
index 0000000..d0288b0
--- /dev/null
@@ -0,0 +1,85 @@
+--
+-- Copyright Michael Orlitzky
+--
+-- http://michael.orlitzky.com/
+--
+-- This program is free software: you can redistribute it and/or modify
+-- it under the terms of the GNU General Public License as published by
+-- the Free Software Foundation, either version 3 of the License, or
+-- (at your option) any later version.
+--
+-- This program is distributed in the hope that it will be useful,
+-- but WITHOUT ANY WARRANTY; without even the implied warranty of
+-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+-- GNU General Public License for more details.
+--
+-- http://www.fsf.org/licensing/licenses/gpl.html
+--
+
+module Tnef.File where
+
+import Data.Binary
+import qualified Data.Binary.Get as BinaryGet
+import qualified Data.Binary.Put as BinaryPut
+import Data.Int (Int64)
+import Tnef.Attachment
+import Tnef.Object
+
+
+-- A TNEF "file" has a signature at the top which should be the same
+-- for all TNEF files. There's also a "key," which probably does
+-- something, and looks to be unique. After that, there's a variable
+-- number of objects (whose structure can be seen in TnefObject). The
+-- "total bytes" thing is not part of the file. I'm just using it to
+-- keep track of the bytes read for now (to make sure it equals the
+-- size of the file when we're done).
+data TnefFile = TnefFile { signature    :: Word32,
+                           key          :: Word16,
+                           first_object :: TnefObject,
+                           total_bytes  :: Int64 }
+
+
+-- Magicest of numbers
+-- Important: Int doesn't guarantee 32 bits, so we
+-- need to use the Integer type here.
+tnef_signature :: Integer
+tnef_signature = 0x223e9f78                  
+
+
+
+instance Show TnefFile where
+    show x = "Signature: " ++ (show (signature x)) ++ " (Correct: " ++ (show tnef_signature) ++ ")\n" ++
+             "Key: " ++ (show (key x)) ++ "\n" ++
+             concat (map show (get_object_list x)) -- Show the objects, one at a time
+
+
+-- Recursively collect an array of objects. It dies
+-- on the last one, which has no next_object property.
+-- Help?
+get_object_list :: TnefFile -> [TnefObject]
+get_object_list x = get_successive_objects (first_object x)
+
+
+get_successive_objects :: TnefObject -> [TnefObject]
+get_successive_objects Nil = [Nil]
+get_successive_objects x = (x : (get_successive_objects (next_object x)))
+
+
+instance Binary TnefFile where
+    put _ = do BinaryPut.putWord8 0
+    get = do
+      signature_word    <- BinaryGet.getWord32host
+      key_word          <- BinaryGet.getWord16host
+      first_object_word <- get :: Get TnefObject
+      total_bytes_word  <- BinaryGet.bytesRead
+      return (TnefFile { signature      = signature_word,
+                         key            = key_word,
+                         first_object   = first_object_word,
+                         total_bytes    = total_bytes_word })
+
+
+write_tnef_file :: TnefFile -> [IO ()]
+write_tnef_file x = do
+  let objects = get_object_list x
+  let attachments = get_attachment_list objects
+  map write_attachment attachments
diff --git a/src/tnef/object.hs b/src/tnef/object.hs
new file mode 100644 (file)
index 0000000..0011ee5
--- /dev/null
@@ -0,0 +1,119 @@
+--
+-- Copyright Michael Orlitzky
+--
+-- http://michael.orlitzky.com/
+--
+-- This program is free software: you can redistribute it and/or modify
+-- it under the terms of the GNU General Public License as published by
+-- the Free Software Foundation, either version 3 of the License, or
+-- (at your option) any later version.
+--
+-- This program is distributed in the hope that it will be useful,
+-- but WITHOUT ANY WARRANTY; without even the implied warranty of
+-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+-- GNU General Public License for more details.
+--
+-- http://www.fsf.org/licensing/licenses/gpl.html
+--
+
+module Tnef.Object where
+
+import Control.Monad (replicateM)
+import Data.Binary
+import qualified Data.Binary.Get as BinaryGet
+import qualified Data.Binary.Put as BinaryPut
+import qualified Data.ByteString.Lazy as BS
+import Data.Char
+import Data.Word
+import Tnef.ObjectName
+import Tnef.ObjectLvl
+
+
+-- The only tricky part here is the "chained" TnefObjects.
+-- Each object contains the next one. It's like that because
+-- I couldn't figure out how to parse an unknown number of
+-- objects using Binary.Get.
+data TnefObject = TnefObject { obj_lvl      :: TnefObjectLvl,
+                               obj_name     :: TnefObjectName,
+                               obj_type     :: Word16,
+                               obj_length   :: Word32,
+                               obj_data     :: [Word8],
+                               obj_checksum :: Word16,
+                               next_object  :: TnefObject }
+                | Nil
+
+                                      
+instance Show TnefObject where
+    show Nil = "Nil"
+    show x = "<object>" ++ "\n" ++
+             "  Level: " ++ (show (obj_lvl x)) ++ "\n" ++
+             "  Name: " ++ (show (obj_name x)) ++ "\n" ++
+             "  Type: " ++ (show (obj_type x)) ++ "\n" ++
+             "  Data: " ++ (show_data x) ++ "\n" ++
+             "  Checksum: " ++ (show (obj_checksum x)) ++ " (Actual: " ++ (show (checksum (obj_data x))) ++ ")\n" ++
+             "</object>\n"
+
+drop_null_byte :: [Word8] -> [Word8]
+drop_null_byte = takeWhile (/= 0)
+                 
+show_data :: TnefObject -> String
+show_data Nil = ""
+show_data x
+    | ((show n) == "Attachment Data") = "(Binary)"
+    | ((show n) == "Date Sent") = show (obj_data x)
+    | ((show n) == "Date Received") = show (obj_data x)
+    | ((show n) == "Date Modified") = show (obj_data x)
+    | ((show n) == "Attachment Modification Date") = show (obj_data x)
+    | otherwise = t
+      where
+        n = (obj_name x)
+        t = (map (chr . word8_to_int) (drop_null_byte (obj_data x)))
+
+
+-- Just sum up the values of the bytes, and take that
+-- value mod 65536 at the end. (checksum obj_data) should
+-- equal the value of obj_checksum for each TnefObject.
+checksum :: [Word8] -> Int
+checksum x = (foldr (+) 0 (map word8_to_int x)) `mod` 0x10000
+
+
+-- Just for convenience
+word8_to_int :: Word8 -> Int
+word8_to_int x = (fromIntegral x)
+
+
+instance Binary TnefObject where
+    put _ = do BinaryPut.putWord8 0
+    get = do
+      obj_lvl_word    <- BinaryGet.getWord8      
+      obj_name_word   <- BinaryGet.getWord16host
+      obj_type_word   <- BinaryGet.getWord16host
+      obj_length_word <- BinaryGet.getWord32host
+      -- This pulls in obj_length bytes of data as a [Word8]
+      obj_data_word  <- replicateM (fromIntegral obj_length_word) BinaryGet.getWord8
+      obj_checksum_word <- BinaryGet.getWord16host
+      next_object_word  <- get_next_tnef_object
+      return (TnefObject { obj_lvl = (word_to_lvl obj_lvl_word),
+                           obj_name = (word_to_name obj_name_word),
+                           obj_type = obj_type_word,
+                           obj_length = obj_length_word,
+                           obj_data = obj_data_word,
+                           obj_checksum = obj_checksum_word,
+                           next_object = next_object_word })
+
+
+-- If there's no more data, return Nil as the "next" object.
+get_next_tnef_object :: Get TnefObject
+get_next_tnef_object = do
+  is_empty <- BinaryGet.isEmpty
+  if is_empty
+    then return Nil
+    else (get :: Get TnefObject)
+
+
+write_tnef_object :: String -> TnefObject -> IO ()
+write_tnef_object _ Nil = return ()
+write_tnef_object path x
+    | ((show n) == "Attachment Data") = BS.writeFile path (BS.pack (obj_data x))
+    | otherwise = return ()
+      where n = (obj_name x)
diff --git a/src/tnef/object_lvl.hs b/src/tnef/object_lvl.hs
new file mode 100644 (file)
index 0000000..6690c1c
--- /dev/null
@@ -0,0 +1,37 @@
+--
+-- Copyright Michael Orlitzky
+--
+-- http://michael.orlitzky.com/
+--
+-- This program is free software: you can redistribute it and/or modify
+-- it under the terms of the GNU General Public License as published by
+-- the Free Software Foundation, either version 3 of the License, or
+-- (at your option) any later version.
+--
+-- This program is distributed in the hope that it will be useful,
+-- but WITHOUT ANY WARRANTY; without even the implied warranty of
+-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+-- GNU General Public License for more details.
+--
+-- http://www.fsf.org/licensing/licenses/gpl.html
+--
+
+module Tnef.ObjectLvl where
+
+import Data.Word
+
+-- TNEF "Levels". Not sure why they're special, but
+-- one is attached to every TnefObject.
+data TnefObjectLvl = TnefObjectLvl { code :: Integer,
+                                     desc :: String }
+instance Show TnefObjectLvl where
+    show = desc
+
+
+tnef_object_lvls :: [TnefObjectLvl]
+tnef_object_lvls = [ TnefObjectLvl 0x01 "Message",
+                     TnefObjectLvl 0x02 "Attachment" ]
+
+-- I want to marry list comprehensions
+word_to_lvl :: Word8 -> TnefObjectLvl
+word_to_lvl c = head [ x | x <- tnef_object_lvls, (code x) == read (show c)]
diff --git a/src/tnef/object_name.hs b/src/tnef/object_name.hs
new file mode 100644 (file)
index 0000000..4042aa8
--- /dev/null
@@ -0,0 +1,68 @@
+--
+-- Copyright Michael Orlitzky
+--
+-- http://michael.orlitzky.com/
+--
+-- This program is free software: you can redistribute it and/or modify
+-- it under the terms of the GNU General Public License as published by
+-- the Free Software Foundation, either version 3 of the License, or
+-- (at your option) any later version.
+--
+-- This program is distributed in the hope that it will be useful,
+-- but WITHOUT ANY WARRANTY; without even the implied warranty of
+-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+-- GNU General Public License for more details.
+--
+-- http://www.fsf.org/licensing/licenses/gpl.html
+--
+
+module Tnef.ObjectName where
+
+import Data.Word
+                                         
+-- These are the "names" for TNEF objects.  They more or less specify
+-- what type of object you've got, so I'm not sure what the "type"
+-- field is for as a result.
+data TnefObjectName = TnefObjectName { code :: Integer,
+                                       desc :: String }
+instance Show TnefObjectName where
+    show = desc
+
+
+tnef_object_names :: [TnefObjectName]
+tnef_object_names = [ TnefObjectName 0x0000 "Owner",
+                      TnefObjectName 0x0001 "Sent For",
+                      TnefObjectName 0x0002 "Delegate",
+                      TnefObjectName 0x0006 "Date Start",
+                      TnefObjectName 0x0007 "Date End",
+                      TnefObjectName 0x0008 "Owner Appointment ID",
+                      TnefObjectName 0x0009 "Response Requested",
+                      TnefObjectName 0x8000 "From",
+                      TnefObjectName 0x8004 "Subject",
+                      TnefObjectName 0x8005 "Date Sent",
+                      TnefObjectName 0x8006 "Date Received",
+                      TnefObjectName 0x8007 "Message Status",
+                      TnefObjectName 0x8008 "Message Class",
+                      TnefObjectName 0x8009 "Message ID",
+                      TnefObjectName 0x800a "Parent ID",
+                      TnefObjectName 0x800b "Conversation ID",
+                      TnefObjectName 0x800c "Body",
+                      TnefObjectName 0x800d "Priority",
+                      TnefObjectName 0x800f "Attachment Data",
+                      TnefObjectName 0x8010 "Attachment File Name",
+                      TnefObjectName 0x8011 "Attachment Meta File",
+                      TnefObjectName 0x8012 "Attachment Creation Date",
+                      TnefObjectName 0x8013 "Attachment Modification Date",
+                      TnefObjectName 0x8020 "Date Modified",
+                      TnefObjectName 0x9001 "Attachment Transport Filename",
+                      TnefObjectName 0x9002 "Attachment Rendering Data",
+                      TnefObjectName 0x9003 "MAPI Properties",
+                      TnefObjectName 0x9004 "Recipients",
+                      TnefObjectName 0x9005 "Attachment",
+                      TnefObjectName 0x9006 "TNEF Version",
+                      TnefObjectName 0x9007 "OEM Codepage",
+                      TnefObjectName 0x9008 "Original Message Class" ]
+
+
+word_to_name :: Word16 -> TnefObjectName
+word_to_name c = head [ x | x <- tnef_object_names, (code x) == read (show c)]
diff --git a/test/body.tnef b/test/body.tnef
new file mode 100644 (file)
index 0000000..06ac6e1
Binary files /dev/null and b/test/body.tnef differ
diff --git a/test/data-before-name.tnef b/test/data-before-name.tnef
new file mode 100644 (file)
index 0000000..81ca5d6
Binary files /dev/null and b/test/data-before-name.tnef differ
diff --git a/test/multi-name-property.tnef b/test/multi-name-property.tnef
new file mode 100644 (file)
index 0000000..5d70e3a
Binary files /dev/null and b/test/multi-name-property.tnef differ
diff --git a/test/two-files.tnef b/test/two-files.tnef
new file mode 100644 (file)
index 0000000..b33b64d
Binary files /dev/null and b/test/two-files.tnef differ
diff --git a/test/ytnef_outlook_2k3_appointment.tnef b/test/ytnef_outlook_2k3_appointment.tnef
new file mode 100644 (file)
index 0000000..91496f7
Binary files /dev/null and b/test/ytnef_outlook_2k3_appointment.tnef differ
diff --git a/test/ytnef_random_sample.tnef b/test/ytnef_random_sample.tnef
new file mode 100644 (file)
index 0000000..b9f6295
Binary files /dev/null and b/test/ytnef_random_sample.tnef differ