1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
|
{-# LANGUAGE ViewPatterns #-}
-----------------------------------------------------------------------------
{- |
Module : Data.MBox
Copyright : (c) Gershom Bazerman, 2009; ported to Text by Alexander Jerneck, 2012
License : BSD 3 Clause
Maintainer : gershomb@gmail.com
Stability : experimental
Reads and writes mboxrd files as per <http://www.qmail.org/man/man5/mbox.html>.
This module uses Lazy Text pervasively, and should be able to operate as a streaming parser. That is to say, given a lazy stream of Text, and a streaming processing function, you should be able to analyze large mbox files in constant space.
-}
-------------------------------------------------------------------------
module Data.MBox (MBox, Message(..), Header, parseMBox, parseForward, parseDateHeader, showMessage, showMBox, getHeader, isID, isDate) where
import Prelude hiding (tail, init, last, minimum, maximum, foldr1, foldl1, (!!), read)
import Control.Arrow
import Data.Char
import Data.Maybe
import Data.Time
import Safe
import qualified Data.Text.Lazy as T
import qualified Data.Time.Locale.Compat as LC
type MBox = [Message]
data Message = Message {fromLine :: T.Text, headers :: [Header], body :: T.Text} deriving (Read, Show)
type Header = (T.Text, T.Text)
-- | Reads a date header as a UTCTime
parseDateHeader :: T.Text -> Maybe UTCTime
parseDateHeader txt = listToMaybe . catMaybes $ map tryParse formats where
header = T.unpack txt
tryParse f = parseTime LC.defaultTimeLocale f header
formats =
[ "%a, %_d %b %Y %T %z"
, "%a, %_d %b %Y %T %Z"
, "%a, %d %b %Y %T %z"
, "%a, %d %b %Y %T %Z"
, "%a, %_d %b %Y %T %z (%Z)"
, "%a, %_d %b %Y %T %z (GMT%:-z)"
, "%a, %_d %b %Y %T %z (UTC%:-z)"
, "%a, %_d %b %Y %T %z (GMT%:z)"
, "%a, %_d %b %Y %T %z (UTC%:z)"
, "%A, %B %e, %Y %l:%M %p"
, "%e %b %Y %T %z"
]
-- | Attempts to retrieve the contents of a forwarded message from an enclosing message.
parseForward :: Message -> Message
parseForward origMsg@(Message f _ b) =
case drop 1 $ dropWhile (/= T.pack "-----Original Message-----") (T.lines b) of
[] -> origMsg
xs -> headDef origMsg . parseMBox . T.unlines $ f:xs
-- | Parses Text as an mbox file.
parseMBox :: T.Text -> MBox
parseMBox = go . T.lines
where
go [] = []
go (x:xs) = uncurry (:) . (readMsg x *** go) . break ((T.pack "From ") `T.isPrefixOf`) $ xs
readMsg :: T.Text -> [T.Text] -> Message
readMsg x xs = uncurry (Message x) . second (T.unlines . map unquoteFrom). readHeaders $ xs
readHeaders :: [T.Text] -> ([Header], [T.Text])
readHeaders [] = ([],[])
readHeaders (x:xs)
| T.null x || T.all isSpace x || not (T.any (==':') x) = ([],xs)
| otherwise = first ((second (T.strip . sanHeader . (`T.append` headerCont) . T.drop 1) . T.break (==':') $ x):) $ readHeaders xs'
where (headerCont, xs') = first ((T.pack " " `T.append`) . T.unlines . map T.strip) . break notCont $ xs
notCont :: T.Text -> Bool
notCont s = doesNotStartSpace s || allSpace s
allSpace = T.all isSpace
doesNotStartSpace s = case T.length s of
0 -> True
_ -> not (isSpace $ T.head s)
unquoteFrom :: T.Text -> T.Text
unquoteFrom xs'@(T.stripPrefix (T.pack ">") -> Just suf) = if (T.pack "From ") `T.isPrefixOf` T.dropWhile (=='>') suf
then suf
else xs'
unquoteFrom xs = xs
sanHeader :: T.Text -> T.Text
sanHeader = T.replace (T.pack "\n") (T.pack " ")
-- | Renders an MBox into Text
showMBox :: MBox -> T.Text
showMBox = T.concat . map showMessage
-- | Renders an individual message into Text.
showMessage :: Message -> T.Text
showMessage (Message f hs b) = T.unlines $ f : formatHeaders hs ++ [(T.pack "\n")] ++ formatBody b
where
formatHeaders = map (\(x,y) -> x `T.append` (T.pack ": ") `T.append` y)
formatBody = map unFrom . T.lines
unFrom x
| isFrom x = '>' `T.cons` x
| otherwise = x
isFrom x = (T.pack "From ") `T.isPrefixOf` T.dropWhile (=='>') x
-- | Return True if header is a Message-ID header.
isID :: Header -> Bool
isID (x, _) = x == T.pack "Message-ID"
-- | Return True if header is a Date header.
isDate :: Header -> Bool
isDate (x, _) = x == T.pack "Date"
-- | Return the values of headers for which predicate is True
getHeader :: (Header -> Bool) -> Message -> [T.Text]
getHeader predFunc = map snd . filter predFunc . headers
|