{-# LANGUAGE ViewPatterns #-}

-----------------------------------------------------------------------------
{- |
Module      :  Data.MBox
Copyright   :  (c) Gershom Bazerman, 2009; ported to Text by Alexander Jerneck, 2012
License     :  BSD 3 Clause
Maintainer  :  gershomb@gmail.com
Stability   :  experimental

Reads and writes mboxrd files as per <http://www.qmail.org/man/man5/mbox.html>.

This module uses Lazy Text pervasively, and should be able to operate as a streaming parser. That is to say, given a lazy stream of Text, and a streaming processing function, you should be able to analyze large mbox files in constant space.

-}
-------------------------------------------------------------------------

module Data.MBox (MBox, Message(..), Header, parseMBox, parseForward, parseDateHeader, showMessage, showMBox, getHeader, isID, isDate) where

import Prelude hiding (tail, init, last, minimum, maximum, foldr1, foldl1, (!!), read)
import Control.Arrow
import Data.Char
import Data.Maybe
import Data.Time
import Safe
import qualified Data.Text.Lazy as T
import qualified Data.Time.Locale.Compat as LC

type MBox = [Message]
data Message = Message {Message -> Text
fromLine :: T.Text, Message -> [Header]
headers :: [Header], Message -> Text
body :: T.Text} deriving (ReadPrec [Message]
ReadPrec Message
Int -> ReadS Message
ReadS [Message]
(Int -> ReadS Message)
-> ReadS [Message]
-> ReadPrec Message
-> ReadPrec [Message]
-> Read Message
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Message]
$creadListPrec :: ReadPrec [Message]
readPrec :: ReadPrec Message
$creadPrec :: ReadPrec Message
readList :: ReadS [Message]
$creadList :: ReadS [Message]
readsPrec :: Int -> ReadS Message
$creadsPrec :: Int -> ReadS Message
Read, Int -> Message -> ShowS
[Message] -> ShowS
Message -> String
(Int -> Message -> ShowS)
-> (Message -> String) -> ([Message] -> ShowS) -> Show Message
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Message] -> ShowS
$cshowList :: [Message] -> ShowS
show :: Message -> String
$cshow :: Message -> String
showsPrec :: Int -> Message -> ShowS
$cshowsPrec :: Int -> Message -> ShowS
Show)
type Header = (T.Text, T.Text)

-- | Reads a date header as a UTCTime
parseDateHeader :: T.Text -> Maybe UTCTime
parseDateHeader :: Text -> Maybe UTCTime
parseDateHeader txt :: Text
txt = [UTCTime] -> Maybe UTCTime
forall a. [a] -> Maybe a
listToMaybe ([UTCTime] -> Maybe UTCTime)
-> ([Maybe UTCTime] -> [UTCTime])
-> [Maybe UTCTime]
-> Maybe UTCTime
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Maybe UTCTime] -> [UTCTime]
forall a. [Maybe a] -> [a]
catMaybes ([Maybe UTCTime] -> Maybe UTCTime)
-> [Maybe UTCTime] -> Maybe UTCTime
forall a b. (a -> b) -> a -> b
$ (String -> Maybe UTCTime) -> [String] -> [Maybe UTCTime]
forall a b. (a -> b) -> [a] -> [b]
map String -> Maybe UTCTime
forall t. ParseTime t => String -> Maybe t
tryParse [String]
formats where
  header :: String
header = Text -> String
T.unpack Text
txt
  tryParse :: String -> Maybe t
tryParse f :: String
f = TimeLocale -> String -> String -> Maybe t
forall t. ParseTime t => TimeLocale -> String -> String -> Maybe t
parseTime TimeLocale
LC.defaultTimeLocale String
f String
header
  formats :: [String]
formats =
    [ "%a, %_d %b %Y %T %z"
    , "%a, %_d %b %Y %T %Z"
    , "%a, %d %b %Y %T %z"
    , "%a, %d %b %Y %T %Z"
    , "%a, %_d %b %Y %T %z (%Z)"
    , "%a, %_d %b %Y %T %z (GMT%:-z)"
    , "%a, %_d %b %Y %T %z (UTC%:-z)"
    , "%a, %_d %b %Y %T %z (GMT%:z)"
    , "%a, %_d %b %Y %T %z (UTC%:z)"
    , "%A, %B %e, %Y %l:%M %p"
    , "%e %b %Y %T %z"
    ]

-- | Attempts to retrieve the contents of a forwarded message from an enclosing message.
parseForward :: Message -> Message
parseForward :: Message -> Message
parseForward origMsg :: Message
origMsg@(Message f :: Text
f _ b :: Text
b) =
    case Int -> [Text] -> [Text]
forall a. Int -> [a] -> [a]
drop 1 ([Text] -> [Text]) -> [Text] -> [Text]
forall a b. (a -> b) -> a -> b
$ (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
/= String -> Text
T.pack "-----Original Message-----") (Text -> [Text]
T.lines Text
b) of
      [] -> Message
origMsg
      xs :: [Text]
xs -> Message -> [Message] -> Message
forall a. a -> [a] -> a
headDef Message
origMsg ([Message] -> Message)
-> ([Text] -> [Message]) -> [Text] -> Message
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Message]
parseMBox (Text -> [Message]) -> ([Text] -> Text) -> [Text] -> [Message]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> Text
T.unlines ([Text] -> Message) -> [Text] -> Message
forall a b. (a -> b) -> a -> b
$ Text
fText -> [Text] -> [Text]
forall a. a -> [a] -> [a]
:[Text]
xs

-- | Parses Text as an mbox file.
parseMBox :: T.Text -> MBox
parseMBox :: Text -> [Message]
parseMBox = [Text] -> [Message]
go ([Text] -> [Message]) -> (Text -> [Text]) -> Text -> [Message]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text]
T.lines
    where
      go :: [Text] -> [Message]
go [] = []
      go (x :: Text
x:xs :: [Text]
xs) = (Message -> [Message] -> [Message])
-> (Message, [Message]) -> [Message]
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry (:) ((Message, [Message]) -> [Message])
-> ([Text] -> (Message, [Message])) -> [Text] -> [Message]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> [Text] -> Message
readMsg Text
x ([Text] -> Message)
-> ([Text] -> [Message])
-> ([Text], [Text])
-> (Message, [Message])
forall (a :: * -> * -> *) b c b' c'.
Arrow a =>
a b c -> a b' c' -> a (b, b') (c, c')
*** [Text] -> [Message]
go) (([Text], [Text]) -> (Message, [Message]))
-> ([Text] -> ([Text], [Text])) -> [Text] -> (Message, [Message])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Bool) -> [Text] -> ([Text], [Text])
forall a. (a -> Bool) -> [a] -> ([a], [a])
break ((String -> Text
T.pack "From ") Text -> Text -> Bool
`T.isPrefixOf`) ([Text] -> [Message]) -> [Text] -> [Message]
forall a b. (a -> b) -> a -> b
$ [Text]
xs
      readMsg :: T.Text -> [T.Text] -> Message
      readMsg :: Text -> [Text] -> Message
readMsg x :: Text
x xs :: [Text]
xs = ([Header] -> Text -> Message) -> ([Header], Text) -> Message
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry (Text -> [Header] -> Text -> Message
Message Text
x) (([Header], Text) -> Message)
-> ([Text] -> ([Header], Text)) -> [Text] -> Message
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Text] -> Text) -> ([Header], [Text]) -> ([Header], Text)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (d, b) (d, c)
second ([Text] -> Text
T.unlines ([Text] -> Text) -> ([Text] -> [Text]) -> [Text] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Text) -> [Text] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Text -> Text
unquoteFrom)(([Header], [Text]) -> ([Header], Text))
-> ([Text] -> ([Header], [Text])) -> [Text] -> ([Header], Text)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> ([Header], [Text])
readHeaders ([Text] -> Message) -> [Text] -> Message
forall a b. (a -> b) -> a -> b
$ [Text]
xs
      readHeaders :: [T.Text] -> ([Header], [T.Text])
      readHeaders :: [Text] -> ([Header], [Text])
readHeaders [] = ([],[])
      readHeaders (x :: Text
x:xs :: [Text]
xs)
          | Text -> Bool
T.null Text
x Bool -> Bool -> Bool
|| (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isSpace Text
x Bool -> Bool -> Bool
|| Bool -> Bool
not ((Char -> Bool) -> Text -> Bool
T.any (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==':') Text
x) = ([],[Text]
xs)
          | Bool
otherwise = ([Header] -> [Header]) -> ([Header], [Text]) -> ([Header], [Text])
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first (((Text -> Text) -> Header -> Header
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (d, b) (d, c)
second (Text -> Text
T.strip (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
sanHeader (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Text -> Text
`T.append` Text
headerCont) (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int64 -> Text -> Text
T.drop 1) (Header -> Header) -> (Text -> Header) -> Text -> Header
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Header
T.break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==':') (Text -> Header) -> Text -> Header
forall a b. (a -> b) -> a -> b
$ Text
x)Header -> [Header] -> [Header]
forall a. a -> [a] -> [a]
:) (([Header], [Text]) -> ([Header], [Text]))
-> ([Header], [Text]) -> ([Header], [Text])
forall a b. (a -> b) -> a -> b
$ [Text] -> ([Header], [Text])
readHeaders [Text]
xs'
          where (headerCont :: Text
headerCont, xs' :: [Text]
xs') = ([Text] -> Text) -> ([Text], [Text]) -> (Text, [Text])
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first ((String -> Text
T.pack " " Text -> Text -> Text
`T.append`) (Text -> Text) -> ([Text] -> Text) -> [Text] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> Text
T.unlines ([Text] -> Text) -> ([Text] -> [Text]) -> [Text] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Text) -> [Text] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Text -> Text
T.strip) (([Text], [Text]) -> (Text, [Text]))
-> ([Text] -> ([Text], [Text])) -> [Text] -> (Text, [Text])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Bool) -> [Text] -> ([Text], [Text])
forall a. (a -> Bool) -> [a] -> ([a], [a])
break Text -> Bool
notCont ([Text] -> (Text, [Text])) -> [Text] -> (Text, [Text])
forall a b. (a -> b) -> a -> b
$ [Text]
xs
                notCont :: T.Text -> Bool
                notCont :: Text -> Bool
notCont s :: Text
s = Text -> Bool
doesNotStartSpace Text
s Bool -> Bool -> Bool
|| Text -> Bool
allSpace Text
s
                allSpace :: Text -> Bool
allSpace = (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isSpace
                doesNotStartSpace :: Text -> Bool
doesNotStartSpace s :: Text
s = case Text -> Int64
T.length Text
s of
                                        0 -> Bool
True
                                        _ -> Bool -> Bool
not (Char -> Bool
isSpace (Char -> Bool) -> Char -> Bool
forall a b. (a -> b) -> a -> b
$ Text -> Char
T.head Text
s)


      unquoteFrom :: T.Text -> T.Text
      unquoteFrom :: Text -> Text
unquoteFrom xs' :: Text
xs'@(Text -> Text -> Maybe Text
T.stripPrefix (String -> Text
T.pack ">") -> Just suf :: Text
suf) = if (String -> Text
T.pack "From ") Text -> Text -> Bool
`T.isPrefixOf` (Char -> Bool) -> Text -> Text
T.dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
=='>') Text
suf
                                                                 then Text
suf
                                                                 else Text
xs'
      unquoteFrom xs :: Text
xs = Text
xs

sanHeader :: T.Text -> T.Text
sanHeader :: Text -> Text
sanHeader = Text -> Text -> Text -> Text
T.replace (String -> Text
T.pack "\n") (String -> Text
T.pack " ")

-- | Renders an MBox into Text
showMBox :: MBox -> T.Text
showMBox :: [Message] -> Text
showMBox = [Text] -> Text
T.concat ([Text] -> Text) -> ([Message] -> [Text]) -> [Message] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Message -> Text) -> [Message] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Message -> Text
showMessage

-- | Renders an individual message into Text.
showMessage :: Message -> T.Text
showMessage :: Message -> Text
showMessage (Message f :: Text
f hs :: [Header]
hs b :: Text
b) = [Text] -> Text
T.unlines ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ Text
f Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Header] -> [Text]
formatHeaders [Header]
hs [Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
++ [(String -> Text
T.pack "\n")] [Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
++ Text -> [Text]
formatBody Text
b
                               where
                                 formatHeaders :: [Header] -> [Text]
formatHeaders = (Header -> Text) -> [Header] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (\(x :: Text
x,y :: Text
y) -> Text
x Text -> Text -> Text
`T.append` (String -> Text
T.pack ": ") Text -> Text -> Text
`T.append` Text
y)
                                 formatBody :: Text -> [Text]
formatBody = (Text -> Text) -> [Text] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Text -> Text
unFrom ([Text] -> [Text]) -> (Text -> [Text]) -> Text -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text]
T.lines
                                 unFrom :: Text -> Text
unFrom x :: Text
x
                                     | Text -> Bool
isFrom Text
x = '>' Char -> Text -> Text
`T.cons` Text
x
                                     | Bool
otherwise = Text
x
                                 isFrom :: Text -> Bool
isFrom x :: Text
x = (String -> Text
T.pack "From ") Text -> Text -> Bool
`T.isPrefixOf` (Char -> Bool) -> Text -> Text
T.dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
=='>') Text
x

-- | Return True if header is a Message-ID header.
isID :: Header -> Bool
isID :: Header -> Bool
isID (x :: Text
x, _) = Text
x Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== String -> Text
T.pack "Message-ID"

-- | Return True if header is a Date header.
isDate :: Header -> Bool
isDate :: Header -> Bool
isDate (x :: Text
x, _) = Text
x Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== String -> Text
T.pack "Date"

-- | Return the values of headers for which predicate is True
getHeader :: (Header -> Bool) -> Message -> [T.Text]
getHeader :: (Header -> Bool) -> Message -> [Text]
getHeader predFunc :: Header -> Bool
predFunc = (Header -> Text) -> [Header] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Header -> Text
forall a b. (a, b) -> b
snd ([Header] -> [Text]) -> (Message -> [Header]) -> Message -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Header -> Bool) -> [Header] -> [Header]
forall a. (a -> Bool) -> [a] -> [a]
filter Header -> Bool
predFunc ([Header] -> [Header])
-> (Message -> [Header]) -> Message -> [Header]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Message -> [Header]
headers