{-# LANGUAGE BangPatterns, CPP #-}
module Data.Csv.Parser
( DecodeOptions(..)
, defaultDecodeOptions
, csv
, csvWithHeader
, header
, record
, name
, field
) where
import Data.ByteString.Builder (byteString, toLazyByteString, charUtf8)
import Control.Applicative (optional)
import Data.Attoparsec.ByteString.Char8 (char, endOfInput)
import qualified Data.Attoparsec.ByteString as A
import qualified Data.Attoparsec.Lazy as AL
import qualified Data.Attoparsec.Zepto as Z
import qualified Data.ByteString as S
import qualified Data.ByteString.Unsafe as S
import qualified Data.Vector as V
import Data.Word (Word8)
import Data.Csv.Types
import Data.Csv.Util ((<$!>), blankLine, endOfLine, liftM2', cr, newline, doubleQuote, toStrict)
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative ((<$>), (*>), (<*), pure)
import Data.Monoid (mappend, mempty)
#endif
data DecodeOptions = DecodeOptions
{
DecodeOptions -> Word8
decDelimiter :: {-# UNPACK #-} !Word8
} deriving (DecodeOptions -> DecodeOptions -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DecodeOptions -> DecodeOptions -> Bool
$c/= :: DecodeOptions -> DecodeOptions -> Bool
== :: DecodeOptions -> DecodeOptions -> Bool
$c== :: DecodeOptions -> DecodeOptions -> Bool
Eq, Int -> DecodeOptions -> ShowS
[DecodeOptions] -> ShowS
DecodeOptions -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [DecodeOptions] -> ShowS
$cshowList :: [DecodeOptions] -> ShowS
show :: DecodeOptions -> String
$cshow :: DecodeOptions -> String
showsPrec :: Int -> DecodeOptions -> ShowS
$cshowsPrec :: Int -> DecodeOptions -> ShowS
Show)
defaultDecodeOptions :: DecodeOptions
defaultDecodeOptions :: DecodeOptions
defaultDecodeOptions = DecodeOptions
{ decDelimiter :: Word8
decDelimiter = Word8
44
}
csv :: DecodeOptions -> AL.Parser Csv
csv :: DecodeOptions -> Parser Csv
csv !DecodeOptions
opts = do
[Record]
vals <- forall a. Parser a -> Parser [a]
sepByEndOfLine1' (Word8 -> Parser Record
record (DecodeOptions -> Word8
decDelimiter DecodeOptions
opts))
Maybe ()
_ <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
endOfLine
forall t. Chunk t => Parser t ()
endOfInput
let nonEmpty :: [Record]
nonEmpty = [Record] -> [Record]
removeBlankLines [Record]
vals
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! forall a. [a] -> Vector a
V.fromList [Record]
nonEmpty
{-# INLINE csv #-}
sepByDelim1' :: AL.Parser a
-> Word8
-> AL.Parser [a]
sepByDelim1' :: forall a. Parser a -> Word8 -> Parser [a]
sepByDelim1' Parser a
p !Word8
delim = forall (m :: * -> *) a b c.
Monad m =>
(a -> b -> c) -> m a -> m b -> m c
liftM2' (:) Parser a
p Parser ByteString [a]
loop
where
loop :: Parser ByteString [a]
loop = do
Maybe Word8
mb <- Parser (Maybe Word8)
A.peekWord8
case Maybe Word8
mb of
Just Word8
b | Word8
b forall a. Eq a => a -> a -> Bool
== Word8
delim -> forall (m :: * -> *) a b c.
Monad m =>
(a -> b -> c) -> m a -> m b -> m c
liftM2' (:) (Parser Word8
A.anyWord8 forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser a
p) Parser ByteString [a]
loop
Maybe Word8
_ -> forall (f :: * -> *) a. Applicative f => a -> f a
pure []
{-# INLINE sepByDelim1' #-}
sepByEndOfLine1' :: AL.Parser a
-> AL.Parser [a]
sepByEndOfLine1' :: forall a. Parser a -> Parser [a]
sepByEndOfLine1' Parser a
p = forall (m :: * -> *) a b c.
Monad m =>
(a -> b -> c) -> m a -> m b -> m c
liftM2' (:) Parser a
p Parser ByteString [a]
loop
where
loop :: Parser ByteString [a]
loop = do
Maybe Word8
mb <- Parser (Maybe Word8)
A.peekWord8
case Maybe Word8
mb of
Just Word8
b | Word8
b forall a. Eq a => a -> a -> Bool
== Word8
cr ->
forall (m :: * -> *) a b c.
Monad m =>
(a -> b -> c) -> m a -> m b -> m c
liftM2' (:) (Parser Word8
A.anyWord8 forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Word8 -> Parser Word8
A.word8 Word8
newline forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser a
p) Parser ByteString [a]
loop
| Word8
b forall a. Eq a => a -> a -> Bool
== Word8
newline ->
forall (m :: * -> *) a b c.
Monad m =>
(a -> b -> c) -> m a -> m b -> m c
liftM2' (:) (Parser Word8
A.anyWord8 forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser a
p) Parser ByteString [a]
loop
Maybe Word8
_ -> forall (f :: * -> *) a. Applicative f => a -> f a
pure []
{-# INLINE sepByEndOfLine1' #-}
csvWithHeader :: DecodeOptions -> AL.Parser (Header, V.Vector NamedRecord)
!DecodeOptions
opts = do
!Record
hdr <- Word8 -> Parser Record
header (DecodeOptions -> Word8
decDelimiter DecodeOptions
opts)
[NamedRecord]
vals <- forall a b. (a -> b) -> [a] -> [b]
map (Record -> Record -> NamedRecord
toNamedRecord Record
hdr) forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Record] -> [Record]
removeBlankLines forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
forall a. Parser a -> Parser [a]
sepByEndOfLine1' (Word8 -> Parser Record
record (DecodeOptions -> Word8
decDelimiter DecodeOptions
opts))
Maybe ()
_ <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
endOfLine
forall t. Chunk t => Parser t ()
endOfInput
let !v :: Vector NamedRecord
v = forall a. [a] -> Vector a
V.fromList [NamedRecord]
vals
forall (m :: * -> *) a. Monad m => a -> m a
return (Record
hdr, Vector NamedRecord
v)
header :: Word8
-> AL.Parser Header
!Word8
delim = forall a. [a] -> Vector a
V.fromList forall (m :: * -> *) a b. Monad m => (a -> b) -> m a -> m b
<$!> Word8 -> Parser ByteString
name Word8
delim forall a. Parser a -> Word8 -> Parser [a]
`sepByDelim1'` Word8
delim forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine
name :: Word8 -> AL.Parser Name
name :: Word8 -> Parser ByteString
name !Word8
delim = Word8 -> Parser ByteString
field Word8
delim
removeBlankLines :: [Record] -> [Record]
removeBlankLines :: [Record] -> [Record]
removeBlankLines = forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Record -> Bool
blankLine)
record :: Word8
-> AL.Parser Record
record :: Word8 -> Parser Record
record !Word8
delim = forall a. [a] -> Vector a
V.fromList forall (m :: * -> *) a b. Monad m => (a -> b) -> m a -> m b
<$!> Word8 -> Parser ByteString
field Word8
delim forall a. Parser a -> Word8 -> Parser [a]
`sepByDelim1'` Word8
delim
{-# INLINE record #-}
field :: Word8 -> AL.Parser Field
field :: Word8 -> Parser ByteString
field !Word8
delim = do
Maybe Word8
mb <- Parser (Maybe Word8)
A.peekWord8
case Maybe Word8
mb of
Just Word8
b | Word8
b forall a. Eq a => a -> a -> Bool
== Word8
doubleQuote -> Parser ByteString
escapedField
Maybe Word8
_ -> Word8 -> Parser ByteString
unescapedField Word8
delim
{-# INLINE field #-}
escapedField :: AL.Parser S.ByteString
escapedField :: Parser ByteString
escapedField = do
Char
_ <- Parser Char
dquote
ByteString
s <- HasCallStack => ByteString -> ByteString
S.init forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (forall s. s -> (s -> Word8 -> Maybe s) -> Parser ByteString
A.scan Bool
False forall a b. (a -> b) -> a -> b
$ \Bool
s Word8
c -> if Word8
c forall a. Eq a => a -> a -> Bool
== Word8
doubleQuote
then forall a. a -> Maybe a
Just (Bool -> Bool
not Bool
s)
else if Bool
s then forall a. Maybe a
Nothing
else forall a. a -> Maybe a
Just Bool
False)
if Word8
doubleQuote Word8 -> ByteString -> Bool
`S.elem` ByteString
s
then case forall a. Parser a -> ByteString -> Either String a
Z.parse Parser ByteString
unescape ByteString
s of
Right ByteString
r -> forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
r
Left String
err -> forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
err
else forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
s
unescapedField :: Word8 -> AL.Parser S.ByteString
unescapedField :: Word8 -> Parser ByteString
unescapedField !Word8
delim = (Word8 -> Bool) -> Parser ByteString
A.takeWhile (\ Word8
c -> Word8
c forall a. Eq a => a -> a -> Bool
/= Word8
doubleQuote Bool -> Bool -> Bool
&&
Word8
c forall a. Eq a => a -> a -> Bool
/= Word8
newline Bool -> Bool -> Bool
&&
Word8
c forall a. Eq a => a -> a -> Bool
/= Word8
delim Bool -> Bool -> Bool
&&
Word8
c forall a. Eq a => a -> a -> Bool
/= Word8
cr)
dquote :: AL.Parser Char
dquote :: Parser Char
dquote = Char -> Parser Char
char Char
'"'
unescape :: Z.Parser S.ByteString
unescape :: Parser ByteString
unescape = (ByteString -> ByteString
toStrict forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> ByteString
toLazyByteString) forall (m :: * -> *) a b. Monad m => (a -> b) -> m a -> m b
<$!> forall {m :: * -> *}. Monad m => Builder -> ZeptoT m Builder
go forall a. Monoid a => a
mempty where
go :: Builder -> ZeptoT m Builder
go Builder
acc = do
ByteString
h <- forall (m :: * -> *).
Monad m =>
(Word8 -> Bool) -> ZeptoT m ByteString
Z.takeWhile (forall a. Eq a => a -> a -> Bool
/= Word8
doubleQuote)
let rest :: ZeptoT m Builder
rest = do
ByteString
start <- forall (m :: * -> *). Monad m => Int -> ZeptoT m ByteString
Z.take Int
2
if (ByteString -> Word8
S.unsafeHead ByteString
start forall a. Eq a => a -> a -> Bool
== Word8
doubleQuote Bool -> Bool -> Bool
&&
ByteString -> Int -> Word8
S.unsafeIndex ByteString
start Int
1 forall a. Eq a => a -> a -> Bool
== Word8
doubleQuote)
then Builder -> ZeptoT m Builder
go (Builder
acc forall a. Monoid a => a -> a -> a
`mappend` ByteString -> Builder
byteString ByteString
h forall a. Monoid a => a -> a -> a
`mappend` Char -> Builder
charUtf8 Char
'"')
else forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"invalid CSV escape sequence"
Bool
done <- forall (m :: * -> *). Monad m => ZeptoT m Bool
Z.atEnd
if Bool
done
then forall (m :: * -> *) a. Monad m => a -> m a
return (Builder
acc forall a. Monoid a => a -> a -> a
`mappend` ByteString -> Builder
byteString ByteString
h)
else ZeptoT m Builder
rest