{-# LANGUAGE BangPatterns, OverloadedStrings, ScopedTypeVariables #-}

-- Module:      Data.Csv.Encoding
-- Copyright:   (c) 2011 MailRank, Inc.
--              (c) 2012 Johan Tibell
-- License:     BSD3
-- Maintainer:  Johan Tibell <[email protected]>
-- Stability:   experimental
-- Portability: portable
--
-- Encoding and decoding of data types into CSV.
module Data.Csv.Encoding
    (
    -- * Encoding and decoding
      HasHeader(..)
    , decode
    , decodeByName
    , Quoting(..)
    , encode
    , encodeByName
    , encodeDefaultOrderedByName

    -- ** Encoding and decoding options
    , DecodeOptions(..)
    , defaultDecodeOptions
    , decodeWith
    , decodeWithP
    , decodeByNameWith
    , decodeByNameWithP
    , EncodeOptions(..)
    , defaultEncodeOptions
    , encodeWith
    , encodeByNameWith
    , encodeDefaultOrderedByNameWith

    -- ** Encoding and decoding single records
    , encodeRecord
    , encodeNamedRecord
    , recordSep
    ) where

import Data.ByteString.Builder
import Control.Applicative as AP (Applicative(..), (<|>))
import Data.Attoparsec.ByteString.Char8 (endOfInput)
import qualified Data.Attoparsec.ByteString.Lazy as AL
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Char8 as BL8
import qualified Data.HashMap.Strict as HM
import Data.Vector (Vector)
import qualified Data.Vector as V
import Data.Word (Word8)
import Data.Monoid
import Prelude hiding (unlines)

import qualified Data.Csv.Conversion as Conversion
import Data.Csv.Conversion (FromNamedRecord, FromRecord, ToNamedRecord,
                            ToRecord, parseNamedRecord, parseRecord, runParser,
                            toNamedRecord, toRecord)
import Data.Csv.Parser hiding (csv, csvWithHeader)
import qualified Data.Csv.Parser as Parser
import Data.Csv.Types hiding (toNamedRecord)
import qualified Data.Csv.Types as Types
import Data.Csv.Util (blankLine, endOfLine, toStrict)


-- TODO: 'encode' isn't as efficient as it could be.

------------------------------------------------------------------------
-- * Encoding and decoding

-- | Efficiently deserialize CSV records from a lazy 'L.ByteString'.
-- If this fails due to incomplete or invalid input, @'Left' msg@ is
-- returned. Equivalent to @'decodeWith' 'defaultDecodeOptions'@.
decode :: FromRecord a
       => HasHeader     -- ^ Data contains header that should be
                        -- skipped
       -> L.ByteString  -- ^ CSV data
       -> Either String (Vector a)
decode :: forall a.
FromRecord a =>
HasHeader -> ByteString -> Either String (Vector a)
decode = forall a.
FromRecord a =>
DecodeOptions
-> HasHeader -> ByteString -> Either String (Vector a)
decodeWith DecodeOptions
defaultDecodeOptions
{-# INLINE decode #-}

-- | Efficiently deserialize CSV records from a lazy 'L.ByteString'.
-- If this fails due to incomplete or invalid input, @'Left' msg@ is
-- returned. The data is assumed to be preceded by a header.
-- Equivalent to @'decodeByNameWith' 'defaultDecodeOptions'@.
decodeByName :: FromNamedRecord a
             => L.ByteString  -- ^ CSV data
             -> Either String (Header, Vector a)
decodeByName :: forall a.
FromNamedRecord a =>
ByteString -> Either String (Header, Vector a)
decodeByName = forall a.
FromNamedRecord a =>
DecodeOptions -> ByteString -> Either String (Header, Vector a)
decodeByNameWith DecodeOptions
defaultDecodeOptions
{-# INLINE decodeByName #-}

-- | Efficiently serialize CSV records as a lazy 'L.ByteString'.
encode :: ToRecord a => [a] -> L.ByteString
encode :: forall a. ToRecord a => [a] -> ByteString
encode = forall a. ToRecord a => EncodeOptions -> [a] -> ByteString
encodeWith EncodeOptions
defaultEncodeOptions
{-# INLINE encode #-}

-- | Efficiently serialize CSV records as a lazy 'L.ByteString'. The
-- header is written before any records and dictates the field order.
encodeByName :: ToNamedRecord a => Header -> [a] -> L.ByteString
encodeByName :: forall a. ToNamedRecord a => Header -> [a] -> ByteString
encodeByName = forall a.
ToNamedRecord a =>
EncodeOptions -> Header -> [a] -> ByteString
encodeByNameWith EncodeOptions
defaultEncodeOptions
{-# INLINE encodeByName #-}

-- | Like 'encodeByName', but header and field order is dictated by
-- the 'Conversion.header' method.
encodeDefaultOrderedByName :: (Conversion.DefaultOrdered a, ToNamedRecord a) =>
                              [a] -> L.ByteString
encodeDefaultOrderedByName :: forall a. (DefaultOrdered a, ToNamedRecord a) => [a] -> ByteString
encodeDefaultOrderedByName = forall a.
(DefaultOrdered a, ToNamedRecord a) =>
EncodeOptions -> [a] -> ByteString
encodeDefaultOrderedByNameWith EncodeOptions
defaultEncodeOptions
{-# INLINE encodeDefaultOrderedByName #-}

------------------------------------------------------------------------
-- ** Encoding and decoding options

-- | Like 'decode', but lets you customize how the CSV data is parsed.
decodeWith :: FromRecord a
           => DecodeOptions  -- ^ Decoding options
           -> HasHeader      -- ^ Data contains header that should be
                             -- skipped
           -> L.ByteString   -- ^ CSV data
           -> Either String (Vector a)
decodeWith :: forall a.
FromRecord a =>
DecodeOptions
-> HasHeader -> ByteString -> Either String (Vector a)
decodeWith = forall a.
(DecodeOptions -> Parser a)
-> DecodeOptions -> HasHeader -> ByteString -> Either String a
decodeWithC (forall a.
(Header -> Parser a) -> DecodeOptions -> Parser (Vector a)
csv forall a. FromRecord a => Header -> Parser a
parseRecord)
{-# INLINE [1] decodeWith #-}

{-# RULES
    "idDecodeWith" decodeWith = idDecodeWith
 #-}

-- | Same as 'decodeWith', but more efficient as no type
-- conversion is performed.
idDecodeWith :: DecodeOptions -> HasHeader -> L.ByteString
             -> Either String (Vector (Vector B.ByteString))
idDecodeWith :: DecodeOptions
-> HasHeader -> ByteString -> Either String (Vector Header)
idDecodeWith = forall a.
(DecodeOptions -> Parser a)
-> DecodeOptions -> HasHeader -> ByteString -> Either String a
decodeWithC DecodeOptions -> Parser (Vector Header)
Parser.csv

-- | Like 'decodeWith'', but lets you specify a parser function.
--
-- @since 0.5.2.0
decodeWithP :: (Record -> Conversion.Parser a)
            -- ^ Custom parser function
            -> DecodeOptions -- ^ Decoding options
            -> HasHeader     -- ^ Data contains header that should be
                             -- skipped
            -> L.ByteString  -- ^ CSV data
            -> Either String (Vector a)
decodeWithP :: forall a.
(Header -> Parser a)
-> DecodeOptions
-> HasHeader
-> ByteString
-> Either String (Vector a)
decodeWithP Header -> Parser a
_parseRecord = forall a.
(DecodeOptions -> Parser a)
-> DecodeOptions -> HasHeader -> ByteString -> Either String a
decodeWithC (forall a.
(Header -> Parser a) -> DecodeOptions -> Parser (Vector a)
csv Header -> Parser a
_parseRecord)
{-# INLINE [1] decodeWithP #-}

-- | Decode CSV data using the provided parser, skipping a leading
-- header if 'hasHeader' is 'HasHeader'. Returns 'Left' @errMsg@ on
-- failure.
decodeWithC :: (DecodeOptions -> AL.Parser a) -> DecodeOptions -> HasHeader
            -> BL8.ByteString -> Either String a
decodeWithC :: forall a.
(DecodeOptions -> Parser a)
-> DecodeOptions -> HasHeader -> ByteString -> Either String a
decodeWithC DecodeOptions -> Parser a
p !DecodeOptions
opts HasHeader
hasHeader = forall a. Parser a -> ByteString -> Either String a
decodeWithP' Parser a
parser
  where parser :: Parser a
parser = case HasHeader
hasHeader of
            HasHeader
HasHeader -> Word8 -> Parser Header
header (DecodeOptions -> Word8
decDelimiter DecodeOptions
opts) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> DecodeOptions -> Parser a
p DecodeOptions
opts
            HasHeader
NoHeader  -> DecodeOptions -> Parser a
p DecodeOptions
opts
{-# INLINE decodeWithC #-}

-- | Like 'decodeByName', but lets you customize how the CSV data is
-- parsed.
decodeByNameWith :: FromNamedRecord a
                 => DecodeOptions  -- ^ Decoding options
                 -> L.ByteString   -- ^ CSV data
                 -> Either String (Header, Vector a)
decodeByNameWith :: forall a.
FromNamedRecord a =>
DecodeOptions -> ByteString -> Either String (Header, Vector a)
decodeByNameWith !DecodeOptions
opts = forall a. Parser a -> ByteString -> Either String a
decodeWithP' (forall a.
(NamedRecord -> Parser a)
-> DecodeOptions -> Parser (Header, Vector a)
csvWithHeader forall a. FromNamedRecord a => NamedRecord -> Parser a
parseNamedRecord DecodeOptions
opts)

-- | Like 'decodeByNameWith', but lets you specify a parser function.
--
-- @since 0.5.2.0
decodeByNameWithP :: (NamedRecord -> Conversion.Parser a)
                  -- ^ Custom parser function
                  -> DecodeOptions -- ^ Decoding options
                  -> L.ByteString  -- ^ CSV data
                  -> Either String (Header, Vector a)
decodeByNameWithP :: forall a.
(NamedRecord -> Parser a)
-> DecodeOptions -> ByteString -> Either String (Header, Vector a)
decodeByNameWithP NamedRecord -> Parser a
_parseNamedRecord !DecodeOptions
opts =
  forall a. Parser a -> ByteString -> Either String a
decodeWithP' (forall a.
(NamedRecord -> Parser a)
-> DecodeOptions -> Parser (Header, Vector a)
csvWithHeader NamedRecord -> Parser a
_parseNamedRecord DecodeOptions
opts)

-- | Should quoting be applied to fields, and at which level?
data Quoting
    = QuoteNone        -- ^ No quotes.
    | QuoteMinimal     -- ^ Quotes according to RFC 4180.
    | QuoteAll         -- ^ Always quote.
    deriving (Quoting -> Quoting -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Quoting -> Quoting -> Bool
$c/= :: Quoting -> Quoting -> Bool
== :: Quoting -> Quoting -> Bool
$c== :: Quoting -> Quoting -> Bool
Eq, Int -> Quoting -> ShowS
[Quoting] -> ShowS
Quoting -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Quoting] -> ShowS
$cshowList :: [Quoting] -> ShowS
show :: Quoting -> String
$cshow :: Quoting -> String
showsPrec :: Int -> Quoting -> ShowS
$cshowsPrec :: Int -> Quoting -> ShowS
Show)

-- | Options that controls how data is encoded. These options can be
-- used to e.g. encode data in a tab-separated format instead of in a
-- comma-separated format.
--
-- To avoid having your program stop compiling when new fields are
-- added to 'EncodeOptions', create option records by overriding
-- values in 'defaultEncodeOptions'. Example:
--
-- > myOptions = defaultEncodeOptions {
-- >       encDelimiter = fromIntegral (ord '\t')
-- >     }
--
-- /N.B./ The 'encDelimiter' must /not/ be the quote character (i.e.
-- @\"@) or one of the record separator characters (i.e. @\\n@ or
-- @\\r@).
data EncodeOptions = EncodeOptions
    { -- | Field delimiter.
      EncodeOptions -> Word8
encDelimiter  :: {-# UNPACK #-} !Word8

      -- | Record separator selection.  @True@ for CRLF (@\\r\\n@) and
      -- @False@ for LF (@\\n@).
    , EncodeOptions -> Bool
encUseCrLf :: !Bool

      -- | Include a header row when encoding @ToNamedRecord@
      -- instances.
    , EncodeOptions -> Bool
encIncludeHeader :: !Bool

      -- | What kind of quoting should be applied to text fields.
    , EncodeOptions -> Quoting
encQuoting :: !Quoting
    } deriving (EncodeOptions -> EncodeOptions -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: EncodeOptions -> EncodeOptions -> Bool
$c/= :: EncodeOptions -> EncodeOptions -> Bool
== :: EncodeOptions -> EncodeOptions -> Bool
$c== :: EncodeOptions -> EncodeOptions -> Bool
Eq, Int -> EncodeOptions -> ShowS
[EncodeOptions] -> ShowS
EncodeOptions -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [EncodeOptions] -> ShowS
$cshowList :: [EncodeOptions] -> ShowS
show :: EncodeOptions -> String
$cshow :: EncodeOptions -> String
showsPrec :: Int -> EncodeOptions -> ShowS
$cshowsPrec :: Int -> EncodeOptions -> ShowS
Show)

-- | Encoding options for CSV files.
defaultEncodeOptions :: EncodeOptions
defaultEncodeOptions :: EncodeOptions
defaultEncodeOptions = EncodeOptions
    { encDelimiter :: Word8
encDelimiter     = Word8
44  -- comma
    , encUseCrLf :: Bool
encUseCrLf       = Bool
True
    , encIncludeHeader :: Bool
encIncludeHeader = Bool
True
    , encQuoting :: Quoting
encQuoting       = Quoting
QuoteMinimal
    }

-- | Like 'encode', but lets you customize how the CSV data is
-- encoded.
encodeWith :: ToRecord a => EncodeOptions -> [a] -> L.ByteString
encodeWith :: forall a. ToRecord a => EncodeOptions -> [a] -> ByteString
encodeWith EncodeOptions
opts
    | Word8 -> Bool
validDelim (EncodeOptions -> Word8
encDelimiter EncodeOptions
opts) =
        Builder -> ByteString
toLazyByteString
        forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> [Builder] -> Builder
unlines (Bool -> Builder
recordSep (EncodeOptions -> Bool
encUseCrLf EncodeOptions
opts))
        forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map (Quoting -> Word8 -> Header -> Builder
encodeRecord (EncodeOptions -> Quoting
encQuoting EncodeOptions
opts) (EncodeOptions -> Word8
encDelimiter EncodeOptions
opts)
              forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. ToRecord a => a -> Header
toRecord)
    | Bool
otherwise = forall a. a
encodeOptionsError
{-# INLINE encodeWith #-}

-- | Check if the delimiter is valid.
validDelim :: Word8 -> Bool
validDelim :: Word8 -> Bool
validDelim Word8
delim = Word8
delim forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [Word8
cr, Word8
nl, Word8
dquote]
  where
    nl :: Word8
nl = Word8
10
    cr :: Word8
cr = Word8
13
    dquote :: Word8
dquote = Word8
34

-- | Raises an exception indicating that the provided delimiter isn't
-- valid. See 'validDelim'.
--
-- Keep this message consistent with the documentation of
-- 'EncodeOptions'.
encodeOptionsError :: a
encodeOptionsError :: forall a. a
encodeOptionsError = forall a. HasCallStack => String -> a
error forall a b. (a -> b) -> a -> b
$ String
"Data.Csv: " forall a. [a] -> [a] -> [a]
++
        String
"The 'encDelimiter' must /not/ be the quote character (i.e. " forall a. [a] -> [a] -> [a]
++
        String
"\") or one of the record separator characters (i.e. \\n or " forall a. [a] -> [a] -> [a]
++
        String
"\\r)"

-- | Encode a single record, without the trailing record separator
-- (i.e. newline).
encodeRecord :: Quoting -> Word8 -> Record -> Builder
encodeRecord :: Quoting -> Word8 -> Header -> Builder
encodeRecord Quoting
qtng Word8
delim = forall a. Monoid a => [a] -> a
mconcat forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> [Builder] -> [Builder]
intersperse (Word8 -> Builder
word8 Word8
delim)
                     forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map ByteString -> Builder
byteString forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map (Quoting -> Word8 -> ByteString -> ByteString
escape Quoting
qtng Word8
delim) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Vector a -> [a]
V.toList
{-# INLINE encodeRecord #-}

-- | Encode a single named record, without the trailing record
-- separator (i.e. newline), using the given field order.
encodeNamedRecord :: Header -> Quoting -> Word8 -> NamedRecord -> Builder
encodeNamedRecord :: Header -> Quoting -> Word8 -> NamedRecord -> Builder
encodeNamedRecord Header
hdr Quoting
qtng Word8
delim =
    Quoting -> Word8 -> Header -> Builder
encodeRecord Quoting
qtng Word8
delim forall b c a. (b -> c) -> (a -> b) -> a -> c
. Header -> NamedRecord -> Header
namedRecordToRecord Header
hdr

-- TODO: Optimize
escape :: Quoting -> Word8 -> B.ByteString -> B.ByteString
escape :: Quoting -> Word8 -> ByteString -> ByteString
escape !Quoting
qtng !Word8
delim !ByteString
s
    | (Quoting
qtng forall a. Eq a => a -> a -> Bool
== Quoting
QuoteMinimal Bool -> Bool -> Bool
&&
        (Word8 -> Bool) -> ByteString -> Bool
B.any (\ Word8
b -> Word8
b forall a. Eq a => a -> a -> Bool
== Word8
dquote Bool -> Bool -> Bool
|| Word8
b forall a. Eq a => a -> a -> Bool
== Word8
delim Bool -> Bool -> Bool
|| Word8
b forall a. Eq a => a -> a -> Bool
== Word8
nl Bool -> Bool -> Bool
|| Word8
b forall a. Eq a => a -> a -> Bool
== Word8
cr) ByteString
s
      ) Bool -> Bool -> Bool
|| Quoting
qtng forall a. Eq a => a -> a -> Bool
== Quoting
QuoteAll
         = ByteString -> ByteString
toStrict forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> ByteString
toLazyByteString forall a b. (a -> b) -> a -> b
$
            Word8 -> Builder
word8 Word8
dquote
            forall a. Semigroup a => a -> a -> a
<> forall a. (a -> Word8 -> a) -> a -> ByteString -> a
B.foldl
                (\ Builder
acc Word8
b -> Builder
acc forall a. Semigroup a => a -> a -> a
<> if Word8
b forall a. Eq a => a -> a -> Bool
== Word8
dquote
                    then ByteString -> Builder
byteString ByteString
"\"\""
                    else Word8 -> Builder
word8 Word8
b)
                forall a. Monoid a => a
mempty
                ByteString
s
            forall a. Semigroup a => a -> a -> a
<> Word8 -> Builder
word8 Word8
dquote
    | Bool
otherwise = ByteString
s
  where
    dquote :: Word8
dquote = Word8
34
    nl :: Word8
nl     = Word8
10
    cr :: Word8
cr     = Word8
13

-- | Like 'encodeByName', but lets you customize how the CSV data is
-- encoded.
encodeByNameWith :: ToNamedRecord a => EncodeOptions -> Header -> [a]
                 -> L.ByteString
encodeByNameWith :: forall a.
ToNamedRecord a =>
EncodeOptions -> Header -> [a] -> ByteString
encodeByNameWith EncodeOptions
opts Header
hdr [a]
v
    | Word8 -> Bool
validDelim (EncodeOptions -> Word8
encDelimiter EncodeOptions
opts) =
        Builder -> ByteString
toLazyByteString (Bool -> Builder
rows (EncodeOptions -> Bool
encIncludeHeader EncodeOptions
opts))
    | Bool
otherwise = forall a. a
encodeOptionsError
  where
    rows :: Bool -> Builder
rows Bool
False = Builder
records
    rows Bool
True  = Quoting -> Word8 -> Header -> Builder
encodeRecord (EncodeOptions -> Quoting
encQuoting EncodeOptions
opts) (EncodeOptions -> Word8
encDelimiter EncodeOptions
opts) Header
hdr forall a. Semigroup a => a -> a -> a
<>
                 Bool -> Builder
recordSep (EncodeOptions -> Bool
encUseCrLf EncodeOptions
opts) forall a. Semigroup a => a -> a -> a
<> Builder
records
    records :: Builder
records = Builder -> [Builder] -> Builder
unlines (Bool -> Builder
recordSep (EncodeOptions -> Bool
encUseCrLf EncodeOptions
opts))
              forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map (Header -> Quoting -> Word8 -> NamedRecord -> Builder
encodeNamedRecord Header
hdr (EncodeOptions -> Quoting
encQuoting EncodeOptions
opts) (EncodeOptions -> Word8
encDelimiter EncodeOptions
opts)
                     forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. ToNamedRecord a => a -> NamedRecord
toNamedRecord)
              forall a b. (a -> b) -> a -> b
$ [a]
v
{-# INLINE encodeByNameWith #-}

-- | Like 'encodeDefaultOrderedByNameWith', but lets you customize how
-- the CSV data is encoded.
encodeDefaultOrderedByNameWith ::
    forall a. (Conversion.DefaultOrdered a, ToNamedRecord a) =>
    EncodeOptions -> [a] -> L.ByteString
encodeDefaultOrderedByNameWith :: forall a.
(DefaultOrdered a, ToNamedRecord a) =>
EncodeOptions -> [a] -> ByteString
encodeDefaultOrderedByNameWith EncodeOptions
opts [a]
v
    | Word8 -> Bool
validDelim (EncodeOptions -> Word8
encDelimiter EncodeOptions
opts) =
        Builder -> ByteString
toLazyByteString (Bool -> Builder
rows (EncodeOptions -> Bool
encIncludeHeader EncodeOptions
opts))
    | Bool
otherwise = forall a. a
encodeOptionsError
  where
    hdr :: Header
hdr = (forall a. DefaultOrdered a => a -> Header
Conversion.headerOrder (forall a. HasCallStack => a
undefined :: a))
    rows :: Bool -> Builder
rows Bool
False = Builder
records
    rows Bool
True  = Quoting -> Word8 -> Header -> Builder
encodeRecord (EncodeOptions -> Quoting
encQuoting EncodeOptions
opts) (EncodeOptions -> Word8
encDelimiter EncodeOptions
opts) Header
hdr forall a. Semigroup a => a -> a -> a
<>
                 Bool -> Builder
recordSep (EncodeOptions -> Bool
encUseCrLf EncodeOptions
opts) forall a. Semigroup a => a -> a -> a
<> Builder
records
    records :: Builder
records = Builder -> [Builder] -> Builder
unlines (Bool -> Builder
recordSep (EncodeOptions -> Bool
encUseCrLf EncodeOptions
opts))
              forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map (Header -> Quoting -> Word8 -> NamedRecord -> Builder
encodeNamedRecord Header
hdr (EncodeOptions -> Quoting
encQuoting EncodeOptions
opts) (EncodeOptions -> Word8
encDelimiter EncodeOptions
opts)
                     forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. ToNamedRecord a => a -> NamedRecord
toNamedRecord)
              forall a b. (a -> b) -> a -> b
$ [a]
v
{-# INLINE encodeDefaultOrderedByNameWith #-}

namedRecordToRecord :: Header -> NamedRecord -> Record
namedRecordToRecord :: Header -> NamedRecord -> Header
namedRecordToRecord Header
hdr NamedRecord
nr = forall a b. (a -> b) -> Vector a -> Vector b
V.map ByteString -> ByteString
find Header
hdr
  where
    find :: ByteString -> ByteString
find ByteString
n = case forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HM.lookup ByteString
n NamedRecord
nr of
        Maybe ByteString
Nothing -> forall a. String -> String -> a
moduleError String
"namedRecordToRecord" forall a b. (a -> b) -> a -> b
$
                   String
"header contains name " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show (ByteString -> String
B8.unpack ByteString
n) forall a. [a] -> [a] -> [a]
++
                   String
" which is not present in the named record"
        Just ByteString
v  -> ByteString
v

moduleError :: String -> String -> a
moduleError :: forall a. String -> String -> a
moduleError String
func String
msg = forall a. HasCallStack => String -> a
error forall a b. (a -> b) -> a -> b
$ String
"Data.Csv.Encoding." forall a. [a] -> [a] -> [a]
++ String
func forall a. [a] -> [a] -> [a]
++ String
": " forall a. [a] -> [a] -> [a]
++ String
msg
{-# NOINLINE moduleError #-}

recordSep :: Bool -> Builder
recordSep :: Bool -> Builder
recordSep Bool
False = Word8 -> Builder
word8 Word8
10 -- new line (\n)
recordSep Bool
True  = String -> Builder
string8 String
"\r\n"

unlines :: Builder -> [Builder] -> Builder
unlines :: Builder -> [Builder] -> Builder
unlines Builder
_ [] = forall a. Monoid a => a
mempty
unlines Builder
sep (Builder
b:[Builder]
bs) = Builder
b forall a. Semigroup a => a -> a -> a
<> Builder
sep forall a. Semigroup a => a -> a -> a
<> Builder -> [Builder] -> Builder
unlines Builder
sep [Builder]
bs

intersperse :: Builder -> [Builder] -> [Builder]
intersperse :: Builder -> [Builder] -> [Builder]
intersperse Builder
_   []      = []
intersperse Builder
sep (Builder
x:[Builder]
xs)  = Builder
x forall a. a -> [a] -> [a]
: Builder -> [Builder] -> [Builder]
prependToAll Builder
sep [Builder]
xs

prependToAll :: Builder -> [Builder] -> [Builder]
prependToAll :: Builder -> [Builder] -> [Builder]
prependToAll Builder
_   []     = []
prependToAll Builder
sep (Builder
x:[Builder]
xs) = Builder
sep forall a. Semigroup a => a -> a -> a
<> Builder
x forall a. a -> [a] -> [a]
: Builder -> [Builder] -> [Builder]
prependToAll Builder
sep [Builder]
xs

decodeWithP' :: AL.Parser a -> L.ByteString -> Either String a
decodeWithP' :: forall a. Parser a -> ByteString -> Either String a
decodeWithP' Parser a
p ByteString
s =
    case forall a. Parser a -> ByteString -> Result a
AL.parse Parser a
p ByteString
s of
      AL.Done ByteString
_ a
v     -> forall a b. b -> Either a b
Right a
v
      AL.Fail ByteString
left [String]
_ String
msg -> forall a b. a -> Either a b
Left String
errMsg
        where
          errMsg :: String
errMsg = String
"parse error (" forall a. [a] -> [a] -> [a]
++ String
msg forall a. [a] -> [a] -> [a]
++ String
") at " forall a. [a] -> [a] -> [a]
++
                   (if ByteString -> Int64
BL8.length ByteString
left forall a. Ord a => a -> a -> Bool
> Int64
100
                    then (forall a. Int -> [a] -> [a]
take Int
100 forall a b. (a -> b) -> a -> b
$ ByteString -> String
BL8.unpack ByteString
left) forall a. [a] -> [a] -> [a]
++ String
" (truncated)"
                    else forall a. Show a => a -> String
show (ByteString -> String
BL8.unpack ByteString
left))
{-# INLINE decodeWithP' #-}

-- These alternative implementation of the 'csv' and 'csvWithHeader'
-- parsers from the 'Parser' module performs the
-- 'FromRecord'/'FromNamedRecord' conversions on-the-fly, thereby
-- avoiding the need to hold a big 'CSV' value in memory. The 'CSV'
-- type has a quite large memory overhead due to high constant
-- overheads of 'B.ByteString' and 'V.Vector'.

-- TODO: Check that the error messages don't duplicate prefixes, as in
-- "parse error: conversion error: ...".

-- | Parse a CSV file that does not include a header.
csv :: (Record -> Conversion.Parser a) -> DecodeOptions
    -> AL.Parser (V.Vector a)
csv :: forall a.
(Header -> Parser a) -> DecodeOptions -> Parser (Vector a)
csv Header -> Parser a
_parseRecord !DecodeOptions
opts = do
    [a]
vals <- Parser ByteString [a]
records
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! forall a. [a] -> Vector a
V.fromList [a]
vals
  where
    records :: Parser ByteString [a]
records = do
        !Header
r <- Word8 -> Parser Header
record (DecodeOptions -> Word8
decDelimiter DecodeOptions
opts)
        if Header -> Bool
blankLine Header
r
            then (forall t. Chunk t => Parser t ()
endOfInput forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure []) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser ()
endOfLine forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ByteString [a]
records)
            else case forall a. Parser a -> Either String a
runParser (Header -> Parser a
_parseRecord Header
r) of
                Left String
msg  -> forall (m :: * -> *) a. MonadFail m => String -> m a
fail forall a b. (a -> b) -> a -> b
$ String
"conversion error: " forall a. [a] -> [a] -> [a]
++ String
msg
                Right a
val -> do
                    ![a]
vals <- (forall t. Chunk t => Parser t ()
endOfInput forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
AP.pure []) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser ()
endOfLine forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ByteString [a]
records)
                    forall (m :: * -> *) a. Monad m => a -> m a
return (a
val forall a. a -> [a] -> [a]
: [a]
vals)
{-# INLINE csv #-}

-- | Parse a CSV file that includes a header.
csvWithHeader :: (NamedRecord -> Conversion.Parser a) -> DecodeOptions
              -> AL.Parser (Header, V.Vector a)
csvWithHeader :: forall a.
(NamedRecord -> Parser a)
-> DecodeOptions -> Parser (Header, Vector a)
csvWithHeader NamedRecord -> Parser a
_parseNamedRecord !DecodeOptions
opts = do
    !Header
hdr <- Word8 -> Parser Header
header (DecodeOptions -> Word8
decDelimiter DecodeOptions
opts)
    [a]
vals <- Header -> Parser ByteString [a]
records Header
hdr
    let !v :: Vector a
v = forall a. [a] -> Vector a
V.fromList [a]
vals
    forall (m :: * -> *) a. Monad m => a -> m a
return (Header
hdr, Vector a
v)
  where
    records :: Header -> Parser ByteString [a]
records Header
hdr = do
        !Header
r <- Word8 -> Parser Header
record (DecodeOptions -> Word8
decDelimiter DecodeOptions
opts)
        if Header -> Bool
blankLine Header
r
            then (forall t. Chunk t => Parser t ()
endOfInput forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure []) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser ()
endOfLine forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Header -> Parser ByteString [a]
records Header
hdr)
            else case forall a. Parser a -> Either String a
runParser (Header -> Header -> Parser a
convert Header
hdr Header
r) of
                Left String
msg  -> forall (m :: * -> *) a. MonadFail m => String -> m a
fail forall a b. (a -> b) -> a -> b
$ String
"conversion error: " forall a. [a] -> [a] -> [a]
++ String
msg
                Right a
val -> do
                    ![a]
vals <- (forall t. Chunk t => Parser t ()
endOfInput forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure []) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser ()
endOfLine forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Header -> Parser ByteString [a]
records Header
hdr)
                    forall (m :: * -> *) a. Monad m => a -> m a
return (a
val forall a. a -> [a] -> [a]
: [a]
vals)

    convert :: Header -> Header -> Parser a
convert Header
hdr = NamedRecord -> Parser a
_parseNamedRecord forall b c a. (b -> c) -> (a -> b) -> a -> c
. Header -> Header -> NamedRecord
Types.toNamedRecord Header
hdr