{-# LANGUAGE Safe #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}

-- |
-- Module      :  Text.Megaparsec.Byte.Lexer
-- Copyright   :  © 2015–present Megaparsec contributors
-- License     :  FreeBSD
--
-- Maintainer  :  Mark Karpov <[email protected]>
-- Stability   :  experimental
-- Portability :  portable
--
-- Stripped-down version of "Text.Megaparsec.Char.Lexer" for streams of
-- bytes.
--
-- This module is intended to be imported qualified:
--
-- > import qualified Text.Megaparsec.Byte.Lexer as L
module Text.Megaparsec.Byte.Lexer
  ( -- * White space
    space,
    lexeme,
    symbol,
    symbol',
    skipLineComment,
    skipBlockComment,
    skipBlockCommentNested,

    -- * Numbers
    decimal,
    binary,
    octal,
    hexadecimal,
    scientific,
    float,
    signed,
  )
where

import Control.Applicative
import Data.Functor (void)
import Data.List (foldl')
import Data.Proxy
import Data.Scientific (Scientific)
import qualified Data.Scientific as Sci
import Data.Word (Word8)
import Text.Megaparsec
import qualified Text.Megaparsec.Byte as B
import Text.Megaparsec.Lexer

----------------------------------------------------------------------------
-- White space

-- | Given a comment prefix this function returns a parser that skips line
-- comments. Note that it stops just before the newline character but
-- doesn't consume the newline. Newline is either supposed to be consumed by
-- 'space' parser or picked up manually.
skipLineComment ::
  (MonadParsec e s m, Token s ~ Word8) =>
  -- | Line comment prefix
  Tokens s ->
  m ()
skipLineComment :: forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Tokens s -> m ()
skipLineComment Tokens s
prefix =
  forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
B.string Tokens s
prefix forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Functor f => f a -> f ()
void (forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhileP (forall a. a -> Maybe a
Just String
"character") (forall a. Eq a => a -> a -> Bool
/= Token s
10))
{-# INLINEABLE skipLineComment #-}

-- | @'skipBlockComment' start end@ skips non-nested block comment starting
-- with @start@ and ending with @end@.
skipBlockComment ::
  (MonadParsec e s m, Token s ~ Word8) =>
  -- | Start of block comment
  Tokens s ->
  -- | End of block comment
  Tokens s ->
  m ()
skipBlockComment :: forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Tokens s -> Tokens s -> m ()
skipBlockComment Tokens s
start Tokens s
end = m (Tokens s)
p forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (f :: * -> *) a. Functor f => f a -> f ()
void (forall (m :: * -> *) a end. MonadPlus m => m a -> m end -> m [a]
manyTill forall e s (m :: * -> *). MonadParsec e s m => m (Token s)
anySingle m (Tokens s)
n)
  where
    p :: m (Tokens s)
p = forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
B.string Tokens s
start
    n :: m (Tokens s)
n = forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
B.string Tokens s
end
{-# INLINEABLE skipBlockComment #-}

-- | @'skipBlockCommentNested' start end@ skips possibly nested block
-- comment starting with @start@ and ending with @end@.
--
-- @since 5.0.0
skipBlockCommentNested ::
  (MonadParsec e s m, Token s ~ Word8) =>
  -- | Start of block comment
  Tokens s ->
  -- | End of block comment
  Tokens s ->
  m ()
skipBlockCommentNested :: forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Tokens s -> Tokens s -> m ()
skipBlockCommentNested Tokens s
start Tokens s
end = m (Tokens s)
p forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (f :: * -> *) a. Functor f => f a -> f ()
void (forall (m :: * -> *) a end. MonadPlus m => m a -> m end -> m [a]
manyTill m ()
e m (Tokens s)
n)
  where
    e :: m ()
e = forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Tokens s -> Tokens s -> m ()
skipBlockCommentNested Tokens s
start Tokens s
end forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (f :: * -> *) a. Functor f => f a -> f ()
void forall e s (m :: * -> *). MonadParsec e s m => m (Token s)
anySingle
    p :: m (Tokens s)
p = forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
B.string Tokens s
start
    n :: m (Tokens s)
n = forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
B.string Tokens s
end
{-# INLINEABLE skipBlockCommentNested #-}

----------------------------------------------------------------------------
-- Numbers

-- | Parse an integer in the decimal representation according to the format
-- of integer literals described in the Haskell report.
--
-- If you need to parse signed integers, see the 'signed' combinator.
--
-- __Warning__: this function does not perform range checks.
decimal ::
  forall e s m a.
  (MonadParsec e s m, Token s ~ Word8, Num a) =>
  m a
decimal :: forall e s (m :: * -> *) a.
(MonadParsec e s m, Token s ~ Word8, Num a) =>
m a
decimal = forall e s (m :: * -> *) a.
(MonadParsec e s m, Token s ~ Word8, Num a) =>
m a
decimal_ forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"integer"
{-# INLINEABLE decimal #-}

-- | A non-public helper to parse decimal integers.
decimal_ ::
  forall e s m a.
  (MonadParsec e s m, Token s ~ Word8, Num a) =>
  m a
decimal_ :: forall e s (m :: * -> *) a.
(MonadParsec e s m, Token s ~ Word8, Num a) =>
m a
decimal_ = Tokens s -> a
mkNum forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhile1P (forall a. a -> Maybe a
Just String
"digit") Word8 -> Bool
isDigit
  where
    mkNum :: Tokens s -> a
mkNum = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' forall {a} {a}. (Integral a, Num a) => a -> a -> a
step a
0 forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall s. Stream s => Proxy s -> Tokens s -> [Token s]
chunkToTokens (forall {k} (t :: k). Proxy t
Proxy :: Proxy s)
    step :: a -> a -> a
step a
a a
w = a
a forall a. Num a => a -> a -> a
* a
10 forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral (a
w forall a. Num a => a -> a -> a
- a
48)
{-# INLINE decimal_ #-}

-- | Parse an integer in the binary representation. The binary number is
-- expected to be a non-empty sequence of zeroes “0” and ones “1”.
--
-- You could of course parse some prefix before the actual number:
--
-- > binary = char 48 >> char' 98 >> L.binary
--
-- __Warning__: this function does not perform range checks.
--
-- @since 7.0.0
binary ::
  forall e s m a.
  (MonadParsec e s m, Token s ~ Word8, Num a) =>
  m a
binary :: forall e s (m :: * -> *) a.
(MonadParsec e s m, Token s ~ Word8, Num a) =>
m a
binary =
  Tokens s -> a
mkNum
    forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhile1P forall a. Maybe a
Nothing forall {a}. (Eq a, Num a) => a -> Bool
isBinDigit
    forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"binary integer"
  where
    mkNum :: Tokens s -> a
mkNum = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' forall {a} {a}. (Integral a, Num a) => a -> a -> a
step a
0 forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall s. Stream s => Proxy s -> Tokens s -> [Token s]
chunkToTokens (forall {k} (t :: k). Proxy t
Proxy :: Proxy s)
    step :: a -> a -> a
step a
a a
w = a
a forall a. Num a => a -> a -> a
* a
2 forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral (a
w forall a. Num a => a -> a -> a
- a
48)
    isBinDigit :: a -> Bool
isBinDigit a
w = a
w forall a. Eq a => a -> a -> Bool
== a
48 Bool -> Bool -> Bool
|| a
w forall a. Eq a => a -> a -> Bool
== a
49
{-# INLINEABLE binary #-}

-- | Parse an integer in the octal representation. The format of the octal
-- number is expected to be according to the Haskell report except for the
-- fact that this parser doesn't parse “0o” or “0O” prefix. It is a
-- responsibility of the programmer to parse correct prefix before parsing
-- the number itself.
--
-- For example you can make it conform to the Haskell report like this:
--
-- > octal = char 48 >> char' 111 >> L.octal
--
-- __Warning__: this function does not perform range checks.
octal ::
  forall e s m a.
  (MonadParsec e s m, Token s ~ Word8, Num a) =>
  m a
octal :: forall e s (m :: * -> *) a.
(MonadParsec e s m, Token s ~ Word8, Num a) =>
m a
octal =
  Tokens s -> a
mkNum
    forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhile1P forall a. Maybe a
Nothing forall {a}. (Ord a, Num a) => a -> Bool
isOctDigit
    forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"octal integer"
  where
    mkNum :: Tokens s -> a
mkNum = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' forall {a} {a}. (Integral a, Num a) => a -> a -> a
step a
0 forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall s. Stream s => Proxy s -> Tokens s -> [Token s]
chunkToTokens (forall {k} (t :: k). Proxy t
Proxy :: Proxy s)
    step :: a -> a -> a
step a
a a
w = a
a forall a. Num a => a -> a -> a
* a
8 forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral (a
w forall a. Num a => a -> a -> a
- a
48)
    isOctDigit :: a -> Bool
isOctDigit a
w = a
w forall a. Num a => a -> a -> a
- a
48 forall a. Ord a => a -> a -> Bool
< a
8
{-# INLINEABLE octal #-}

-- | Parse an integer in the hexadecimal representation. The format of the
-- hexadecimal number is expected to be according to the Haskell report
-- except for the fact that this parser doesn't parse “0x” or “0X” prefix.
-- It is a responsibility of the programmer to parse correct prefix before
-- parsing the number itself.
--
-- For example you can make it conform to the Haskell report like this:
--
-- > hexadecimal = char 48 >> char' 120 >> L.hexadecimal
--
-- __Warning__: this function does not perform range checks.
hexadecimal ::
  forall e s m a.
  (MonadParsec e s m, Token s ~ Word8, Num a) =>
  m a
hexadecimal :: forall e s (m :: * -> *) a.
(MonadParsec e s m, Token s ~ Word8, Num a) =>
m a
hexadecimal =
  Tokens s -> a
mkNum
    forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhile1P forall a. Maybe a
Nothing forall {a}. (Ord a, Num a) => a -> Bool
isHexDigit
    forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"hexadecimal integer"
  where
    mkNum :: Tokens s -> a
mkNum = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' forall {a} {a}. (Num a, Integral a) => a -> a -> a
step a
0 forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall s. Stream s => Proxy s -> Tokens s -> [Token s]
chunkToTokens (forall {k} (t :: k). Proxy t
Proxy :: Proxy s)
    step :: a -> a -> a
step a
a a
w
      | a
w forall a. Ord a => a -> a -> Bool
>= a
48 Bool -> Bool -> Bool
&& a
w forall a. Ord a => a -> a -> Bool
<= a
57 = a
a forall a. Num a => a -> a -> a
* a
16 forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral (a
w forall a. Num a => a -> a -> a
- a
48)
      | a
w forall a. Ord a => a -> a -> Bool
>= a
97 = a
a forall a. Num a => a -> a -> a
* a
16 forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral (a
w forall a. Num a => a -> a -> a
- a
87)
      | Bool
otherwise = a
a forall a. Num a => a -> a -> a
* a
16 forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral (a
w forall a. Num a => a -> a -> a
- a
55)
    isHexDigit :: a -> Bool
isHexDigit a
w =
      (a
w forall a. Ord a => a -> a -> Bool
>= a
48 Bool -> Bool -> Bool
&& a
w forall a. Ord a => a -> a -> Bool
<= a
57)
        Bool -> Bool -> Bool
|| (a
w forall a. Ord a => a -> a -> Bool
>= a
97 Bool -> Bool -> Bool
&& a
w forall a. Ord a => a -> a -> Bool
<= a
102)
        Bool -> Bool -> Bool
|| (a
w forall a. Ord a => a -> a -> Bool
>= a
65 Bool -> Bool -> Bool
&& a
w forall a. Ord a => a -> a -> Bool
<= a
70)
{-# INLINEABLE hexadecimal #-}

-- | Parse a floating point value as a 'Scientific' number. 'Scientific' is
-- great for parsing of arbitrary precision numbers coming from an untrusted
-- source. See documentation in "Data.Scientific" for more information.
--
-- The parser can be used to parse integers or floating point values. Use
-- functions like 'Data.Scientific.floatingOrInteger' from "Data.Scientific"
-- to test and extract integer or real values.
--
-- This function does not parse sign, if you need to parse signed numbers,
-- see 'signed'.
scientific ::
  forall e s m.
  (MonadParsec e s m, Token s ~ Word8) =>
  m Scientific
scientific :: forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
m Scientific
scientific = do
  Integer
c' <- forall e s (m :: * -> *) a.
(MonadParsec e s m, Token s ~ Word8, Num a) =>
m a
decimal_
  SP Integer
c Int
e' <- forall (m :: * -> *) a. Alternative m => a -> m a -> m a
option (Integer -> Int -> SP
SP Integer
c' Int
0) (forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try forall a b. (a -> b) -> a -> b
$ forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Proxy s -> Integer -> m SP
dotDecimal_ (forall {k} (t :: k). Proxy t
Proxy :: Proxy s) Integer
c')
  Int
e <- forall (m :: * -> *) a. Alternative m => a -> m a -> m a
option Int
e' (forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try forall a b. (a -> b) -> a -> b
$ forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Int -> m Int
exponent_ Int
e')
  forall (m :: * -> *) a. Monad m => a -> m a
return (Integer -> Int -> Scientific
Sci.scientific Integer
c Int
e)
{-# INLINEABLE scientific #-}

data SP = SP !Integer {-# UNPACK #-} !Int

-- | Parse a floating point number according to the syntax for floating
-- point literals described in the Haskell report.
--
-- This function does not parse sign, if you need to parse signed numbers,
-- see 'signed'.
--
-- __Note__: in versions /6.0.0/–/6.1.1/ this function accepted plain integers.
float :: (MonadParsec e s m, Token s ~ Word8, RealFloat a) => m a
float :: forall e s (m :: * -> *) a.
(MonadParsec e s m, Token s ~ Word8, RealFloat a) =>
m a
float = do
  Integer
c' <- forall e s (m :: * -> *) a.
(MonadParsec e s m, Token s ~ Word8, Num a) =>
m a
decimal_
  forall a. RealFloat a => Scientific -> a
Sci.toRealFloat
    forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ( ( do
              SP Integer
c Int
e' <- forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Proxy s -> Integer -> m SP
dotDecimal_ (forall {k} (t :: k). Proxy t
Proxy :: Proxy s) Integer
c'
              Int
e <- forall (m :: * -> *) a. Alternative m => a -> m a -> m a
option Int
e' (forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try forall a b. (a -> b) -> a -> b
$ forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Int -> m Int
exponent_ Int
e')
              forall (m :: * -> *) a. Monad m => a -> m a
return (Integer -> Int -> Scientific
Sci.scientific Integer
c Int
e)
          )
            forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Integer -> Int -> Scientific
Sci.scientific Integer
c' forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Int -> m Int
exponent_ Int
0)
        )
{-# INLINEABLE float #-}

dotDecimal_ ::
  (MonadParsec e s m, Token s ~ Word8) =>
  Proxy s ->
  Integer ->
  m SP
dotDecimal_ :: forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Proxy s -> Integer -> m SP
dotDecimal_ Proxy s
pxy Integer
c' = do
  forall (f :: * -> *) a. Functor f => f a -> f ()
void (forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Token s -> m (Token s)
B.char Token s
46)
  let mkNum :: Tokens s -> SP
mkNum = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' forall {a}. Integral a => SP -> a -> SP
step (Integer -> Int -> SP
SP Integer
c' Int
0) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall s. Stream s => Proxy s -> Tokens s -> [Token s]
chunkToTokens Proxy s
pxy
      step :: SP -> a -> SP
step (SP Integer
a Int
e') a
w =
        Integer -> Int -> SP
SP
          (Integer
a forall a. Num a => a -> a -> a
* Integer
10 forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral (a
w forall a. Num a => a -> a -> a
- a
48))
          (Int
e' forall a. Num a => a -> a -> a
- Int
1)
  Tokens s -> SP
mkNum forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhile1P (forall a. a -> Maybe a
Just String
"digit") Word8 -> Bool
isDigit
{-# INLINE dotDecimal_ #-}

exponent_ ::
  (MonadParsec e s m, Token s ~ Word8) =>
  Int ->
  m Int
exponent_ :: forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Int -> m Int
exponent_ Int
e' = do
  forall (f :: * -> *) a. Functor f => f a -> f ()
void (forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Token s -> m (Token s)
B.char' Token s
101)
  (forall a. Num a => a -> a -> a
+ Int
e') forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall e s (m :: * -> *) a.
(MonadParsec e s m, Token s ~ Word8, Num a) =>
m () -> m a -> m a
signed (forall (m :: * -> *) a. Monad m => a -> m a
return ()) forall e s (m :: * -> *) a.
(MonadParsec e s m, Token s ~ Word8, Num a) =>
m a
decimal_
{-# INLINE exponent_ #-}

-- | @'signed' space p@ parser parses an optional sign character (“+” or
-- “-”), then if there is a sign it consumes optional white space (using
-- @space@ parser), then it runs parser @p@ which should return a number.
-- Sign of the number is changed according to the previously parsed sign
-- character.
--
-- For example, to parse signed integer you can write:
--
-- > lexeme        = L.lexeme spaceConsumer
-- > integer       = lexeme L.decimal
-- > signedInteger = L.signed spaceConsumer integer
signed ::
  (MonadParsec e s m, Token s ~ Word8, Num a) =>
  -- | How to consume white space after the sign
  m () ->
  -- | How to parse the number itself
  m a ->
  -- | Parser for signed numbers
  m a
signed :: forall e s (m :: * -> *) a.
(MonadParsec e s m, Token s ~ Word8, Num a) =>
m () -> m a -> m a
signed m ()
spc m a
p = forall (m :: * -> *) a. Alternative m => a -> m a -> m a
option forall a. a -> a
id (forall e s (m :: * -> *) a. MonadParsec e s m => m () -> m a -> m a
lexeme m ()
spc m (a -> a)
sign) forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> m a
p
  where
    sign :: m (a -> a)
sign = (forall a. a -> a
id forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Token s -> m (Token s)
B.char Token s
43) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (forall a. Num a => a -> a
negate forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Token s -> m (Token s)
B.char Token s
45)
{-# INLINEABLE signed #-}

----------------------------------------------------------------------------
-- Helpers

-- | A fast predicate to check if the given 'Word8' is a digit in ASCII.
isDigit :: Word8 -> Bool
isDigit :: Word8 -> Bool
isDigit Word8
w = Word8
w forall a. Num a => a -> a -> a
- Word8
48 forall a. Ord a => a -> a -> Bool
< Word8
10
{-# INLINE isDigit #-}