Copyright | © 2015–present Megaparsec contributors © 2007 Paolo Martini © 1999–2001 Daan Leijen |
---|---|
License | FreeBSD |
Maintainer | Mark Karpov <[email protected]> |
Stability | experimental |
Portability | non-portable |
Safe Haskell | Safe |
Language | Haskell2010 |
Commonly used character parsers.
Synopsis
- newline :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- crlf :: forall e s m. (MonadParsec e s m, Token s ~ Char) => m (Tokens s)
- eol :: forall e s m. (MonadParsec e s m, Token s ~ Char) => m (Tokens s)
- tab :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- space :: (MonadParsec e s m, Token s ~ Char) => m ()
- hspace :: (MonadParsec e s m, Token s ~ Char) => m ()
- space1 :: (MonadParsec e s m, Token s ~ Char) => m ()
- hspace1 :: (MonadParsec e s m, Token s ~ Char) => m ()
- controlChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- spaceChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- upperChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- lowerChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- letterChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- alphaNumChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- printChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- digitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- binDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- octDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- hexDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- markChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- numberChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- punctuationChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- symbolChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- separatorChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- asciiChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- latin1Char :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- charCategory :: (MonadParsec e s m, Token s ~ Char) => GeneralCategory -> m (Token s)
- categoryName :: GeneralCategory -> String
- char :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s)
- char' :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s)
- string :: MonadParsec e s m => Tokens s -> m (Tokens s)
- string' :: (MonadParsec e s m, FoldCase (Tokens s)) => Tokens s -> m (Tokens s)
Simple parsers
crlf :: forall e s m. (MonadParsec e s m, Token s ~ Char) => m (Tokens s) Source #
Parse a carriage return character followed by a newline character. Return the sequence of characters parsed.
hspace :: (MonadParsec e s m, Token s ~ Char) => m () Source #
Like space
, but does not accept newlines and carriage returns.
Since: 9.0.0
hspace1 :: (MonadParsec e s m, Token s ~ Char) => m () Source #
Like space1
, but does not accept newlines and carriage returns.
Since: 9.0.0
Categories of characters
controlChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a control character (a non-printing character of the Latin-1 subset of Unicode).
spaceChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a Unicode space character, and the control characters: tab, newline, carriage return, form feed, and vertical tab.
upperChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse an upper-case or title-case alphabetic Unicode character. Title case is used by a small number of letter ligatures like the single-character form of Lj.
lowerChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a lower-case alphabetic Unicode character.
letterChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse an alphabetic Unicode character: lower-case, upper-case, or title-case letter, or a letter of case-less scripts/modifier letter.
alphaNumChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse an alphabetic or numeric digit Unicode characters.
Note that the numeric digits outside the ASCII range are parsed by this
parser but not by digitChar
. Such digits may be part of identifiers but
are not used by the printer and reader to represent numbers.
printChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a printable Unicode character: letter, number, mark, punctuation, symbol or space.
digitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse an ASCII digit, i.e between “0” and “9”.
binDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a binary digit, i.e. "0" or "1".
Since: 7.0.0
octDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse an octal digit, i.e. between “0” and “7”.
hexDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a hexadecimal digit, i.e. between “0” and “9”, or “a” and “f”, or “A” and “F”.
markChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a Unicode mark character (accents and the like), which combines with preceding characters.
numberChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a Unicode numeric character, including digits from various scripts, Roman numerals, etc.
punctuationChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a Unicode punctuation character, including various kinds of connectors, brackets and quotes.
symbolChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a Unicode symbol characters, including mathematical and currency symbols.
separatorChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a Unicode space and separator characters.
asciiChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a character from the first 128 characters of the Unicode character set, corresponding to the ASCII character set.
latin1Char :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a character from the first 256 characters of the Unicode character set, corresponding to the ISO 8859-1 (Latin-1) character set.
charCategory :: (MonadParsec e s m, Token s ~ Char) => GeneralCategory -> m (Token s) Source #
parses character in Unicode General Category
charCategory
catcat
, see GeneralCategory
.
categoryName :: GeneralCategory -> String Source #
Return the human-readable name of Unicode General Category.
Single character
char :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s) Source #
A type-constrained version of single
.
semicolon = char ';'
char' :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s) Source #
The same as char
but case-insensitive. This parser returns the
actually parsed character preserving its case.
>>>
parseTest (char' 'e') "E"
'E'>>>
parseTest (char' 'e') "G"
1:1: unexpected 'G' expecting 'E' or 'e'