Safe Haskell | Safe-Inferred |
---|---|
Language | Haskell2010 |
RIO.Char
Description
Unicode Char
. Import as:
import qualified RIO.Char as C
This module does not export any partial functions. For those, see RIO.Char.Partial
Synopsis
- data Char
- isControl :: Char -> Bool
- isSpace :: Char -> Bool
- isLower :: Char -> Bool
- isUpper :: Char -> Bool
- isAlpha :: Char -> Bool
- isAlphaNum :: Char -> Bool
- isPrint :: Char -> Bool
- isDigit :: Char -> Bool
- isOctDigit :: Char -> Bool
- isHexDigit :: Char -> Bool
- isLetter :: Char -> Bool
- isMark :: Char -> Bool
- isNumber :: Char -> Bool
- isPunctuation :: Char -> Bool
- isSymbol :: Char -> Bool
- isSeparator :: Char -> Bool
- isAscii :: Char -> Bool
- isLatin1 :: Char -> Bool
- isAsciiUpper :: Char -> Bool
- isAsciiLower :: Char -> Bool
- data GeneralCategory
- = UppercaseLetter
- | LowercaseLetter
- | TitlecaseLetter
- | ModifierLetter
- | OtherLetter
- | NonSpacingMark
- | SpacingCombiningMark
- | EnclosingMark
- | DecimalNumber
- | LetterNumber
- | OtherNumber
- | ConnectorPunctuation
- | DashPunctuation
- | OpenPunctuation
- | ClosePunctuation
- | InitialQuote
- | FinalQuote
- | OtherPunctuation
- | MathSymbol
- | CurrencySymbol
- | ModifierSymbol
- | OtherSymbol
- | Space
- | LineSeparator
- | ParagraphSeparator
- | Control
- | Format
- | Surrogate
- | PrivateUse
- | NotAssigned
- generalCategory :: Char -> GeneralCategory
- toUpper :: Char -> Char
- toLower :: Char -> Char
- toTitle :: Char -> Char
- ord :: Char -> Int
- showLitChar :: Char -> ShowS
- lexLitChar :: ReadS String
- readLitChar :: ReadS Char
Documentation
The character type Char
represents Unicode codespace
and its elements are code points as in definitions
D9 and D10 of the Unicode Standard.
Character literals in Haskell are single-quoted: 'Q'
, 'Я'
or 'Ω'
.
To represent a single quote itself use '\''
, and to represent a backslash
use '\\'
. The full grammar can be found in the section 2.6 of the
Haskell 2010 Language Report.
To specify a character by its code point one can use decimal, hexadecimal
or octal notation: '\65'
, '\x41'
and '\o101'
are all alternative forms
of 'A'
. The largest code point is '\x10ffff'
.
There is a special escape syntax for ASCII control characters:
Escape | Alternatives | Meaning |
---|---|---|
'\NUL' | '\0' | null character |
'\SOH' | '\1' | start of heading |
'\STX' | '\2' | start of text |
'\ETX' | '\3' | end of text |
'\EOT' | '\4' | end of transmission |
'\ENQ' | '\5' | enquiry |
'\ACK' | '\6' | acknowledge |
'\BEL' | '\7' , '\a' | bell (alert) |
'\BS' | '\8' , '\b' | backspace |
'\HT' | '\9' , '\t' | horizontal tab |
'\LF' | '\10' , '\n' | line feed (new line) |
'\VT' | '\11' , '\v' | vertical tab |
'\FF' | '\12' , '\f' | form feed |
'\CR' | '\13' , '\r' | carriage return |
'\SO' | '\14' | shift out |
'\SI' | '\15' | shift in |
'\DLE' | '\16' | data link escape |
'\DC1' | '\17' | device control 1 |
'\DC2' | '\18' | device control 2 |
'\DC3' | '\19' | device control 3 |
'\DC4' | '\20' | device control 4 |
'\NAK' | '\21' | negative acknowledge |
'\SYN' | '\22' | synchronous idle |
'\ETB' | '\23' | end of transmission block |
'\CAN' | '\24' | cancel |
'\EM' | '\25' | end of medium |
'\SUB' | '\26' | substitute |
'\ESC' | '\27' | escape |
'\FS' | '\28' | file separator |
'\GS' | '\29' | group separator |
'\RS' | '\30' | record separator |
'\US' | '\31' | unit separator |
'\SP' | '\32' , ' ' | space |
'\DEL' | '\127' | delete |
Instances
IsChar Char | Since: base-2.1 | ||||
PrintfArg Char | Since: base-2.1 | ||||
Defined in Text.Printf | |||||
NFData Char | |||||
Defined in Control.DeepSeq | |||||
Data Char | @since base-4.0.0.0 | ||||
Defined in GHC.Internal.Data.Data Methods gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b) -> (forall g. g -> c g) -> Char -> c Char # gunfold :: (forall b r. Data b => c (b -> r) -> c r) -> (forall r. r -> c r) -> Constr -> c Char # dataTypeOf :: Char -> DataType # dataCast1 :: Typeable t => (forall d. Data d => c (t d)) -> Maybe (c Char) # dataCast2 :: Typeable t => (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c Char) # gmapT :: (forall b. Data b => b -> b) -> Char -> Char # gmapQl :: (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> Char -> r # gmapQr :: forall r r'. (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> Char -> r # gmapQ :: (forall d. Data d => d -> u) -> Char -> [u] # gmapQi :: Int -> (forall d. Data d => d -> u) -> Char -> u # gmapM :: Monad m => (forall d. Data d => d -> m d) -> Char -> m Char # gmapMp :: MonadPlus m => (forall d. Data d => d -> m d) -> Char -> m Char # gmapMo :: MonadPlus m => (forall d. Data d => d -> m d) -> Char -> m Char # | |||||
Bounded Char | @since base-2.01 | ||||
Enum Char | @since base-2.01 | ||||
Storable Char | @since base-2.01 | ||||
Defined in GHC.Internal.Foreign.Storable | |||||
Ix Char | @since base-2.01 | ||||
Read Char | @since base-2.01 | ||||
Show Char | @since base-2.01 | ||||
Eq Char | |||||
Ord Char | |||||
Hashable Char | |||||
Defined in Data.Hashable.Class | |||||
Prim Char | |||||
Defined in Data.Primitive.Types Methods sizeOfType# :: Proxy Char -> Int# # alignmentOfType# :: Proxy Char -> Int# # alignment# :: Char -> Int# # indexByteArray# :: ByteArray# -> Int# -> Char # readByteArray# :: MutableByteArray# s -> Int# -> State# s -> (# State# s, Char #) # writeByteArray# :: MutableByteArray# s -> Int# -> Char -> State# s -> State# s # setByteArray# :: MutableByteArray# s -> Int# -> Int# -> Char -> State# s -> State# s # indexOffAddr# :: Addr# -> Int# -> Char # readOffAddr# :: Addr# -> Int# -> State# s -> (# State# s, Char #) # writeOffAddr# :: Addr# -> Int# -> Char -> State# s -> State# s # setOffAddr# :: Addr# -> Int# -> Int# -> Char -> State# s -> State# s # | |||||
Display Char # | Since: 0.1.0.0 | ||||
Defined in RIO.Prelude.Display | |||||
Unbox Char | |||||
Defined in Data.Vector.Unboxed.Base | |||||
Lift Char | |||||
Vector Vector Char | |||||
Defined in Data.Vector.Unboxed.Base Methods basicUnsafeFreeze :: Mutable Vector s Char -> ST s (Vector Char) # basicUnsafeThaw :: Vector Char -> ST s (Mutable Vector s Char) # basicLength :: Vector Char -> Int # basicUnsafeSlice :: Int -> Int -> Vector Char -> Vector Char # basicUnsafeIndexM :: Vector Char -> Int -> Box Char # basicUnsafeCopy :: Mutable Vector s Char -> Vector Char -> ST s () # | |||||
MVector MVector Char | |||||
Defined in Data.Vector.Unboxed.Base Methods basicLength :: MVector s Char -> Int basicUnsafeSlice :: Int -> Int -> MVector s Char -> MVector s Char basicOverlaps :: MVector s Char -> MVector s Char -> Bool basicUnsafeNew :: Int -> ST s (MVector s Char) basicInitialize :: MVector s Char -> ST s () basicUnsafeReplicate :: Int -> Char -> ST s (MVector s Char) basicUnsafeRead :: MVector s Char -> Int -> ST s Char basicUnsafeWrite :: MVector s Char -> Int -> Char -> ST s () basicClear :: MVector s Char -> ST s () basicSet :: MVector s Char -> Char -> ST s () basicUnsafeCopy :: MVector s Char -> MVector s Char -> ST s () basicUnsafeMove :: MVector s Char -> MVector s Char -> ST s () basicUnsafeGrow :: MVector s Char -> Int -> ST s (MVector s Char) | |||||
Generic1 (URec Char :: k -> Type) | |||||
Defined in GHC.Internal.Generics Associated Types
| |||||
Foldable (UChar :: Type -> Type) | @since base-4.9.0.0 | ||||
Defined in GHC.Internal.Data.Foldable Methods fold :: Monoid m => UChar m -> m # foldMap :: Monoid m => (a -> m) -> UChar a -> m # foldMap' :: Monoid m => (a -> m) -> UChar a -> m # foldr :: (a -> b -> b) -> b -> UChar a -> b # foldr' :: (a -> b -> b) -> b -> UChar a -> b # foldl :: (b -> a -> b) -> b -> UChar a -> b # foldl' :: (b -> a -> b) -> b -> UChar a -> b # foldr1 :: (a -> a -> a) -> UChar a -> a # foldl1 :: (a -> a -> a) -> UChar a -> a # elem :: Eq a => a -> UChar a -> Bool # maximum :: Ord a => UChar a -> a # minimum :: Ord a => UChar a -> a # | |||||
Traversable (UChar :: Type -> Type) | @since base-4.9.0.0 | ||||
Functor (URec Char :: Type -> Type) | @since base-4.9.0.0 | ||||
Generic (URec Char p) | |||||
Defined in GHC.Internal.Generics Associated Types
| |||||
Show (URec Char p) | @since base-4.9.0.0 | ||||
Eq (URec Char p) | @since base-4.9.0.0 | ||||
Ord (URec Char p) | @since base-4.9.0.0 | ||||
Defined in GHC.Internal.Generics | |||||
newtype Vector Char | |||||
Defined in Data.Vector.Unboxed.Base | |||||
data URec Char (p :: k) | Used for marking occurrences of @since base-4.9.0.0 | ||||
newtype MVector s Char | |||||
Defined in Data.Vector.Unboxed.Base | |||||
type Compare (a :: Char) (b :: Char) | |||||
Defined in GHC.Internal.Data.Type.Ord | |||||
type Rep1 (URec Char :: k -> Type) | @since base-4.9.0.0 | ||||
Defined in GHC.Internal.Generics | |||||
type Rep (URec Char p) | @since base-4.9.0.0 | ||||
Defined in GHC.Internal.Generics |
Character classification
Unicode characters are divided into letters, Data.Char.numbers, marks, punctuation, Data.Char.symbols, separators (including spaces) and others (including control characters).
Selects control characters, which are the non-printing characters of the Latin-1 subset of Unicode.
Returns True
for any Unicode space character, and the control
characters \t
, \n
, \r
, \f
, \v
.
Selects lower-case alphabetic Unicode characters (letters).
Note: this predicate does not work for letter-like characters such as:
'ⓐ'
(U+24D0
circled Latin small letter a) and
'ⅳ'
(U+2173
small Roman numeral four). This is due to selecting only
characters with the GeneralCategory
LowercaseLetter
.
See isLowerCase
for a more intuitive predicate.
Selects upper-case or title-case alphabetic Unicode characters (letters). Title case is used by a small number of letter ligatures like the single-character form of Lj.
Note: this predicate does not work for letter-like characters such as:
'Ⓐ'
(U+24B6
circled Latin capital letter A) and
'Ⅳ'
(U+2163
Roman numeral four). This is due to selecting only
characters with the GeneralCategory
UppercaseLetter
or TitlecaseLetter
.
See isUpperCase
for a more intuitive predicate. Note that
unlike isUpperCase
, isUpper
does select title-case characters such as
'Dž'
(U+01C5
Latin capital letter d with small letter z with caron) or
'ᾯ'
(U+1FAF
Greek capital letter omega with dasia and perispomeni and
prosgegrammeni).
Selects alphabetic Unicode characters (lower-case, upper-case and
title-case letters, plus letters of caseless scripts and modifiers letters).
This function is equivalent to isLetter
.
This function returns True
if its argument has one of the
following GeneralCategory
s, or False
otherwise:
These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Letter".
isAlphaNum :: Char -> Bool #
Selects alphabetic or numeric Unicode characters.
Note that numeric digits outside the ASCII range, as well as numeric
characters which aren't digits, are selected by this function but not by
isDigit
. Such characters may be part of identifiers but are not used by
the printer and reader to represent numbers, e.g., Roman numerals like
,
full-width digits like V
'1'
(aka '65297'
).
This function returns True
if its argument has one of the
following GeneralCategory
s, or False
otherwise:
Selects printable Unicode characters (letters, numbers, marks, punctuation, symbols and spaces).
This function returns False
if its argument has one of the
following GeneralCategory
s, or True
otherwise:
isOctDigit :: Char -> Bool #
Selects ASCII octal digits, i.e. '0'
..'7'
.
isHexDigit :: Char -> Bool #
Selects ASCII hexadecimal digits,
i.e. '0'
..'9'
, 'a'
..'f'
, 'A'
..'F'
.
Selects alphabetic Unicode characters (lower-case, upper-case and
title-case letters, plus letters of caseless scripts and
modifiers letters). This function is equivalent to
isAlpha
.
This function returns True
if its argument has one of the
following GeneralCategory
s, or False
otherwise:
These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Letter".
Examples
Basic usage:
>>>
isLetter 'a'
True>>>
isLetter 'A'
True>>>
isLetter 'λ'
True>>>
isLetter '0'
False>>>
isLetter '%'
False>>>
isLetter '♥'
False>>>
isLetter '\31'
False
Ensure that isLetter
and isAlpha
are equivalent.
>>>
let chars = [(chr 0)..]
>>>
let letters = map isLetter chars
>>>
let alphas = map isAlpha chars
>>>
letters == alphas
True
Selects Unicode mark characters, for example accents and the like, which combine with preceding characters.
This function returns True
if its argument has one of the
following GeneralCategory
s, or False
otherwise:
These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Mark".
Examples
Basic usage:
>>>
isMark 'a'
False>>>
isMark '0'
False
Combining marks such as accent characters usually need to follow another character before they become printable:
>>>
map isMark "ò"
[False,True]
Puns are not necessarily supported:
>>>
isMark '✓'
False
Selects Unicode numeric characters, including digits from various scripts, Roman numerals, et cetera.
This function returns True
if its argument has one of the
following GeneralCategory
s, or False
otherwise:
These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Number".
Examples
Basic usage:
>>>
isNumber 'a'
False>>>
isNumber '%'
False>>>
isNumber '3'
True
ASCII '0'
through '9'
are all numbers:
>>>
and $ map isNumber ['0'..'9']
True
Unicode Roman numerals are "numbers" as well:
>>>
isNumber 'Ⅸ'
True
isPunctuation :: Char -> Bool #
Selects Unicode punctuation characters, including various kinds of connectors, brackets and quotes.
This function returns True
if its argument has one of the
following GeneralCategory
s, or False
otherwise:
ConnectorPunctuation
DashPunctuation
OpenPunctuation
ClosePunctuation
InitialQuote
FinalQuote
OtherPunctuation
These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Punctuation".
Examples
Basic usage:
>>>
isPunctuation 'a'
False>>>
isPunctuation '7'
False>>>
isPunctuation '♥'
False>>>
isPunctuation '"'
True>>>
isPunctuation '?'
True>>>
isPunctuation '—'
True
Selects Unicode symbol characters, including mathematical and currency symbols.
This function returns True
if its argument has one of the
following GeneralCategory
s, or False
otherwise:
These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Symbol".
Examples
Basic usage:
>>>
isSymbol 'a'
False>>>
isSymbol '6'
False>>>
isSymbol '='
True
The definition of "math symbol" may be a little counter-intuitive depending on one's background:
>>>
isSymbol '+'
True>>>
isSymbol '-'
False
isSeparator :: Char -> Bool #
Selects Unicode space and separator characters.
This function returns True
if its argument has one of the
following GeneralCategory
s, or False
otherwise:
These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Separator".
Examples
Basic usage:
>>>
isSeparator 'a'
False>>>
isSeparator '6'
False>>>
isSeparator ' '
True
Warning: newlines and tab characters are not considered separators.
>>>
isSeparator '\n'
False>>>
isSeparator '\t'
False
But some more exotic characters are (like HTML's
):
>>>
isSeparator '\160'
True
Subranges
Selects the first 128 characters of the Unicode character set, corresponding to the ASCII character set.
Selects the first 256 characters of the Unicode character set, corresponding to the ISO 8859-1 (Latin-1) character set.
isAsciiUpper :: Char -> Bool #
isAsciiLower :: Char -> Bool #
Unicode general categories
data GeneralCategory #
Unicode General Categories (column 2 of the UnicodeData table) in the order they are listed in the Unicode standard (the Unicode Character Database, in particular).
Examples
Basic usage:
>>>
:t OtherLetter
OtherLetter :: GeneralCategory
Eq
instance:
>>>
UppercaseLetter == UppercaseLetter
True>>>
UppercaseLetter == LowercaseLetter
False
Ord
instance:
>>>
NonSpacingMark <= MathSymbol
True
Enum
instance:
>>>
enumFromTo ModifierLetter SpacingCombiningMark
[ModifierLetter,OtherLetter,NonSpacingMark,SpacingCombiningMark]
Read
instance:
>>>
read "DashPunctuation" :: GeneralCategory
DashPunctuation>>>
read "17" :: GeneralCategory
*** Exception: Prelude.read: no parse
Show
instance:
>>>
show EnclosingMark
"EnclosingMark"
Bounded
instance:
>>>
minBound :: GeneralCategory
UppercaseLetter>>>
maxBound :: GeneralCategory
NotAssigned
Ix
instance:
>>>
import GHC.Internal.Data.Ix ( index )
>>>
index (OtherLetter,Control) FinalQuote
12>>>
index (OtherLetter,Control) Format
*** Exception: Error in array index
Constructors
UppercaseLetter | Lu: Letter, Uppercase |
LowercaseLetter | Ll: Letter, Lowercase |
TitlecaseLetter | Lt: Letter, Titlecase |
ModifierLetter | Lm: Letter, Modifier |
OtherLetter | Lo: Letter, Other |
NonSpacingMark | Mn: Mark, Non-Spacing |
SpacingCombiningMark | Mc: Mark, Spacing Combining |
EnclosingMark | Me: Mark, Enclosing |
DecimalNumber | Nd: Number, Decimal |
LetterNumber | Nl: Number, Letter |
OtherNumber | No: Number, Other |
ConnectorPunctuation | Pc: Punctuation, Connector |
DashPunctuation | Pd: Punctuation, Dash |
OpenPunctuation | Ps: Punctuation, Open |
ClosePunctuation | Pe: Punctuation, Close |
InitialQuote | Pi: Punctuation, Initial quote |
FinalQuote | Pf: Punctuation, Final quote |
OtherPunctuation | Po: Punctuation, Other |
MathSymbol | Sm: Symbol, Math |
CurrencySymbol | Sc: Symbol, Currency |
ModifierSymbol | Sk: Symbol, Modifier |
OtherSymbol | So: Symbol, Other |
Space | Zs: Separator, Space |
LineSeparator | Zl: Separator, Line |
ParagraphSeparator | Zp: Separator, Paragraph |
Control | Cc: Other, Control |
Format | Cf: Other, Format |
Surrogate | Cs: Other, Surrogate |
PrivateUse | Co: Other, Private Use |
NotAssigned | Cn: Other, Not Assigned |
Instances
generalCategory :: Char -> GeneralCategory #
The Unicode general category of the character. This relies on the
Enum
instance of GeneralCategory
, which must remain in the
same order as the categories are presented in the Unicode
standard.
Examples
Basic usage:
>>>
generalCategory 'a'
LowercaseLetter>>>
generalCategory 'A'
UppercaseLetter>>>
generalCategory '0'
DecimalNumber>>>
generalCategory '%'
OtherPunctuation>>>
generalCategory '♥'
OtherSymbol>>>
generalCategory '\31'
Control>>>
generalCategory ' '
Space
Case conversion
Convert a letter to the corresponding upper-case letter, if any. Any other character is returned unchanged.
Convert a letter to the corresponding lower-case letter, if any. Any other character is returned unchanged.
Convert a letter to the corresponding title-case or upper-case letter, if any. (Title case differs from upper case only for a small number of ligature letters.) Any other character is returned unchanged.
Numeric representations
String representations
showLitChar :: Char -> ShowS #
Convert a character to a string using only printable characters, using Haskell source-language escape conventions. For example:
showLitChar '\n' s = "\\n" ++ s
lexLitChar :: ReadS String #
Read a string representation of a character, using Haskell source-language escape conventions. For example:
lexLitChar "\\nHello" = [("\\n", "Hello")]
readLitChar :: ReadS Char #
Read a string representation of a character, using Haskell source-language escape conventions, and convert it to the character that it encodes. For example:
readLitChar "\\nHello" = [('\n', "Hello")]