Copyright | Copyright (C) 2006-2023 John MacFarlane |
---|---|
License | GNU GPL, version 2 or above |
Maintainer | John MacFarlane <jgm@berkeley.edu> |
Stability | alpha |
Portability | portable |
Safe Haskell | Safe-Inferred |
Language | Haskell2010 |
Text.Pandoc.Shared
Description
Utility functions and definitions used by the various Pandoc modules.
Synopsis
- splitBy :: (a -> Bool) -> [a] -> [[a]]
- splitTextBy :: (Char -> Bool) -> Text -> [Text]
- splitTextByIndices :: [Int] -> Text -> [Text]
- inquotes :: Text -> Text
- tshow :: Show a => a -> Text
- stripTrailingNewlines :: Text -> Text
- trim :: Text -> Text
- triml :: Text -> Text
- trimr :: Text -> Text
- trimMath :: Text -> Text
- stripFirstAndLast :: Text -> Text
- camelCaseToHyphenated :: Text -> Text
- camelCaseStrToHyphenated :: String -> String
- toRomanNumeral :: Int -> Text
- tabFilter :: Int -> Text -> Text
- normalizeDate :: Text -> Maybe Text
- addPandocAttributes :: forall b. HasAttributes (Cm () b) => [(Text, Text)] -> b -> b
- orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [Text]
- extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines
- removeFormatting :: Walkable Inline a => a -> [Inline]
- deNote :: Inline -> Inline
- stringify :: Walkable Inline a => a -> Text
- capitalize :: Walkable Inline a => a -> a
- compactify :: [Blocks] -> [Blocks]
- compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
- linesToPara :: [[Inline]] -> Block
- figureDiv :: Attr -> Caption -> [Block] -> Block
- makeSections :: Bool -> Maybe Int -> [Block] -> [Block]
- uniqueIdent :: Extensions -> [Inline] -> Set Text -> Text
- inlineListToIdentifier :: Extensions -> [Inline] -> Text
- textToIdentifier :: Extensions -> Text -> Text
- isHeaderBlock :: Block -> Bool
- headerShift :: Int -> Pandoc -> Pandoc
- stripEmptyParagraphs :: Pandoc -> Pandoc
- onlySimpleTableCells :: [[[Block]]] -> Bool
- isTightList :: [[Block]] -> Bool
- taskListItemFromAscii :: Extensions -> [Block] -> [Block]
- taskListItemToAscii :: Extensions -> [Block] -> [Block]
- handleTaskListItem :: ([Inline] -> [Inline]) -> Extensions -> [Block] -> [Block]
- addMetaField :: ToMetaValue a => Text -> a -> Meta -> Meta
- eastAsianLineBreakFilter :: Pandoc -> Pandoc
- htmlSpanLikeElements :: Set Text
- filterIpynbOutput :: Maybe Format -> Pandoc -> Pandoc
- formatCode :: Attr -> Inlines -> Inlines
- renderTags' :: [Tag Text] -> Text
- inDirectory :: FilePath -> IO a -> IO a
- makeCanonical :: FilePath -> FilePath
- collapseFilePath :: FilePath -> FilePath
- filteredFilesFromArchive :: Archive -> (FilePath -> Bool) -> [(FilePath, ByteString)]
- blocksToInlines :: [Block] -> [Inline]
- blocksToInlines' :: [Block] -> Inlines
- blocksToInlinesWithSep :: Inlines -> [Block] -> Inlines
- defaultBlocksSeparator :: Inlines
- safeRead :: (MonadPlus m, Read a) => Text -> m a
- safeStrRead :: (MonadPlus m, Read a) => String -> m a
List processing
splitTextBy :: (Char -> Bool) -> Text -> [Text] #
Split text by groups of one or more separator.
splitTextByIndices :: [Int] -> Text -> [Text] #
Split text at the given widths. Note that the break points are not indices but text widths, which will be different for East Asian characters, emojis, etc.
Text processing
stripTrailingNewlines :: Text -> Text #
Strip trailing newlines from string.
stripFirstAndLast :: Text -> Text #
Strip leading and trailing characters from string
camelCaseToHyphenated :: Text -> Text #
Change CamelCase word to hyphenated lowercase (e.g., camel-case).
toRomanNumeral :: Int -> Text #
Convert number < 4000 to uppercase roman numeral.
Arguments
:: Int | Tab stop |
-> Text | Input |
-> Text |
Convert tabs to spaces. Tabs will be preserved if tab stop is set to 0.
Date/time
normalizeDate :: Text -> Maybe Text #
Parse a date and convert (if possible) to "YYYY-MM-DD" format. We limit years to the range 1601-9999 (ISO 8601 accepts greater than or equal to 1583, but MS Word only accepts dates starting 1601).
Pandoc block and inline list processing
addPandocAttributes :: forall b. HasAttributes (Cm () b) => [(Text, Text)] -> b -> b #
Add key-value attributes to a pandoc element. If the element
does not have a slot for attributes, create an enclosing Span
(for Inlines) or Div (for Blocks). Note that both 'Cm () Inlines'
and 'Cm () Blocks' are instances of HasAttributes
.
orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [Text] #
Generate infinite lazy list of markers for an ordered list, depending on list attributes.
extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines #
Extract the leading and trailing spaces from inside an inline element and place them outside the element. SoftBreaks count as Spaces for these purposes.
removeFormatting :: Walkable Inline a => a -> [Inline] #
Extract inlines, removing formatting.
stringify :: Walkable Inline a => a -> Text #
Convert pandoc structure to a string with formatting removed. Footnotes are skipped (since we don't want their contents in link labels).
capitalize :: Walkable Inline a => a -> a #
Bring all regular text in a pandoc structure to uppercase.
This function correctly handles cases where a lowercase character doesn't match to a single uppercase character – e.g. “Straße” would be converted to “STRASSE”, not “STRAßE”.
Change final list item from Para
to Plain
if the list contains
no other Para
blocks. Otherwise (if the list items contain Para
blocks besides possibly at the end), turn any Plain
s into Para
s (#5285).
compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])] #
Like compactify
, but acts on items of definition lists.
linesToPara :: [[Inline]] -> Block #
Convert a list of lines into a paragraph with hard line breaks. This is useful e.g. for rudimentary support of LineBlock elements in writers.
figureDiv :: Attr -> Caption -> [Block] -> Block #
Creates a Div block from figure components. The intended use is in writers of formats that do not have markup support for figures.
The resulting div is given the class figure
and contains the figure
body and the figure caption. The latter is wrapped in a Div
of
class caption
, with the stringified short-caption
as attribute.
makeSections :: Bool -> Maybe Int -> [Block] -> [Block] #
Put a list of Pandoc blocks into a hierarchical structure:
a list of sections (each a Div with class "section" and first
element a Header). If the numbering
parameter is True, Header
numbers are added via the number attribute on the header.
If the baseLevel parameter is Just n, Header levels are
adjusted to be gapless starting at level n.
uniqueIdent :: Extensions -> [Inline] -> Set Text -> Text #
Generate a unique identifier from a list of inlines. Second argument is a list of already used identifiers.
inlineListToIdentifier :: Extensions -> [Inline] -> Text #
Convert Pandoc inline list to plain text identifier.
textToIdentifier :: Extensions -> Text -> Text #
Convert string to plain text identifier.
isHeaderBlock :: Block -> Bool #
True if block is a Header block.
headerShift :: Int -> Pandoc -> Pandoc #
Shift header levels up or down.
stripEmptyParagraphs :: Pandoc -> Pandoc #
Remove empty paragraphs.
onlySimpleTableCells :: [[[Block]]] -> Bool #
Detect if table rows contain only cells consisting of a single
paragraph that has no LineBreak
.
isTightList :: [[Block]] -> Bool #
Detect if a list is tight.
taskListItemFromAscii :: Extensions -> [Block] -> [Block] #
Convert a list item containing tasklist syntax (e.g. [x]
)
to using U+2610 BALLOT BOX
or U+2612 BALLOT BOX WITH X
.
taskListItemToAscii :: Extensions -> [Block] -> [Block] #
Convert a list item containing text starting with U+2610 BALLOT BOX
or U+2612 BALLOT BOX WITH X
to tasklist syntax (e.g. [x]
).
handleTaskListItem :: ([Inline] -> [Inline]) -> Extensions -> [Block] -> [Block] #
addMetaField :: ToMetaValue a => Text -> a -> Meta -> Meta #
Set a field of a Meta
object. If the field already has a value,
convert it into a list with the new value appended to the old value(s).
eastAsianLineBreakFilter :: Pandoc -> Pandoc #
Remove soft breaks between East Asian characters.
htmlSpanLikeElements :: Set Text #
Set of HTML elements that are represented as Span with a class equal as the element tag itself.
filterIpynbOutput :: Maybe Format -> Pandoc -> Pandoc #
Process ipynb output cells. If mode is Nothing, remove all output. If mode is Just format, select best output for the format. If format is not ipynb, strip out ANSI escape sequences from CodeBlocks (see #5633).
formatCode :: Attr -> Inlines -> Inlines #
Reformat Inlines
as code, putting the stringlike parts in Code
elements while bringing other inline formatting outside.
The idea is that e.g. `[Str "a",Space,Strong [Str "b"]]` should turn
into `[Code ("",[],[]) "a ", Strong [Code ("",[],[]) "b"]]`.
This helps work around the limitation that pandoc's Code element can
only contain string content (see issue #7525).
TagSoup HTML handling
renderTags' :: [Tag Text] -> Text #
Render HTML tags.
File handling
inDirectory :: FilePath -> IO a -> IO a #
Perform an IO action in a directory, returning to starting directory.
makeCanonical :: FilePath -> FilePath #
Canonicalizes a file path by removing redundant .
and ..
.
collapseFilePath :: FilePath -> FilePath #
Remove intermediate "." and ".." directories from a path.
collapseFilePath "./foo" == "foo" collapseFilePath "/bar/../baz" == "/baz" collapseFilePath "/../baz" == "/../baz" collapseFilePath "parent/foo/baz/../bar" == "parent/foo/bar" collapseFilePath "parent/foo/baz/../../bar" == "parent/bar" collapseFilePath "parent/foo/.." == "parent" collapseFilePath "/parent/foo/../../bar" == "/bar"
filteredFilesFromArchive :: Archive -> (FilePath -> Bool) -> [(FilePath, ByteString)] #
for squashing blocks
blocksToInlines :: [Block] -> [Inline] #
blocksToInlines' :: [Block] -> Inlines #
blocksToInlinesWithSep :: Inlines -> [Block] -> Inlines #
defaultBlocksSeparator :: Inlines #
Inline elements used to separate blocks when squashing blocks into inlines.
Safe read
safeStrRead :: (MonadPlus m, Read a) => String -> m a #