regex-tdfa-1.1.1: Replaces/Enhances Text.RegexContentsIndex
Text.Regex.TDFA.Common
Description
Common provides simple functions to the backend. It defines most of the data types. All modules should call error via the common_error function below.
Synopsis
look :: Int -> IntMap a -> a
common_error :: String -> String -> a
on :: (t1 -> t1 -> t2) -> (t -> t1) -> t -> t -> t2
norep :: Eq a => [a] -> [a]
norepBy :: (a -> a -> Bool) -> [a] -> [a]
mapFst :: Functor f => (t -> t2) -> f (t, t1) -> f (t2, t1)
mapSnd :: Functor f => (t1 -> t2) -> f (t, t1) -> f (t, t2)
fst3 :: (a, b, c) -> a
snd3 :: (a, b, c) -> b
thd3 :: (a, b, c) -> c
flipOrder :: Ordering -> Ordering
noWin :: WinTags -> Bool
newtype DoPa = DoPa {
dopaIndex :: Int
}
data CompOption = CompOption {
caseSensitive :: Bool
multiline :: Bool
rightAssoc :: Bool
newSyntax :: Bool
lastStarGreedy :: Bool
}
data ExecOption = ExecOption {
captureGroups :: Bool
}
type Tag = Int
data OP
= Maximize
| Minimize
| Orbit
| Ignore
type Index = Int
type SetIndex = IntSet
type Position = Int
type GroupIndex = Int
data GroupInfo = GroupInfo {
thisIndex :: GroupIndex
parentIndex :: GroupIndex
startTag :: Tag
stopTag :: Tag
flagTag :: Tag
}
data Regex = Regex {
regex_dfa :: DFA
regex_init :: Index
regex_b_index :: (Index, Index)
regex_b_tags :: (Tag, Tag)
regex_trie :: TrieSet DFA
regex_tags :: Array Tag OP
regex_groups :: Array GroupIndex [GroupInfo]
regex_isFrontAnchored :: Bool
regex_compOptions :: CompOption
regex_execOptions :: ExecOption
}
data WinEmpty
= WinEmpty Instructions
| WinTest WhichTest (Maybe WinEmpty) (Maybe WinEmpty)
data QNFA = QNFA {
q_id :: Index
q_qt :: QT
}
data QT
= Simple {
qt_win :: WinTags
qt_trans :: CharMap QTrans
qt_other :: QTrans
}
| Testing {
qt_test :: WhichTest
qt_dopas :: EnumSet DoPa
qt_a :: QT
qt_b :: QT
}
type QTrans = IntMap [TagCommand]
data WhichTest
= Test_BOL
| Test_EOL
| Test_BOB
| Test_EOB
| Test_BOW
| Test_EOW
| Test_EdgeWord
| Test_NotEdgeWord
data TagTask
= TagTask
| ResetGroupStopTask
| SetGroupStopTask
| ResetOrbitTask
| EnterOrbitTask
| LeaveOrbitTask
type TagTasks = [(Tag, TagTask)]
data TagUpdate
= PreUpdate TagTask
| PostUpdate TagTask
type TagList = [(Tag, TagUpdate)]
type TagCommand = (DoPa, TagList)
type WinTags = TagList
data DFA = DFA {
d_id :: SetIndex
d_dt :: DT
}
data Transition = Transition {
trans_many :: DFA
trans_single :: DFA
trans_how :: DTrans
}
data DT
= Simple' {
dt_win :: IntMap Instructions
dt_trans :: CharMap Transition
dt_other :: Transition
}
| Testing' {
dt_test :: WhichTest
dt_dopas :: EnumSet DoPa
dt_a :: DT
dt_b :: DT
}
type DTrans = IntMap (IntMap (DoPa, Instructions))
type DTrans' = [(Index, [(Index, (DoPa, ([(Tag, (Position, Bool))], [String])))])]
data Orbits = Orbits {
inOrbit :: !Bool
basePos :: Position
ordinal :: Maybe Int
getOrbits :: !(Seq Position)
}
data Instructions = Instructions {
newPos :: ![(Tag, Action)]
newOrbits :: !(Maybe (Position -> OrbitTransformer))
}
data Action
= SetPre
| SetPost
| SetVal Int
type OrbitTransformer = OrbitLog -> OrbitLog
type OrbitLog = IntMap Orbits
showQT :: QT -> String
indent :: [String] -> String
showDT :: DT -> String
seeDTrans :: DTrans -> String
Documentation
look :: Int -> IntMap a -> a
common_error :: String -> String -> a
on :: (t1 -> t1 -> t2) -> (t -> t1) -> t -> t -> t2
norep :: Eq a => [a] -> [a]
after sort or sortBy the use of nubnubBy can be replaced by norepnorepBy
norepBy :: (a -> a -> Bool) -> [a] -> [a]
after sort or sortBy the use of nubnubBy can be replaced by norepnorepBy
mapFst :: Functor f => (t -> t2) -> f (t, t1) -> f (t2, t1)
mapSnd :: Functor f => (t1 -> t2) -> f (t, t1) -> f (t, t2)
fst3 :: (a, b, c) -> a
snd3 :: (a, b, c) -> b
thd3 :: (a, b, c) -> c
flipOrder :: Ordering -> Ordering
noWin :: WinTags -> Bool
newtype DoPa
Used to track elements of the pattern that accept characters or are anchors
Constructors
DoPa
dopaIndex :: Int
show/hide Instances
data CompOption
Control whether the pattern is multiline or case-sensitive like Text.Regex and whether to capture the subgroups (1, 2, etc). Controls enabling extra anchor syntax.
Constructors
CompOption
caseSensitive :: BoolTrue in blankCompOpt and defaultCompOpt
multiline :: BoolFalse in blankCompOpt, True in defaultCompOpt. Compile for newline-sensitive matching. By default, newline is a completely ordinary character with no special meaning in either REs or strings. With this flag, inverted bracket expressions and . never match newline, a ^ anchor matches the null string after any newline in the string in addition to its normal function, and the $ anchor matches the null string before any newline in the string in addition to its normal function.
rightAssoc :: BoolTrue (and therefore Right associative) in blankCompOpt and defaultCompOpt
newSyntax :: BoolFalse in blankCompOpt, True in defaultCompOpt. Add the extended non-POSIX syntax described in Text.Regex.TDFA haddock documentation.
lastStarGreedy :: BoolFalse by default. This is POSIX correct but it takes space and is slower. Setting this to true will improve performance, and should be done if you plan to set the captureGroups execoption to False.
show/hide Instances
data ExecOption
Constructors
ExecOption
captureGroups :: BoolTrue by default. Set to False to improve speed (and space).
show/hide Instances
type Tag = Int
Used by implementation to name certain Postions during matching. Identity of Position tag to set during a transition
data OP
Internal use to indicate type of tag and preference for larger or smaller Positions
Constructors
Maximize
Minimize
Orbit
Ignore
show/hide Instances
type Index = Int
Internal NFA node identity number
type SetIndex = IntSet
Internal DFA identity is this Set of NFA Index
type Position = Int
Index into the text being searched
type GroupIndex = Int
GroupIndex is for indexing submatches from capturing parenthesized groups (PGroup/Group)
data GroupInfo
GroupInfo collects the parent and tag information for an instance of a group
Constructors
GroupInfo
thisIndex :: GroupIndex
parentIndex :: GroupIndex
startTag :: Tag
stopTag :: Tag
flagTag :: Tag
show/hide Instances
data Regex
The TDFA backend specific Regex type, used by this module's RegexOptions and RegexMaker
Constructors
Regex
regex_dfa :: DFAstarting DFA state
regex_init :: Indexindex of starting state
regex_b_index :: (Index, Index)indexes of smallest and largest states
regex_b_tags :: (Tag, Tag)indexes of smallest and largest tags
regex_trie :: TrieSet DFAAll DFA states
regex_tags :: Array Tag OPinformation about each tag
regex_groups :: Array GroupIndex [GroupInfo]information about each group
regex_isFrontAnchored :: Boolused for optimizing execution
regex_compOptions :: CompOption
regex_execOptions :: ExecOption
show/hide Instances
data WinEmpty
Constructors
WinEmpty Instructions
WinTest WhichTest (Maybe WinEmpty) (Maybe WinEmpty)
show/hide Instances
data QNFA
Internal NFA node type
Constructors
QNFA
q_id :: Index
q_qt :: QT
show/hide Instances
data QT
Internal to QNFA type.
Constructors
Simple
qt_win :: WinTagsempty transitions to the virtual winning state
qt_trans :: CharMap QTransall ways to leave this QNFA to other or the same QNFA
qt_other :: QTransdefault ways to leave this QNFA to other or the same QNFA
Testing
qt_test :: WhichTestThe test to perform
qt_dopas :: EnumSet DoPalocation(s) of the anchor(s) in the original regexp
qt_a :: QTuse qt_a if test is True, else use qt_b
qt_b :: QTuse qt_a if test is True, else use qt_b
show/hide Instances
type QTrans = IntMap [TagCommand]
Internal type to represent the tagged transition from one QNFA to another (or itself). The key is the Index of the destination QNFA.
data WhichTest
Known predicates, just Beginning of Line (^) and End of Line ($). Also support for GNU extensions is being added: ` beginning of buffer, ' end of buffer, < and > for begin and end of words, b and B for word boundary and not word boundary.
Constructors
Test_BOL
Test_EOL
Test_BOB
Test_EOB
Test_BOW
Test_EOW
Test_EdgeWord
Test_NotEdgeWord
show/hide Instances
data TagTask
The things that can be done with a Tag. TagTask and ResetGroupStopTask are for tags with Maximize or Minimize OP values. ResetOrbitTask and EnterOrbitTask and LeaveOrbitTask are for tags with Orbit OP value.
Constructors
TagTask
ResetGroupStopTask
SetGroupStopTask
ResetOrbitTask
EnterOrbitTask
LeaveOrbitTask
show/hide Instances
type TagTasks = [(Tag, TagTask)]
Ordered list of tags and their associated Task
data TagUpdate
When attached to a QTrans the TagTask can be done before or after accepting the character.
Constructors
PreUpdate TagTask
PostUpdate TagTask
show/hide Instances
type TagList = [(Tag, TagUpdate)]
Ordered list of tags and their associated update operation.
type TagCommand = (DoPa, TagList)
A TagList and the location of the item in the original pattern that is being accepted.
type WinTags = TagList
Ordered list of tags and their associated update operation to perform on an empty transition to the virtual winning state.
data DFA
Internal DFA node, identified by the Set of indices of the QNFA nodes it represents.
Constructors
DFA
d_id :: SetIndex
d_dt :: DT
show/hide Instances
data Transition
Constructors
Transition
trans_many :: DFAwhere to go (maximal), including respawning
trans_single :: DFAwhere to go, not including respawning
trans_how :: DTranshow to go, including respawning
data DT
Internal to the DFA node
Constructors
Simple'
dt_win :: IntMap InstructionsActions to perform to win
dt_trans :: CharMap TransitionTransition to accept Char
dt_other :: Transitiondefault accepting transition
Testing'
dt_test :: WhichTestThe test to perform
dt_dopas :: EnumSet DoPalocation(s) of the anchor(s) in the original regexp
dt_a :: DTuse dt_a if test is True else use dt_b
dt_b :: DTuse dt_a if test is True else use dt_b
show/hide Instances
type DTrans = IntMap (IntMap (DoPa, Instructions))

Internal type to repesent the commands for the tagged transition. The outer IntMap is for the destination Index and the inner IntMap is for the Source Index. This is convenient since all runtime data going to the same destination must be compared to find the best.

A Destination IntMap entry may have an empty Source IntMap if and only if the destination is the starting index and the NFA/DFA. This instructs the matching engine to spawn a new entry starting at the post-update position.

type DTrans' = [(Index, [(Index, (DoPa, ([(Tag, (Position, Bool))], [String])))])]
Internal convenience type for the text display code
data Orbits

Positions for which a * was re-started while looping. Need to append locations at back but compare starting with front, so use Seq as a Queue. The initial position is saved in basePos (and a Maximize Tag), the middle positions in the Seq, and the final position is NOT saved in the Orbits (only in a Maximize Tag).

The orderinal code is being written XXX TODO document it.

Constructors
Orbits
inOrbit :: !Bool
basePos :: Position
ordinal :: Maybe Int
getOrbits :: !(Seq Position)
show/hide Instances
data Instructions
The newPos and newFlags lists in Instructions are sorted by, and unique in, the Tag values
Constructors
Instructions
newPos :: ![(Tag, Action)]
newOrbits :: !(Maybe (Position -> OrbitTransformer))
show/hide Instances
data Action
Constructors
SetPre
SetPost
SetVal Int
show/hide Instances
type OrbitTransformer = OrbitLog -> OrbitLog
type OrbitLog = IntMap Orbits
showQT :: QT -> String
indent :: [String] -> String
showDT :: DT -> String
seeDTrans :: DTrans -> String
Produced by Haddock version 2.3.0