#include <regex.h>
Inheritance diagram for RegexMatcher:
Public Member Functions | |
RegexMatcher (const UnicodeString ®exp, uint32_t flags, UErrorCode &status) | |
Construct a RegexMatcher for a regular expression. | |
RegexMatcher (const UnicodeString ®exp, const UnicodeString &input, uint32_t flags, UErrorCode &status) | |
Construct a RegexMatcher for a regular expression. | |
virtual | ~RegexMatcher () |
Destructor. | |
virtual UBool | matches (UErrorCode &status) |
Attempts to match the entire input region against the pattern. | |
virtual UBool | matches (int32_t startIndex, UErrorCode &status) |
Resets the matcher, then attempts to match the input beginning at the specified startIndex, and extending to the end of the input. | |
virtual UBool | lookingAt (UErrorCode &status) |
Attempts to match the input string, starting from the beginning of the region, against the pattern. | |
virtual UBool | lookingAt (int32_t startIndex, UErrorCode &status) |
Attempts to match the input string, starting from the specified index, against the pattern. | |
virtual UBool | find () |
Find the next pattern match in the input string. | |
virtual UBool | find (int32_t start, UErrorCode &status) |
Resets this RegexMatcher and then attempts to find the next substring of the input string that matches the pattern, starting at the specified index. | |
virtual UnicodeString | group (UErrorCode &status) const |
Returns a string containing the text matched by the previous match. | |
virtual UnicodeString | group (int32_t groupNum, UErrorCode &status) const |
Returns a string containing the text captured by the given group during the previous match operation. | |
virtual int32_t | groupCount () const |
Returns the number of capturing groups in this matcher's pattern. | |
virtual int32_t | start (UErrorCode &status) const |
Returns the index in the input string of the start of the text matched during the previous match operation. | |
virtual int32_t | start (int32_t group, UErrorCode &status) const |
Returns the index in the input string of the start of the text matched by the specified capture group during the previous match operation. | |
virtual int32_t | end (UErrorCode &status) const |
Returns the index in the input string of the first character following the text matched during the previous match operation. | |
virtual int32_t | end (int32_t group, UErrorCode &status) const |
Returns the index in the input string of the character following the text matched by the specified capture group during the previous match operation. | |
virtual RegexMatcher & | reset () |
Resets this matcher. | |
virtual RegexMatcher & | reset (int32_t index, UErrorCode &status) |
Resets this matcher, and set the current input position. | |
virtual RegexMatcher & | reset (const UnicodeString &input) |
Resets this matcher with a new input string. | |
virtual const UnicodeString & | input () const |
Returns the input string being matched. | |
virtual RegexMatcher & | region (int32_t start, int32_t limit, UErrorCode &status) |
Sets the limits of this matcher's region. | |
virtual int32_t | regionStart () const |
Reports the start index of this matcher's region. | |
virtual int32_t | regionEnd () const |
Reports the end (limit) index (exclusive) of this matcher's region. | |
virtual UBool | hasTransparentBounds () const |
Queries the transparency of region bounds for this matcher. | |
virtual RegexMatcher & | useTransparentBounds (UBool b) |
Sets the transparency of region bounds for this matcher. | |
virtual UBool | hasAnchoringBounds () const |
Return true if this matcher is using anchoring bounds. | |
virtual RegexMatcher & | useAnchoringBounds (UBool b) |
Set whether this matcher is using Anchoring Bounds for its region. | |
virtual UBool | hitEnd () const |
Return TRUE if the most recent matching operation touched the end of the text being processed. | |
virtual UBool | requireEnd () const |
Return TRUE the most recent match succeeded and additional input could cause it to fail. | |
virtual const RegexPattern & | pattern () const |
Returns the pattern that is interpreted by this matcher. | |
virtual UnicodeString | replaceAll (const UnicodeString &replacement, UErrorCode &status) |
Replaces every substring of the input that matches the pattern with the given replacement string. | |
virtual UnicodeString | replaceFirst (const UnicodeString &replacement, UErrorCode &status) |
Replaces the first substring of the input that matches the pattern with the replacement string. | |
virtual RegexMatcher & | appendReplacement (UnicodeString &dest, const UnicodeString &replacement, UErrorCode &status) |
Implements a replace operation intended to be used as part of an incremental find-and-replace. | |
virtual UnicodeString & | appendTail (UnicodeString &dest) |
As the final step in a find-and-replace operation, append the remainder of the input string, starting at the position following the last appendReplacement(), to the destination string. | |
virtual int32_t | split (const UnicodeString &input, UnicodeString dest[], int32_t destCapacity, UErrorCode &status) |
Split a string into fields. | |
virtual void | setTimeLimit (int32_t limit, UErrorCode &status) |
Set a processing time limit for match operations with this Matcher. | |
virtual int32_t | getTimeLimit () const |
Get the time limit, if any, for match operations made with this Matcher. | |
virtual void | setStackLimit (int32_t limit, UErrorCode &status) |
Set the amount of heap storage avaliable for use by the match backtracking stack. | |
virtual int32_t | getStackLimit () const |
Get the size of the heap storage available for use by the back tracking stack. | |
virtual void | setMatchCallback (URegexMatchCallback *callback, const void *context, UErrorCode &status) |
Set a callback function for use with this Matcher. | |
virtual void | getMatchCallback (URegexMatchCallback *&callback, const void *&context, UErrorCode &status) |
Get the callback function for this URegularExpression. | |
void | setTrace (UBool state) |
setTrace Debug function, enable/disable tracing of the matching engine. | |
virtual UClassID | getDynamicClassID () const |
ICU "poor man's RTTI", returns a UClassID for the actual class. | |
void | resetPreserveRegion () |
Static Public Member Functions | |
UClassID | getStaticClassID () |
ICU "poor man's RTTI", returns a UClassID for this class. | |
Friends | |
class | RegexPattern |
class | RegexCImpl |
It includes methods for testing for matches, and for find and replace operations.
Class RegexMatcher is not intended to be subclassed.
Definition at line 451 of file regex.h.
|
Construct a RegexMatcher for a regular expression. This is a convenience method that avoids the need to explicitly create a RegexPattern object. Note that if several RegexMatchers need to be created for the same expression, it will be more efficient to separately create and cache a RegexPattern object, and use its matcher() method to create the RegexMatcher objects.
|
|
Construct a RegexMatcher for a regular expression. This is a convenience method that avoids the need to explicitly create a RegexPattern object. Note that if several RegexMatchers need to be created for the same expression, it will be more efficient to separately create and cache a RegexPattern object, and use its matcher() method to create the RegexMatcher objects. The matcher will retain a reference to the supplied input string, and all regexp pattern matching operations happen directly on the original string. It is critical that the string not be altered or deleted before use by the regular expression operations is complete.
|
|
Destructor.
|
|
Implements a replace operation intended to be used as part of an incremental find-and-replace. The input string, starting from the end of the previous replacement and ending at the start of the current match, is appended to the destination string. Then the replacement string is appended to the output string, including handling any substitutions of captured text. For simple, prepackaged, non-incremental find-and-replace operations, see replaceFirst() or replaceAll().
|
|
As the final step in a find-and-replace operation, append the remainder of the input string, starting at the position following the last appendReplacement(), to the destination string.
|
|
Returns the index in the input string of the character following the text matched by the specified capture group during the previous match operation.
|
|
Returns the index in the input string of the first character following the text matched during the previous match operation.
|
|
Resets this RegexMatcher and then attempts to find the next substring of the input string that matches the pattern, starting at the specified index.
|
|
Find the next pattern match in the input string.
The find begins searching the input at the location following the end of the previous match, or at the start of the string if there is no previous match. If a match is found, Note that if the input string is changed by the application, use find(startPos, status) instead of find(), because the saved starting position may not be valid with the altered input string.
|
|
ICU "poor man's RTTI", returns a UClassID for the actual class.
Implements UObject. |
|
Get the callback function for this URegularExpression.
|
|
Get the size of the heap storage available for use by the back tracking stack.
|
|
ICU "poor man's RTTI", returns a UClassID for this class.
|
|
Get the time limit, if any, for match operations made with this Matcher.
|
|
Returns a string containing the text captured by the given group during the previous match operation. Group(0) is the entire match.
|
|
Returns a string containing the text matched by the previous match. If the pattern can match an empty string, an empty string may be returned.
|
|
Returns the number of capturing groups in this matcher's pattern.
|
|
Return true if this matcher is using anchoring bounds. By default, matchers use anchoring region boounds.
|
|
Queries the transparency of region bounds for this matcher. See useTransparentBounds for a description of transparent and opaque bounds. By default, a matcher uses opaque region boundaries.
|
|
Return TRUE if the most recent matching operation touched the end of the text being processed. In this case, additional input text could change the results of that match. hitEnd() is defined for both successful and unsuccessful matches. In either case hitEnd() will return TRUE if if the end of the text was reached at any point during the matching process.
|
|
Returns the input string being matched. The returned string is not a copy, but the live input string. It should not be altered or deleted.
|
|
Attempts to match the input string, starting from the specified index, against the pattern. The match may be of any length, and is not required to extend to the end of the input string. Contrast with match().
If the match succeeds then more information can be obtained via the
|
|
Attempts to match the input string, starting from the beginning of the region, against the pattern. Like the matches() method, this function always starts at the beginning of the input region; unlike that function, it does not require that the entire region be matched.
If the match succeeds then more information can be obtained via the
|
|
Resets the matcher, then attempts to match the input beginning at the specified startIndex, and extending to the end of the input. The input region is reset to include the entire input string. A successful match must extend to the end of the input.
|
|
Attempts to match the entire input region against the pattern.
|
|
Returns the pattern that is interpreted by this matcher.
|
|
Sets the limits of this matcher's region. The region is the part of the input string that will be searched to find a match. Invoking this method resets the matcher, and then sets the region to start at the index specified by the start parameter and end at the index specified by the end parameter. Depending on the transparency and anchoring being used (see useTransparentBounds and useAnchoringBounds), certain constructs such as anchors may behave differently at or around the boundaries of the region The function will fail if start is greater than limit, or if either index is less than zero or greater than the length of the string being matched.
|
|
Reports the end (limit) index (exclusive) of this matcher's region. The searches this matcher conducts are limited to finding matches within regionStart (inclusive) and regionEnd (exclusive).
|
|
Reports the start index of this matcher's region. The searches this matcher conducts are limited to finding matches within regionStart (inclusive) and regionEnd (exclusive).
|
|
Replaces every substring of the input that matches the pattern with the given replacement string. This is a convenience function that provides a complete find-and-replace-all operation. This method first resets this matcher. It then scans the input string looking for matches of the pattern. Input that is not part of any match is left unchanged; each match is replaced in the result by the replacement string. The replacement string may contain references to capture groups.
|
|
Replaces the first substring of the input that matches the pattern with the replacement string. This is a convenience function that provides a complete find-and-replace operation. This function first resets this RegexMatcher. It then scans the input string looking for a match of the pattern. Input that is not part of the match is appended directly to the result string; the match is replaced in the result by the replacement string. The replacement string may contain references to captured groups. The state of the matcher (the position at which a subsequent find() would begin) after completing a replaceFirst() is not specified. The RegexMatcher should be reset before doing additional find() operations.
|
|
Return TRUE the most recent match succeeded and additional input could cause it to fail. If this method returns false and a match was found, then more input might change the match but the match won't be lost. If a match was not found, then requireEnd has no meaning.
|
|
Resets this matcher with a new input string. This allows instances of RegexMatcher to be reused, which is more efficient than creating a new RegexMatcher for each input string to be processed.
|
|
Resets this matcher, and set the current input position. The effect is to remove any memory of previous matches, and to cause subsequent find() operations to begin at the specified position in the input string. The matcher's region is reset to its default, which is the entire input string. An alternative to this function is to set a match region beginning at the desired index.
|
|
Resets this matcher. The effect is to remove any memory of previous matches, and to cause subsequent find() operations to begin at the beginning of the input string.
|
|
|
|
Set a callback function for use with this Matcher. During matching operations the function will be called periodically, giving the application the opportunity to terminate a long-running match.
|
|
Set the amount of heap storage avaliable for use by the match backtracking stack. The matcher is also reset, discarding any results from previous matches. ICU uses a backtracking regular expression engine, with the backtrack stack maintained on the heap. This function sets the limit to the amount of memory that can be used for this purpose. A backtracking stack overflow will result in an error from the match operation that caused it. A limit is desirable because a malicious or poorly designed pattern can use excessive memory, potentially crashing the process. A limit is enabled by default.
|
|
Set a processing time limit for match operations with this Matcher. Some patterns, when matching certain strings, can run in exponential time. For practical purposes, the match operation may appear to be in an infinite loop. When a limit is set a match operation will fail with an error if the limit is exceeded. The units of the limit are steps of the match engine. Correspondence with actual processor time will depend on the speed of the processor and the details of the specific pattern, but will typically be on the order of milliseconds. By default, the matching time is not limited.
|
|
setTrace Debug function, enable/disable tracing of the matching engine. For internal ICU development use only. DO NO USE!!!!
|
|
Split a string into fields. Somewhat like split() from Perl. The pattern matches identify delimiters that separate the input into fields. The input data between the matches becomes the fields themselves.
|
|
Returns the index in the input string of the start of the text matched by the specified capture group during the previous match operation. Return -1 if the capture group exists in the pattern, but was not part of the last match.
|
|
Returns the index in the input string of the start of the text matched during the previous match operation.
|
|
Set whether this matcher is using Anchoring Bounds for its region. With anchoring bounds, pattern anchors such as ^ and $ will match at the start and end of the region. Without Anchoring Bounds, anchors will only match at the positions they would in the complete text. Anchoring Bounds are the default for regions.
|
|
Sets the transparency of region bounds for this matcher. Invoking this function with an argument of true will set this matcher to use transparent bounds. If the boolean argument is false, then opaque bounds will be used. Using transparent bounds, the boundaries of this matcher's region are transparent to lookahead, lookbehind, and boundary matching constructs. Those constructs can see text beyond the boundaries of the region while checking for a match. With opaque bounds, no text outside of the matcher's region is visible to lookahead, lookbehind, and boundary matching constructs. By default, a matcher uses opaque bounds.
|