#ifdef _WIN32
#pragma warning(push)
#pragma warning(disable:4996)
#define str_icmp stricmp
#else
#define str_icmp strcasecmp
#endif
#include "regexp/Pattern.h"
#include "regexp/Matcher.h"
#include <cstdio>
#include <cstring>
#include <typeinfo>
#include <algorithm>
std::map<std::string, Pattern *> Pattern::compiledPatterns;
std::map<std::string, std::pair<std::string, unsigned
long
> > Pattern::registeredPatterns;
const
int
Pattern::MIN_QMATCH = 0x00000000;
const
int
Pattern::MAX_QMATCH = 0x7FFFFFFF;
const
unsigned
long
Pattern::CASE_INSENSITIVE = 0x01;
const
unsigned
long
Pattern::LITERAL = 0x02;
const
unsigned
long
Pattern::DOT_MATCHES_ALL = 0x04;
const
unsigned
long
Pattern::MULTILINE_MATCHING = 0x08;
const
unsigned
long
Pattern::UNIX_LINE_MODE = 0x10;
Pattern::Pattern(
const
std::string & rhs)
{
matcher = NULL;
pattern = rhs;
curInd = 0;
groupCount = 0;
nonCapGroupCount = 0;
error = 0;
head = NULL;
}
void
Pattern::raiseError()
{
switch
(pattern[curInd - 1])
{
case
'*'
:
case
')'
:
case
'+'
:
case
'?'
:
case
']'
:
case
'}'
:
fprintf
(stderr,
"%s\n%*c^\n"
, pattern.c_str(), curInd - 1,
' '
);
fprintf
(stderr,
"Syntax Error near here. Possible unescaped meta character.\n"
);
break
;
default
:
fprintf
(stderr,
"%s\n%*c^\n"
, pattern.c_str(), curInd - 1,
' '
);
fprintf
(stderr,
"Syntax Error near here. \n"
);
break
;
}
error = 1;
}
NFANode * Pattern::registerNode(NFANode * node)
{
nodes[node] = 1;
return
node;
}
std::string Pattern::classUnion (std::string s1, std::string s2)
const
{
char
out[300];
std::sort(s1.begin(), s1.end());
std::sort(s2.begin(), s2.end());
*std::set_union(s1.begin(), s1.end(), s2.begin(), s2.end(), out) = 0;
return
out;
}
std::string Pattern::classIntersect (std::string s1, std::string s2)
const
{
char
out[300];
std::sort(s1.begin(), s1.end());
std::sort(s2.begin(), s2.end());
*std::set_intersection(s1.begin(), s1.end(), s2.begin(), s2.end(), out) = 0;
return
out;
}
std::string Pattern::classNegate (std::string s1)
const
{
char
out[300];
int
i, ind = 0;
std::map<
char
,
bool
> m;
for
(i = 0; i < (
int
)s1.size(); ++i)
{
m[s1[i]] = 1;
if
(flags & CASE_INSENSITIVE)
{
if
(s1[i] >=
'a'
&& s1[i] <=
'z'
) m[s1[i] -
'a'
+
'A'
] = 1;
else
if
(s1[i] >=
'A'
&& s1[i] <=
'Z'
) m[s1[i] -
'A'
+
'a'
] = 1;
}
}
for
(i = 0xFF; i >= 0; --i)
if
(m.find((
char
)i) == m.end()) out[ind++] = (
char
)i;
out[ind] = 0;
return
std::string(out, ind);
}
std::string Pattern::classCreateRange(
char
low,
char
hi)
const
{
char
out[300];
int
ind = 0;
while
(low != hi) out[ind++] = low++;
out[ind++] = low;
return
std::string(out, ind);
}
int
Pattern::getInt(
int
start,
int
end)
{
int
ret = 0;
for
(; start <= end; ++start) ret = ret * 10 + (pattern[start] -
'0'
);
return
ret;
}
bool
Pattern::quantifyCurly(
int
& sNum,
int
& eNum)
{
bool
good = 1;
int
i, ci = curInd + 1;
int
commaInd = ci, endInd = ci, len = pattern.size();
sNum = eNum = 0;
while
(endInd < len && pattern[endInd ] !=
'}'
) ++endInd;
while
(commaInd < endInd && pattern[commaInd] !=
','
) ++commaInd;
if
(endInd >= len) { raiseError();
return
0; }
for
(i = ci; good && i < endInd; ++i)
if
(i != commaInd && !
isdigit
(pattern[i])) good = 0;
if
(!good && commaInd < endInd) { raiseError();
return
0; }
if
(!good)
return
0;
if
(commaInd == ci)
{
if
(endInd == commaInd + 1) { sNum = MIN_QMATCH; eNum = MAX_QMATCH; }
else
{ sNum = MIN_QMATCH; eNum = getInt(commaInd + 1, endInd - 1); }
}
else
if
(commaInd == endInd - 1) { sNum = getInt(ci, commaInd - 1); eNum = MAX_QMATCH; }
else
if
(commaInd == endInd) { sNum = getInt(ci, endInd - 1); eNum = sNum; }
else
{ sNum = getInt(ci, commaInd - 1); eNum = getInt(commaInd + 1, endInd - 1); }
curInd = endInd + 1;
return
1;
}
NFANode * Pattern::quantifyGroup(NFANode * start, NFANode * stop,
const
int
gn)
{
NFANode * newNode = NULL;
int
type = 0;
if
(curInd < (
int
)pattern.size())
{
char
ch = (curInd + 1 >= (
int
)pattern.size()) ? -1 : pattern[curInd + 1];
switch
(pattern[curInd])
{
case
'*'
:
++curInd;
switch
(ch)
{
case
'?'
: ++curInd; type = 1;
break
;
case
'+'
: ++curInd; type = 2;
break
;
}
newNode = registerNode(
new
NFAGroupLoopPrologueNode(gn));
newNode->next = registerNode(
new
NFAGroupLoopNode(start, MIN_QMATCH, MAX_QMATCH, gn, type));
stop->next = newNode->next;
return
newNode;
case
'?'
:
++curInd;
switch
(ch)
{
case
'?'
: ++curInd; type = 1;
break
;
case
'+'
: ++curInd; type = 2;
break
;
}
newNode = registerNode(
new
NFAGroupLoopPrologueNode(gn));
newNode->next = registerNode(
new
NFAGroupLoopNode(start, MIN_QMATCH, 1, gn, type));
stop->next = newNode->next;
return
newNode;
case
'+'
:
++curInd;
switch
(ch)
{
case
'?'
: ++curInd; type = 1;
break
;
case
'+'
: ++curInd; type = 2;
break
;
}
newNode = registerNode(
new
NFAGroupLoopPrologueNode(gn));
newNode->next = registerNode(
new
NFAGroupLoopNode(start, 1, MAX_QMATCH, gn, type));
stop->next = newNode->next;
return
newNode;
case
'{'
:
{
int
s, e;
if
(quantifyCurly(s, e))
{
ch = (curInd < (
int
)pattern.size()) ? pattern[curInd] : -1;
switch
(ch)
{
case
'?'
: ++curInd; type = 1;
break
;
case
'+'
: ++curInd; type = 2;
break
;
}
newNode = registerNode(
new
NFAGroupLoopPrologueNode(gn));
newNode->next = registerNode(
new
NFAGroupLoopNode(start, s, e, gn, type));
stop->next = newNode->next;
return
newNode;
}
}
default
:
break
;
}
}
return
NULL;
}
NFANode * Pattern::quantify(NFANode * newNode)
{
if
(curInd < (
int
)pattern.size())
{
char
ch = (curInd + 1 >= (
int
)pattern.size()) ? -1 : pattern[curInd + 1];
switch
(pattern[curInd])
{
case
'*'
:
++curInd;
switch
(ch)
{
case
'?'
: ++curInd; newNode = registerNode(
new
NFALazyQuantifierNode (
this
, newNode, MIN_QMATCH, MAX_QMATCH));
break
;
case
'+'
: ++curInd; newNode = registerNode(
new
NFAPossessiveQuantifierNode(
this
, newNode, MIN_QMATCH, MAX_QMATCH));
break
;
default
: newNode = registerNode(
new
NFAGreedyQuantifierNode (
this
, newNode, MIN_QMATCH, MAX_QMATCH));
break
;
}
break
;
case
'?'
:
++curInd;
switch
(ch)
{
case
'?'
: ++curInd; newNode = registerNode(
new
NFALazyQuantifierNode (
this
, newNode, MIN_QMATCH, 1));
break
;
case
'+'
: ++curInd; newNode = registerNode(
new
NFAPossessiveQuantifierNode(
this
, newNode, MIN_QMATCH, 1));
break
;
default
: newNode = registerNode(
new
NFAGreedyQuantifierNode (
this
, newNode, MIN_QMATCH, 1));
break
;
}
break
;
case
'+'
:
++curInd;
switch
(ch)
{
case
'?'
: ++curInd; newNode = registerNode(
new
NFALazyQuantifierNode (
this
, newNode, 1, MAX_QMATCH));
break
;
case
'+'
: ++curInd; newNode = registerNode(
new
NFAPossessiveQuantifierNode(
this
, newNode, 1, MAX_QMATCH));
break
;
default
: newNode = registerNode(
new
NFAGreedyQuantifierNode (
this
, newNode, 1, MAX_QMATCH));
break
;
}
break
;
case
'{'
:
{
int
s, e;
if
(quantifyCurly(s, e))
{
ch = (curInd < (
int
)pattern.size()) ? pattern[curInd] : -1;
switch
(ch)
{
case
'?'
: ++curInd; newNode = registerNode(
new
NFALazyQuantifierNode (
this
, newNode, s, e));
break
;
case
'+'
: ++curInd; newNode = registerNode(
new
NFAPossessiveQuantifierNode(
this
, newNode, s, e));
break
;
default
: newNode = registerNode(
new
NFAGreedyQuantifierNode (
this
, newNode, s, e));
break
;
}
}
}
break
;
default
:
break
;
}
}
return
newNode;
}
std::string Pattern::parseClass()
{
std::string t, ret =
""
;
char
ch, c1, c2;
bool
inv = 0, neg = 0, quo = 0;
if
(curInd < (
int
)pattern.size() && pattern[curInd] ==
'^'
)
{
++curInd;
neg = 1;
}
while
(curInd < (
int
)pattern.size() && pattern[curInd] !=
']'
)
{
ch = pattern[curInd++];
if
(ch ==
'['
)
{
t = parseClass();
ret = classUnion(ret, t);
}
else
if
(ch ==
'&'
&& curInd < (
int
)pattern.size() && pattern[curInd] ==
'&'
)
{
if
(pattern[++curInd] !=
'['
)
{
raiseError();
curInd = pattern.size();
}
else
{
++curInd;
t = parseClass();
ret = classIntersect(ret, t);
}
}
else
if
(ch ==
'\\'
)
{
t = parseEscape(inv, quo);
if
(quo)
{
raiseError();
curInd = pattern.size();
}
else
if
(inv || t.size() > 1)
{
if
(inv) t = classNegate(t);
ret = classUnion(ret, t);
}
else
if
(curInd < (
int
)pattern.size() && pattern[curInd] ==
'-'
)
{
c1 = t[0];
++curInd;
if
(curInd >= (
int
)pattern.size()) raiseError();
else
{
c2 = pattern[curInd++];
if
(c2 ==
'\\'
)
{
t = parseEscape(inv, quo);
if
(quo)
{
raiseError();
curInd = pattern.size();
}
else
if
(inv || t.size() > 1) raiseError();
else
ret = classUnion(ret, classCreateRange(c1, c2));
}
else
if
(c2 ==
'['
|| c2 ==
']'
|| c2 ==
'-'
|| c2 ==
'&'
)
{
raiseError();
curInd = pattern.size();
}
else
ret = classUnion(ret, classCreateRange(c1, c2));
}
}
else
{
ret = classUnion(ret, t);
}
}
else
if
(curInd < (
int
)pattern.size() && pattern[curInd] ==
'-'
)
{
c1 = ch;
++curInd;
if
(curInd >= (
int
)pattern.size()) raiseError();
else
{
c2 = pattern[curInd++];
if
(c2 ==
'\\'
)
{
t = parseEscape(inv, quo);
if
(quo)
{
raiseError();
curInd = pattern.size();
}
else
if
(inv || t.size() > 1) raiseError();
else
ret = classUnion(ret, classCreateRange(c1, c2));
}
else
if
(c2 ==
'['
|| c2 ==
']'
|| c2 ==
'-'
|| c2 ==
'&'
)
{
raiseError();
curInd = pattern.size();
}
else
{
ret = classUnion(ret, classCreateRange(c1, c2));
}
}
}
else
{
ret +=
" "
;
ret[ret.size() - 1] = ch;
}
}
if
(curInd >= (
int
)pattern.size() || pattern[curInd] !=
']'
)
{
raiseError();
ret =
""
;
}
else
{
++curInd;
if
(neg) ret = classNegate(ret);
}
return
ret;
}
std::string Pattern::parsePosix()
{
std::string s7 = pattern.substr(curInd, 7);
if
(s7 ==
"{Lower}"
) { curInd += 7;
return
"abcdefghijklmnopqrstuvwxyz"
; }
if
(s7 ==
"{Upper}"
) { curInd += 7;
return
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
; }
if
(s7 ==
"{Alpha}"
) { curInd += 7;
return
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
; }
if
(s7 ==
"{Digit}"
) { curInd += 7;
return
"0123456789"
; }
if
(s7 ==
"{Alnum}"
) { curInd += 7;
return
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
; }
if
(s7 ==
"{Punct}"
) { curInd += 7;
return
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
; }
if
(s7 ==
"{Graph}"
) { curInd += 7;
return
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
; }
if
(s7 ==
"{Print}"
) { curInd += 7;
return
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
; }
if
(s7 ==
"{Blank}"
) { curInd += 7;
return
" \t"
; }
if
(s7 ==
"{Space}"
) { curInd += 7;
return
" \t\n\x0B\f\r"
; }
if
(s7 ==
"{Cntrl}"
)
{
int
i;
std::string s =
" "
;
for
(i = 0; i < 5; ++i) s += s;
s +=
" "
;
for
(i = 0; i <= 0x1F; ++i) s[i] = i;
s[0x20] = 0x7F;
curInd += 7;
return
s;
}
if
(s7 ==
"{ASCII}"
)
{
std::string s(0x80,
' '
);
for
(
int
i = 0; i < 0x80; ++i) s[i] = i;
curInd += 7;
return
s;
}
if
(pattern.substr(curInd, 8) ==
"{XDigit}"
) { curInd += 8;
return
"abcdefABCDEF0123456789"
; }
raiseError();
return
""
;
}
NFANode * Pattern::parseBackref()
{
#define is_dig(x) ((x) >= '0' && (x) <= '9')
#define to_int(x) ((x) - '0')
int
ci = curInd;
int
oldRef = 0, ref = 0;
while
(ci < (
int
)pattern.size() && is_dig(pattern[ci]) && (ref < 10 || ref < groupCount))
{
oldRef = ref;
ref = ref * 10 + to_int(pattern[ci++]);
}
if
(ci == (
int
)pattern.size())
{
oldRef = ref;
++ci;
}
if
(oldRef < 0 || ci <= curInd)
{
raiseError();
return
registerNode(
new
NFAReferenceNode(-1));
}
curInd = ci;
return
registerNode(
new
NFAReferenceNode(ref));
#undef is_dig
#undef to_int
}
std::string Pattern::parseOctal()
{
#define islowoc(x) ((x) >= '0' && (x) <= '3')
#define isoc(x) ((x) >= '0' && (x) <= '7')
#define fromoc(x) ((x) - '0')
int
ci = curInd;
char
ch1 = (ci + 0 < (
int
)pattern.size()) ? pattern[ci + 0] : -1;
char
ch2 = (ci + 1 < (
int
)pattern.size()) ? pattern[ci + 1] : -1;
char
ch3 = (ci + 2 < (
int
)pattern.size()) ? pattern[ci + 2] : -1;
std::string s =
" "
;
if
(islowoc(ch1) && isoc(ch2))
{
curInd += 2;
s[0] = fromoc(ch1) * 8 + fromoc(ch2);
if
(isoc(ch3))
{
++curInd;
s[0] = s[0] * 8 + fromoc(ch3);
}
}
else
if
(isoc(ch1) && isoc(ch2))
{
curInd += 2;
s[0] = fromoc(ch1) * 8 + fromoc(ch2);
}
else
raiseError();
return
s;
#undef islowoc
#undef isoc
#undef fromoc
}
std::string Pattern::parseHex()
{
#define to_low(x) (((x) >= 'A' && (x) <= 'Z') ? ((x) - 'A' + 'a') : (x))
#define is_dig(x) ((x) >= '0' && (x) <= '9')
#define is_hex(x) (is_dig(x) || (to_low(x) >= 'a' && to_low(x) <= 'f'))
#define to_int(x) ((is_dig(x)) ? ((x) - '0') : (to_low(x) - 'a' + 10))
int
ci = curInd;
char
ch1 = (ci + 0 < (
int
)pattern.size()) ? pattern[ci + 0] : -1;
char
ch2 = (ci + 1 < (
int
)pattern.size()) ? pattern[ci + 1] : -1;
std::string s =
" "
;
if
(is_hex(ch1) && is_hex(ch2))
{
curInd += 2;
s[0] = (to_int(ch1) << 4 & 0xF0) | (to_int(ch2) & 0x0F);
}
return
s;
#undef to_low
#undef is_dig
#undef is_hex
#undef to_int
}
std::string Pattern::parseEscape(
bool
& inv,
bool
& quo)
{
char
ch = pattern[curInd++];
std::string classes =
""
;
if
(curInd > (
int
)pattern.size())
{
raiseError();
return
NULL;
}
quo = 0;
inv = 0;
switch
(ch)
{
case
'p'
: classes = parsePosix();
break
;
case
'P'
: classes =
"!!"
; classes += parsePosix();
break
;
case
'd'
: classes =
"0123456789"
;
break
;
case
'D'
: classes =
"!!0123456789"
;
break
;
case
's'
: classes =
" \t\r\n\f"
;
break
;
case
'S'
: classes =
"!! \t\r\n\f"
;
break
;
case
'w'
: classes =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"
;
break
;
case
'W'
: classes =
"!!abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"
;
break
;
case
'0'
: classes = parseOctal();
break
;
case
'x'
: classes = parseHex();
break
;
case
'Q'
: quo = 1;
break
;
case
't'
: classes =
"\t"
;
break
;
case
'r'
: classes =
"\r"
;
break
;
case
'n'
: classes =
"\n"
;
break
;
case
'f'
: classes =
"\f"
;
break
;
case
'a'
: classes =
"\a"
;
break
;
case
'e'
: classes =
"\r"
;
break
;
default
: classes =
" "
; classes[0] = ch;
break
;
}
if
(classes.substr(0, 2) ==
"!!"
)
{
classes = classes.substr(2);
inv = 1;
}
return
classes;
}
NFANode * Pattern::parseRegisteredPattern(NFANode ** end)
{
int
i, j;
std::string s;
NFANode * ret = NULL;
for
(i = curInd; i < (
int
)pattern.size() && pattern[i] !=
'}'
; ++i) { }
if
(pattern[i] !=
'}'
) { raiseError();
return
NULL; }
if
(i == curInd + 1) { raiseError();
return
NULL; }
if
(
!(
(pattern[curInd] >=
'a'
&& pattern[curInd] <=
'z'
) ||
(pattern[curInd] >=
'A'
&& pattern[curInd] <=
'Z'
) ||
(pattern[curInd] ==
'_'
)
)
)
{
raiseError();
return
NULL;
}
for
(j = curInd; !error && j < i; ++j)
{
if
(
!(
(pattern[j] >=
'a'
&& pattern[j] <=
'z'
) ||
(pattern[j] >=
'A'
&& pattern[j] <=
'Z'
) ||
(pattern[j] >=
'0'
&& pattern[j] <=
'9'
) ||
(pattern[j] ==
'_'
)
)
)
{
raiseError();
return
NULL;
}
}
s = pattern.substr(curInd, i - curInd);
if
(registeredPatterns.find(s) == registeredPatterns.end()) raiseError();
else
{
unsigned
long
oflags = flags;
std::string op = pattern;
int
ci = i + 1;
pattern = registeredPatterns[s].first;
curInd = 0;
flags = registeredPatterns[s].second;
--groupCount;
ret = parse(0, 0, end);
pattern = op;
curInd = ci;
flags = oflags;
}
if
(error) { *end = ret = NULL; }
return
ret;
}
NFANode * Pattern::parseBehind(
const
bool
pos, NFANode ** end)
{
std::string t =
""
;
while
(curInd < (
int
)pattern.size() && pattern[curInd] !=
')'
)
{
char
ch = pattern[curInd++];
t +=
" "
;
if
(ch ==
'\\'
)
{
if
(curInd + 1 >= (
int
)pattern.size())
{
raiseError();
return
*end = registerNode(
new
NFACharNode(
' '
));
}
ch = pattern[curInd++];
}
t[t.size() - 1] = ch;
}
if
(curInd >= (
int
)pattern.size() || pattern[curInd] !=
')'
) raiseError();
else
++curInd;
return
*end = registerNode(
new
NFALookBehindNode(t, pos));
}
NFANode * Pattern::parseQuote()
{
bool
done = 0;
std::string s =
""
;
while
(!done)
{
if
(curInd >= (
int
)pattern.size())
{
raiseError();
done = 1;
}
else
if
(pattern.substr(curInd, 2) ==
"\\E"
)
{
curInd += 2;
done = 1;
}
else
if
(pattern[curInd] ==
'\\'
)
{
s +=
" "
;
s[s.size() - 1] = pattern[++curInd];
++curInd;
}
else
{
s +=
" "
;
s[s.size() - 1] = pattern[curInd++];
}
}
if
((flags & Pattern::CASE_INSENSITIVE) != 0)
return
registerNode(
new
NFACIQuoteNode(s));
return
registerNode(
new
NFAQuoteNode(s));
}
NFANode * Pattern::parse(
const
bool
inParen,
const
bool
inOr, NFANode ** end,
const
int
orGroup,
const
bool
inOrCap)
{
NFANode * start, * cur, * next = NULL;
std::string t;
int
grc = groupCount++;
bool
inv, quo;
bool
ahead =
false
, pos =
false
, noncap =
false
, indep =
false
;
unsigned
long
oldFlags = flags;
if
(inParen)
{
if
(inOr)
{
--groupCount;
noncap = !inOrCap;
grc = orGroup;
if
(noncap) cur = start = registerNode(
new
NFAGroupHeadNode(grc));
else
cur = start = registerNode(
new
NFASubStartNode);
}
else
if
(pattern[curInd] ==
'?'
)
{
++curInd;
--groupCount;
if
(pattern[curInd] ==
':'
) { noncap = 1; ++curInd; grc = --nonCapGroupCount; }
else
if
(pattern[curInd] ==
'='
) { ++curInd; ahead = 1; pos = 1; }
else
if
(pattern[curInd] ==
'!'
) { ++curInd; ahead = 1; pos = 0; }
else
if
(pattern.substr(curInd, 2) ==
"<="
) { curInd += 2;
return
parseBehind(1, end); }
else
if
(pattern.substr(curInd, 2) ==
"<!"
) { curInd += 2;
return
parseBehind(0, end); }
else
if
(pattern[curInd] ==
'>'
) { ++curInd; indep = 1; }
else
{
bool
negate =
false
, done =
false
;
while
(!done)
{
if
(curInd >= (
int
)pattern.size())
{
raiseError();
return
NULL;
}
else
if
(negate)
{
switch
(pattern[curInd])
{
case
'i'
: flags &= ~Pattern::CASE_INSENSITIVE;
break
;
case
'd'
: flags &= ~Pattern::UNIX_LINE_MODE;
break
;
case
'm'
: flags &= ~Pattern::MULTILINE_MATCHING;
break
;
case
's'
: flags &= ~Pattern::DOT_MATCHES_ALL;
break
;
case
':'
: done =
true
;
break
;
case
')'
:
++curInd;
*end = registerNode(
new
NFALookBehindNode(
""
,
true
));
return
*end;
case
'-'
:
default
: raiseError();
return
NULL;
}
}
else
{
switch
(pattern[curInd])
{
case
'i'
: flags |= Pattern::CASE_INSENSITIVE;
break
;
case
'd'
: flags |= Pattern::UNIX_LINE_MODE;
break
;
case
'm'
: flags |= Pattern::MULTILINE_MATCHING;
break
;
case
's'
: flags |= Pattern::DOT_MATCHES_ALL;
break
;
case
':'
: done =
true
;
break
;
case
'-'
: negate =
true
;
break
;
case
')'
:
++curInd;
*end = registerNode(
new
NFALookBehindNode(
""
,
true
));
return
*end;
default
: raiseError();
return
NULL;
}
}
++curInd;
}
noncap = 1;
grc = --nonCapGroupCount;
}
if
(noncap) cur = start = registerNode(
new
NFAGroupHeadNode(grc));
else
cur = start = registerNode(
new
NFASubStartNode);
}
else
cur = start = registerNode(
new
NFAGroupHeadNode(grc));
}
else
cur = start = registerNode(
new
NFASubStartNode);
while
(curInd < (
int
)pattern.size())
{
char
ch = pattern[curInd++];
next = NULL;
if
(error)
return
NULL;
switch
(ch)
{
case
'^'
:
if
((flags & Pattern::MULTILINE_MATCHING) != 0) next = registerNode(
new
NFAStartOfLineNode);
else
next = registerNode(
new
NFAStartOfInputNode);
break
;
case
'$'
:
if
((flags & Pattern::MULTILINE_MATCHING) != 0) next = registerNode(
new
NFAEndOfLineNode);
else
next = registerNode(
new
NFAEndOfInputNode(0));
break
;
case
'|'
:
cur->next = registerNode(
new
NFAAcceptNode);
cur = start = registerNode(
new
NFAOrNode(start, parse(inParen,
true
, NULL, grc, !noncap)));
break
;
case
'\\'
:
if
(curInd < (
int
)pattern.size())
{
bool
eoi = 0;
switch
(pattern[curInd])
{
case
'1'
:
case
'2'
:
case
'3'
:
case
'4'
:
case
'5'
:
case
'6'
:
case
'7'
:
case
'8'
:
case
'9'
: next = parseBackref();
break
;
case
'A'
: ++curInd; next = registerNode(
new
NFAStartOfInputNode);
break
;
case
'B'
: ++curInd; next = registerNode(
new
NFAWordBoundaryNode(0));
break
;
case
'b'
: ++curInd; next = registerNode(
new
NFAWordBoundaryNode(1));
break
;
case
'G'
: ++curInd; next = registerNode(
new
NFAEndOfMatchNode);
break
;
case
'Z'
: eoi = 1;
case
'z'
: ++curInd; next = registerNode(
new
NFAEndOfInputNode(eoi));
break
;
default
:
t = parseEscape(inv, quo);
if
(!quo)
{
if
(t.size() > 1 || inv)
{
if
((flags & Pattern::CASE_INSENSITIVE) != 0) next = registerNode(
new
NFACIClassNode(t, inv));
else
next = registerNode(
new
NFAClassNode(t, inv));
}
else
{
next = registerNode(
new
NFACharNode(t[0]));
}
}
else
{
next = parseQuote();
}
}
}
else
raiseError();
break
;
case
'['
:
if
((flags & Pattern::CASE_INSENSITIVE) == 0)
{
NFAClassNode * clazz =
new
NFAClassNode();
std::string s = parseClass();
for
(
int
i = 0; i < (
int
)s.size(); ++i) clazz->vals[s[i]] = 1;
next = registerNode(clazz);
}
else
{
NFACIClassNode * clazz =
new
NFACIClassNode();
std::string s = parseClass();
for
(
int
i = 0; i < (
int
)s.size(); ++i) clazz->vals[
tolower
(s[i])] = 1;
next = registerNode(clazz);
}
break
;
case
'.'
:
{
bool
useN = 1, useR = 1;
NFAClassNode * clazz =
new
NFAClassNode(1);
if
((flags & Pattern::UNIX_LINE_MODE) != 0) useR = 0;
if
((flags & Pattern::DOT_MATCHES_ALL) != 0) useN = useR = 0;
if
(useN) clazz->vals[
'\n'
] = 1;
if
(useR) clazz->vals[
'\r'
] = 1;
next = registerNode(clazz);
}
break
;
case
'('
:
{
NFANode * end, * t1, * t2;
t1 = parse(
true
,
false
, &end);
if
(!t1) raiseError();
else
if
(t1->isGroupHeadNode() && (t2 = quantifyGroup(t1, end, grc)) != NULL)
{
cur->next = t2;
cur = t2->next;
}
else
{
cur->next = t1;
cur = end;
}
}
break
;
case
')'
:
if
(!inParen) raiseError();
else
if
(inOr)
{
--curInd;
cur = cur->next = registerNode(
new
NFAAcceptNode);
flags = oldFlags;
return
start;
}
else
{
if
(ahead)
{
cur = cur->next = registerNode(
new
NFAAcceptNode);
flags = oldFlags;
return
*end = registerNode(
new
NFALookAheadNode(start, pos));
}
else
if
(indep)
{
cur = cur->next = registerNode(
new
NFAAcceptNode);
flags = oldFlags;
return
*end = registerNode(
new
NFAPossessiveQuantifierNode(
this
, start, 1, 1));
}
else
{
*end = cur = cur->next = registerNode(
new
NFAGroupTailNode(grc));
next = quantifyGroup(start, *end, grc);
if
(next)
{
start = next;
*end = next->next;
}
flags = oldFlags;
return
start;
}
}
break
;
case
'{'
:
cur->next = parseRegisteredPattern(&next);
if
(cur->next) cur = next;
break
;
case
'*'
:
case
'+'
:
case
'?'
:
case
'}'
:
case
']'
:
raiseError();
break
;
default
:
if
((flags & Pattern::CASE_INSENSITIVE) != 0) next = registerNode(
new
NFACICharNode(ch));
else
next = registerNode(
new
NFACharNode(ch));
break
;
}
NFAOrNode * orNode =
dynamic_cast
<NFAOrNode *>(cur);
if
(orNode)
{
NFANode * one = orNode->one, * two = orNode->two;
while
(one->next) one = one->next;
while
(two->next) two = two->next;
one->next = two->next = cur = registerNode(
new
NFAAcceptNode);
}
if
(next)
{
cur = cur->next = quantify(next);
}
}
if
(inParen) raiseError();
else
{
if
(inOr) cur = cur->next = registerNode(
new
NFAAcceptNode);
if
(end) *end = cur;
}
flags = oldFlags;
if
(error)
return
NULL;
return
start;
}
Pattern * Pattern::compile(
const
std::string & pattern,
const
unsigned
long
mode)
{
Pattern * p =
new
Pattern(pattern);
NFANode * end;
p->flags = mode;
if
((mode & Pattern::LITERAL) != 0)
{
p->head = p->registerNode(
new
NFAStartNode);
if
((mode & Pattern::CASE_INSENSITIVE) != 0) p->head->next = p->registerNode(
new
NFACIQuoteNode(pattern));
else
p->head->next = p->registerNode(
new
NFAQuoteNode(pattern));
p->head->next->next = p->registerNode(
new
NFAEndNode);
}
else
{
p->head = p->parse(0, 0, &end);
if
(!p->head)
{
delete
p;
p = NULL;
}
else
{
if
(!(p->head && p->head->isStartOfInputNode()))
{
NFANode * n = p->registerNode(
new
NFAStartNode);
n->next = p->head;
p->head = n;
}
end->next = p->registerNode(
new
NFAEndNode);
}
}
if
(p != NULL)
{
p->matcher =
new
Matcher(p,
""
);
}
return
p;
}
Pattern * Pattern::compileAndKeep(
const
std::string & pattern,
const
unsigned
long
mode)
{
Pattern * ret = NULL;
std::map<std::string, Pattern*>::iterator it = compiledPatterns.find(pattern);
if
(it != compiledPatterns.end())
{
ret = it->second;
}
else
{
ret = compile(pattern, mode);
compiledPatterns[pattern] = ret;
}
return
ret;
}
std::string Pattern::replace(
const
std::string & pattern,
const
std::string & str,
const
std::string & replacementText,
const
unsigned
long
mode)
{
std::string ret;
Pattern * p = Pattern::compile(pattern, mode);
if
(p)
{
ret = p->replace(str, replacementText);
delete
p;
}
return
ret;
}
std::vector<std::string> Pattern::split(
const
std::string & pattern,
const
std::string & str,
const
bool
keepEmptys,
const
unsigned
long
limit,
const
unsigned
long
mode)
{
std::vector<std::string> ret;
Pattern * p = Pattern::compile(pattern, mode);
if
(p)
{
ret = p->split(str, keepEmptys, limit);
delete
p;
}
return
ret;
}
std::vector<std::string> Pattern::findAll(
const
std::string & pattern,
const
std::string & str,
const
unsigned
long
mode)
{
std::vector<std::string> ret;
Pattern * p = Pattern::compile(pattern, mode);
if
(p)
{
ret = p->findAll(str);
delete
p;
}
return
ret;
}
bool
Pattern::matches(
const
std::string & pattern,
const
std::string & str,
const
unsigned
long
mode)
{
bool
ret = 0;
Pattern * p = compile(pattern, mode);
if
(p)
{
ret = p->matches(str);
delete
p;
}
return
ret;
}
bool
Pattern::registerPattern(
const
std::string & name,
const
std::string & pattern,
const
unsigned
long
mode)
{
Pattern * p = Pattern::compile(pattern, mode);
if
(!p)
return
0;
Pattern::registeredPatterns[name] = std::make_pair(pattern, mode);
delete
p;
return
1;
}
void
Pattern::unregisterPatterns()
{
registeredPatterns.clear();
}
void
Pattern::clearPatternCache()
{
std::map<std::string, Pattern*>::iterator it;
for
(it = compiledPatterns.begin(); it != compiledPatterns.end(); ++it)
{
delete
it->second;
}
compiledPatterns.clear();
}
std::pair<std::string,
int
> Pattern::findNthMatch(
const
std::string & pattern,
const
std::string & str,
const
int
matchNum,
const
unsigned
long
mode)
{
std::pair<std::string,
int
> ret;
Pattern * p = Pattern::compile(pattern, mode);
ret.second = -1;
if
(p)
{
int
i = -1;
p->matcher->setString(str);
while
(i < matchNum && p->matcher->findNextMatch()) { ++i; }
if
(i == matchNum && p->matcher->getStartingIndex() >= 0)
{
ret.first = p->matcher->getGroup(0);
ret.second = p->matcher->getStartingIndex();
}
delete
p;
}
return
ret;
}
Pattern::~Pattern()
{
if
(matcher)
delete
matcher;
for
(std::map<NFANode*,
bool
>::iterator it = nodes.begin(); it != nodes.end(); ++it)
{
delete
it->first;
}
}
std::string Pattern::replace(
const
std::string & str,
const
std::string & replacementText)
{
int
li = 0;
std::string ret =
""
;
matcher->setString(str);
while
(matcher->findNextMatch())
{
ret += str.substr(li, matcher->getStartingIndex() - li);
ret += matcher->replaceWithGroups(replacementText);
li = matcher->getEndingIndex();
}
ret += str.substr(li);
return
ret;
}
std::vector<std::string> Pattern::split(
const
std::string & str,
const
bool
keepEmptys,
const
unsigned
long
limit)
{
unsigned
long
lim = (limit == 0 ? MAX_QMATCH : limit);
int
li = 0;
std::vector<std::string> ret;
matcher->setString(str);
while
(matcher->findNextMatch() && ret.size() < lim)
{
if
(matcher->getStartingIndex() == 0 && keepEmptys) ret.push_back(
""
);
if
((matcher->getStartingIndex() != matcher->getEndingIndex()) || keepEmptys)
{
if
(li != matcher->getStartingIndex() || keepEmptys)
{
ret.push_back(str.substr(li, matcher->getStartingIndex() - li));
}
li = matcher->getEndingIndex();
}
}
if
(li < (
int
)str.size()) ret.push_back(str.substr(li));
return
ret;
}
std::vector<std::string> Pattern::findAll(
const
std::string & str)
{
matcher->setString(str);
return
matcher->findAll();
}
bool
Pattern::matches(
const
std::string & str)
{
matcher->setString(str);
return
matcher->matches();
}
unsigned
long
Pattern::getFlags()
const
{
return
flags;
}
std::string Pattern::getPattern()
const
{
return
pattern;
}
Matcher * Pattern::createMatcher(
const
std::string & str)
{
return
new
Matcher(
this
, str);
}
void
Pattern::print()
{
printf
(
"Pattern(%s):\n"
, pattern.c_str());
if
(head) head->print(1);
}
static
char
* getPrintChar(
const
char
ch)
{
static
char
buf[1024];
if
(ch ==
'0'
)
strcpy
(buf,
"\\0"
);
else
if
(ch ==
'\r'
)
strcpy
(buf,
"\\r"
);
else
if
(ch ==
'\t'
)
strcpy
(buf,
"\\t"
);
else
if
(ch ==
'\n'
)
strcpy
(buf,
"\\n"
);
else
if
(ch == 0x0C)
strcpy
(buf,
"\\p"
);
else
if
(ch <
' '
|| ch >= 0x80)
sprintf
(buf,
"0x%02X"
, ch);
else
sprintf
(buf,
"%c"
, ch);
return
buf;
}
static
void
appendRawToClass(
const
int
ch,
char
*
const
buf,
int
& curInd)
{
switch
(ch)
{
case
'('
:
case
')'
:
case
'\\'
:
case
'-'
:
case
'['
:
case
']'
:
buf[curInd++] =
'\\'
;
default
:;
buf[curInd++] = (
char
)ch;
}
}
static
void
appendToClass0(
const
int
j,
int
& start,
const
bool
raw,
const
bool
allowRanges,
char
*
const
buf,
int
& curInd)
{
if
(start != -1)
{
if
(start + 1 == j)
{
if
(raw)
{
appendRawToClass(start, buf, curInd);
}
else
{
sprintf
(buf + curInd,
"\\x%02X"
, start);
curInd += 4;
}
}
else
if
(start + 2 == j)
{
if
(raw)
{
appendRawToClass(start, buf, curInd);
appendRawToClass(start + 1, buf, curInd);
}
else
{
sprintf
(buf + curInd,
"\\x%02X\\x%02X"
, start, start + 1);
curInd += 8;
}
}
else
{
if
(allowRanges)
{
if
(raw)
{
appendRawToClass(start, buf, curInd);
buf[curInd++] =
'-'
;
appendRawToClass(j - 1, buf, curInd);
}
else
{
sprintf
(buf + curInd,
"\\x%02X-\\x%02X"
, start, j - 1);
curInd += 9;
}
}
else
{
for
(
int
k = start; k < j; ++k)
{
appendRawToClass(k, buf, curInd);
}
}
}
}
start = -1;
}
static
void
appendToClass(
const
std::map<
char
,
bool
> & chars,
const
int
*
const
range,
const
bool
raw,
const
bool
allowRanges,
char
*
const
buf,
int
& curInd)
{
int
start = -1, j;
for
(j = range[0]; j < range[1]; ++j)
{
if
(chars.find((
char
)j) != chars.end())
{
if
(start == -1) start = j;
}
else
{
appendToClass0(j, start, raw, allowRanges, buf, curInd);
}
}
appendToClass0(j, start, raw, allowRanges, buf, curInd);
}
static
char
* getClassDesc(
const
std::map<
char
,
bool
> & chars,
const
bool
inv,
const
bool
firstPass =
true
)
{
static
char
buf[1024];
int
curInd = 0;
bool
ok[256] = {
false
};
buf[curInd++] =
'['
;
if
(inv)
{
buf[curInd++] =
'^'
;
}
if
(chars.find(
' '
) != chars.end()) { buf[curInd++] =
' '
; }
if
(chars.find(0x00) != chars.end()) {
strcpy
(buf + curInd,
"\\0"
); curInd += 2; }
if
(chars.find(
'\t'
) != chars.end()) {
strcpy
(buf + curInd,
"\\t"
); curInd += 2; }
if
(chars.find(
'\r'
) != chars.end()) {
strcpy
(buf + curInd,
"\\r"
); curInd += 2; }
if
(chars.find(
'\n'
) != chars.end()) {
strcpy
(buf + curInd,
"\\n"
); curInd += 2; }
if
(chars.find(0x0c) != chars.end()) {
strcpy
(buf + curInd,
"\\p"
); curInd += 2; }
const
int
HEX_RANGES[][2] =
{
{ 0x01, 0x09 },
{ 0x0B, 0x0B },
{ 0x0E, 0x20 },
{ 0x80, 0x100 },
};
const
int
RAW_RANGES[][2] =
{
{ 0x20,
'0'
},
{
'0'
,
'9'
+ 1 },
{
'9'
+ 1,
'A'
},
{
'A'
,
'Z'
+ 1 },
{
'Z'
+ 1,
'a'
},
{
'a'
,
'z'
+ 1 },
{
'z'
+ 1, 0x80 },
};
const
bool
ALLOW_RANGES[] =
{
false
,
true
,
false
,
true
,
false
,
true
,
false
,
};
const
int
NUM_HEX =
sizeof
(HEX_RANGES) /
sizeof
(HEX_RANGES[0]);
const
int
NUM_RAW =
sizeof
(RAW_RANGES) /
sizeof
(RAW_RANGES[0]);
for
(
int
i = 0; i < NUM_HEX; ++i)
{
appendToClass(chars, HEX_RANGES[i],
false
,
true
, buf, curInd);
}
for
(
int
i = 0; i < NUM_RAW; ++i)
{
appendToClass(chars, RAW_RANGES[i],
true
, ALLOW_RANGES[i], buf, curInd);
}
buf[curInd++] =
']'
;
buf[curInd++] = 0;
if
(firstPass)
{
char
newBuf[1024];
std::map<
char
,
bool
> invChars;
for
(
int
i = 0; i < 256; ++i)
{
if
(chars.find((
char
)i) == chars.end()) invChars[(
char
)i] =
true
;
}
strcpy
(newBuf, buf);
getClassDesc(invChars, !inv,
false
);
if
(
strlen
(buf) >
strlen
(newBuf))
strcpy
(buf, newBuf);
}
return
buf;
}
NFANode::NFANode() { next = NULL; }
NFANode::~NFANode() { }
void
NFANode::findAllNodes(std::map<NFANode*,
bool
> & soFar)
{
if
(soFar.find(
this
) == soFar.end())
return
;
soFar[
this
] = 1;
if
(next) next->findAllNodes(soFar);
}
void
NFANode::print(
const
int
indent)
{
printf
(
"%*c%s\n"
, indent * 2,
' '
,
typeid
(*
this
).name());
if
(next) next->print(indent);
}
NFACharNode::NFACharNode(
const
char
c) { ch = c; }
int
NFACharNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
if
(curInd < (
int
)str.size() && str[curInd] == ch)
return
next->match(str, matcher, curInd + 1, depth + 1);
return
-1;
}
void
NFACharNode::print(
const
int
indent)
{
printf
(
"%*c%s(0x%02X)\n"
, indent * 2,
' '
,
typeid
(*
this
).name(), ch);
if
(next) next->print(indent);
}
NFACICharNode::NFACICharNode(
const
char
c) { ch =
tolower
(c); }
int
NFACICharNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
if
(curInd < (
int
)str.size() &&
tolower
(str[curInd]) == ch)
return
next->match(str, matcher, curInd + 1, depth + 1);
return
-1;
}
void
NFACICharNode::print(
const
int
indent)
{
printf
(
"%*c%s(0x%02X)\n"
, indent * 2,
' '
,
typeid
(*
this
).name(), ch);
if
(next) next->print(indent);
}
NFAStartNode::NFAStartNode() { }
int
NFAStartNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
int
ret = -1, ci = curInd;
matcher->starts[0] = curInd;
if
((matcher->getFlags() & Matcher::MATCH_ENTIRE_STRING) == (unsigned
int
)Matcher::MATCH_ENTIRE_STRING)
{
if
(curInd != 0)
{
matcher->starts[0] = -1;
return
-1;
}
return
next->match(str, matcher, 0, depth + 1);
}
while
((ret = next->match(str, matcher, ci)) == -1 && ci < (
int
)str.size())
{
matcher->clearGroups();
matcher->starts[0] = ++ci;
}
if
(ret < 0) matcher->starts[0] = -1;
return
ret;
}
NFAEndNode::NFAEndNode() { }
int
NFAEndNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
matcher->ends[0] = curInd;
if
((matcher->getFlags() & Matcher::MATCH_ENTIRE_STRING) != 0)
{
if
(curInd == (
int
)str.size())
return
curInd;
matcher->ends[0] = -1;
return
-1;
}
return
curInd;
}
void
NFAQuantifierNode::findAllNodes(std::map<NFANode*,
bool
> & soFar)
{
inner->findAllNodes(soFar);
NFANode::findAllNodes(soFar);
}
NFAQuantifierNode::NFAQuantifierNode(Pattern * pat, NFANode * internal,
const
int
minMatch,
const
int
maxMatch)
{
inner = internal;
inner->next = pat->registerNode(
new
NFAAcceptNode);
min = (minMatch < Pattern::MIN_QMATCH) ? Pattern::MIN_QMATCH : minMatch;
max = (maxMatch > Pattern::MAX_QMATCH) ? Pattern::MAX_QMATCH : maxMatch;
}
int
NFAQuantifierNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
int
i0, i1, i2 = 0;
i0 = i1 = curInd;
while
(i2 < min)
{
++i2;
i1 = inner->match(str, matcher, i0, depth + 1);
if
(i1 <= i0)
return
i1;
i0 = i1;
}
return
i1;
}
void
NFAQuantifierNode::print(
const
int
indent)
{
printf
(
"%*c%s(min=%d, max=%d)\n"
, indent * 2,
' '
,
typeid
(*
this
).name(), min, max);
printf
(
"%*cinner\n"
, (indent + 1) * 2,
' '
);
if
(inner) inner->print(indent + 2);
if
(next) next->print(indent);
}
NFAGreedyQuantifierNode::NFAGreedyQuantifierNode(Pattern * pat, NFANode * internal,
const
int
minMatch,
const
int
maxMatch)
: NFAQuantifierNode(pat, internal, minMatch, maxMatch) { }
int
NFAGreedyQuantifierNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
int
t = NFAQuantifierNode::match(str, matcher, curInd, depth + 1);
if
(t != -1)
return
matchInternal(str, matcher, t, min, depth + 1);
return
t;
}
int
NFAGreedyQuantifierNode::matchInternal(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
soFar,
const
int
depth)
const
{
if
(soFar >= max)
return
next->match(str, matcher, curInd, depth + 1);
int
i, j;
i = inner->match(str, matcher, curInd, depth + 1);
if
(i != -1)
{
j = matchInternal(str, matcher, i, soFar + 1, depth + 1);
if
(j != -1)
return
j;
}
return
next->match(str, matcher, curInd, depth + 1);
}
NFALazyQuantifierNode::NFALazyQuantifierNode(Pattern * pat, NFANode * internal,
const
int
minMatch,
const
int
maxMatch)
: NFAQuantifierNode(pat, internal, minMatch, maxMatch) { }
int
NFALazyQuantifierNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
int
i, j, m = NFAQuantifierNode::match(str, matcher, curInd, depth + 1);
if
(m == -1)
return
-1;
for
(i = min; i < max; ++i)
{
j = next->match(str, matcher, m, depth + 1);
if
(j == -1)
{
j = inner->match(str, matcher, m, depth + 1);
if
(j <= m)
return
-1;
m = j;
}
else
return
j;
}
return
next->match(str, matcher, m, depth + 1);
}
NFAPossessiveQuantifierNode::NFAPossessiveQuantifierNode(Pattern * pat, NFANode * internal,
const
int
minMatch,
const
int
maxMatch)
: NFAQuantifierNode(pat, internal, minMatch, maxMatch) { }
int
NFAPossessiveQuantifierNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
int
i, j, m = NFAQuantifierNode::match(str, matcher, curInd, depth + 1);
if
(m == -1)
return
-1;
for
(i = min; i < max; ++i)
{
j = inner->match(str, matcher, m, depth + 1);
if
(j <= m)
return
next->match(str, matcher, m, depth + 1);
m = j;
}
return
next->match(str, matcher, m, depth + 1);
}
NFAAcceptNode::NFAAcceptNode() { }
int
NFAAcceptNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
if
(!next)
return
curInd;
else
return
next->match(str, matcher, curInd, depth + 1);
}
NFAClassNode::NFAClassNode(
const
bool
invert)
{
inv = invert;
}
NFAClassNode::NFAClassNode(
const
std::string & clazz,
const
bool
invert)
{
inv = invert;
for
(
int
i = 0; i < (
int
)clazz.size(); ++i) vals[clazz[i]] = 1;
}
int
NFAClassNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
if
(curInd < (
int
)str.size() && ((vals.find(str[curInd]) != vals.end()) ^ inv))
{
return
next->match(str, matcher, curInd + 1, depth + 1);
}
return
-1;
}
void
NFAClassNode::print(
const
int
indent)
{
printf
(
"%*c%s(%d vals, inv=%s)\n"
, indent * 2,
' '
,
typeid
(*
this
).name(), (
int
)vals.size(), inv ?
"true"
:
"false"
);
if
(next) next->print(indent);
}
NFACIClassNode::NFACIClassNode(
const
bool
invert)
{
inv = invert;
}
NFACIClassNode::NFACIClassNode(
const
std::string & clazz,
const
bool
invert)
{
inv = invert;
for
(
int
i = 0; i < (
int
)clazz.size(); ++i) vals[
tolower
(clazz[i])] = 1;
}
int
NFACIClassNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
if
(curInd < (
int
)str.size() && ((vals.find(
tolower
(str[curInd])) != vals.end()) ^ inv))
{
return
next->match(str, matcher, curInd + 1, depth + 1);
}
return
-1;
}
void
NFACIClassNode::print(
const
int
indent)
{
if
(next) next->print(indent);
}
#undef to_lower
NFASubStartNode::NFASubStartNode() { }
int
NFASubStartNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
return
next->match(str, matcher, curInd, depth + 1);
}
NFAOrNode::NFAOrNode(NFANode * first, NFANode * second) : one(first), two(second) { }
void
NFAOrNode::findAllNodes(std::map<NFANode*,
bool
> & soFar)
{
if
(one) one->findAllNodes(soFar);
if
(two) two->findAllNodes(soFar);
NFANode::findAllNodes(soFar);
}
int
NFAOrNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
int
ci = one->match(str, matcher, curInd, depth + 1);
if
(ci != -1)
return
ci;
return
two->match(str, matcher, curInd, depth + 1);
}
void
NFAOrNode::print(
const
int
indent)
{
printf
(
"%*c%s\n"
, indent * 2,
' '
,
typeid
(*
this
).name());
printf
(
"%*cone\n"
, (indent + 1) * 2,
' '
);
one->print(indent + 2);
printf
(
"%*ctwo\n"
, (indent + 1) * 2,
' '
);
two->print(indent + 2);
}
NFAQuoteNode::NFAQuoteNode(
const
std::string & quoted) : qStr(quoted) { }
int
NFAQuoteNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
if
(curInd + qStr.size() > str.size())
return
-1;
if
(str.substr(curInd, qStr.size()) != qStr)
return
-1;
return
next->match(str, matcher, curInd + qStr.size(), depth + 1);
}
void
NFAQuoteNode::print(
const
int
indent)
{
printf
(
"%*c%s(%s)\n"
, indent * 2,
' '
,
typeid
(*
this
).name(), qStr.c_str());
if
(next) next->print(indent);
}
NFACIQuoteNode::NFACIQuoteNode(
const
std::string & quoted) : qStr(quoted) { }
int
NFACIQuoteNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
if
(curInd + qStr.size() > str.size())
return
-1;
if
(str_icmp(str.substr(curInd, qStr.size()).c_str(), qStr.c_str()))
return
-1;
return
next->match(str, matcher, qStr.size(), depth + 1);
}
void
NFACIQuoteNode::print(
const
int
indent)
{
printf
(
"%*c%s(%s)\n"
, indent * 2,
' '
,
typeid
(*
this
).name(), qStr.c_str());
if
(next) next->print(indent);
}
NFALookAheadNode::NFALookAheadNode(NFANode * internal,
const
bool
positive) : NFANode(), pos(positive), inner(internal) { }
void
NFALookAheadNode::findAllNodes(std::map<NFANode*,
bool
> & soFar)
{
if
(inner) inner->findAllNodes(soFar);
NFANode::findAllNodes(soFar);
}
int
NFALookAheadNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
return
((inner->match(str, matcher, curInd, depth + 1) == -1) ^ pos) ? next->match(str, matcher, curInd, depth + 1) : -1;
}
void
NFALookAheadNode::print(
const
int
indent)
{
printf
(
"%*c%s(pos=%s)\n"
, indent * 2,
' '
,
typeid
(*
this
).name(), pos ?
"true"
:
"false"
);
inner->print(indent + 1);
if
(next) next->print(indent);
}
NFALookBehindNode::NFALookBehindNode(
const
std::string & str,
const
bool
positive) : pos(positive), mStr(str) { }
int
NFALookBehindNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
if
(pos)
{
if
(curInd < (
int
)mStr.size())
return
-1;
if
(str.substr(curInd - mStr.size(), mStr.size()) == mStr)
return
next->match(str, matcher, curInd, depth + 1);
}
else
{
if
(curInd < (
int
)mStr.size())
return
next->match(str, matcher, curInd, depth + 1);
if
(str.substr(curInd - mStr.size(), mStr.size()) == mStr)
return
-1;
return
next->match(str, matcher, curInd, depth + 1);
}
return
-1;
}
void
NFALookBehindNode::print(
const
int
indent)
{
printf
(
"%*c%s(str=%s, pos=%s)\n"
, indent * 2,
' '
,
typeid
(*
this
).name(), mStr.c_str(), pos ?
"true"
:
"false"
);
if
(next) next->print(indent);
}
NFAStartOfLineNode::NFAStartOfLineNode() { }
int
NFAStartOfLineNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
if
(curInd == 0 || str[curInd - 1] ==
'\n'
|| str[curInd - 1] ==
'\r'
)
{
return
next->match(str, matcher, curInd, depth + 1);
}
return
-1;
}
NFAEndOfLineNode::NFAEndOfLineNode() { }
int
NFAEndOfLineNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
if
(curInd >= (
int
)str.size() || str[curInd] ==
'\n'
|| str[curInd] ==
'\r'
)
{
return
next->match(str, matcher, curInd, depth + 1);
}
return
-1;
}
NFAReferenceNode::NFAReferenceNode(
const
int
groupIndex) : gi(groupIndex) { }
int
NFAReferenceNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
int
len = matcher->ends[gi] - matcher->starts[gi];
int
ni = -1;
if
(gi < 1 || matcher->ends[gi] < matcher->starts[gi] || len == 0) ni = curInd;
else
if
(curInd + len > (
int
)str.size())
return
-1;
else
if
(str.substr(curInd, len) != str.substr(matcher->starts[gi], len))
return
-1;
else
ni = curInd + len;
return
next->match(str, matcher, ni, depth + 1);
}
void
NFAReferenceNode::print(
const
int
indent)
{
printf
(
"%*c%s(gi=%d)\n"
, indent * 2,
' '
,
typeid
(*
this
).name(), gi);
if
(next) next->print(indent);
}
NFAStartOfInputNode::NFAStartOfInputNode() { }
int
NFAStartOfInputNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
if
(curInd == 0)
return
next->match(str, matcher, curInd, depth + 1);
return
-1;
}
NFAEndOfInputNode::NFAEndOfInputNode(
const
bool
lookForTerm) : term(lookForTerm) { }
int
NFAEndOfInputNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
int
len = (
int
)str.size();
if
(curInd == len)
return
next->match(str, matcher, curInd, depth + 1);
else
if
(term)
{
if
(curInd == len - 1 && (str[curInd] ==
'\r'
|| str[curInd] ==
'\n'
))
{
return
next->match(str, matcher, curInd, depth + 1);
}
else
if
(curInd == len - 2 && str.substr(curInd, 2) ==
"\r\n"
)
{
return
next->match(str, matcher, curInd, depth + 1);
}
}
return
-1;
}
void
NFAEndOfInputNode::print(
const
int
indent)
{
printf
(
"%*c%s(term=%s)\n"
, indent * 2,
' '
,
typeid
(*
this
).name(), term ?
"true"
:
"false"
);
if
(next) next->print(indent);
}
NFAWordBoundaryNode::NFAWordBoundaryNode(
const
bool
positive) : pos(positive) { }
int
NFAWordBoundaryNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
#define is_alpha(x) (((x) >= 'a' && (x) <= 'z') || ((x) >= 'A' && (x) <= 'Z'))
int
len = (
int
)str.size();
bool
ok = 0;
char
c1 = (curInd - 1 < len) ? str[curInd - 1] : -1;
char
c2 = (curInd < len) ? str[curInd ] : -1;
if
(curInd == len)
return
next->match(str, matcher, curInd, depth + 1);
if
(is_alpha(c1) ^ is_alpha(c2)) ok = 1;
if
(ok && pos)
return
next->match(str, matcher, curInd, depth + 1);
return
-1;
#undef is_alpha
}
void
NFAWordBoundaryNode::print(
const
int
indent)
{
printf
(
"%*c%s(pos=%s)\n"
, indent * 2,
' '
,
typeid
(*
this
).name(), pos ?
"true"
:
"false"
);
if
(next) next->print(indent);
}
NFAEndOfMatchNode::NFAEndOfMatchNode() { }
int
NFAEndOfMatchNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
if
(curInd == matcher->lm)
return
next->match(str, matcher, curInd, depth + 1);
return
-1;
}
NFAGroupHeadNode::NFAGroupHeadNode(
const
int
groupIndex) : gi(groupIndex) { }
int
NFAGroupHeadNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
int
ret, o = matcher->starts[gi];
matcher->starts[gi] = curInd;
ret = next->match(str, matcher, curInd, depth + 1);
if
(ret < 0) matcher->starts[gi] = o;
return
ret;
}
void
NFAGroupHeadNode::print(
const
int
indent)
{
printf
(
"%*c%s(gi=%d)\n"
, indent * 2,
' '
,
typeid
(*
this
).name(), gi);
if
(next) next->print(indent);
}
NFAGroupTailNode::NFAGroupTailNode(
const
int
groupIndex) : gi(groupIndex) { }
int
NFAGroupTailNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
int
ret, o = matcher->ends[gi];
matcher->ends[gi] = curInd;
ret = next->match(str, matcher, curInd, depth + 1);
if
(ret < 0) matcher->ends[gi] = o;
return
ret;
}
void
NFAGroupTailNode::print(
const
int
indent)
{
printf
(
"%*c%s(gi=%d)\n"
, indent * 2,
' '
,
typeid
(*
this
).name(), gi);
if
(next) next->print(indent);
}
NFAGroupLoopPrologueNode::NFAGroupLoopPrologueNode(
const
int
groupIndex) : gi(groupIndex) { }
int
NFAGroupLoopPrologueNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
int
ret, o1 = matcher->groups[gi], o2 = matcher->groupPos[gi], o3 = matcher->groupIndeces[gi];
matcher->groups[gi] = 0;
matcher->groupPos[gi] = 0;
matcher->groupIndeces[gi] = -1;
ret = next->match(str, matcher, curInd, depth + 1);
if
(ret < 0)
{
matcher->groups[gi] = o1;
matcher->groupPos[gi] = o2;
matcher->groupIndeces[gi] = o3;
}
return
ret;
}
void
NFAGroupLoopPrologueNode::print(
const
int
indent)
{
printf
(
"%*c%s(gi=%d)\n"
, indent * 2,
' '
,
typeid
(*
this
).name(), gi);
if
(next) next->print(indent);
}
NFAGroupLoopNode::NFAGroupLoopNode(NFANode * internal,
const
int
minMatch,
const
int
maxMatch,
const
int
groupIndex,
const
int
matchType)
{
inner = internal;
min = minMatch;
max = maxMatch;
gi = groupIndex;
type = matchType;
}
void
NFAGroupLoopNode::findAllNodes(std::map<NFANode*,
bool
> & soFar)
{
if
(inner) inner->findAllNodes(soFar);
NFANode::findAllNodes(soFar);
}
int
NFAGroupLoopNode::match(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
bool
b = (curInd > matcher->groupIndeces[gi]);
if
(b && matcher->groups[gi] < min)
{
++matcher->groups[gi];
int
o = matcher->groupIndeces[gi];
matcher->groupIndeces[gi] = curInd;
int
ret = inner->match(str, matcher, curInd, depth + 1);
if
(ret < 0)
{
matcher->groupIndeces[gi] = o;
--matcher->groups[gi];
}
return
ret;
}
else
if
(!b || matcher->groups[gi] >= max)
{
return
next->match(str, matcher, curInd, depth + 1);
}
else
{
switch
(type)
{
case
0:
return
matchGreedy(str, matcher, curInd, depth + 1);
case
1:
return
matchLazy(str, matcher, curInd, depth + 1);
case
2:
return
matchPossessive(str, matcher, curInd, depth + 1);
}
}
return
-1;
}
int
NFAGroupLoopNode::matchGreedy(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
int
o = matcher->groupIndeces[gi];
matcher->groupIndeces[gi] = curInd;
++matcher->groups[gi];
int
ret = inner->match(str, matcher, curInd, depth + 1);
if
(ret < 0)
{
--matcher->groups[gi];
matcher->groupIndeces[gi] = o;
ret = next->match(str, matcher, curInd, depth + 1);
}
return
ret;
}
int
NFAGroupLoopNode::matchLazy(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
int
ret = next->match(str, matcher, curInd, depth + 1);
if
(ret < 0)
{
int
o = matcher->groupIndeces[gi];
matcher->groupIndeces[gi] = curInd;
++matcher->groups[gi];
ret = inner->match(str, matcher, curInd, depth + 1);
if
(ret < 0)
{
--matcher->groups[gi];
matcher->groupIndeces[gi] = o;
}
}
return
ret;
}
int
NFAGroupLoopNode::matchPossessive(
const
std::string & str, Matcher * matcher,
const
int
curInd,
const
int
depth)
const
{
int
o = matcher->groupIndeces[gi];
matcher->groupPos[gi] = matcher->groups[gi];
matcher->groupIndeces[gi] = curInd;
++matcher->groups[gi];
int
ret = inner->match(str, matcher, curInd, depth + 1);
if
(ret < 0)
{
--matcher->groups[gi];
matcher->groupIndeces[gi] = o;
if
(matcher->groups[gi] == matcher->groupPos[gi]) ret = next->match(str, matcher, curInd, depth + 1);
}
return
ret;
}
void
NFAGroupLoopNode::print(
const
int
indent)
{
printf
(
"%*c%s(gi=%d, min=%d, max=%d, type=%s)\n"
, indent * 2,
' '
,
typeid
(*
this
).name(), gi, min, max, type == 0 ?
"greedy"
: type == 1 ?
"reluctant"
:
"possessive"
);
if
(next) next->print(indent);
}
#ifdef _WIN32
#pragma warning(pop)
#endif