diff -r 0000000000000000000000000000000000000000 -r ad86853a2662e23621f1159c34656ee3921af5fa license.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/license.txt Wed Sep 11 22:16:55 2013 -0500 @@ -0,0 +1,53 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. + +"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of this License; and + +You must cause any modified files to carry prominent notices stating that You changed the files; and + +You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and + +If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. +You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. +5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS \ No newline at end of file diff -r 0000000000000000000000000000000000000000 -r ad86853a2662e23621f1159c34656ee3921af5fa linux/makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linux/makefile Wed Sep 11 22:16:55 2013 -0500 @@ -0,0 +1,25 @@ +all: nfw + +nfw: main.o nfw.o Matcher.o Pattern.o + g++ main.o nfw.o Matcher.o Pattern.o -o nfw + +main.o: ../src/main.cpp + g++ -c ../src/main.cpp + +nfw.o: ../src/nfw.cpp + g++ -c ../src/nfw.cpp + +Matcher.o: ../src/Matcher.cpp + g++ -c ../src/Matcher.cpp + +Pattern.o: ../src/Pattern.cpp + g++ -c ../src/Pattern.cpp + +clean: + rm -rf *.o nfw + +install: nfw + install -m 0755 nfw /usr/local/bin + +uninstall: nfw + rm /usr/local/bin/nfw diff -r 0000000000000000000000000000000000000000 -r ad86853a2662e23621f1159c34656ee3921af5fa src/Matcher.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Matcher.cpp Wed Sep 11 22:16:55 2013 -0500 @@ -0,0 +1,178 @@ +#include "regexp/Matcher.h" +#include "regexp/Pattern.h" + +const int Matcher::MATCH_ENTIRE_STRING = 0x01; + +/* + Detailed documentation is provided in this class' header file + + @author Jeffery Stuart + @since November 2004 + @version 1.07.00 +*/ + +Matcher::Matcher(Pattern * pattern, const std::string & text) +{ + pat = pattern; + str = text; + gc = pattern->groupCount; + ncgc = -pattern->nonCapGroupCount; + flags = 0; + matchedSomething = false; + starts = new int[gc + ncgc]; + ends = new int[gc + ncgc]; + groups = new int[gc + ncgc]; + groupPos = new int[gc + ncgc]; + groupIndeces = new int[gc + ncgc]; + starts = starts + ncgc; + ends = ends + ncgc; + groups = groups + ncgc; + groupPos = groupPos + ncgc; + groupIndeces = groupIndeces + ncgc; + for (int i = 0; i < gc; ++i) starts[i] = ends[i] = 0; +} +Matcher::~Matcher() +{ + delete [] (starts - ncgc); + delete [] (ends - ncgc); + delete [] (groups - ncgc); + delete [] (groupIndeces - ncgc); + delete [] (groupPos - ncgc); +} +void Matcher::clearGroups() +{ + int i; + lm = 0; + for (i = 0; i < gc; ++i) groups[i] = starts[i] = ends[i] = -1; + for (i = 1; i <= ncgc; ++i) groups[0 - i] = -1; +} +std::string Matcher::replaceWithGroups(const std::string & str) +{ + std::string ret = ""; + + std::string t = str; + while (t.size() > 0) + { + if (t[0] == '\\') + { + t = t.substr(1); + if (t.size() == 0) + { + ret += "\\"; + } + else if (t[0] < '0' || t[0] > '9') + { + ret += t.substr(0, 1); + t = t.substr(1); + } + else + { + int gn = 0; + while (t.size() > 0 && t[0] >= '0' && t[0] <= '9') + { + gn = gn * 10 + (t[0] - '0'); + t = t.substr(1); + } + ret += getGroup(gn); + } + } + else + { + ret += t.substr(0, 1); + t = t.substr(1); + } + } + + return ret; +} +unsigned long Matcher::getFlags() const +{ + return flags; +} +std::string Matcher::getText() const +{ + return str; +} + +bool Matcher::matches() +{ + flags = MATCH_ENTIRE_STRING; + matchedSomething = false; + clearGroups(); + lm = 0; + return pat->head->match(str, this, 0) == (int)str.size(); +} +bool Matcher::findFirstMatch() +{ + starts[0] = 0; + flags = 0; + clearGroups(); + start = 0; + lm = 0; + ends[0] = pat->head->match(str, this, 0); + if (ends[0] >= 0) + { + matchedSomething = true; + return 1; + } + return 0; +} +bool Matcher::findNextMatch() +{ + int s = starts[0], e = ends[0]; + + if (!matchedSomething) return findFirstMatch(); + if (s == e) ++e; + flags = 0; + clearGroups(); + + starts[0] = e; + if (e >= (int)str.size()) return 0; + start = e; + lm = e; + ends[0] = pat->head->match(str, this, e); + return ends[0] >= 0; +} +std::vector Matcher::findAll() +{ + std::vector ret; + reset(); + while (findNextMatch()) + { + ret.push_back(getGroup()); + } + return ret; +} + +void Matcher::reset() +{ + lm = 0; + clearGroups(); + matchedSomething = false; +} + +int Matcher::getStartingIndex(const int groupNum) const +{ + if (groupNum < 0 || groupNum >= gc) return -1; + return starts[groupNum]; +} +int Matcher::getEndingIndex(const int groupNum) const +{ + if (groupNum < 0 || groupNum >= gc) return -1; + return ends[groupNum]; +} +std::string Matcher::getGroup(const int groupNum) const +{ + if (groupNum < 0 || groupNum >= gc) return ""; + if (starts[groupNum] < 0 || ends[groupNum] < 0) return ""; + return str.substr(starts[groupNum], ends[groupNum] - starts[groupNum]); +} +std::vector Matcher::getGroups(const bool includeGroupZero) const +{ + int i, start = (includeGroupZero ? 0 : 1); + std::vector ret; + + for (i = start; i < gc; ++i) ret.push_back(getGroup(i)); + return ret; +} + diff -r 0000000000000000000000000000000000000000 -r ad86853a2662e23621f1159c34656ee3921af5fa src/Pattern.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Pattern.cpp Wed Sep 11 22:16:55 2013 -0500 @@ -0,0 +1,2020 @@ +/** + From the author (Jeff Stuart) + " + Let me start by saying this file is pretty big. If you feel up to it, you can + try making changes yourself, but you would be better off to just email me at + stuart@cs.ucdavis.edu if you think there is a bug, or have something useful you + would like added. This project is very "near and dear" to me, so I am fairly + quick to make bug fixes. The header files for Pattern and Matcher are fairly + well documented and the function names are pretty self-explanatory, but if you + are having any trouble, feel free to email me at stuart@cs.ucdavis.edu. + + If you email me, make sure you put something like C++RE in the subject because + I tend to delete email if I don't recognize the name and the subject is + something like "I Need Your Help" or "Got A Second" or "I Found It". + " + */ + +/* + Detailed documentation is provided in this class' header file + + @author Jeffery Stuart + @since November 24th, 2007 + @version 1.09.00 +*/ + +#ifdef _WIN32 + #pragma warning(push) + #pragma warning(disable:4996) + #define str_icmp stricmp +#else + #define str_icmp strcasecmp +#endif + +#include "regexp/Pattern.h" +#include "regexp/Matcher.h" +#include +#include +#include +#include + +std::map Pattern::compiledPatterns; +std::map > Pattern::registeredPatterns; + +const int Pattern::MIN_QMATCH = 0x00000000; +const int Pattern::MAX_QMATCH = 0x7FFFFFFF; + +const unsigned long Pattern::CASE_INSENSITIVE = 0x01; +const unsigned long Pattern::LITERAL = 0x02; +const unsigned long Pattern::DOT_MATCHES_ALL = 0x04; +const unsigned long Pattern::MULTILINE_MATCHING = 0x08; +const unsigned long Pattern::UNIX_LINE_MODE = 0x10; + +Pattern::Pattern(const std::string & rhs) +{ + matcher = NULL; + pattern = rhs; + curInd = 0; + groupCount = 0; + nonCapGroupCount = 0; + error = 0; + head = NULL; +} +// convenience function in case we want to add any extra debugging output +void Pattern::raiseError() +{ + switch (pattern[curInd - 1]) + { + case '*': + case ')': + case '+': + case '?': + case ']': + case '}': + fprintf(stderr, "%s\n%*c^\n", pattern.c_str(), curInd - 1, ' '); + fprintf(stderr, "Syntax Error near here. Possible unescaped meta character.\n"); + break; + default: + fprintf(stderr, "%s\n%*c^\n", pattern.c_str(), curInd - 1, ' '); + fprintf(stderr, "Syntax Error near here. \n"); + break; + } + error = 1; +} +NFANode * Pattern::registerNode(NFANode * node) +{ + nodes[node] = 1; + return node; +} + +std::string Pattern::classUnion (std::string s1, std::string s2) const +{ + char out[300]; + std::sort(s1.begin(), s1.end()); + std::sort(s2.begin(), s2.end()); + *std::set_union(s1.begin(), s1.end(), s2.begin(), s2.end(), out) = 0; + return out; +} +std::string Pattern::classIntersect (std::string s1, std::string s2) const +{ + char out[300]; + std::sort(s1.begin(), s1.end()); + std::sort(s2.begin(), s2.end()); + *std::set_intersection(s1.begin(), s1.end(), s2.begin(), s2.end(), out) = 0; + return out; +} +std::string Pattern::classNegate (std::string s1) const +{ + char out[300]; + int i, ind = 0; + std::map m; + + for (i = 0; i < (int)s1.size(); ++i) + { + m[s1[i]] = 1; + if (flags & CASE_INSENSITIVE) + { + if (s1[i] >= 'a' && s1[i] <= 'z') m[s1[i] - 'a' + 'A'] = 1; + else if (s1[i] >= 'A' && s1[i] <= 'Z') m[s1[i] - 'A' + 'a'] = 1; + } + } + for (i = 0xFF; i >= 0; --i) if (m.find((char)i) == m.end()) out[ind++] = (char)i; + out[ind] = 0; + return std::string(out, ind); +} +std::string Pattern::classCreateRange(char low, char hi) const +{ + char out[300]; + int ind = 0; + while (low != hi) out[ind++] = low++; + out[ind++] = low; + return std::string(out, ind); +} + +int Pattern::getInt(int start, int end) +{ + int ret = 0; + for (; start <= end; ++start) ret = ret * 10 + (pattern[start] - '0'); + return ret; +} +bool Pattern::quantifyCurly(int & sNum, int & eNum) +{ + bool good = 1; + int i, ci = curInd + 1; + int commaInd = ci, endInd = ci, len = pattern.size(); + sNum = eNum = 0; + + while (endInd < len && pattern[endInd ] != '}') ++endInd; + while (commaInd < endInd && pattern[commaInd] != ',') ++commaInd; + if (endInd >= len) { raiseError(); return 0; } + for (i = ci; good && i < endInd; ++i) if (i != commaInd && !isdigit(pattern[i])) good = 0; + if (!good && commaInd < endInd) { raiseError(); return 0; } + if (!good) return 0; + /* so now everything in here is either a comma (and there is at most one comma) or a digit */ + if (commaInd == ci) // {,*} + { + if (endInd == commaInd + 1) { sNum = MIN_QMATCH; eNum = MAX_QMATCH; } // {,} = * + else { sNum = MIN_QMATCH; eNum = getInt(commaInd + 1, endInd - 1); } // {,+} + } + else if (commaInd == endInd - 1) { sNum = getInt(ci, commaInd - 1); eNum = MAX_QMATCH; } // {+,} + else if (commaInd == endInd) { sNum = getInt(ci, endInd - 1); eNum = sNum; } // {+} + else { sNum = getInt(ci, commaInd - 1); eNum = getInt(commaInd + 1, endInd - 1); } // {+,+} + curInd = endInd + 1; + return 1; +} +NFANode * Pattern::quantifyGroup(NFANode * start, NFANode * stop, const int gn) +{ + NFANode * newNode = NULL; + int type = 0; + + if (curInd < (int)pattern.size()) + { + char ch = (curInd + 1 >= (int)pattern.size()) ? -1 : pattern[curInd + 1]; + switch (pattern[curInd]) + { + case '*': + ++curInd; + switch (ch) + { + case '?': ++curInd; type = 1; break; + case '+': ++curInd; type = 2; break; + } + newNode = registerNode(new NFAGroupLoopPrologueNode(gn)); + newNode->next = registerNode(new NFAGroupLoopNode(start, MIN_QMATCH, MAX_QMATCH, gn, type)); + stop->next = newNode->next; + return newNode; + case '?': + ++curInd; + switch (ch) + { + case '?': ++curInd; type = 1; break; + case '+': ++curInd; type = 2; break; + } + newNode = registerNode(new NFAGroupLoopPrologueNode(gn)); + newNode->next = registerNode(new NFAGroupLoopNode(start, MIN_QMATCH, 1, gn, type)); + stop->next = newNode->next; + return newNode; + case '+': + ++curInd; + switch (ch) + { + case '?': ++curInd; type = 1; break; + case '+': ++curInd; type = 2; break; + } + newNode = registerNode(new NFAGroupLoopPrologueNode(gn)); + newNode->next = registerNode(new NFAGroupLoopNode(start, 1, MAX_QMATCH, gn, type)); + stop->next = newNode->next; + return newNode; + case '{': + { + int s, e; + if (quantifyCurly(s, e)) + { + ch = (curInd < (int)pattern.size()) ? pattern[curInd] : -1; + switch (ch) + { + case '?': ++curInd; type = 1; break; + case '+': ++curInd; type = 2; break; + } + newNode = registerNode(new NFAGroupLoopPrologueNode(gn)); + newNode->next = registerNode(new NFAGroupLoopNode(start, s, e, gn, type)); + stop->next = newNode->next; + return newNode; + } + } + default: + break; + } + } + return NULL; +} + +NFANode * Pattern::quantify(NFANode * newNode) +{ + if (curInd < (int)pattern.size()) + { + char ch = (curInd + 1 >= (int)pattern.size()) ? -1 : pattern[curInd + 1]; + switch (pattern[curInd]) + { + case '*': + ++curInd; + switch (ch) + { + case '?': ++curInd; newNode = registerNode(new NFALazyQuantifierNode (this, newNode, MIN_QMATCH, MAX_QMATCH)); break; + case '+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierNode(this, newNode, MIN_QMATCH, MAX_QMATCH)); break; + default: newNode = registerNode(new NFAGreedyQuantifierNode (this, newNode, MIN_QMATCH, MAX_QMATCH)); break; + } + break; + case '?': + ++curInd; + switch (ch) + { + case '?': ++curInd; newNode = registerNode(new NFALazyQuantifierNode (this, newNode, MIN_QMATCH, 1)); break; + case '+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierNode(this, newNode, MIN_QMATCH, 1)); break; + default: newNode = registerNode(new NFAGreedyQuantifierNode (this, newNode, MIN_QMATCH, 1)); break; + } + break; + case '+': + ++curInd; + switch (ch) + { + case '?': ++curInd; newNode = registerNode(new NFALazyQuantifierNode (this, newNode, 1, MAX_QMATCH)); break; + case '+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierNode(this, newNode, 1, MAX_QMATCH)); break; + default: newNode = registerNode(new NFAGreedyQuantifierNode (this, newNode, 1, MAX_QMATCH)); break; + } + break; + case '{': + { + int s, e; + if (quantifyCurly(s, e)) + { + ch = (curInd < (int)pattern.size()) ? pattern[curInd] : -1; + switch (ch) + { + case '?': ++curInd; newNode = registerNode(new NFALazyQuantifierNode (this, newNode, s, e)); break; + case '+': ++curInd; newNode = registerNode(new NFAPossessiveQuantifierNode(this, newNode, s, e)); break; + default: newNode = registerNode(new NFAGreedyQuantifierNode (this, newNode, s, e)); break; + } + } + } + break; + default: + break; + } + } + return newNode; +} +std::string Pattern::parseClass() +{ + std::string t, ret = ""; + char ch, c1, c2; + bool inv = 0, neg = 0, quo = 0; + + if (curInd < (int)pattern.size() && pattern[curInd] == '^') + { + ++curInd; + neg = 1; + } + while (curInd < (int)pattern.size() && pattern[curInd] != ']') + { + ch = pattern[curInd++]; + if (ch == '[') + { + t = parseClass(); + ret = classUnion(ret, t); + } + /*else if (ch == '-') + { + raiseError(); + curInd = pattern.size(); + }*/ + else if (ch == '&' && curInd < (int)pattern.size() && pattern[curInd] == '&') + { + if (pattern[++curInd] != '[') + { + raiseError(); + curInd = pattern.size(); + } + else + { + ++curInd; + t = parseClass(); + ret = classIntersect(ret, t); + } + } + else if (ch == '\\') + { + t = parseEscape(inv, quo); + if (quo) + { + raiseError(); + curInd = pattern.size(); + } + else if (inv || t.size() > 1) // cant be part of a range (a-z) + { + if (inv) t = classNegate(t); + ret = classUnion(ret, t); + } + else if (curInd < (int)pattern.size() && pattern[curInd] == '-') // part of a range (a-z) + { + c1 = t[0]; + ++curInd; + if (curInd >= (int)pattern.size()) raiseError(); + else + { + c2 = pattern[curInd++]; + if (c2 == '\\') + { + t = parseEscape(inv, quo); + if (quo) + { + raiseError(); + curInd = pattern.size(); + } + else if (inv || t.size() > 1) raiseError(); + else ret = classUnion(ret, classCreateRange(c1, c2)); + } + else if (c2 == '[' || c2 == ']' || c2 == '-' || c2 == '&') + { + raiseError(); + curInd = pattern.size(); + } + else ret = classUnion(ret, classCreateRange(c1, c2)); + } + } + else + { + ret = classUnion(ret, t); + } + } + else if (curInd < (int)pattern.size() && pattern[curInd] == '-') + { + c1 = ch; + ++curInd; + if (curInd >= (int)pattern.size()) raiseError(); + else + { + c2 = pattern[curInd++]; + if (c2 == '\\') + { + t = parseEscape(inv, quo); + if (quo) + { + raiseError(); + curInd = pattern.size(); + } + else if (inv || t.size() > 1) raiseError(); + else ret = classUnion(ret, classCreateRange(c1, c2)); + } + else if (c2 == '[' || c2 == ']' || c2 == '-' || c2 == '&') + { + raiseError(); + curInd = pattern.size(); + } + else + { + ret = classUnion(ret, classCreateRange(c1, c2)); + } + } + } + else + { + ret += " "; + ret[ret.size() - 1] = ch; + } + } + if (curInd >= (int)pattern.size() || pattern[curInd] != ']') + { + raiseError(); + ret = ""; + } + else + { + ++curInd; + if (neg) ret = classNegate(ret); + } + return ret; +} +std::string Pattern::parsePosix() +{ + std::string s7 = pattern.substr(curInd, 7); + if (s7 == "{Lower}") { curInd += 7; return "abcdefghijklmnopqrstuvwxyz"; } + if (s7 == "{Upper}") { curInd += 7; return "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; } + if (s7 == "{Alpha}") { curInd += 7; return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; } + if (s7 == "{Digit}") { curInd += 7; return "0123456789"; } + if (s7 == "{Alnum}") { curInd += 7; return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; } + if (s7 == "{Punct}") { curInd += 7; return "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; } + if (s7 == "{Graph}") { curInd += 7; return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; } + if (s7 == "{Print}") { curInd += 7; return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; } + if (s7 == "{Blank}") { curInd += 7; return " \t"; } + if (s7 == "{Space}") { curInd += 7; return " \t\n\x0B\f\r"; } + if (s7 == "{Cntrl}") + { + int i; + std::string s = " "; + + for (i = 0; i < 5; ++i) s += s; + s += " "; + for (i = 0; i <= 0x1F; ++i) s[i] = i; + s[0x20] = 0x7F; + curInd += 7; + return s; + } + if (s7 == "{ASCII}") + { + std::string s(0x80, ' '); + for (int i = 0; i < 0x80; ++i) s[i] = i; + curInd += 7; + return s; + } + if (pattern.substr(curInd, 8) == "{XDigit}") { curInd += 8; return "abcdefABCDEF0123456789"; } + raiseError(); + return ""; +} +NFANode * Pattern::parseBackref() +{ + #define is_dig(x) ((x) >= '0' && (x) <= '9') + #define to_int(x) ((x) - '0') + int ci = curInd; + int oldRef = 0, ref = 0; + + while (ci < (int)pattern.size() && is_dig(pattern[ci]) && (ref < 10 || ref < groupCount)) + { + oldRef = ref; + ref = ref * 10 + to_int(pattern[ci++]); + } + if (ci == (int)pattern.size()) + { + oldRef = ref; + ++ci; + } + if (oldRef < 0 || ci <= curInd) + { + raiseError(); + return registerNode(new NFAReferenceNode(-1)); + } + curInd = ci; + return registerNode(new NFAReferenceNode(ref)); + + #undef is_dig + #undef to_int +} +std::string Pattern::parseOctal() +{ + #define islowoc(x) ((x) >= '0' && (x) <= '3') + #define isoc(x) ((x) >= '0' && (x) <= '7') + #define fromoc(x) ((x) - '0') + int ci = curInd; + char ch1 = (ci + 0 < (int)pattern.size()) ? pattern[ci + 0] : -1; + char ch2 = (ci + 1 < (int)pattern.size()) ? pattern[ci + 1] : -1; + char ch3 = (ci + 2 < (int)pattern.size()) ? pattern[ci + 2] : -1; + std::string s = " "; + + if (islowoc(ch1) && isoc(ch2)) + { + curInd += 2; + s[0] = fromoc(ch1) * 8 + fromoc(ch2); + if (isoc(ch3)) + { + ++curInd; + s[0] = s[0] * 8 + fromoc(ch3); + } + } + else if (isoc(ch1) && isoc(ch2)) + { + curInd += 2; + s[0] = fromoc(ch1) * 8 + fromoc(ch2); + } + else raiseError(); + + return s; + #undef islowoc + #undef isoc + #undef fromoc +} +std::string Pattern::parseHex() +{ + #define to_low(x) (((x) >= 'A' && (x) <= 'Z') ? ((x) - 'A' + 'a') : (x)) + #define is_dig(x) ((x) >= '0' && (x) <= '9') + #define is_hex(x) (is_dig(x) || (to_low(x) >= 'a' && to_low(x) <= 'f')) + #define to_int(x) ((is_dig(x)) ? ((x) - '0') : (to_low(x) - 'a' + 10)) + + int ci = curInd; + char ch1 = (ci + 0 < (int)pattern.size()) ? pattern[ci + 0] : -1; + char ch2 = (ci + 1 < (int)pattern.size()) ? pattern[ci + 1] : -1; + std::string s = " "; + + if (is_hex(ch1) && is_hex(ch2)) + { + curInd += 2; + s[0] = (to_int(ch1) << 4 & 0xF0) | (to_int(ch2) & 0x0F); + } + + return s; + #undef to_low + #undef is_dig + #undef is_hex + #undef to_int +} +std::string Pattern::parseEscape(bool & inv, bool & quo) +{ + char ch = pattern[curInd++]; + std::string classes = ""; + + if (curInd > (int)pattern.size()) + { + raiseError(); + return NULL; + } + + quo = 0; + inv = 0; + switch (ch) + { + case 'p': classes = parsePosix(); break; + case 'P': classes = "!!"; classes += parsePosix(); break; + case 'd': classes = "0123456789"; break; + case 'D': classes = "!!0123456789"; break; + case 's': classes = " \t\r\n\f"; break; + case 'S': classes = "!! \t\r\n\f"; break; + case 'w': classes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; break; + case 'W': classes = "!!abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; break; + case '0': classes = parseOctal(); break; + case 'x': classes = parseHex(); break; + + case 'Q': quo = 1; break; + case 't': classes = "\t"; break; + case 'r': classes = "\r"; break; + case 'n': classes = "\n"; break; + case 'f': classes = "\f"; break; + case 'a': classes = "\a"; break; + case 'e': classes = "\r"; break; + default: classes = " "; classes[0] = ch; break; + } + if (classes.substr(0, 2) == "!!") + { + classes = classes.substr(2); + inv = 1; + } + return classes; +} +NFANode * Pattern::parseRegisteredPattern(NFANode ** end) +{ + int i, j; + std::string s; + NFANode * ret = NULL; + for (i = curInd; i < (int)pattern.size() && pattern[i] != '}'; ++i) { } + if (pattern[i] != '}') { raiseError(); return NULL; } + if (i == curInd + 1) { raiseError(); return NULL; } // {} + if ( + !( + (pattern[curInd] >= 'a' && pattern[curInd] <= 'z') || + (pattern[curInd] >= 'A' && pattern[curInd] <= 'Z') || + (pattern[curInd] == '_') + ) + ) + { + raiseError(); + return NULL; + } + for (j = curInd; !error && j < i; ++j) + { + if ( + !( + (pattern[j] >= 'a' && pattern[j] <= 'z') || + (pattern[j] >= 'A' && pattern[j] <= 'Z') || + (pattern[j] >= '0' && pattern[j] <= '9') || + (pattern[j] == '_') + ) + ) + { + raiseError(); + return NULL; + } + } + s = pattern.substr(curInd, i - curInd); + if (registeredPatterns.find(s) == registeredPatterns.end()) raiseError(); + else + { + unsigned long oflags = flags; + std::string op = pattern; + int ci = i + 1; + + pattern = registeredPatterns[s].first; + curInd = 0; + flags = registeredPatterns[s].second; + + --groupCount; + ret = parse(0, 0, end); + + pattern = op; + curInd = ci; + flags = oflags; + } + if (error) { *end = ret = NULL; } + return ret; +} + +// look behind should interpret everything as a literal (except \\) since the +// pattern must have a concrete length +NFANode * Pattern::parseBehind(const bool pos, NFANode ** end) +{ + std::string t = ""; + while (curInd < (int)pattern.size() && pattern[curInd] != ')') + { + char ch = pattern[curInd++]; + t += " "; + if (ch == '\\') + { + if (curInd + 1 >= (int)pattern.size()) + { + raiseError(); + return *end = registerNode(new NFACharNode(' ')); + } + ch = pattern[curInd++]; + } + t[t.size() - 1] = ch; + } + if (curInd >= (int)pattern.size() || pattern[curInd] != ')') raiseError(); + else ++curInd; + return *end = registerNode(new NFALookBehindNode(t, pos)); +} +NFANode * Pattern::parseQuote() +{ + bool done = 0; + std::string s = ""; + + while (!done) + { + if (curInd >= (int)pattern.size()) + { + raiseError(); + done = 1; + } + else if (pattern.substr(curInd, 2) == "\\E") + { + curInd += 2; + done = 1; + } + else if (pattern[curInd] == '\\') + { + s += " "; + s[s.size() - 1] = pattern[++curInd]; + ++curInd; + } + else + { + s += " "; + s[s.size() - 1] = pattern[curInd++]; + } + } + if ((flags & Pattern::CASE_INSENSITIVE) != 0) return registerNode(new NFACIQuoteNode(s)); + return registerNode(new NFAQuoteNode(s)); +} +NFANode * Pattern::parse(const bool inParen, const bool inOr, NFANode ** end, const int orGroup, const bool inOrCap) +{ + NFANode * start, * cur, * next = NULL; + std::string t; + int grc = groupCount++; + bool inv, quo; + bool ahead = false, pos = false, noncap = false, indep = false; + unsigned long oldFlags = flags; + + if (inParen) + { + if (inOr) + { + --groupCount; + noncap = !inOrCap; + grc = orGroup; + if (noncap) cur = start = registerNode(new NFAGroupHeadNode(grc)); + else cur = start = registerNode(new NFASubStartNode); + } + else if (pattern[curInd] == '?') + { + ++curInd; + --groupCount; + if (pattern[curInd] == ':') { noncap = 1; ++curInd; grc = --nonCapGroupCount; } + else if (pattern[curInd] == '=') { ++curInd; ahead = 1; pos = 1; } + else if (pattern[curInd] == '!') { ++curInd; ahead = 1; pos = 0; } + else if (pattern.substr(curInd, 2) == "<=") { curInd += 2; return parseBehind(1, end); } + else if (pattern.substr(curInd, 2) == "') { ++curInd; indep = 1; } + else + { + bool negate = false, done = false; + while (!done) + { + if (curInd >= (int)pattern.size()) + { + raiseError(); + return NULL; + } + else if (negate) + { + switch (pattern[curInd]) + { + case 'i': flags &= ~Pattern::CASE_INSENSITIVE; break; + case 'd': flags &= ~Pattern::UNIX_LINE_MODE; break; + case 'm': flags &= ~Pattern::MULTILINE_MATCHING; break; + case 's': flags &= ~Pattern::DOT_MATCHES_ALL; break; + case ':': done = true; break; + case ')': + ++curInd; + *end = registerNode(new NFALookBehindNode("", true)); + return *end; + case '-': + default: raiseError(); return NULL; + } + } + else + { + switch (pattern[curInd]) + { + case 'i': flags |= Pattern::CASE_INSENSITIVE; break; + case 'd': flags |= Pattern::UNIX_LINE_MODE; break; + case 'm': flags |= Pattern::MULTILINE_MATCHING; break; + case 's': flags |= Pattern::DOT_MATCHES_ALL; break; + case ':': done = true; break; + case '-': negate = true; break; + case ')': + ++curInd; + *end = registerNode(new NFALookBehindNode("", true)); + return *end; + default: raiseError(); return NULL; + } + } + ++curInd; + } + noncap = 1; + grc = --nonCapGroupCount; + } + if (noncap) cur = start = registerNode(new NFAGroupHeadNode(grc)); + else cur = start = registerNode(new NFASubStartNode); + } + else cur = start = registerNode(new NFAGroupHeadNode(grc)); + } + else cur = start = registerNode(new NFASubStartNode); + while (curInd < (int)pattern.size()) + { + char ch = pattern[curInd++]; + + next = NULL; + if (error) return NULL; + switch (ch) + { + case '^': + if ((flags & Pattern::MULTILINE_MATCHING) != 0) next = registerNode(new NFAStartOfLineNode); + else next = registerNode(new NFAStartOfInputNode); + break; + case '$': + if ((flags & Pattern::MULTILINE_MATCHING) != 0) next = registerNode(new NFAEndOfLineNode); + else next = registerNode(new NFAEndOfInputNode(0)); + break; + case '|': + cur->next = registerNode(new NFAAcceptNode); + cur = start = registerNode(new NFAOrNode(start, parse(inParen, true, NULL, grc, !noncap))); + break; + case '\\': + if (curInd < (int)pattern.size()) + { + bool eoi = 0; + switch (pattern[curInd]) + { + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': next = parseBackref(); break; + case 'A': ++curInd; next = registerNode(new NFAStartOfInputNode); break; + case 'B': ++curInd; next = registerNode(new NFAWordBoundaryNode(0)); break; + case 'b': ++curInd; next = registerNode(new NFAWordBoundaryNode(1)); break; + case 'G': ++curInd; next = registerNode(new NFAEndOfMatchNode); break; + case 'Z': eoi = 1; + case 'z': ++curInd; next = registerNode(new NFAEndOfInputNode(eoi)); break; + default: + t = parseEscape(inv, quo); + if (!quo) + { + if (t.size() > 1 || inv) + { + if ((flags & Pattern::CASE_INSENSITIVE) != 0) next = registerNode(new NFACIClassNode(t, inv)); + else next = registerNode(new NFAClassNode(t, inv)); + } + else + { + next = registerNode(new NFACharNode(t[0])); + } + } + else + { + next = parseQuote(); + } + } + } + else raiseError(); + break; + case '[': + if ((flags & Pattern::CASE_INSENSITIVE) == 0) + { + NFAClassNode * clazz = new NFAClassNode(); + std::string s = parseClass(); + for (int i = 0; i < (int)s.size(); ++i) clazz->vals[s[i]] = 1; + next = registerNode(clazz); + } + else + { + NFACIClassNode * clazz = new NFACIClassNode(); + std::string s = parseClass(); + for (int i = 0; i < (int)s.size(); ++i) clazz->vals[tolower(s[i])] = 1; + next = registerNode(clazz); + } + break; + case '.': + { + bool useN = 1, useR = 1; + NFAClassNode * clazz = new NFAClassNode(1); + if ((flags & Pattern::UNIX_LINE_MODE) != 0) useR = 0; + if ((flags & Pattern::DOT_MATCHES_ALL) != 0) useN = useR = 0; + if (useN) clazz->vals['\n'] = 1; + if (useR) clazz->vals['\r'] = 1; + next = registerNode(clazz); + } + break; + case '(': + { + NFANode * end, * t1, * t2; + t1 = parse(true, false, &end); + if (!t1) raiseError(); + else if (t1->isGroupHeadNode() && (t2 = quantifyGroup(t1, end, grc)) != NULL) + { + cur->next = t2; + cur = t2->next; + } + else + { + cur->next = t1; + cur = end; + } + } + break; + case ')': + if (!inParen) raiseError(); + else if (inOr) + { + --curInd; + cur = cur->next = registerNode(new NFAAcceptNode); + flags = oldFlags; + return start; + } + else + { + if (ahead) + { + cur = cur->next = registerNode(new NFAAcceptNode); + flags = oldFlags; + return *end = registerNode(new NFALookAheadNode(start, pos)); + } + else if (indep) + { + cur = cur->next = registerNode(new NFAAcceptNode); + flags = oldFlags; + return *end = registerNode(new NFAPossessiveQuantifierNode(this, start, 1, 1)); + } + else // capping or noncapping, it doesnt matter + { + *end = cur = cur->next = registerNode(new NFAGroupTailNode(grc)); + next = quantifyGroup(start, *end, grc); + if (next) + { + start = next; + *end = next->next; + } + flags = oldFlags; + return start; + } + } + break; + case '{': // registered pattern + cur->next = parseRegisteredPattern(&next); + if (cur->next) cur = next; + break; + case '*': + case '+': + case '?': + case '}': + case ']': + raiseError(); + break; + default: + if ((flags & Pattern::CASE_INSENSITIVE) != 0) next = registerNode(new NFACICharNode(ch)); + else next = registerNode(new NFACharNode(ch)); + break; + } + NFAOrNode * orNode = dynamic_cast(cur); + if (orNode) // an orNode will never have a "next" set. thus we don't have to worry about problems with + { // quantifying an or node. + NFANode * one = orNode->one, * two = orNode->two; + while (one->next) one = one->next; + while (two->next) two = two->next; + one->next = two->next = cur = registerNode(new NFAAcceptNode); + } + if (next) + { + cur = cur->next = quantify(next); + } + } + if (inParen) raiseError(); + else + { + if (inOr) cur = cur->next = registerNode(new NFAAcceptNode); + if (end) *end = cur; + } + + flags = oldFlags; + if (error) return NULL; + + return start; +} + +Pattern * Pattern::compile(const std::string & pattern, const unsigned long mode) +{ + Pattern * p = new Pattern(pattern); + NFANode * end; + + p->flags = mode; + if ((mode & Pattern::LITERAL) != 0) + { + p->head = p->registerNode(new NFAStartNode); + if ((mode & Pattern::CASE_INSENSITIVE) != 0) p->head->next = p->registerNode(new NFACIQuoteNode(pattern)); + else p->head->next = p->registerNode(new NFAQuoteNode(pattern)); + p->head->next->next = p->registerNode(new NFAEndNode); + } + else + { + p->head = p->parse(0, 0, &end); + if (!p->head) + { + delete p; + p = NULL; + } + else + { + if (!(p->head && p->head->isStartOfInputNode())) + { + NFANode * n = p->registerNode(new NFAStartNode); + n->next = p->head; + p->head = n; + } + end->next = p->registerNode(new NFAEndNode); + } + } + if (p != NULL) + { + p->matcher = new Matcher(p, ""); + } + + return p; +} + +Pattern * Pattern::compileAndKeep(const std::string & pattern, const unsigned long mode) +{ + Pattern * ret = NULL; + std::map::iterator it = compiledPatterns.find(pattern); + + if (it != compiledPatterns.end()) + { + ret = it->second; + } + else + { + ret = compile(pattern, mode); + compiledPatterns[pattern] = ret; + } + + return ret; +} +std::string Pattern::replace(const std::string & pattern, const std::string & str, + const std::string & replacementText, const unsigned long mode) +{ + std::string ret; + Pattern * p = Pattern::compile(pattern, mode); + if (p) + { + ret = p->replace(str, replacementText); + delete p; + } + return ret; +} + +std::vector Pattern::split(const std::string & pattern, const std::string & str, const bool keepEmptys, + const unsigned long limit, const unsigned long mode) +{ + std::vector ret; + Pattern * p = Pattern::compile(pattern, mode); + if (p) + { + ret = p->split(str, keepEmptys, limit); + delete p; + } + return ret; +} + +std::vector Pattern::findAll(const std::string & pattern, const std::string & str, const unsigned long mode) +{ + std::vector ret; + Pattern * p = Pattern::compile(pattern, mode); + if (p) + { + ret = p->findAll(str); + delete p; + } + return ret; +} + +bool Pattern::matches(const std::string & pattern, const std::string & str, const unsigned long mode) +{ + bool ret = 0; + Pattern * p = compile(pattern, mode); + + if (p) + { + ret = p->matches(str); + delete p; + } + + return ret; +} + +bool Pattern::registerPattern(const std::string & name, const std::string & pattern, const unsigned long mode) +{ + Pattern * p = Pattern::compile(pattern, mode); + if (!p) return 0; + Pattern::registeredPatterns[name] = std::make_pair(pattern, mode); + delete p; + return 1; +} + +void Pattern::unregisterPatterns() +{ + registeredPatterns.clear(); +} +void Pattern::clearPatternCache() +{ + std::map::iterator it; + for (it = compiledPatterns.begin(); it != compiledPatterns.end(); ++it) + { + delete it->second; + } + compiledPatterns.clear(); +} + +std::pair Pattern::findNthMatch(const std::string & pattern, const std::string & str, + const int matchNum, const unsigned long mode) +{ + std::pair ret; + Pattern * p = Pattern::compile(pattern, mode); + + ret.second = -1; + if (p) + { + int i = -1; + p->matcher->setString(str); + while (i < matchNum && p->matcher->findNextMatch()) { ++i; } + if (i == matchNum && p->matcher->getStartingIndex() >= 0) + { + ret.first = p->matcher->getGroup(0); + ret.second = p->matcher->getStartingIndex(); + } + delete p; + } + + return ret; +} + +Pattern::~Pattern() +{ + if (matcher) delete matcher; + for (std::map::iterator it = nodes.begin(); it != nodes.end(); ++it) + { + delete it->first; + } +} +std::string Pattern::replace(const std::string & str, const std::string & replacementText) +{ + int li = 0; + std::string ret = ""; + + matcher->setString(str); + while (matcher->findNextMatch()) + { + ret += str.substr(li, matcher->getStartingIndex() - li); + ret += matcher->replaceWithGroups(replacementText); + li = matcher->getEndingIndex(); + } + ret += str.substr(li); + + return ret; +} +std::vector Pattern::split(const std::string & str, const bool keepEmptys, const unsigned long limit) +{ + unsigned long lim = (limit == 0 ? MAX_QMATCH : limit); + int li = 0; + std::vector ret; + + matcher->setString(str); + + while (matcher->findNextMatch() && ret.size() < lim) + { + if (matcher->getStartingIndex() == 0 && keepEmptys) ret.push_back(""); + if ((matcher->getStartingIndex() != matcher->getEndingIndex()) || keepEmptys) + { + if (li != matcher->getStartingIndex() || keepEmptys) + { + ret.push_back(str.substr(li, matcher->getStartingIndex() - li)); + } + li = matcher->getEndingIndex(); + } + } + if (li < (int)str.size()) ret.push_back(str.substr(li)); + + return ret; +} +std::vector Pattern::findAll(const std::string & str) +{ + matcher->setString(str); + return matcher->findAll(); +} +bool Pattern::matches(const std::string & str) +{ + matcher->setString(str); + return matcher->matches(); +} +unsigned long Pattern::getFlags() const +{ + return flags; +} +std::string Pattern::getPattern() const +{ + return pattern; +} +Matcher * Pattern::createMatcher(const std::string & str) +{ + return new Matcher(this, str); +} +void Pattern::print() +{ + printf("Pattern(%s):\n", pattern.c_str()); + if (head) head->print(1); +} + + + + + + + +static char * getPrintChar(const char ch) +{ + static char buf[1024]; + if (ch == '0') strcpy(buf, "\\0"); + else if (ch == '\r') strcpy(buf, "\\r"); + else if (ch == '\t') strcpy(buf, "\\t"); + else if (ch == '\n') strcpy(buf, "\\n"); + else if (ch == 0x0C) strcpy(buf, "\\p"); + else if (ch < ' ' || ch >= 0x80) sprintf(buf, "0x%02X", ch); + else sprintf(buf, "%c", ch); + return buf; +} +static void appendRawToClass(const int ch, char * const buf, int & curInd) +{ + switch (ch) + { + case '(': + case ')': + case '\\': + case '-': + case '[': + case ']': + buf[curInd++] = '\\'; + default:; + buf[curInd++] = (char)ch; + } +} +static void appendToClass0(const int j, int & start, const bool raw, const bool allowRanges, char * const buf, int & curInd) +{ + if (start != -1) + { + if (start + 1 == j) + { + if (raw) + { + appendRawToClass(start, buf, curInd); + } + else + { + sprintf(buf + curInd, "\\x%02X", start); + curInd += 4; + } + } + else if (start + 2 == j) + { + if (raw) + { + appendRawToClass(start, buf, curInd); + appendRawToClass(start + 1, buf, curInd); + } + else + { + sprintf(buf + curInd, "\\x%02X\\x%02X", start, start + 1); + curInd += 8; + } + } + else + { + if (allowRanges) + { + if (raw) + { + appendRawToClass(start, buf, curInd); + buf[curInd++] = '-'; + appendRawToClass(j - 1, buf, curInd); + } + else + { + sprintf(buf + curInd, "\\x%02X-\\x%02X", start, j - 1); + curInd += 9; + } + } + else + { + for (int k = start; k < j; ++k) + { + appendRawToClass(k, buf, curInd); + } + } + } + } + start = -1; +} +static void appendToClass(const std::map & chars, const int * const range, const bool raw, const bool allowRanges, char * const buf, int & curInd) +{ + int start = -1, j; + for (j = range[0]; j < range[1]; ++j) + { + if (chars.find((char)j) != chars.end()) + { + if (start == -1) start = j; + } + else + { + appendToClass0(j, start, raw, allowRanges, buf, curInd); + } + } + appendToClass0(j, start, raw, allowRanges, buf, curInd); +} +static char * getClassDesc(const std::map & chars, const bool inv, const bool firstPass = true) +{ + static char buf[1024]; + int curInd = 0; + bool ok[256] = { false }; + + buf[curInd++] = '['; + if (inv) + { + buf[curInd++] = '^'; + } + // check for lower case letters + if (chars.find(' ') != chars.end()) { buf[curInd++] = ' '; } + if (chars.find(0x00) != chars.end()) { strcpy(buf + curInd, "\\0"); curInd += 2; } + if (chars.find('\t') != chars.end()) { strcpy(buf + curInd, "\\t"); curInd += 2; } + if (chars.find('\r') != chars.end()) { strcpy(buf + curInd, "\\r"); curInd += 2; } + if (chars.find('\n') != chars.end()) { strcpy(buf + curInd, "\\n"); curInd += 2; } + if (chars.find(0x0c) != chars.end()) { strcpy(buf + curInd, "\\p"); curInd += 2; } + const int HEX_RANGES[][2] = + { + { 0x01, 0x09 }, + { 0x0B, 0x0B }, + { 0x0E, 0x20 }, + { 0x80, 0x100 }, + }; + const int RAW_RANGES[][2] = + { + { 0x20, '0' }, + { '0', '9' + 1 }, + { '9' + 1, 'A' }, + { 'A', 'Z' + 1 }, + { 'Z' + 1, 'a' }, + { 'a', 'z' + 1 }, + { 'z' + 1, 0x80 }, + }; + const bool ALLOW_RANGES[] = + { + false, + true, + false, + true, + false, + true, + false, + }; + const int NUM_HEX = sizeof(HEX_RANGES) / sizeof(HEX_RANGES[0]); + const int NUM_RAW = sizeof(RAW_RANGES) / sizeof(RAW_RANGES[0]); + + for (int i = 0; i < NUM_HEX; ++i) + { + appendToClass(chars, HEX_RANGES[i], false, true, buf, curInd); + } + for (int i = 0; i < NUM_RAW; ++i) + { + appendToClass(chars, RAW_RANGES[i], true, ALLOW_RANGES[i], buf, curInd); + } + buf[curInd++] = ']'; + buf[curInd++] = 0; + if (firstPass) + { + char newBuf[1024]; + std::map invChars; + for (int i = 0; i < 256; ++i) + { + if (chars.find((char)i) == chars.end()) invChars[(char)i] = true; + } + strcpy(newBuf, buf); + getClassDesc(invChars, !inv, false); + if (strlen(buf) > strlen(newBuf)) strcpy(buf, newBuf); + } + return buf; +} + +// NFANode + +NFANode::NFANode() { next = NULL; } +NFANode::~NFANode() { } +void NFANode::findAllNodes(std::map & soFar) +{ + if (soFar.find(this) == soFar.end()) return; + soFar[this] = 1; + if (next) next->findAllNodes(soFar); +} +void NFANode::print(const int indent) +{ + printf("%*c%s\n", indent * 2, ' ', typeid(*this).name()); + if (next) next->print(indent); +} + +// NFACharNode + +NFACharNode::NFACharNode(const char c) { ch = c; } +int NFACharNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + if (curInd < (int)str.size() && str[curInd] == ch) return next->match(str, matcher, curInd + 1, depth + 1); + return -1; +} +void NFACharNode::print(const int indent) +{ + printf("%*c%s(0x%02X)\n", indent * 2, ' ', typeid(*this).name(), ch); + if (next) next->print(indent); +} + +// NFACICharNode + +NFACICharNode::NFACICharNode(const char c) { ch = tolower(c); } +int NFACICharNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + if (curInd < (int)str.size() && tolower(str[curInd]) == ch) return next->match(str, matcher, curInd + 1, depth + 1); + return -1; +} +void NFACICharNode::print(const int indent) +{ + printf("%*c%s(0x%02X)\n", indent * 2, ' ', typeid(*this).name(), ch); + if (next) next->print(indent); +} + +// NFAStartNode + +NFAStartNode::NFAStartNode() { } +int NFAStartNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + int ret = -1, ci = curInd; + + matcher->starts[0] = curInd; + if ((matcher->getFlags() & Matcher::MATCH_ENTIRE_STRING) == (unsigned int)Matcher::MATCH_ENTIRE_STRING) + { + if (curInd != 0) + { + matcher->starts[0] = -1; + return -1; + } + return next->match(str, matcher, 0, depth + 1); + } + while ((ret = next->match(str, matcher, ci)) == -1 && ci < (int)str.size()) + { + matcher->clearGroups(); + matcher->starts[0] = ++ci; + } + if (ret < 0) matcher->starts[0] = -1; + return ret; +} + +// NFAEndNode + +NFAEndNode::NFAEndNode() { } +int NFAEndNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + matcher->ends[0] = curInd; + if ((matcher->getFlags() & Matcher::MATCH_ENTIRE_STRING) != 0) + { + if (curInd == (int)str.size()) return curInd; + matcher->ends[0] = -1; + return -1; + } + return curInd; +} + +// NFAQuantifierNode + +void NFAQuantifierNode::findAllNodes(std::map & soFar) +{ + inner->findAllNodes(soFar); + NFANode::findAllNodes(soFar); +} +NFAQuantifierNode::NFAQuantifierNode(Pattern * pat, NFANode * internal, const int minMatch, const int maxMatch) +{ + inner = internal; + inner->next = pat->registerNode(new NFAAcceptNode); + min = (minMatch < Pattern::MIN_QMATCH) ? Pattern::MIN_QMATCH : minMatch; + max = (maxMatch > Pattern::MAX_QMATCH) ? Pattern::MAX_QMATCH : maxMatch; +} + +int NFAQuantifierNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + int i0, i1, i2 = 0; + + i0 = i1 = curInd; + while (i2 < min) + { + ++i2; + i1 = inner->match(str, matcher, i0, depth + 1); + if (i1 <= i0) return i1; // i1 < i0 means i1 is -1 + i0 = i1; + } + + return i1; +} +void NFAQuantifierNode::print(const int indent) +{ + printf("%*c%s(min=%d, max=%d)\n", indent * 2, ' ', typeid(*this).name(), min, max); + printf("%*cinner\n", (indent + 1) * 2, ' '); + if (inner) inner->print(indent + 2); + if (next) next->print(indent); +} + +// NFAGreedyQuantifierNode +NFAGreedyQuantifierNode::NFAGreedyQuantifierNode(Pattern * pat, NFANode * internal, const int minMatch, const int maxMatch) + : NFAQuantifierNode(pat, internal, minMatch, maxMatch) { } +int NFAGreedyQuantifierNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + int t = NFAQuantifierNode::match(str, matcher, curInd, depth + 1); + if (t != -1) return matchInternal(str, matcher, t, min, depth + 1); + return t; +} +int NFAGreedyQuantifierNode::matchInternal(const std::string & str, Matcher * matcher, const int curInd, const int soFar, const int depth) const +{ + if (soFar >= max) return next->match(str, matcher, curInd, depth + 1); + + int i, j; + + i = inner->match(str, matcher, curInd, depth + 1); + if (i != -1) + { + j = matchInternal(str, matcher, i, soFar + 1, depth + 1); + if (j != -1) return j; + } + return next->match(str, matcher, curInd, depth + 1); +} + +// NFALazyQuantifierNode + +NFALazyQuantifierNode::NFALazyQuantifierNode(Pattern * pat, NFANode * internal, const int minMatch, const int maxMatch) + : NFAQuantifierNode(pat, internal, minMatch, maxMatch) { } +int NFALazyQuantifierNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + int i, j, m = NFAQuantifierNode::match(str, matcher, curInd, depth + 1); + + if (m == -1) return -1; + + for (i = min; i < max; ++i) + { + j = next->match(str, matcher, m, depth + 1); + if (j == -1) + { + j = inner->match(str, matcher, m, depth + 1); + // if j < m, then j is -1, so we bail. + // if j == m, then we would just go and call next->match on the same index, + // but it already failed trying to match right there, so we know we can + // just bail + if (j <= m) return -1; + m = j; + } + else return j; + } + return next->match(str, matcher, m, depth + 1); +} + +// NFAPossessiveQuantifierNode + +NFAPossessiveQuantifierNode::NFAPossessiveQuantifierNode(Pattern * pat, NFANode * internal, const int minMatch, const int maxMatch) + : NFAQuantifierNode(pat, internal, minMatch, maxMatch) { } +int NFAPossessiveQuantifierNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + int i, j, m = NFAQuantifierNode::match(str, matcher, curInd, depth + 1); + + if (m == -1) return -1; + for (i = min; i < max; ++i) + { + j = inner->match(str, matcher, m, depth + 1); + if (j <= m) return next->match(str, matcher, m, depth + 1); + m = j; + } + return next->match(str, matcher, m, depth + 1); +} + +// NFAAcceptNode + +NFAAcceptNode::NFAAcceptNode() { } +int NFAAcceptNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + if (!next) return curInd; + else return next->match(str, matcher, curInd, depth + 1); +} + +// NFAClassNode + +NFAClassNode::NFAClassNode(const bool invert) +{ + inv = invert; +} +NFAClassNode::NFAClassNode(const std::string & clazz, const bool invert) +{ + inv = invert; + for (int i = 0; i < (int)clazz.size(); ++i) vals[clazz[i]] = 1; +} +int NFAClassNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + if (curInd < (int)str.size() && ((vals.find(str[curInd]) != vals.end()) ^ inv)) + { + return next->match(str, matcher, curInd + 1, depth + 1); + } + return -1; +} +void NFAClassNode::print(const int indent) +{ + printf("%*c%s(%d vals, inv=%s)\n", indent * 2, ' ', typeid(*this).name(), (int)vals.size(), inv ? "true" : "false"); + if (next) next->print(indent); +} + +// NFACIClassNode + +NFACIClassNode::NFACIClassNode(const bool invert) +{ + inv = invert; +} +NFACIClassNode::NFACIClassNode(const std::string & clazz, const bool invert) +{ + inv = invert; + for (int i = 0; i < (int)clazz.size(); ++i) vals[tolower(clazz[i])] = 1; +} +int NFACIClassNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + if (curInd < (int)str.size() && ((vals.find(tolower(str[curInd])) != vals.end()) ^ inv)) + { + return next->match(str, matcher, curInd + 1, depth + 1); + } + return -1; +} +void NFACIClassNode::print(const int indent) +{ + if (next) next->print(indent); +} + +#undef to_lower + +// NFASubStartNode + +NFASubStartNode::NFASubStartNode() { } +int NFASubStartNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + return next->match(str, matcher, curInd, depth + 1); +} + +// NFAOrNode + +NFAOrNode::NFAOrNode(NFANode * first, NFANode * second) : one(first), two(second) { } +void NFAOrNode::findAllNodes(std::map & soFar) +{ + if (one) one->findAllNodes(soFar); + if (two) two->findAllNodes(soFar); + NFANode::findAllNodes(soFar); +} +int NFAOrNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + int ci = one->match(str, matcher, curInd, depth + 1); + if (ci != -1) return ci; + return two->match(str, matcher, curInd, depth + 1); +/* + int ci = one->match(str, matcher, curInd, depth + 1); + if (ci != -1) + { + ci = next->match(str, matcher, ci, depth + 1); + if (ci != -1) return ci; + } + ci = two->match(str, matcher, curInd, depth + 1); + if (ci != -1) ci = next->match(str, matcher, ci, depth + 1); + return -1; +*/ +} +void NFAOrNode::print(const int indent) +{ + printf("%*c%s\n", indent * 2, ' ', typeid(*this).name()); + printf("%*cone\n", (indent + 1) * 2, ' '); + one->print(indent + 2); + printf("%*ctwo\n", (indent + 1) * 2, ' '); + two->print(indent + 2); +} + +// NFAQuoteNode + +NFAQuoteNode::NFAQuoteNode(const std::string & quoted) : qStr(quoted) { } +int NFAQuoteNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + if (curInd + qStr.size() > str.size()) return -1; + if (str.substr(curInd, qStr.size()) != qStr) return -1; + return next->match(str, matcher, curInd + qStr.size(), depth + 1); +} +void NFAQuoteNode::print(const int indent) +{ + printf("%*c%s(%s)\n", indent * 2, ' ', typeid(*this).name(), qStr.c_str()); + if (next) next->print(indent); +} + +// NFACIQuoteNode + +NFACIQuoteNode::NFACIQuoteNode(const std::string & quoted) : qStr(quoted) { } +int NFACIQuoteNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + if (curInd + qStr.size() > str.size()) return -1; + if (str_icmp(str.substr(curInd, qStr.size()).c_str(), qStr.c_str())) return -1; + return next->match(str, matcher, qStr.size(), depth + 1); +} +void NFACIQuoteNode::print(const int indent) +{ + printf("%*c%s(%s)\n", indent * 2, ' ', typeid(*this).name(), qStr.c_str()); + if (next) next->print(indent); +} + +// NFALookAheadNode + +NFALookAheadNode::NFALookAheadNode(NFANode * internal, const bool positive) : NFANode(), pos(positive), inner(internal) { } +void NFALookAheadNode::findAllNodes(std::map & soFar) +{ + if (inner) inner->findAllNodes(soFar); + NFANode::findAllNodes(soFar); +} +int NFALookAheadNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + return ((inner->match(str, matcher, curInd, depth + 1) == -1) ^ pos) ? next->match(str, matcher, curInd, depth + 1) : -1; +} +void NFALookAheadNode::print(const int indent) +{ + printf("%*c%s(pos=%s)\n", indent * 2, ' ', typeid(*this).name(), pos ? "true" : "false"); + inner->print(indent + 1); + if (next) next->print(indent); +} + +// NFALookBehindNode + +NFALookBehindNode::NFALookBehindNode(const std::string & str, const bool positive) : pos(positive), mStr(str) { } +int NFALookBehindNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + if (pos) + { + if (curInd < (int)mStr.size()) return -1; + if (str.substr(curInd - mStr.size(), mStr.size()) == mStr) return next->match(str, matcher, curInd, depth + 1); + } + else + { + if (curInd < (int)mStr.size()) return next->match(str, matcher, curInd, depth + 1); + if (str.substr(curInd - mStr.size(), mStr.size()) == mStr) return -1; + return next->match(str, matcher, curInd, depth + 1); + } + return -1; +} +void NFALookBehindNode::print(const int indent) +{ + printf("%*c%s(str=%s, pos=%s)\n", indent * 2, ' ', typeid(*this).name(), mStr.c_str(), pos ? "true" : "false"); + if (next) next->print(indent); +} + +// NFAStartOfLineNode + +NFAStartOfLineNode::NFAStartOfLineNode() { } +int NFAStartOfLineNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + if (curInd == 0 || str[curInd - 1] == '\n' || str[curInd - 1] == '\r') + { + return next->match(str, matcher, curInd, depth + 1); + } + return -1; +} + +// NFAEndOfLineNode + +NFAEndOfLineNode::NFAEndOfLineNode() { } +int NFAEndOfLineNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + if (curInd >= (int)str.size() || str[curInd] == '\n' || str[curInd] == '\r') + { + return next->match(str, matcher, curInd, depth + 1); + } + return -1; +} + +// NFAReferenceNode + +NFAReferenceNode::NFAReferenceNode(const int groupIndex) : gi(groupIndex) { } +int NFAReferenceNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + int len = matcher->ends[gi] - matcher->starts[gi]; + int ni = -1; + if (gi < 1 || matcher->ends[gi] < matcher->starts[gi] || len == 0) ni = curInd; + else if (curInd + len > (int)str.size()) return -1; + else if (str.substr(curInd, len) != str.substr(matcher->starts[gi], len)) return -1; + else ni = curInd + len; + + return next->match(str, matcher, ni, depth + 1); +} +void NFAReferenceNode::print(const int indent) +{ + printf("%*c%s(gi=%d)\n", indent * 2, ' ', typeid(*this).name(), gi); + if (next) next->print(indent); +} + +// NFAStartOfInputNode + +NFAStartOfInputNode::NFAStartOfInputNode() { } +int NFAStartOfInputNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + if (curInd == 0) return next->match(str, matcher, curInd, depth + 1); + return -1; +} + +// NFAEndOfInputNode + +NFAEndOfInputNode::NFAEndOfInputNode(const bool lookForTerm) : term(lookForTerm) { } +int NFAEndOfInputNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + int len = (int)str.size(); + if (curInd == len) return next->match(str, matcher, curInd, depth + 1); + else if (term) + { + if (curInd == len - 1 && (str[curInd] == '\r' || str[curInd] == '\n')) + { + return next->match(str, matcher, curInd, depth + 1); + } + else if (curInd == len - 2 && str.substr(curInd, 2) == "\r\n") + { + return next->match(str, matcher, curInd, depth + 1); + } + } + return -1; +} +void NFAEndOfInputNode::print(const int indent) +{ + printf("%*c%s(term=%s)\n", indent * 2, ' ', typeid(*this).name(), term ? "true" : "false"); + if (next) next->print(indent); +} + +// NFAWordBoundaryNode + +NFAWordBoundaryNode::NFAWordBoundaryNode(const bool positive) : pos(positive) { } +int NFAWordBoundaryNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + #define is_alpha(x) (((x) >= 'a' && (x) <= 'z') || ((x) >= 'A' && (x) <= 'Z')) + + int len = (int)str.size(); + bool ok = 0; + char c1 = (curInd - 1 < len) ? str[curInd - 1] : -1; + char c2 = (curInd < len) ? str[curInd ] : -1; + + if (curInd == len) return next->match(str, matcher, curInd, depth + 1); + if (is_alpha(c1) ^ is_alpha(c2)) ok = 1; + if (ok && pos) return next->match(str, matcher, curInd, depth + 1); + return -1; + + #undef is_alpha +} +void NFAWordBoundaryNode::print(const int indent) +{ + printf("%*c%s(pos=%s)\n", indent * 2, ' ', typeid(*this).name(), pos ? "true" : "false"); + if (next) next->print(indent); +} + +// NFAEndOfMatchNode + +NFAEndOfMatchNode::NFAEndOfMatchNode() { } +int NFAEndOfMatchNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + if (curInd == matcher->lm) return next->match(str, matcher, curInd, depth + 1); + return -1; +} + +// NFAGroupHeadNode + +NFAGroupHeadNode::NFAGroupHeadNode(const int groupIndex) : gi(groupIndex) { } +int NFAGroupHeadNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + int ret, o = matcher->starts[gi]; + + matcher->starts[gi] = curInd; + ret = next->match(str, matcher, curInd, depth + 1); + if (ret < 0) matcher->starts[gi] = o; + + return ret; +} +void NFAGroupHeadNode::print(const int indent) +{ + printf("%*c%s(gi=%d)\n", indent * 2, ' ', typeid(*this).name(), gi); + if (next) next->print(indent); +} + +// NFAGroupTailNode + +NFAGroupTailNode::NFAGroupTailNode(const int groupIndex) : gi(groupIndex) { } +int NFAGroupTailNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + int ret, o = matcher->ends[gi]; + matcher->ends[gi] = curInd; + ret = next->match(str, matcher, curInd, depth + 1); + if (ret < 0) matcher->ends[gi] = o; + + return ret; +} +void NFAGroupTailNode::print(const int indent) +{ + printf("%*c%s(gi=%d)\n", indent * 2, ' ', typeid(*this).name(), gi); + if (next) next->print(indent); +} + +// NFAGroupLoopPrologueNode + +NFAGroupLoopPrologueNode::NFAGroupLoopPrologueNode(const int groupIndex) : gi(groupIndex) { } +int NFAGroupLoopPrologueNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + int ret, o1 = matcher->groups[gi], o2 = matcher->groupPos[gi], o3 = matcher->groupIndeces[gi]; + + matcher->groups[gi] = 0; + matcher->groupPos[gi] = 0; + matcher->groupIndeces[gi] = -1; + ret = next->match(str, matcher, curInd, depth + 1); + if (ret < 0) + { + matcher->groups[gi] = o1; + matcher->groupPos[gi] = o2; + matcher->groupIndeces[gi] = o3; + } + + return ret; +} +void NFAGroupLoopPrologueNode::print(const int indent) +{ + printf("%*c%s(gi=%d)\n", indent * 2, ' ', typeid(*this).name(), gi); + if (next) next->print(indent); +} + +// NFAGroupLoopNode + +NFAGroupLoopNode::NFAGroupLoopNode(NFANode * internal, const int minMatch, const int maxMatch, + const int groupIndex, const int matchType) +{ + inner = internal; + min = minMatch; + max = maxMatch; + gi = groupIndex; + type = matchType; +} +void NFAGroupLoopNode::findAllNodes(std::map & soFar) +{ + if (inner) inner->findAllNodes(soFar); + NFANode::findAllNodes(soFar); +} +int NFAGroupLoopNode::match(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + bool b = (curInd > matcher->groupIndeces[gi]); + + if (b && matcher->groups[gi] < min) + { + ++matcher->groups[gi]; + int o = matcher->groupIndeces[gi]; + matcher->groupIndeces[gi] = curInd; + int ret = inner->match(str, matcher, curInd, depth + 1); + if (ret < 0) + { + matcher->groupIndeces[gi] = o; + --matcher->groups[gi]; + } + return ret; + } + else if (!b || matcher->groups[gi] >= max) + { + return next->match(str, matcher, curInd, depth + 1); + } + else + { + switch (type) + { + case 0: return matchGreedy(str, matcher, curInd, depth + 1); + case 1: return matchLazy(str, matcher, curInd, depth + 1); + case 2: return matchPossessive(str, matcher, curInd, depth + 1); + } + } + return -1; +} +int NFAGroupLoopNode::matchGreedy(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + int o = matcher->groupIndeces[gi]; // save our info for backtracking + matcher->groupIndeces[gi] = curInd; // move along + ++matcher->groups[gi]; + int ret = inner->match(str, matcher, curInd, depth + 1); // match internally + if (ret < 0) + { // if we failed, then restore info and match next + --matcher->groups[gi]; + matcher->groupIndeces[gi] = o; + ret = next->match(str, matcher, curInd, depth + 1); + } + return ret; +} +int NFAGroupLoopNode::matchLazy(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + int ret = next->match(str, matcher, curInd, depth + 1); // be lazy, just go on + if (ret < 0) + { + int o = matcher->groupIndeces[gi]; // save info for backtracking + matcher->groupIndeces[gi] = curInd; // advance our position + ++matcher->groups[gi]; + ret = inner->match(str, matcher, curInd, depth + 1); // match our internal stuff + if (ret < 0) // if we failed, then restore the info + { + --matcher->groups[gi]; + matcher->groupIndeces[gi] = o; + } + } + return ret; +} +int NFAGroupLoopNode::matchPossessive(const std::string & str, Matcher * matcher, const int curInd, const int depth) const +{ + int o = matcher->groupIndeces[gi]; // save info for backtracking + matcher->groupPos[gi] = matcher->groups[gi]; // set a flag stating we have matcher at least this much + matcher->groupIndeces[gi] = curInd; // move along + ++matcher->groups[gi]; + int ret = inner->match(str, matcher, curInd, depth + 1); // try and match again + if (ret < 0) + { // if we fail, back off, but to an extent + --matcher->groups[gi]; + matcher->groupIndeces[gi] = o; + if (matcher->groups[gi] == matcher->groupPos[gi]) ret = next->match(str, matcher, curInd, depth + 1); + } + return ret; +} +void NFAGroupLoopNode::print(const int indent) +{ + printf("%*c%s(gi=%d, min=%d, max=%d, type=%s)\n", indent * 2, ' ', typeid(*this).name(), gi, min, max, type == 0 ? "greedy" : type == 1 ? "reluctant" : "possessive"); + if (next) next->print(indent); +} + +#ifdef _WIN32 + #pragma warning(pop) +#endif diff -r 0000000000000000000000000000000000000000 -r ad86853a2662e23621f1159c34656ee3921af5fa src/main.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/main.cpp Wed Sep 11 22:16:55 2013 -0500 @@ -0,0 +1,18 @@ +#include +#include +#include "nfw.h" + +using namespace std; + +int main(int argc, char *argv[]) +{ + auto_ptr nfwvar(new nfw(argc, argv)); + if (nfwvar->isValid()) + { + return 0; + } else { + cout << "Invalid rule!" << endl; + cout << nfwvar->getFalidRule() << endl; + return 1; + } +} \ No newline at end of file diff -r 0000000000000000000000000000000000000000 -r ad86853a2662e23621f1159c34656ee3921af5fa src/nfw.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/nfw.cpp Wed Sep 11 22:16:55 2013 -0500 @@ -0,0 +1,255 @@ +#include +#include +#include +#include +#include +#include +#include +#include "regexp/Pattern.h" +#include "regexp/Matcher.h" +#include "nfw.h" + +using namespace std; + +nfw::nfw(int argc, char *argv[]) +{ + for (int i = 0; iparamters.push_back(argv[i]); + this->populateGrammar(); + this->parseArguments(); +} + +void nfw::parseArguments() +{ + vector rule_arr; + vector words; + ifstream rulefile; + string line; + string rule; + if (this->paramters.size() == 1) + { + cerr << "Wrong number of arguments" << endl; + } else if ( this->paramters.size() == 2 && this->paramters.at(1) == "help" ) + { + cout << "Some help information..." << endl; + this->valid = true; + } else if ( this->paramters.size() == 2 && (this->paramters.at(1) == "version" || this->paramters.at(1) == "ver" ) ) + { + cout << "nfw version - " << NFW_VERSION << endl; + cout << "regex library by Jeff Stuart (GPL) http://freshmeat.net/projects/cpp_regex/" << endl; + this->valid = true; + } else if ( this->paramters.size() == 3 && this->paramters.at(1) == "file" ) { + //open file + rulefile.open(this->paramters.at(2).c_str()); + if (rulefile.is_open()) + { + while (! rulefile.eof() ) + { + getline(rulefile, line); + Pattern * p = Pattern::compile("([a-z0-9.\\-])+ (\\w|\"([^\"]*)\")+"); + rule_arr = p->findAll(line); + delete p; + this->split(line, ' ', words); + if (words.at(0) == "custom") + { + //this->customRule(rule); + for(unsigned int i = 1; i < words.size(); i++) + { + cout << words.at(i) << " "; + } + cout << endl; + } else { + if (this->validateRules(rule_arr)) + { + this->valid = true; + this->parseRule(rule_arr); + } else { + this->valid = false; + } + } + words.clear(); + rule_arr.clear(); + } + rulefile.close(); + } + } else if ( this->paramters.size() > 3) { + // parse rule from parameter + for(unsigned int i = 1; i < this->paramters.size(); i++) + { + if (i == this->paramters.size()) + rule += this->paramters.at(i); + else + rule += this->paramters.at(i) + " "; + } + Pattern * p = Pattern::compile("([a-z0-9.\\-])+ (\\w|\"([^\"]*)\")+"); + rule_arr = p->findAll(rule); + delete p; + this->split(rule, ' ', words); + if (words.at(0) == "custom") + { + //this->customRule(rule); + for(unsigned int i = 1; i < words.size(); i++) + { + cout << words.at(i) << " "; + } + cout << endl; + } else { + if (this->validateRules(rule_arr)) + { + this->valid = true; + this->parseRule(rule_arr); + } else { + this->valid = false; + } + } + } else { + cerr << "Wrong number of arguments" << endl; + } +} + +void nfw::parseRule(vector& rule_arr) +{ + // Parses a single rule + string p1; + string p2; + string rule = ""; + string act = ""; + string chain = ""; + vector parts; + + auto_ptr split(Pattern::compile("([a-z0-9.\\-]|\\w|\"([^\"]*)\")+")); + auto_ptr action(Pattern::compile("(drop|deny|accept|log)")); + auto_ptr ip(Pattern::compile("(?:\\d{1,3}\\.){3}\\d{1,3}")); + auto_ptr iprange(Pattern::compile("(?:\\d{1,3}\\.){3}\\d{1,3}-(?:\\d{1,3}\\.){3}\\d{1,3}")); + auto_ptr port(Pattern::compile("(\\d)*")); + + //auto_ptr actionmatch = action->createMatcher(); + //auto_ptr ipmatch = ip->createMatcher(); + //auto_ptr iprangematch = iprange->createMatcher(); + for(unsigned int i = 0; i < rule_arr.size(); i++) + { + parts = split->findAll(rule_arr.at(i)); + //this->split(rule_arr.at(i), ' ', parts); + //rule_arr.spl + p1 = parts.at(0); + p2 = parts.at(1); + parts.clear(); + Matcher * actionmatch = action->createMatcher(p1); + Matcher * ipmatch = ip->createMatcher(p1); + Matcher * iprangematch = iprange->createMatcher(p1); + Matcher * portmatch = port->createMatcher(p1); + if (actionmatch->matches()) + { + act = p1; + chain = p2; + } else if ( iprangematch->matches() ) + { + rule += "-m iprange "; + if (p2 == "source") + rule += "--src-range "; + else + rule += "--dst-range "; + rule += p1 + " "; + } else if ( ipmatch->matches() ) + { + if (p2 == "source") + rule += "-s "; + else + rule += "-d "; + rule += p1 + " "; + } else if ( portmatch->matches() ) + { + if (p2 == "source") + rule += "--sport "; + else + rule += "--dport "; + rule += p1 + " "; + } else { + //parse the other rules that we won't bother matching with regex + if (p1 == "comment") + { + //strip qoutes if exists + if (p2.at(0) == '"') + { + p2.erase(0,1); + p2.erase(p2.size() - 1, 1); + } + //p2.replace(p2.find("\""), 1, ""); + //p2.replace(p2.find("\""), 1, ""); + rule += "-m comment --comment \"" + p2 + "\" "; + } else if (p1 == "protocol") + { + rule += "-p " + p2 + " "; + } + } + delete actionmatch; + delete ipmatch; + delete iprangematch; + } + transform(chain.begin(), chain.end(), chain.begin(), ::toupper); + transform(act.begin(), act.end(), act.begin(), ::toupper); + cout << "-A " << chain << " " << rule << "-j " << act << endl; +} + +void nfw::populateGrammar() +{ + // It doesn't seem to like (word|word) regex syntax, perhaps time to find a new regex library? + //this->grammar.push_back("(\\w)* (input|output|forward){1}"); // + this->grammar.push_back("(\\w)* (\\w)*"); // + this->grammar.push_back("(?:\\d{1,3}\\.){3}\\d{1,3} (source|destination)"); // =: + this->grammar.push_back("(?:\\d{1,3}\\.){3}\\d{1,3}-(?:\\d{1,3}\\.){3}\\d{1,3} (source|destitation)"); // =: - + //this->grammar.push_back("(\\d)* (source|destitation)");// =: # + //this->grammar.push_back("recent (day|year|month)-(\\d)*-(\\d)*"); // =: recent