public static RegexNode operator %(RegexNode node1, RegexNode node2) { var left = node1.Exp as CharSetExpression; var right = node2.Exp as CharSetExpression; Debug.Assert(left != null && right != null && !left.IsReverse && !right.IsReverse); var expression = new CharSetExpression(); expression.IsReverse = false; foreach (var r in left.Ranges) { expression.Ranges.Add(r); } foreach (var r in right.Ranges) { if (!expression.AddRangeWithConflict(r.Begin, r.End)) { Debug.Assert(false, "Failed"); } } return(new RegexNode(expression)); }
public static RegexNode GetCharSetExpression(char a, char b) { var expression = new CharSetExpression(); expression.AddRangeWithConflict(a, b); return(new RegexNode(expression)); }
public static RegexNode operator %(RegexNode node1, RegexNode node2) { var left = node1.Exp as CharSetExpression; var right = node2.Exp as CharSetExpression; Debug.Assert(left != null && right != null && !left.IsReverse && !right.IsReverse); var expression = new CharSetExpression(); expression.IsReverse = false; foreach (var r in left.Ranges) { expression.Ranges.Add(r); } foreach(var r in right.Ranges) { if(!expression.AddRangeWithConflict(r.Begin,r.End)) { Debug.Assert(false, "Failed"); } } return new RegexNode(expression); }
//be responsible for ^ $ . \b(etc.) [a-z] and any character public Expression ParseCharSet() { if (_sourceWindow.PeekChar() == SlidingTextWindow.InvalidCharacter) { return(null); } if (_sourceWindow.AdvanceIfMatches('^')) { return(new BeginExpression()); } else if (_sourceWindow.AdvanceIfMatches('$')) { return(new EndExpression()); } else if (_sourceWindow.AdvanceIfMatches('.')) { var ret = new CharSetExpression(); ret.Add((char)1, char.MaxValue); return(ret); } else if (_sourceWindow.AdvanceIfMatches('\\') || _sourceWindow.AdvanceIfMatches('/')) { //\d etc. var exp = new CharSetExpression(); var c2 = _sourceWindow.PeekChar(); switch (c2) { case 'r': exp.Add('\r'); break; case 'n': exp.Add('\n'); break; case 't': exp.Add('\t'); break; //需要转义的字符在这里 case '\\': case '/': case '(': case ')': case '+': case '*': case '?': case '|': case '{': case '}': case '[': case ']': case '<': case '>': case '^': case '$': case '!': case '=': case '.': exp.Add(c2); break; case 'S': exp.IsReverse = true; goto case 's'; case 's': //spaces exp.Add(' '); exp.Add('\r'); exp.Add('\n'); exp.Add('\t'); break; case 'D': exp.IsReverse = true; goto case 'd'; case 'd': exp.Add('0', '9'); break; case 'L': exp.IsReverse = true; goto case 'l'; case 'l': exp.Add('_'); exp.Add('A', 'Z'); exp.Add('a', 'z'); break; case 'W': exp.IsReverse = true; goto case 'w'; case 'w': exp.Add('0', '9'); exp.Add('_'); exp.Add('A', 'Z'); exp.Add('a', 'z'); break; default: throw new ArgumentException("Error character after \\"); } _sourceWindow.AdvanceChar(); return(exp); } //stuff like [a-z] else if (_sourceWindow.AdvanceIfMatches('[')) { var exp = new CharSetExpression(); exp.IsReverse = _sourceWindow.AdvanceIfMatches('^'); bool midState = false; char lhs = default(char), rhs = default(char); while (true) { if (_sourceWindow.AdvanceIfMatches('\\') || _sourceWindow.AdvanceIfMatches('/')) { var c = _sourceWindow.PeekChar(); char tmp = default(char); switch (c) { case 'r': tmp = '\r'; break; case 'n': tmp = '\n'; break; case 't': tmp = '\t'; break; //需要转义的字符在这里 case '\\': case '/': case '(': case ')': case '+': case '*': case '?': case '|': case '{': case '}': case '[': case ']': case '<': case '>': case '^': case '$': case '!': case '=': case '.': tmp = c; break; default: throw new ArgumentException("Error syntax in []"); } _sourceWindow.AdvanceChar(); if (midState) { rhs = c; } else { lhs = c; } midState = !midState; } else if (_sourceWindow.AdvanceIfMatches("-]")) { throw new ArgumentException("-] occurred."); } else { var c2 = _sourceWindow.NextChar(); if (_sourceWindow.IsValid(c2)) { if (midState) { rhs = c2; } else { lhs = c2; } midState = !midState; } else { throw new ArgumentException("Error in []"); } } if (_sourceWindow.AdvanceIfMatches(']')) { if (midState) { rhs = lhs; } if (!exp.AddRangeWithConflict(lhs, rhs)) { throw new ArgumentException(); } break; } else if (_sourceWindow.AdvanceIfMatches('-')) { if (!midState) { throw new ArgumentException("Invalid - in []"); } } else { var c2 = _sourceWindow.NextChar(); if (_sourceWindow.IsValid(c2)) { if (midState) { rhs = lhs; } if (exp.AddRangeWithConflict(lhs, rhs)) { midState = false; } else { throw new ArgumentException(); } } } } return(exp); } else { char c = default(char); if (_sourceWindow.AdvanceIfOneOf("()+*?{}|", out c)) { _sourceWindow.AdvanceChar(-1); return(null); } else { //the character itself var exp = new CharSetExpression(); exp.IsReverse = false; exp.Add(_sourceWindow.NextChar()); return(exp); } } }
//be responsible for ^ $ . \b(etc.) [a-z] and any character public Expression ParseCharSet() { if (_sourceWindow.PeekChar() == SlidingTextWindow.InvalidCharacter) return null; if (_sourceWindow.AdvanceIfMatches('^')) return new BeginExpression(); else if (_sourceWindow.AdvanceIfMatches('$')) return new EndExpression(); else if (_sourceWindow.AdvanceIfMatches('.')) { var ret = new CharSetExpression(); ret.Add((char)1,char.MaxValue); return ret; } else if (_sourceWindow.AdvanceIfMatches('\\') || _sourceWindow.AdvanceIfMatches('/')) { //\d etc. var exp = new CharSetExpression(); var c2 = _sourceWindow.PeekChar(); switch (c2) { case 'r': exp.Add('\r'); break; case 'n': exp.Add('\n'); break; case 't': exp.Add('\t'); break; //需要转义的字符在这里 case '\\': case '/': case '(': case ')': case '+': case '*': case '?': case '|': case '{': case '}': case '[': case ']': case '<': case '>': case '^': case '$': case '!': case '=': case '.': exp.Add(c2); break; case 'S': exp.IsReverse = true; goto case 's'; case 's': //spaces exp.Add(' '); exp.Add('\r'); exp.Add('\n'); exp.Add('\t'); break; case 'D': exp.IsReverse = true; goto case 'd'; case 'd': exp.Add('0', '9'); break; case 'L': exp.IsReverse = true; goto case 'l'; case 'l': exp.Add('_'); exp.Add('A', 'Z'); exp.Add('a', 'z'); break; case 'W': exp.IsReverse = true; goto case 'w'; case 'w': exp.Add('0', '9'); exp.Add('_'); exp.Add('A', 'Z'); exp.Add('a', 'z'); break; default: throw new ArgumentException("Error character after \\"); } _sourceWindow.AdvanceChar(); return exp; } //stuff like [a-z] else if (_sourceWindow.AdvanceIfMatches('[')) { var exp = new CharSetExpression(); exp.IsReverse = _sourceWindow.AdvanceIfMatches('^'); bool midState = false; char lhs = default(char), rhs = default(char); while (true) { if (_sourceWindow.AdvanceIfMatches('\\') || _sourceWindow.AdvanceIfMatches('/')) { var c = _sourceWindow.PeekChar(); char tmp = default(char); switch (c) { case 'r': tmp = '\r'; break; case 'n': tmp = '\n'; break; case 't': tmp = '\t'; break; //需要转义的字符在这里 case '\\': case '/': case '(': case ')': case '+': case '*': case '?': case '|': case '{': case '}': case '[': case ']': case '<': case '>': case '^': case '$': case '!': case '=': case '.': tmp = c; break; default: throw new ArgumentException("Error syntax in []"); } _sourceWindow.AdvanceChar(); if (midState) rhs = c; else lhs = c; midState = !midState; } else if (_sourceWindow.AdvanceIfMatches("-]")) throw new ArgumentException("-] occurred."); else { var c2 = _sourceWindow.NextChar(); if (_sourceWindow.IsValid(c2)) { if (midState) rhs = c2; else lhs = c2; midState = !midState; } else throw new ArgumentException("Error in []"); } if (_sourceWindow.AdvanceIfMatches(']')) { if (midState) rhs = lhs; if (!exp.AddRangeWithConflict(lhs, rhs)) throw new ArgumentException(); break; } else if (_sourceWindow.AdvanceIfMatches('-')) { if (!midState) throw new ArgumentException("Invalid - in []"); } else { var c2 = _sourceWindow.NextChar(); if (_sourceWindow.IsValid(c2)) { if (midState) { rhs = lhs; } if (exp.AddRangeWithConflict(lhs, rhs)) midState = false; else throw new ArgumentException(); } } } return exp; } else { char c = default(char); if (_sourceWindow.AdvanceIfOneOf("()+*?{}|",out c)) { _sourceWindow.AdvanceChar(-1); return null; } else { //the character itself var exp = new CharSetExpression(); exp.IsReverse = false; exp.Add(_sourceWindow.NextChar()); return exp; } } }
public static RegexNode GetCharSetExpression(char a,char b) { var expression = new CharSetExpression(); expression.AddRangeWithConflict(a, b); return new RegexNode(expression); }