public static RegexNode operator %(RegexNode node1, RegexNode node2) { var left = node1.Exp as CharSetExpression; var right = node2.Exp as CharSetExpression; Debug.Assert(left != null && right != null && !left.IsReverse && !right.IsReverse); var expression = new CharSetExpression(); expression.IsReverse = false; foreach (var r in left.Ranges) { expression.Ranges.Add(r); } foreach (var r in right.Ranges) { if (!expression.AddRangeWithConflict(r.Begin, r.End)) { Debug.Assert(false, "Failed"); } } return(new RegexNode(expression)); }
public static RegexNode GetCharSetExpression(char a, char b) { var expression = new CharSetExpression(); expression.AddRangeWithConflict(a, b); return(new RegexNode(expression)); }
public override EpsilonNfa Apply(CharSetExpression expression, Automaton param) { //[a-z A-Z] 是或,两个 State 之间有许多 range 边 var nfa = new EpsilonNfa(param); foreach (var r in expression.Ranges) { param.AddCharRange(nfa.Start, nfa.End, r); } return(nfa); }
public static RegexNode operator !(RegexNode node) { var exp = node.Exp as CharSetExpression; if(exp != null) { var expression = new CharSetExpression(); foreach (var r in exp.Ranges) { expression.Ranges.Add(r); } expression.IsReverse = !exp.IsReverse; return new RegexNode(expression); } return null; }
public static RegexNode operator !(RegexNode node) { var exp = node.Exp as CharSetExpression; if (exp != null) { var expression = new CharSetExpression(); foreach (var r in exp.Ranges) { expression.Ranges.Add(r); } expression.IsReverse = !exp.IsReverse; return(new RegexNode(expression)); } return(null); }
public bool Equals(CharSetExpression obj) { if (IsReverse != obj.IsReverse) { return(false); } if (Ranges.Count != obj.Ranges.Count) { return(false); } for (int i = 0; i < Ranges.Count; ++i) { if (Ranges[i] != obj.Ranges[i]) { return(false); } } return(true); }
public static RegexNode operator %(RegexNode node1, RegexNode node2) { var left = node1.Exp as CharSetExpression; var right = node2.Exp as CharSetExpression; Debug.Assert(left != null && right != null && !left.IsReverse && !right.IsReverse); var expression = new CharSetExpression(); expression.IsReverse = false; foreach (var r in left.Ranges) { expression.Ranges.Add(r); } foreach(var r in right.Ranges) { if(!expression.AddRangeWithConflict(r.Begin,r.End)) { Debug.Assert(false, "Failed"); } } return new RegexNode(expression); }
public override Expression Apply(CharSetExpression expression, MergeParameter param) { return(new CharSetExpression(expression)); }
//be responsible for ^ $ . \b(etc.) [a-z] and any character public Expression ParseCharSet() { if (_sourceWindow.PeekChar() == SlidingTextWindow.InvalidCharacter) { return(null); } if (_sourceWindow.AdvanceIfMatches('^')) { return(new BeginExpression()); } else if (_sourceWindow.AdvanceIfMatches('$')) { return(new EndExpression()); } else if (_sourceWindow.AdvanceIfMatches('.')) { var ret = new CharSetExpression(); ret.Add((char)1, char.MaxValue); return(ret); } else if (_sourceWindow.AdvanceIfMatches('\\') || _sourceWindow.AdvanceIfMatches('/')) { //\d etc. var exp = new CharSetExpression(); var c2 = _sourceWindow.PeekChar(); switch (c2) { case 'r': exp.Add('\r'); break; case 'n': exp.Add('\n'); break; case 't': exp.Add('\t'); break; //需要转义的字符在这里 case '\\': case '/': case '(': case ')': case '+': case '*': case '?': case '|': case '{': case '}': case '[': case ']': case '<': case '>': case '^': case '$': case '!': case '=': case '.': exp.Add(c2); break; case 'S': exp.IsReverse = true; goto case 's'; case 's': //spaces exp.Add(' '); exp.Add('\r'); exp.Add('\n'); exp.Add('\t'); break; case 'D': exp.IsReverse = true; goto case 'd'; case 'd': exp.Add('0', '9'); break; case 'L': exp.IsReverse = true; goto case 'l'; case 'l': exp.Add('_'); exp.Add('A', 'Z'); exp.Add('a', 'z'); break; case 'W': exp.IsReverse = true; goto case 'w'; case 'w': exp.Add('0', '9'); exp.Add('_'); exp.Add('A', 'Z'); exp.Add('a', 'z'); break; default: throw new ArgumentException("Error character after \\"); } _sourceWindow.AdvanceChar(); return(exp); } //stuff like [a-z] else if (_sourceWindow.AdvanceIfMatches('[')) { var exp = new CharSetExpression(); exp.IsReverse = _sourceWindow.AdvanceIfMatches('^'); bool midState = false; char lhs = default(char), rhs = default(char); while (true) { if (_sourceWindow.AdvanceIfMatches('\\') || _sourceWindow.AdvanceIfMatches('/')) { var c = _sourceWindow.PeekChar(); char tmp = default(char); switch (c) { case 'r': tmp = '\r'; break; case 'n': tmp = '\n'; break; case 't': tmp = '\t'; break; //需要转义的字符在这里 case '\\': case '/': case '(': case ')': case '+': case '*': case '?': case '|': case '{': case '}': case '[': case ']': case '<': case '>': case '^': case '$': case '!': case '=': case '.': tmp = c; break; default: throw new ArgumentException("Error syntax in []"); } _sourceWindow.AdvanceChar(); if (midState) { rhs = c; } else { lhs = c; } midState = !midState; } else if (_sourceWindow.AdvanceIfMatches("-]")) { throw new ArgumentException("-] occurred."); } else { var c2 = _sourceWindow.NextChar(); if (_sourceWindow.IsValid(c2)) { if (midState) { rhs = c2; } else { lhs = c2; } midState = !midState; } else { throw new ArgumentException("Error in []"); } } if (_sourceWindow.AdvanceIfMatches(']')) { if (midState) { rhs = lhs; } if (!exp.AddRangeWithConflict(lhs, rhs)) { throw new ArgumentException(); } break; } else if (_sourceWindow.AdvanceIfMatches('-')) { if (!midState) { throw new ArgumentException("Invalid - in []"); } } else { var c2 = _sourceWindow.NextChar(); if (_sourceWindow.IsValid(c2)) { if (midState) { rhs = lhs; } if (exp.AddRangeWithConflict(lhs, rhs)) { midState = false; } else { throw new ArgumentException(); } } } } return(exp); } else { char c = default(char); if (_sourceWindow.AdvanceIfOneOf("()+*?{}|", out c)) { _sourceWindow.AdvanceChar(-1); return(null); } else { //the character itself var exp = new CharSetExpression(); exp.IsReverse = false; exp.Add(_sourceWindow.NextChar()); return(exp); } } }
public CharSetExpression(CharSetExpression expression) { Ranges = new FlatSet <CharRange>(expression.Ranges); IsReverse = expression.IsReverse; }
public void Visit(CharSetExpression expression) { _returnValue = this.Apply(expression, _paramValue); }
public abstract ReturnT Apply(CharSetExpression expression, ParamT param);
//be responsible for ^ $ . \b(etc.) [a-z] and any character public Expression ParseCharSet() { if (_sourceWindow.PeekChar() == SlidingTextWindow.InvalidCharacter) return null; if (_sourceWindow.AdvanceIfMatches('^')) return new BeginExpression(); else if (_sourceWindow.AdvanceIfMatches('$')) return new EndExpression(); else if (_sourceWindow.AdvanceIfMatches('.')) { var ret = new CharSetExpression(); ret.Add((char)1,char.MaxValue); return ret; } else if (_sourceWindow.AdvanceIfMatches('\\') || _sourceWindow.AdvanceIfMatches('/')) { //\d etc. var exp = new CharSetExpression(); var c2 = _sourceWindow.PeekChar(); switch (c2) { case 'r': exp.Add('\r'); break; case 'n': exp.Add('\n'); break; case 't': exp.Add('\t'); break; //需要转义的字符在这里 case '\\': case '/': case '(': case ')': case '+': case '*': case '?': case '|': case '{': case '}': case '[': case ']': case '<': case '>': case '^': case '$': case '!': case '=': case '.': exp.Add(c2); break; case 'S': exp.IsReverse = true; goto case 's'; case 's': //spaces exp.Add(' '); exp.Add('\r'); exp.Add('\n'); exp.Add('\t'); break; case 'D': exp.IsReverse = true; goto case 'd'; case 'd': exp.Add('0', '9'); break; case 'L': exp.IsReverse = true; goto case 'l'; case 'l': exp.Add('_'); exp.Add('A', 'Z'); exp.Add('a', 'z'); break; case 'W': exp.IsReverse = true; goto case 'w'; case 'w': exp.Add('0', '9'); exp.Add('_'); exp.Add('A', 'Z'); exp.Add('a', 'z'); break; default: throw new ArgumentException("Error character after \\"); } _sourceWindow.AdvanceChar(); return exp; } //stuff like [a-z] else if (_sourceWindow.AdvanceIfMatches('[')) { var exp = new CharSetExpression(); exp.IsReverse = _sourceWindow.AdvanceIfMatches('^'); bool midState = false; char lhs = default(char), rhs = default(char); while (true) { if (_sourceWindow.AdvanceIfMatches('\\') || _sourceWindow.AdvanceIfMatches('/')) { var c = _sourceWindow.PeekChar(); char tmp = default(char); switch (c) { case 'r': tmp = '\r'; break; case 'n': tmp = '\n'; break; case 't': tmp = '\t'; break; //需要转义的字符在这里 case '\\': case '/': case '(': case ')': case '+': case '*': case '?': case '|': case '{': case '}': case '[': case ']': case '<': case '>': case '^': case '$': case '!': case '=': case '.': tmp = c; break; default: throw new ArgumentException("Error syntax in []"); } _sourceWindow.AdvanceChar(); if (midState) rhs = c; else lhs = c; midState = !midState; } else if (_sourceWindow.AdvanceIfMatches("-]")) throw new ArgumentException("-] occurred."); else { var c2 = _sourceWindow.NextChar(); if (_sourceWindow.IsValid(c2)) { if (midState) rhs = c2; else lhs = c2; midState = !midState; } else throw new ArgumentException("Error in []"); } if (_sourceWindow.AdvanceIfMatches(']')) { if (midState) rhs = lhs; if (!exp.AddRangeWithConflict(lhs, rhs)) throw new ArgumentException(); break; } else if (_sourceWindow.AdvanceIfMatches('-')) { if (!midState) throw new ArgumentException("Invalid - in []"); } else { var c2 = _sourceWindow.NextChar(); if (_sourceWindow.IsValid(c2)) { if (midState) { rhs = lhs; } if (exp.AddRangeWithConflict(lhs, rhs)) midState = false; else throw new ArgumentException(); } } } return exp; } else { char c = default(char); if (_sourceWindow.AdvanceIfOneOf("()+*?{}|",out c)) { _sourceWindow.AdvanceChar(-1); return null; } else { //the character itself var exp = new CharSetExpression(); exp.IsReverse = false; exp.Add(_sourceWindow.NextChar()); return exp; } } }
public static RegexNode GetCharSetExpression(char a,char b) { var expression = new CharSetExpression(); expression.AddRangeWithConflict(a, b); return new RegexNode(expression); }
public bool Equals(CharSetExpression obj) { if (IsReverse != obj.IsReverse) return false; if (Ranges.Count != obj.Ranges.Count) return false; for(int i = 0;i < Ranges.Count;++i) { if (Ranges[i] != obj.Ranges[i]) return false; } return true; }
public CharSetExpression(CharSetExpression expression) { Ranges = new FlatSet<CharRange>(expression.Ranges); IsReverse = expression.IsReverse; }