private NFAState ParseFact(NFAState start) { NFAState placeholder = new NFAState(); var end = ParseAtom(placeholder); switch (PeekChar(0)) { case '?': case '*': case '+': case '{': end = ParseAtomModifier(placeholder, end); break; } if (placeholder.Incoming.Length > 0 && start.Outgoing.Length > 0) { start.AddOut(new NFAEpsilonTransition(placeholder)); return(end); } else { placeholder.MergeInto(start); return((end == placeholder) ? start : end); } }
private NFAState ParseExpr(NFAState start) { NFAState end = new NFAState(); do { if (PeekChar(0) == '|') { ReadChar('|'); } var subStart = new NFAState(); var subEnd = ParseTerm(subStart); if (subStart.Incoming.Length == 0) { subStart.MergeInto(start); } else { start.AddOut(new NFAEpsilonTransition(subStart)); } if (subEnd.Outgoing.Length == 0 || (!end.HasTransitions() && PeekChar(0) != '|')) { subEnd.MergeInto(end); } else { subEnd.AddOut(new NFAEpsilonTransition(end)); } } while (PeekChar(0) == '|'); return(end); }
public void AddTextMatch(string str, bool ignoreCase, TokenPattern value) { NFAState state; char ch = str[0]; if (ch < 128 && !ignoreCase) { state = _initialChar[ch]; if (state == null) { state = _initialChar[ch] = new NFAState(); } } else { state = _initial.AddOut(ch, ignoreCase, null); } for (int i = 1; i < str.Length; i++) { state = state.AddOut(str[i], ignoreCase, null); } state.Value = value; }
public void MergeInto(NFAState state) { for (int i = 0; i < Incoming.Length; i++) { state.AddIn(Incoming[i]); Incoming[i].State = state; } Incoming = null; for (int i = 0; i < Outgoing.Length; i++) { state.AddOut(Outgoing[i]); } Outgoing = null; }
private NFAState ParseCharSet(NFAState start) { NFAState end = new NFAState(); NFACharRangeTransition range; if (PeekChar(0) == '^') { ReadChar('^'); range = new NFACharRangeTransition(true, _ignoreCase, end); } else { range = new NFACharRangeTransition(false, _ignoreCase, end); } start.AddOut(range); while (PeekChar(0) > 0) { var min = (char)PeekChar(0); switch (min) { case ']': return(end); case '\\': range.AddCharacter(ReadEscapeChar()); break; default: ReadChar(min); if (PeekChar(0) == '-' && PeekChar(1) > 0 && PeekChar(1) != ']') { ReadChar('-'); var max = ReadChar(); range.AddRange(min, max); } else { range.AddCharacter(min); } break; } } return(end); }
private NFAState ParseChar(NFAState start) { switch (PeekChar(0)) { case '\\': return(ParseEscapeChar(start)); case '^': case '$': throw new RegExpException( RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER, _pos, _pattern); default: return(start.AddOut(ReadChar(), _ignoreCase, new NFAState())); } }
private NFAState ParseAtom(NFAState start) { NFAState end; switch (PeekChar(0)) { case '.': ReadChar('.'); return(start.AddOut(new NFADotTransition(new NFAState()))); case '(': ReadChar('('); end = ParseExpr(start); ReadChar(')'); return(end); case '[': ReadChar('['); end = ParseCharSet(start); ReadChar(']'); return(end); case -1: case ')': case ']': case '{': case '}': case '?': case '*': case '+': case '|': throw new RegExpException( RegExpException.ErrorType.UNEXPECTED_CHARACTER, _pos, _pattern); default: return(ParseChar(start)); } }
private NFAState ParseEscapeChar(NFAState start) { NFAState end = new NFAState(); if (PeekChar(0) == '\\' && PeekChar(1) > 0) { switch ((char)PeekChar(1)) { case 'd': ReadChar(); ReadChar(); return(start.AddOut(new NFADigitTransition(end))); case 'D': ReadChar(); ReadChar(); return(start.AddOut(new NFANonDigitTransition(end))); case 's': ReadChar(); ReadChar(); return(start.AddOut(new NFAWhitespaceTransition(end))); case 'S': ReadChar(); ReadChar(); return(start.AddOut(new NFANonWhitespaceTransition(end))); case 'w': ReadChar(); ReadChar(); return(start.AddOut(new NFAWordTransition(end))); case 'W': ReadChar(); ReadChar(); return(start.AddOut(new NFANonWordTransition(end))); } } return(start.AddOut(ReadEscapeChar(), _ignoreCase, end)); }
private NFAState ParseAtomModifier(NFAState start, NFAState end) { int min = 0; int max = -1; int firstPos = _pos; // Read min and max switch (ReadChar()) { case '?': min = 0; max = 1; break; case '*': min = 0; max = -1; break; case '+': min = 1; max = -1; break; case '{': min = ReadNumber(); max = min; if (PeekChar(0) == ',') { ReadChar(','); max = -1; if (PeekChar(0) != '}') { max = ReadNumber(); } } ReadChar('}'); if (max == 0 || (max > 0 && min > max)) { throw new RegExpException( RegExpException.ErrorType.INVALID_REPEAT_COUNT, firstPos, _pattern); } break; default: throw new RegExpException( RegExpException.ErrorType.UNEXPECTED_CHARACTER, _pos - 1, _pattern); } // Read possessive or reluctant modifiers if (PeekChar(0) == '?') { throw new RegExpException( RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER, _pos, _pattern); } else if (PeekChar(0) == '+') { throw new RegExpException( RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER, _pos, _pattern); } // Handle supported repeaters if (min == 0 && max == 1) { return(start.AddOut(new NFAEpsilonTransition(end))); } else if (min == 0 && max == -1) { if (end.Outgoing.Length == 0) { end.MergeInto(start); } else { end.AddOut(new NFAEpsilonTransition(start)); } return(start); } else if (min == 1 && max == -1) { if (start.Outgoing.Length == 1 && end.Outgoing.Length == 0 && end.Incoming.Length == 1 && start.Outgoing[0] == end.Incoming[0]) { end.AddOut(start.Outgoing[0].Copy(end)); } else { end.AddOut(new NFAEpsilonTransition(start)); } return(end); } else { throw new RegExpException( RegExpException.ErrorType.INVALID_REPEAT_COUNT, firstPos, _pattern); } }