/** * Creates a new tokenizer for the specified input stream. The * tokenizer can be set to process tokens either in * case-sensitive or case-insensitive mode. * * @param input the input stream to read * @param ignoreCase the character case ignore flag * * @since 1.5 */ public Tokenizer(TextReader input, bool ignoreCase) { this.stringDfaMatcher = new StringDFAMatcher(ignoreCase); this.nfaMatcher = new NFAMatcher(ignoreCase); this.regExpMatcher = new RegExpMatcher(ignoreCase); this.buffer = new ReaderBuffer(input); }
/** * Creates a new matcher with the specified element. * * @param e the base regular expression element * @param buffer the input character buffer to work with * @param ignoreCase the character case ignore flag */ internal Matcher(Element e, ReaderBuffer buffer, bool ignoreCase) { this.element = e; this.buffer = buffer; this.ignoreCase = ignoreCase; this.start = 0; Reset(); }
/** * Resets this tokenizer for usage with another input stream. * This method will clear all the internal state in the * tokenizer as well as close the previous input stream. It * is normally called in order to reuse a parser and * tokenizer pair with multiple input streams, thereby * avoiding the cost of re-analyzing the grammar structures. * * @param input the new input stream to read * * @see Parser#reset(Reader) * * @since 1.5 */ public void Reset(TextReader input) { this.buffer.Dispose(); this.buffer = new ReaderBuffer(input); this.previousToken = null; this.lastMatch.Clear(); }
/** * Searches for matching token patterns at the start of the * input stream. If a match is found, the token match object * is updated. * * @param buffer the input buffer to check * @param match the token match to update * * @throws IOException if an I/O error occurred */ public override void Match(ReaderBuffer buffer, TokenMatch match) { TokenPattern res = automaton.Match(buffer, ignoreCase); if (res != null) { match.Update(res.Pattern.Length, res); } }
/** * Searches for matching token patterns at the start of the * input stream. If a match is found, the token match object * is updated. * * @param buffer the input buffer to check * @param match the token match to update * * @throws IOException if an I/O error occurred */ public override void Match(ReaderBuffer buffer, TokenMatch match) { for (int i = 0; i < regExps.Length; i++) { int length = regExps[i].Match(buffer); if (length > 0) { match.Update(length, patterns[i]); } } }
/// <summary> /// Checks if this NFA matches the specified input text. The /// matching will be performed from position zero (0) in the /// buffer. This method will not read any characters from the /// stream, just peek ahead. /// </summary> /// <param name="buffer">The input buffer to check</param> /// <param name="match">The token match to update</param> /// <returns>The number of characters matched</returns> /// <exception cref="System.IO.IOException"> /// If an I/O error occurred /// </exception> public int Match(ReaderBuffer buffer, TokenMatch match) { int length = 0; int pos = 1; int peekChar; NFAState state; // The first step of the match loop has been unrolled and // optimized for performance below. this.queue.Clear(); peekChar = buffer.Peek(0); if (peekChar >= 0 && peekChar < 128) { state = this.initialChar[peekChar]; if (state != null) { this.queue.AddLast(state); } } if (peekChar >= 0) { this.initial.MatchTransitions((char)peekChar, this.queue, true); } this.queue.MarkEnd(); peekChar = buffer.Peek(1); // The remaining match loop processes all subsequent states while (!this.queue.Empty) { if (this.queue.Marked) { pos++; peekChar = buffer.Peek(pos); this.queue.MarkEnd(); } state = this.queue.RemoveFirst(); if (state.Value != null) { match.Update(pos, state.Value); } if (peekChar >= 0) { state.MatchTransitions((char)peekChar, this.queue, false); } } return(length); }
/// <summary> /// Checks if the automaton matches an input stream. The /// matching will be performed from a specified position. This /// method will not read any characters from the stream, just /// peek ahead. The comparison can be done either in /// case-sensitive or case-insensitive mode. /// </summary> /// <param name="buffer">The buffer to read from</param> /// <param name="caseInsensitive">The case-insensitive flag</param> /// <returns>The match value, or null if no match found</returns> /// <exception cref="System.IO.IOException"> /// If an I/O error occurred /// </exception> public TokenPattern Match(ReaderBuffer buffer, bool caseInsensitive) { TokenPattern result = null; DFAState state; int pos = 0; int c; c = buffer.Peek(0); if (c < 0) { return(null); } if (caseInsensitive) { c = char.ToLower((char)c); } if (c < 128) { state = this.ascii[c]; if (state == null) { return(null); } else if (state.Value != null) { result = state.Value; } pos++; } else { state = this.nonAscii; } while ((c = buffer.Peek(pos)) >= 0) { state = state.Tree.Find((char)c, caseInsensitive); if (state == null) { break; } else if (state.Value != null) { result = state.Value; } pos++; } return(result); }
/** * Checks if the start of the input stream matches this * regular expression. * * @param buffer the input buffer to check * * @return the longest match found, or * zero (0) if no match was found * * @throws IOException if an I/O error occurred */ public override int Match(ReaderBuffer buffer) { if (matcher == null) { matcher = regExp.Matcher(buffer); } else { matcher.Reset(buffer); } return(matcher.MatchFromBeginning() ? matcher.Length() : 0); }
/// <summary> /// Checks if the start of the input stream matches this /// regular expression. /// </summary> /// <param name="buffer">The input buffer to check</param> /// <returns>The longest match found, or 0 if none was found</returns> /// <exception cref="IOException">If an I/O error occurred</exception> public override int Match(ReaderBuffer buffer) { if (this.matcher == null) { this.matcher = this.regExp.Matcher(buffer); } else { this.matcher.Reset(buffer); } return(this.matcher.MatchFromBeginning() ? this.matcher.Length : 0); }
/** * Checks if the start of the input stream matches this * regular expression. * * @param buffer the input buffer to check * * @return the longest match found, or * zero (0) if no match was found * * @throws IOException if an I/O error occurred */ public override int Match(ReaderBuffer buffer) { Match m; // Ugly hack since .NET doesn't have a flag for when the // end of the input string was encountered... buffer.Peek(1024 * 16); // Also, there is no API to limit the search to the specified // position, so we double-check the index afterwards instead. m = reg.Match(buffer.ToString(), buffer.Position); if (m.Success && m.Index == buffer.Position) { return(m.Length); } else { return(0); } }
/** * Resets the matcher for use with a new look-ahead character * input stream. This will clear all flags and set the match * length to a negative value. * * @param buffer the character input buffer * * @since 1.5 */ public void Reset(ReaderBuffer buffer) { this.buffer = buffer; Reset(); }
/** * Checks if this NFA matches the specified input text. The * matching will be performed from position zero (0) in the * buffer. This method will not read any characters from the * stream, just peek ahead. * * @param buffer the input buffer to check * @param match the token match to update * * @return the number of characters matched, or * zero (0) if no match was found * * @throws IOException if an I/O error occurred */ public int Match(ReaderBuffer buffer, TokenMatch match) { int length = 0; int pos = 1; int peekChar; NFAState state; // The first step of the match loop has been unrolled and // optimized for performance below. this.queue.Clear(); peekChar = buffer.Peek(0); if (0 <= peekChar && peekChar < 128) { state = this.initialChar[peekChar]; if (state != null) { this.queue.AddLast(state); } } if (peekChar >= 0) { this.initial.MatchTransitions((char) peekChar, this.queue, true); } this.queue.MarkEnd(); peekChar = buffer.Peek(1); // The remaining match loop processes all subsequent states while (!this.queue.Empty) { if (this.queue.Marked) { pos++; peekChar = buffer.Peek(pos); this.queue.MarkEnd(); } state = this.queue.RemoveFirst(); if (state.value != null) { match.Update(pos, state.value); } if (peekChar >= 0) { state.MatchTransitions((char) peekChar, this.queue, false); } } return length; }
/** * Checks if the start of the input stream matches this * regular expression. * * @param buffer the input buffer to check * * @return the longest match found, or * zero (0) if no match was found * * @throws IOException if an I/O error occurred */ public override int Match(ReaderBuffer buffer) { Match m; // Ugly hack since .NET doesn't have a flag for when the // end of the input string was encountered... buffer.Peek(1024 * 16); // Also, there is no API to limit the search to the specified // position, so we double-check the index afterwards instead. m = reg.Match(buffer.ToString(), buffer.Position); if (m.Success && m.Index == buffer.Position) { return m.Length; } else { return 0; } }
/** * Checks if the start of the input stream matches this * regular expression. * * @param buffer the input buffer to check * * @return the longest match found, or * zero (0) if no match was found * * @throws IOException if an I/O error occurred */ public abstract int Match(ReaderBuffer buffer);
/** * Searches for matching token patterns at the start of the * input stream. If a match is found, the token match object * is updated. * * @param buffer the input buffer to check * @param match the token match to update * * @throws IOException if an I/O error occurred */ public override void Match(ReaderBuffer buffer, TokenMatch match) { automaton.Match(buffer, match); }
/** * Searches for matching token patterns at the start of the * input stream. If a match is found, the token match object * is updated. * * @param buffer the input buffer to check * @param match the token match to update * * @throws IOException if an I/O error occurred */ public abstract void Match(ReaderBuffer buffer, TokenMatch match);
/** * Finds all matches and adds the lengths to the matches set. * * @param m the matcher being used * @param buffer the input character buffer to match * @param start the starting position * @param length the match length at the start position * @param count the number of sub-elements matched * @param attempt the number of match attempts here * * @throws IOException if an I/O error occurred */ private void FindMatches(Matcher m, ReaderBuffer buffer, int start, int length, int count, int attempt) { int subLength; // Check match ending here if (count > max) { return; } if (min <= count && attempt == 0) { if (matches.Length <= length) { matches.Length = length + 10; } matches[length] = true; } // Check element match subLength = elem.Match(m, buffer, start, attempt); if (subLength < 0) { return; } else if (subLength == 0) { if (min == count + 1) { if (matches.Length <= length) { matches.Length = length + 10; } matches[length] = true; } return; } // Find alternative and subsequent matches FindMatches(m, buffer, start, length, count, attempt + 1); FindMatches(m, buffer, start + subLength, length + subLength, count + 1, 0); }
/** * Returns the length of a matching string starting at the * specified position. The number of matches to skip can also be * specified. * * @param m the matcher being used * @param buffer the input character buffer to match * @param start the starting position * @param skip the number of matches to skip * * @return the length of the matching string, or * -1 if no match was found * * @throws IOException if an I/O error occurred */ public override int Match(Matcher m, ReaderBuffer buffer, int start, int skip) { if (skip == 0) { matchStart = -1; matches = null; } switch (type) { case RepeatType.GREEDY: return MatchGreedy(m, buffer, start, skip); case RepeatType.RELUCTANT: return MatchReluctant(m, buffer, start, skip); case RepeatType.POSSESSIVE: if (skip == 0) { return MatchPossessive(m, buffer, start, 0); } break; } return -1; }
/** * Returns the length of a matching string starting at the * specified position. The number of matches to skip can also be * specified, but numbers higher than zero (0) cause a failed * match for any element that doesn't attempt to combine other * elements. * * @param m the matcher being used * @param buffer the input character buffer to match * @param start the starting position * @param skip the number of matches to skip * * @return the length of the longest matching string, or * -1 if no match was found * * @throws IOException if an I/O error occurred */ public override int Match(Matcher m, ReaderBuffer buffer, int start, int skip) { int length1 = -1; int length2 = 0; int skip1 = 0; int skip2 = 0; while (skip >= 0) { length1 = elem1.Match(m, buffer, start, skip1); if (length1 < 0) { return -1; } length2 = elem2.Match(m, buffer, start + length1, skip2); if (length2 < 0) { skip1++; skip2 = 0; } else { skip2++; skip--; } } return length1 + length2; }
/** * Creates a new matcher for the specified look-ahead * character input stream. * * @param buffer the character input buffer * * @return the regular expresion matcher * * @since 1.5 */ public Matcher Matcher(ReaderBuffer buffer) { return new Matcher((Element) element.Clone(), buffer, ignoreCase); }
/** * Checks if the automaton matches an input stream. The * matching will be performed from a specified position. This * method will not read any characters from the stream, just * peek ahead. The comparison can be done either in * case-sensitive or case-insensitive mode. * * @param input the input stream to check * @param pos the starting position * @param caseInsensitive the case-insensitive flag * * @return the match value, or * null if no match was found * * @throws IOException if an I/O error occurred */ public TokenPattern Match(ReaderBuffer buffer, bool caseInsensitive) { TokenPattern result = null; DFAState state; int pos = 0; int c; c = buffer.Peek(0); if (c < 0) { return null; } if (caseInsensitive) { c = Char.ToLower((char) c); } if (c < 128) { state = ascii[c]; if (state == null) { return null; } else if (state.value != null) { result = state.value; } pos++; } else { state = nonAscii; } while ((c = buffer.Peek(pos)) >= 0) { state = state.tree.Find((char) c, caseInsensitive); if (state == null) { break; } else if (state.value != null) { result = state.value; } pos++; } return result; }
/** * Returns the length of a matching string starting at the * specified position. The number of matches to skip can also * be specified, but numbers higher than zero (0) cause a * failed match for any element that doesn't attempt to * combine other elements. * * @param m the matcher being used * @param buffer the input character buffer to match * @param start the starting position * @param skip the number of matches to skip * * @return the length of the matching string, or * -1 if no match was found * * @throws IOException if an I/O error occurred */ public abstract int Match(Matcher m, ReaderBuffer buffer, int start, int skip);
/** * Returns the length of the longest possible matching string * starting at the specified position. The number of matches * to skip can also be specified. * * @param m the matcher being used * @param buffer the input character buffer to match * @param start the starting position * @param skip the number of matches to skip * * @return the length of the longest matching string, or * -1 if no match was found * * @throws IOException if an I/O error occurred */ private int MatchGreedy(Matcher m, ReaderBuffer buffer, int start, int skip) { // Check for simple case if (skip == 0) { return MatchPossessive(m, buffer, start, 0); } // Find all matches if (matchStart != start) { matchStart = start; matches = new BitArray(10); FindMatches(m, buffer, start, 0, 0, 0); } // Find first non-skipped match for (int i = matches.Count - 1; i >= 0; i--) { if (matches[i]) { if (skip == 0) { return i; } skip--; } } return -1; }
/** * Checks if the start of the input stream matches this * regular expression. * * @param buffer the input buffer to check * * @return the longest match found, or * zero (0) if no match was found * * @throws IOException if an I/O error occurred */ public override int Match(ReaderBuffer buffer) { if (matcher == null) { matcher = regExp.Matcher(buffer); } else { matcher.Reset(buffer); } return matcher.MatchFromBeginning() ? matcher.Length() : 0; }
/** * Returns the length of a matching string starting at the * specified position. The number of matches to skip can also * be specified, but numbers higher than zero (0) cause a * failed match for any element that doesn't attempt to * combine other elements. * * @param m the matcher being used * @param buffer the input character buffer to match * @param start the starting position * @param skip the number of matches to skip * * @return the length of the longest matching string, or * -1 if no match was found * * @throws IOException if an I/O error occurred */ public override int Match(Matcher m, ReaderBuffer buffer, int start, int skip) { int c; if (skip != 0) { return -1; } for (int i = 0; i < value.Length; i++) { c = buffer.Peek(start + i); if (c < 0) { m.SetReadEndOfString(); return -1; } if (m.IsCaseInsensitive()) { c = (int) Char.ToLower((char) c); } if (c != (int) value[i]) { return -1; } } return value.Length; }
/** * Returns the length of the maximum number of elements matching * the string starting at the specified position. This method * allows no backtracking, i.e. no skips.. * * @param m the matcher being used * @param buffer the input character buffer to match * @param start the starting position * @param count the start count, normally zero (0) * * @return the length of the longest matching string, or * -1 if no match was found * * @throws IOException if an I/O error occurred */ private int MatchPossessive(Matcher m, ReaderBuffer buffer, int start, int count) { int length = 0; int subLength = 1; // Match as many elements as possible while (subLength > 0 && count < max) { subLength = elem.Match(m, buffer, start + length, 0); if (subLength >= 0) { count++; length += subLength; } } // Return result if (min <= count && count <= max) { return length; } else { return -1; } }
/** * Returns the length of a matching string starting at the * specified position. The number of matches to skip can also be * specified, but numbers higher than zero (0) cause a failed * match for any element that doesn't attempt to combine other * elements. * * @param m the matcher being used * @param buffer the input character buffer to match * @param start the starting position * @param skip the number of matches to skip * * @return the length of the matching string, or * -1 if no match was found * * @throws IOException if an I/O error occurred */ public override int Match(Matcher m, ReaderBuffer buffer, int start, int skip) { int c; if (skip != 0) { return -1; } c = buffer.Peek(start); if (c < 0) { m.SetReadEndOfString(); return -1; } if (m.IsCaseInsensitive()) { c = (int) Char.ToLower((char) c); } return InSet((char) c) ? 1 : -1; }
/** * Returns the length of the shortest possible matching string * starting at the specified position. The number of matches to * skip can also be specified. * * @param m the matcher being used * @param buffer the input character buffer to match * @param start the starting position * @param skip the number of matches to skip * * @return the length of the shortest matching string, or * -1 if no match was found * * @throws IOException if an I/O error occurred */ private int MatchReluctant(Matcher m, ReaderBuffer buffer, int start, int skip) { // Find all matches if (matchStart != start) { matchStart = start; matches = new BitArray(10); FindMatches(m, buffer, start, 0, 0, 0); } // Find first non-skipped match for (int i = 0; i < matches.Count; i++) { if (matches[i]) { if (skip == 0) { return i; } skip--; } } return -1; }