/** * Resets this tokenizer for usage with another input stream. * This method will clear all the internal state in the * tokenizer as well as close the previous input stream. It * is normally called in order to reuse a parser and * tokenizer pair with multiple input streams, thereby * avoiding the cost of re-analyzing the grammar structures. * * @param input the new input stream to read * * @see Parser#reset(Reader) * * */ public void Reset(TextReader input) { this.buffer.Dispose(); this.buffer = new ReaderBuffer(input); this.previousToken = null; this.lastMatch.Clear(); }
/** * Creates a new tokenizer for the specified input stream. The * tokenizer can be set to process tokens either in * case-sensitive or case-insensitive mode. * * @param input the input stream to read * @param ignoreCase the character case ignore flag * * */ public Tokenizer(TextReader input, bool ignoreCase) { this.stringDfaMatcher = new StringDFAMatcher(ignoreCase); this.nfaMatcher = new NFAMatcher(ignoreCase); this.regExpMatcher = new RegExpMatcher(ignoreCase); this.buffer = new ReaderBuffer(input); }
/** * Searches for matching token patterns at the start of the * input stream. If a match is found, the token match object * is updated. * * @param buffer the input buffer to check * @param match the token match to update * * @throws IOException if an I/O error occurred */ public override void Match(ReaderBuffer buffer, TokenMatch match) { TokenPattern res = automaton.Match(buffer, ignoreCase); if (res != null) { match.Update(res.Pattern.Length, res); } }
/** * Searches for matching token patterns at the start of the * input stream. If a match is found, the token match object * is updated. * * @param buffer the input buffer to check * @param match the token match to update * * @throws IOException if an I/O error occurred */ public override void Match(ReaderBuffer buffer, TokenMatch match) { for (int i = 0; i < regExps.Length; i++) { int length = regExps[i].Match(buffer); if (length > 0) { match.Update(length, patterns[i]); } } }
/** * Checks if the start of the input stream matches this * regular expression. * * @param buffer the input buffer to check * * @return the longest match found, or * zero (0) if no match was found * * @throws IOException if an I/O error occurred */ public override int Match(ReaderBuffer buffer) { if (matcher == null) { matcher = regExp.Matcher(buffer); } else { matcher.Reset(buffer); } return(matcher.MatchFromBeginning() ? matcher.Length() : 0); }
/** * Checks if the automaton matches an input stream. The * matching will be performed from a specified position. This * method will not read any characters from the stream, just * peek ahead. The comparison can be done either in * case-sensitive or case-insensitive mode. * * @param input the input stream to check * @param pos the starting position * @param caseInsensitive the case-insensitive flag * * @return the match value, or * null if no match was found * * @throws IOException if an I/O error occurred */ public TokenPattern Match(ReaderBuffer buffer, bool caseInsensitive) { TokenPattern result = null; DFAState state; int pos = 0; int c; c = buffer.Peek(0); if (c < 0) { return(null); } if (caseInsensitive) { c = Char.ToLower((char)c); } if (c < 128) { state = ascii[c]; if (state == null) { return(null); } else if (state.value != null) { result = state.value; } pos++; } else { state = nonAscii; } while ((c = buffer.Peek(pos)) >= 0) { state = state.tree.Find((char)c, caseInsensitive); if (state == null) { break; } else if (state.value != null) { result = state.value; } pos++; } return(result); }
/** * Checks if this NFA matches the specified input text. The * matching will be performed from position zero (0) in the * buffer. This method will not read any characters from the * stream, just peek ahead. * * @param buffer the input buffer to check * @param match the token match to update * * @return the number of characters matched, or * zero (0) if no match was found * * @throws IOException if an I/O error occurred */ public int Match(ReaderBuffer buffer, TokenMatch match) { int length = 0; int pos = 1; int peekChar; NFAState state; // The first step of the match loop has been unrolled and // optimized for performance below. this.queue.Clear(); peekChar = buffer.Peek(0); if (0 <= peekChar && peekChar < 128) { state = this.initialChar[peekChar]; if (state != null) { this.queue.AddLast(state); } } if (peekChar >= 0) { this.initial.MatchTransitions((char)peekChar, this.queue, true); } this.queue.MarkEnd(); peekChar = buffer.Peek(1); // The remaining match loop processes all subsequent states while (!this.queue.Empty) { if (this.queue.Marked) { pos++; peekChar = buffer.Peek(pos); this.queue.MarkEnd(); } state = this.queue.RemoveFirst(); if (state.value != null) { match.Update(pos, state.value); } if (peekChar >= 0) { state.MatchTransitions((char)peekChar, this.queue, false); } } return(length); }
/** * Checks if the start of the input stream matches this * regular expression. * * @param buffer the input buffer to check * * @return the longest match found, or * zero (0) if no match was found * * @throws IOException if an I/O error occurred */ public override int Match(ReaderBuffer buffer) { Match m; // Ugly hack since .NET doesn't have a flag for when the // end of the input string was encountered... buffer.Peek(1024 * 16); // Also, there is no API to limit the search to the specified // position, so we double-check the index afterwards instead. m = reg.Match(buffer.ToString(), buffer.Position); if (m.Success && m.Index == buffer.Position) { return(m.Length); } else { return(0); } }
/** * Checks if the start of the input stream matches this * regular expression. * * @param buffer the input buffer to check * * @return the longest match found, or * zero (0) if no match was found * * @throws IOException if an I/O error occurred */ public abstract int Match(ReaderBuffer buffer);
/** * Searches for matching token patterns at the start of the * input stream. If a match is found, the token match object * is updated. * * @param buffer the input buffer to check * @param match the token match to update * * @throws IOException if an I/O error occurred */ public override void Match(ReaderBuffer buffer, TokenMatch match) { automaton.Match(buffer, match); }
/** * Searches for matching token patterns at the start of the * input stream. If a match is found, the token match object * is updated. * * @param buffer the input buffer to check * @param match the token match to update * * @throws IOException if an I/O error occurred */ public abstract void Match(ReaderBuffer buffer, TokenMatch match);