public virtual IList <IToken> Tokenize(string pattern) { // split pattern into chunks: sea (raw input) and islands (<ID>, <expr>) IList <Chunk> chunks = Split(pattern); // create token stream from text and tags IList <IToken> tokens = new List <IToken>(); foreach (Chunk chunk in chunks) { if (chunk is TagChunk) { TagChunk tagChunk = (TagChunk)chunk; // add special rule token or conjure up new token from name if (System.Char.IsUpper(tagChunk.GetTag()[0])) { int ttype = parser.GetTokenType(tagChunk.GetTag()); if (ttype == TokenConstants.InvalidType) { throw new ArgumentException("Unknown token " + tagChunk.GetTag() + " in pattern: " + pattern); } TokenTagToken t = new TokenTagToken(tagChunk.GetTag(), ttype, tagChunk.GetLabel()); tokens.Add(t); } else { if (System.Char.IsLower(tagChunk.GetTag()[0])) { int ruleIndex = parser.GetRuleIndex(tagChunk.GetTag()); if (ruleIndex == -1) { throw new ArgumentException("Unknown rule " + tagChunk.GetTag() + " in pattern: " + pattern); } int ruleImaginaryTokenType = parser.GetATNWithBypassAlts().ruleToTokenType[ruleIndex]; tokens.Add(new RuleTagToken(tagChunk.GetTag(), ruleImaginaryTokenType, tagChunk.GetLabel())); } else { throw new ArgumentException("invalid tag: " + tagChunk.GetTag() + " in pattern: " + pattern); } } } else { TextChunk textChunk = (TextChunk)chunk; AntlrInputStream @in = new AntlrInputStream(textChunk.GetText()); lexer.SetInputStream(@in); IToken t = lexer.NextToken(); while (t.Type != TokenConstants.Eof) { tokens.Add(t); t = lexer.NextToken(); } } } // System.out.println("tokens="+tokens); return(tokens); }
/// <summary> /// Split /// <code><ID> = <e:expr> ;</code> /// into 4 chunks for tokenizing by /// <see cref="Tokenize(string)">Tokenize(string)</see> /// . /// </summary> internal virtual IList <Chunk> Split(string pattern) { int p = 0; int n = pattern.Length; IList <Chunk> chunks = new List <Chunk>(); StringBuilder buf = new StringBuilder(); // find all start and stop indexes first, then collect IList <int> starts = new List <int>(); IList <int> stops = new List <int>(); while (p < n) { if (p == pattern.IndexOf(escape + start, p)) { p += escape.Length + start.Length; } else { if (p == pattern.IndexOf(escape + stop, p)) { p += escape.Length + stop.Length; } else { if (p == pattern.IndexOf(start, p)) { starts.Add(p); p += start.Length; } else { if (p == pattern.IndexOf(stop, p)) { stops.Add(p); p += stop.Length; } else { p++; } } } } } // System.out.println(""); // System.out.println(starts); // System.out.println(stops); if (starts.Count > stops.Count) { throw new ArgumentException("unterminated tag in pattern: " + pattern); } if (starts.Count < stops.Count) { throw new ArgumentException("missing start tag in pattern: " + pattern); } int ntags = starts.Count; for (int i = 0; i < ntags; i++) { if (starts[i] >= stops[i]) { throw new ArgumentException("tag delimiters out of order in pattern: " + pattern); } } // collect into chunks now if (ntags == 0) { string text = Sharpen.Runtime.Substring(pattern, 0, n); chunks.Add(new TextChunk(text)); } if (ntags > 0 && starts[0] > 0) { // copy text up to first tag into chunks string text = Sharpen.Runtime.Substring(pattern, 0, starts[0]); chunks.Add(new TextChunk(text)); } for (int i_1 = 0; i_1 < ntags; i_1++) { // copy inside of <tag> string tag = Sharpen.Runtime.Substring(pattern, starts[i_1] + start.Length, stops[i_1]); string ruleOrToken = tag; string label = null; int colon = tag.IndexOf(':'); if (colon >= 0) { label = Sharpen.Runtime.Substring(tag, 0, colon); ruleOrToken = Sharpen.Runtime.Substring(tag, colon + 1, tag.Length); } chunks.Add(new TagChunk(label, ruleOrToken)); if (i_1 + 1 < ntags) { // copy from end of <tag> to start of next string text = Sharpen.Runtime.Substring(pattern, stops[i_1] + stop.Length, starts[i_1 + 1]); chunks.Add(new TextChunk(text)); } } if (ntags > 0) { int afterLastTag = stops[ntags - 1] + stop.Length; if (afterLastTag < n) { // copy text from end of last tag to end string text = Sharpen.Runtime.Substring(pattern, afterLastTag, n); chunks.Add(new TextChunk(text)); } } // strip out the escape sequences from text chunks but not tags for (int i_2 = 0; i_2 < chunks.Count; i_2++) { Chunk c = chunks[i_2]; if (c is TextChunk) { TextChunk tc = (TextChunk)c; string unescaped = tc.GetText().Replace(escape, string.Empty); if (unescaped.Length < tc.GetText().Length) { chunks.Set(i_2, new TextChunk(unescaped)); } } } return(chunks); }