/// <param name="str"></param> /// <param name="parsingRules"></param> /// <param name="condition">allows parsing to exit early, if the early part of the string is sufficient for example</param> public void Tokenize(string str, ParseRuleSet parsingRules = null, Func <Tokenizer, bool> condition = null) { this.str = str; errors.Clear(); tokens.Clear(); rows.Clear(); Tokenize(parsingRules, 0, condition); }
/// <param name="parseRules"></param> /// <param name="index"></param> /// <param name="condition">allows parsing to exit early, if the early part of the string is sufficient for example</param> protected void Tokenize(ParseRuleSet parseRules = null, int index = 0, Func <Tokenizer, bool> condition = null) { tokenStrings.Clear(); if (string.IsNullOrEmpty(str)) { return; } List <SyntaxTree> contextStack = new List <SyntaxTree>(); if (parseRules == null) { parseRules = CodeRules.Default; } else { contextStack.Add(parseRules.GetEntry(tokens, -1, null)); } int tokenBegin = -1; ParseRuleSet currentContext = parseRules; //Show.Log("parsing \""+str+"\" with ["+currentContext.name+"]"); int lastIndex = index - 1; while (index < str.Length && (condition == null || condition.Invoke(this))) { if (index <= lastIndex) { throw new Exception("tokenize algorithm problem, the index isn't advancing"); } char c = str[index]; WhatsThis(currentContext, index, tokenBegin, parseRules, out Delim delim, out bool isWhiteSpace); if (delim != null) { FinishToken(index, ref tokenBegin); // finish whatever token was being read before this delimeter HandleDelimiter(delim, ref index, contextStack, ref currentContext, parseRules); } else if (!isWhiteSpace) { if (tokenBegin < 0) { tokenBegin = index; } } else { FinishToken(index, ref tokenBegin); // handle whitespace } if (rows != null && c == '\n') { rows.Add(index); } ++index; } FinishToken(index, ref tokenBegin); // add the last token that was still being processed FinalTokenCleanup(); //DebugPrint(-1); ApplyOperators(); }
private void WhatsThis(ParseRuleSet currentContext, int index, int tokenBegin, ParseRuleSet defaultContext, out Delim delim, out bool isWhiteSpace) { char c = str[index]; isWhiteSpace = (currentContext.Whitespace != null) ? currentContext.IsWhitespace(c) : (defaultContext.Whitespace != null) ? defaultContext.IsWhitespace(c) : CodeRules.Default.IsWhitespace(c); if (isWhiteSpace) { delim = null; return; } delim = (currentContext.Delimiters != null) ? currentContext.GetDelimiterAt(str, index, tokenBegin) : defaultContext.GetDelimiterAt(str, index, tokenBegin); }
//public SyntaxTree(List<Token> tokenList, int indexStart, int count) { // tokens = tokenList; tokenStart = indexStart; tokenCount = count; //} public SyntaxTree(ParseRuleSet rule, List <Token> tokenList, int indexStart, int count, object meta) { rules = rule; tokens = tokenList; tokenStart = indexStart; tokenCount = count; sourceMeta = meta; }
private void HandleDelimiter(Delim delim, ref int index, List <SyntaxTree> syntaxStack, ref ParseRuleSet currentContext, ParseRuleSet defaultContext) { Token delimToken = new Token(delim, index, delim.text.Length); if (delim.parseRule != null) { ParseResult pr = delim.parseRule.Invoke(str, index); if (pr.IsError && errors != null) { pr.error.OffsetBy(delimToken.index, rows); errors.Add(pr.error); } if (pr.replacementValue != null) { delimToken.length = pr.lengthParsed; delimToken.meta = new TokenSubstitution(str, pr.replacementValue); } index += pr.lengthParsed - 1; } else { index += delim.text.Length - 1; } DelimCtx dcx = delim as DelimCtx; SyntaxTree endedSyntax = null; if (dcx != null) { if (syntaxStack.Count > 0 && dcx.Context == currentContext && dcx.isEnd) { endedSyntax = syntaxStack[syntaxStack.Count - 1]; endedSyntax.endDelim = dcx; delimToken.meta = endedSyntax; endedSyntax.tokenCount = (tokens.Count - endedSyntax.tokenStart) + 1; syntaxStack.RemoveAt(syntaxStack.Count - 1); if (syntaxStack.Count > 0) { currentContext = syntaxStack[syntaxStack.Count - 1].rules; } else { currentContext = defaultContext; } } if (endedSyntax == null && dcx.isStart) { SyntaxTree parentCntx = (syntaxStack.Count > 0) ? syntaxStack[syntaxStack.Count - 1] : null; SyntaxTree newContext = dcx.Context.GetEntry(tokens, tokens.Count, str, parentCntx); newContext.beginDelim = dcx; currentContext = dcx.Context; delimToken.meta = newContext; syntaxStack.Add(newContext); } } tokens.Add(delimToken); tokenStrings.Add(delim.text); if (endedSyntax != null) { ExtractContextAsSubTokenList(endedSyntax); } }