public override void build() { initialOpTok = TermTok(children[0]); kind = kindE.any; if (children.Length == 1) { name = ""; kind = kindE.any; return; } PTok Tok1 = TermTok(children[1]); if (children.Length == 2) { if (Tok1.E == PTokE.CS_name) { name = Tok1.pay; kind = kindE.any; nameTok = Tok1; } else { name = ""; kind = kindE_from_PTokE(TermEnum(children[1])); refineOPTok = Tok1; } return; } if (children.Length == 3) { name = TermPay(children[2]); nameTok = TermTok(children[2]); kind = kindE_from_PTokE(TermEnum(children[1])); refineOPTok = TermTok(children[1]); } }
const string keywords_S = @"(?'pay'\^lup|\^ldown|\^1up)"; // todo ^ for disambiguation, there is currently no sane way to not let this conflict with `cname` otherwise /* * relaxed tokenizes every string , stuff that would be otherwise not tokenizable is included as special Error Tokens * some extra shizzle for json literals too , but i forgot */ public static PTokBase[] Tokenize(string str_in, bool relaxed = false) { int arg_offs_S = 0; // if successful match [arg_offs_S , arg_offs ) is the interval of indices that hold the matched value in the original string int arg_offs_E = 0; string rest = str_in; string payl = null; // <- always set to null on non match, which the rest of the implementation must consider invalid List <PTokBase> R = new List <PTokBase>(); Func <string, bool> Eat = (RE) => { Match M = Regex.Match(rest, @"^" + RE + @"(?'REST'.*)"); if (M.Success) { payl = M.Groups["pay"].Value; int rest_i = M.Groups["REST"].Index; arg_offs_S = arg_offs_E; arg_offs_E += rest_i; rest = M.Groups["REST"].Value; return(true); } else { payl = null; return(false); } }; while (true) { if (Eat(WhitespaceS)) { R.Add(new PTokWhitespace { len = payl.Length }); // todo : i guess "\t".Length == 1 ? that would be a problem here continue; } if (Eat(cSharp_basic_identifierS)) { R.Add(new PTok { E = PTokE.CS_name, pay = payl }); continue; } if (Eat(AssignmentOP_S)) { PTok op = new PTok { pay = payl }; if (payl == "<-") { op.E = PTokE.OP_arrow_left; } //else if ( payl == "<=" ) op.E = PTokE.OP_assign_collection; else { throw new Exception(); } R.Add(op); continue; } if (Eat(DeclOP_S)) { PTok op = new PTok { pay = payl }; if (payl == "->") { op.E = PTokE.OP_arrow_right; } else { throw new Exception("decl tokenize"); } R.Add(op); continue; } if (Eat(SG_Operator_S)) { PTok op = new PTok { pay = payl }; if (payl == ">") { op.E = PTokE.OP_GT; } else if (payl == ">>") { op.E = PTokE.OP_doubleGT; } else { throw new Exception(); } R.Add(op); continue; } if (Eat(Equals_Operator_S)) { PTok op = new PTok { pay = payl }; if (payl == "==") { op.E = PTokE.OP_equals; } else { throw new Exception("equals op tokenize"); } R.Add(op); continue; } if (Eat(SpecialPropOP_S)) { R.Add(new PTok { E = PTokE.OP_special_prop, pay = payl }); continue; } if (Eat(SingleCharOpS)) { PTok op = new PTok { pay = payl }; if (payl == ".") { op.E = PTokE.OP_dot; } else if (payl == "*") { op.E = PTokE.OP_star; } else if (payl == "%") { op.E = PTokE.OP_percent; } else if (payl == ":") { op.E = PTokE.OP_colon; } else if (payl == "[") { op.E = PTokE.squareBRL; } else if (payl == "]") { op.E = PTokE.squareBRR; } else if (payl == "{") { op.E = PTokE.curlyBRL; } else if (payl == "}") { op.E = PTokE.curlyBRR; } else if (payl == "(") { op.E = PTokE.plainBRL; } else if (payl == ")") { op.E = PTokE.plainBRR; } else if (payl == "/") { op.E = PTokE.OP_slash; } else if (payl == "\\") { op.E = PTokE.OP_backslash; } else if (payl == "$") { op.E = PTokE.OP_dollar; } else if (payl == "#") { op.E = PTokE.OP_sharp; } else if (payl == ",") { op.E = PTokE.OP_comma; } else { throw new Exception("single char OP tokenize"); } R.Add(op); continue; } if (Eat(JSonLiteral)) // consumation length determined by json parser { object JResult = null; string new_rest = ""; if (readJSON(rest, out JResult, out new_rest)) { R.Add(new PTokJSON { E = PTokE.JSON, pay = payl + rest.Substring(0, rest.Length - new_rest.Length) /* <- todo: quick guess */, payJSON = JResult }); rest = new_rest; } else // json parsing fails i have no way of knowing where this thing was supposed to end -> can't continue tokenization { if (relaxed) { R.Add(new PTok { E = PTokE.ErrT, pay = payl + rest }); return(R.ToArray()); } else { throw new Exception(); } } continue; } if (Eat(keywords_S)) { if (payl == "^lup" || payl == "^1up") { R.Add(new PTok { E = PTokE.OP_lift_up, pay = payl }); continue; } if (payl == "^ldown") { R.Add(new PTok { E = PTokE.OP_lift_down, pay = payl }); continue; } throw new NotImplementedException(); } if (relaxed) { if (Eat(@"(?'pay'.)")) { R.Add(new PTok { E = PTokE.ErrT, pay = payl }); // consume arbitrary char and tag it as tokenization error continue; } } if (rest == "") { return(R.ToArray()); // parse success } throw new Exception("untokenizable input :" + rest); // todo : non tokenizable string } }