/// <summary> /// tokenise the text into a token tree. This method is recursive. /// </summary> /// <param name="text"></param> /// <param name="recognisers"></param> /// <returns></returns> static private void ParseText(IEnumerable <IEntityTokeniser> tokenisers, TokenTree tree, int currentOffset, TokenNode current, string text, Dictionary <string, object> Properties) { var tokens = GetFirstTokenList(tokenisers, text.Substring(currentOffset), Properties); // end of parse.. add trailing text if any if (tokens == null || tokens.Count == 0) { var subtext = text.Substring(currentOffset).Trim(); if (subtext.Length > 0) { TextToken textToken = new TextToken { Text = subtext, Pos = currentOffset, Length = subtext.Length }; TokenNode node = new TokenNode { Token = textToken }; current.Children.Add(node); } return; } // calc start pos of the tokens (all will have same start pos) var tokenpos = currentOffset + tokens[0].Pos; // *** create infill *** // tokens are offset from start of text so create infilling text token if (tokenpos > currentOffset) { var infill_txt = text.Substring(currentOffset, tokenpos - currentOffset); // only add infill if it contains more than just white space if (infill_txt.Trim().Length > 0) { TextToken textToken = new TextToken { Text = infill_txt, Pos = currentOffset, Length = tokenpos - currentOffset - 1 }; TokenNode infill_node = new TokenNode { Token = textToken }; current.Children.Add(infill_node); current = infill_node; } } // we have tokens to deal with // for each child, find more tokens foreach (var token in tokens) { var thiscurrent = current; // update the current offset to work from var nextoffset = currentOffset + (token.Pos + token.Length); TokenNode node = new TokenNode { Token = token }; thiscurrent.Children.Add(node); ParseText(tokenisers, tree, nextoffset, node, text, Properties); } }