Example #1
0
        /// <summary>
        ///  tokenise the text into a token tree. This method is recursive.
        /// </summary>
        /// <param name="text"></param>
        /// <param name="recognisers"></param>
        /// <returns></returns>
        static private void ParseText(IEnumerable <IEntityTokeniser> tokenisers, TokenTree tree, int currentOffset, TokenNode current, string text, Dictionary <string, object> Properties)
        {
            var tokens = GetFirstTokenList(tokenisers, text.Substring(currentOffset), Properties);

            // end of parse.. add trailing text if any
            if (tokens == null || tokens.Count == 0)
            {
                var subtext = text.Substring(currentOffset).Trim();

                if (subtext.Length > 0)
                {
                    TextToken textToken = new TextToken
                    {
                        Text   = subtext,
                        Pos    = currentOffset,
                        Length = subtext.Length
                    };

                    TokenNode node = new TokenNode {
                        Token = textToken
                    };
                    current.Children.Add(node);
                }
                return;
            }

            // calc start pos of the tokens (all will have same start pos)
            var tokenpos = currentOffset + tokens[0].Pos;


            // *** create infill ***
            // tokens are offset from start of text so create infilling text token
            if (tokenpos > currentOffset)
            {
                var infill_txt = text.Substring(currentOffset, tokenpos - currentOffset);
                // only add infill if it contains more than just white space
                if (infill_txt.Trim().Length > 0)
                {
                    TextToken textToken = new TextToken
                    {
                        Text   = infill_txt,
                        Pos    = currentOffset,
                        Length = tokenpos - currentOffset - 1
                    };

                    TokenNode infill_node = new TokenNode {
                        Token = textToken
                    };
                    current.Children.Add(infill_node);
                    current = infill_node;
                }
            }

            // we have tokens to deal with
            // for each child, find more tokens
            foreach (var token in tokens)
            {
                var thiscurrent = current;

                // update the current offset to work from
                var nextoffset = currentOffset + (token.Pos + token.Length);

                TokenNode node = new TokenNode {
                    Token = token
                };
                thiscurrent.Children.Add(node);

                ParseText(tokenisers, tree, nextoffset, node, text, Properties);
            }
        }