示例#1
0
        /// <summary>
        /// parse text using entity extractors into a list of tokens lists
        /// </summary>
        /// <param name="properties"></param>
        /// <param name="customerMessage"></param>
        /// <param name="serviceProvider"></param>
        /// <returns></returns>
        public static List <List <Token> > ParseText(Dictionary <string, object> properties, string customerMessage, IEnumerable <IEntityTokeniser> tokenisers)
        {
            TokenTree tree = new TokenTree()
                             .Build(tokenisers, customerMessage, properties);

            var flattenedTokens = tree.Flatten();

            return(flattenedTokens);
        }
示例#2
0
        internal static List <List <Token> > Flatten(this TokenTree tree)
        {
            List <List <TokenNode> > master = new List <List <TokenNode> >();
            List <TokenNode>         list   = new List <TokenNode>();

            tree.Head.Flatten(master, list);

            List <List <Token> > final = new List <List <Token> >();

            foreach (var tnl in master)
            {
                final.Add(tnl.Select(x => x.Token).ToList());
            }

            return(final);
        }
示例#3
0
        static internal TokenTree Build(this TokenTree tree, IEnumerable <IEntityTokeniser> tokenisers, string text, Dictionary <string, object> properties)
        {
            foreach (var tokeniser in tokenisers)
            {
                tokeniser.BeginParse(text, properties);
            }

            TokenNode current = new TokenNode
            {
                Token = new StartToken {
                    Pos = 0, Length = 0
                }
            };

            tree.Head = current;
            int currentOffset = 0;

            ParseText(tokenisers, tree, currentOffset, current, text, properties);

            return(tree);
        }
示例#4
0
        /// <summary>
        ///  tokenise the text into a token tree. This method is recursive.
        /// </summary>
        /// <param name="text"></param>
        /// <param name="recognisers"></param>
        /// <returns></returns>
        static private void ParseText(IEnumerable <IEntityTokeniser> tokenisers, TokenTree tree, int currentOffset, TokenNode current, string text, Dictionary <string, object> Properties)
        {
            var tokens = GetFirstTokenList(tokenisers, text.Substring(currentOffset), Properties);

            // end of parse.. add trailing text if any
            if (tokens == null || tokens.Count == 0)
            {
                var subtext = text.Substring(currentOffset).Trim();

                if (subtext.Length > 0)
                {
                    TextToken textToken = new TextToken
                    {
                        Text   = subtext,
                        Pos    = currentOffset,
                        Length = subtext.Length
                    };

                    TokenNode node = new TokenNode {
                        Token = textToken
                    };
                    current.Children.Add(node);
                }
                return;
            }

            // calc start pos of the tokens (all will have same start pos)
            var tokenpos = currentOffset + tokens[0].Pos;


            // *** create infill ***
            // tokens are offset from start of text so create infilling text token
            if (tokenpos > currentOffset)
            {
                var infill_txt = text.Substring(currentOffset, tokenpos - currentOffset);
                // only add infill if it contains more than just white space
                if (infill_txt.Trim().Length > 0)
                {
                    TextToken textToken = new TextToken
                    {
                        Text   = infill_txt,
                        Pos    = currentOffset,
                        Length = tokenpos - currentOffset - 1
                    };

                    TokenNode infill_node = new TokenNode {
                        Token = textToken
                    };
                    current.Children.Add(infill_node);
                    current = infill_node;
                }
            }

            // we have tokens to deal with
            // for each child, find more tokens
            foreach (var token in tokens)
            {
                var thiscurrent = current;

                // update the current offset to work from
                var nextoffset = currentOffset + (token.Pos + token.Length);

                TokenNode node = new TokenNode {
                    Token = token
                };
                thiscurrent.Children.Add(node);

                ParseText(tokenisers, tree, nextoffset, node, text, Properties);
            }
        }
示例#5
0
 internal static TokenTree PrintTree(this TokenTree tree)
 {
     tree.Head.PrintTree(0);
     return(tree);
 }