Beispiel #1
0
        /// <summary>
        /// Get the prev token entity, honoring ignore in the context.
        /// </summary>
        public TokenEntity GetPrevTokenEntity(TokenEntity tokenEntity)
        {
            var result = tokenEntity?.Previous;

            while (result != null && this.EntityPattern.Ignore.Contains(result.Text))
            {
                result = result.Previous;
            }
            return(result);
        }
Beispiel #2
0
        /// <summary>
        /// Get the next token entity, honoring ignore in the context.
        /// </summary>
        /// <param name="tokenEntity"></param>
        /// <returns></returns>
        public TokenEntity GetNextTokenEntity(TokenEntity tokenEntity)
        {
            var result = tokenEntity.Next;

            while (result != null && this.EntityPattern.Ignore.Contains(result.Text))
            {
                result = result.Next;
            }
            return(result);
        }
Beispiel #3
0
        private void ProcessEntityPattern(MatchContext context, TokenEntity textEntity, EntityPattern entityPattern)
        {
            context.EntityPattern = entityPattern;
            context.CurrentEntity = new LucyEntity()
            {
                Type       = entityPattern.Name,
                Resolution = entityPattern.Resolution,
                Start      = textEntity.Start
            };

            // see if it matches at this textEntity starting position.
            var matchResult = entityPattern.PatternMatcher.Matches(context, textEntity, nextPatternMatcher: null);

            //System.Diagnostics.Trace.TraceInformation($"[{textEntity.Start}] {context.EntityPattern} => \"{textEntity}\" {matchResult.Matched}");

            // if it matches
            if (matchResult.Matched && matchResult.NextToken != textEntity)
            {
                // add it to the entities.
                context.CurrentEntity.End  = matchResult.End;
                context.CurrentEntity.Text = context.Text.Substring(context.CurrentEntity.Start, context.CurrentEntity.End - context.CurrentEntity.Start);

                //if (context.CurrentEntity.Children.Any())
                //{
                //    context.CurrentEntity.Resolution = null;
                //}
                //else
                if (context.CurrentEntity.Resolution == null)
                {
                    if (entityPattern.Resolution != null)
                    {
                        context.CurrentEntity.Resolution = entityPattern.Resolution;
                    }
                    context.CurrentEntity.Children.RemoveWhere(et => et.Type == WildcardPatternMatcher.ENTITYTYPE);
                }

                context.MergeEntities(context.CurrentEntity.Children);
                context.ResolveEntities(context.CurrentEntity.Children);

                context.CurrentEntity.Score = context.CurrentEntity.GetAllEntities().Count() + ((float)(context.CurrentEntity.End - context.CurrentEntity.Start) / context.Text.Length);

                context.AddNewEntity(context.CurrentEntity);
                // Trace.TraceInformation($"\n [{textEntity.Start}] {context.EntityPattern} => {matchResult.Matched} {context.CurrentEntity}");

                //foreach (var childEntity in context.CurrentEntity.Children)
                //{
                //    context.AddNewEntity(childEntity);
                //}
            }
        }
Beispiel #4
0
        public IEnumerable <TokenEntity> Tokenize(string text)
        {
            TokenEntity previous = null;

            using (var tokenStream = _exactAnalyzer.GetTokenStream("name", text))
            {
                var termAtt   = tokenStream.GetAttribute <ICharTermAttribute>();
                var offsetAtt = tokenStream.GetAttribute <IOffsetAttribute>();
                tokenStream.Reset();

                while (tokenStream.IncrementToken())
                {
                    string token     = termAtt.ToString();
                    bool   skipFuzzy = false;
                    var    start     = offsetAtt.StartOffset;
                    var    end       = offsetAtt.EndOffset;

                    if (start > 0 && text[start - 1] == '@')
                    {
                        token     = text.Substring(start - 1, end - start + 1);
                        skipFuzzy = true;
                    }
                    else if (start > 0 && text[start - 1] == '$')
                    {
                        token     = text.Substring(start - 1, end - start + 1);
                        skipFuzzy = true;
                    }

                    var resolution = new TokenResolution()
                    {
                        Token = token
                    };

                    var tokenEntity = new TokenEntity()
                    {
                        Type       = TokenPatternMatcher.ENTITYTYPE,
                        Text       = text.Substring(start, end - start),
                        Start      = offsetAtt.StartOffset,
                        End        = end,
                        Resolution = resolution,
                        Previous   = previous
                    };

                    if (_fuzzyAnalyzer != null && !skipFuzzy)
                    {
                        // get fuzzyText
                        using (var fuzzyTokenStream = _fuzzyAnalyzer.GetTokenStream("name", tokenEntity.Text))
                        {
                            var fuzzyTermAtt = fuzzyTokenStream.GetAttribute <ICharTermAttribute>();
                            fuzzyTokenStream.Reset();
                            while (fuzzyTokenStream.IncrementToken())
                            {
                                resolution.FuzzyTokens.Add(fuzzyTermAtt.ToString());
                            }
                        }
                    }
                    if (previous != null)
                    {
                        previous.Next = tokenEntity;
                    }
                    previous = tokenEntity;
                    yield return(tokenEntity);
                }
            }
        }