/// <summary> /// Get the prev token entity, honoring ignore in the context. /// </summary> public TokenEntity GetPrevTokenEntity(TokenEntity tokenEntity) { var result = tokenEntity?.Previous; while (result != null && this.EntityPattern.Ignore.Contains(result.Text)) { result = result.Previous; } return(result); }
/// <summary> /// Get the next token entity, honoring ignore in the context. /// </summary> /// <param name="tokenEntity"></param> /// <returns></returns> public TokenEntity GetNextTokenEntity(TokenEntity tokenEntity) { var result = tokenEntity.Next; while (result != null && this.EntityPattern.Ignore.Contains(result.Text)) { result = result.Next; } return(result); }
private void ProcessEntityPattern(MatchContext context, TokenEntity textEntity, EntityPattern entityPattern) { context.EntityPattern = entityPattern; context.CurrentEntity = new LucyEntity() { Type = entityPattern.Name, Resolution = entityPattern.Resolution, Start = textEntity.Start }; // see if it matches at this textEntity starting position. var matchResult = entityPattern.PatternMatcher.Matches(context, textEntity, nextPatternMatcher: null); //System.Diagnostics.Trace.TraceInformation($"[{textEntity.Start}] {context.EntityPattern} => \"{textEntity}\" {matchResult.Matched}"); // if it matches if (matchResult.Matched && matchResult.NextToken != textEntity) { // add it to the entities. context.CurrentEntity.End = matchResult.End; context.CurrentEntity.Text = context.Text.Substring(context.CurrentEntity.Start, context.CurrentEntity.End - context.CurrentEntity.Start); //if (context.CurrentEntity.Children.Any()) //{ // context.CurrentEntity.Resolution = null; //} //else if (context.CurrentEntity.Resolution == null) { if (entityPattern.Resolution != null) { context.CurrentEntity.Resolution = entityPattern.Resolution; } context.CurrentEntity.Children.RemoveWhere(et => et.Type == WildcardPatternMatcher.ENTITYTYPE); } context.MergeEntities(context.CurrentEntity.Children); context.ResolveEntities(context.CurrentEntity.Children); context.CurrentEntity.Score = context.CurrentEntity.GetAllEntities().Count() + ((float)(context.CurrentEntity.End - context.CurrentEntity.Start) / context.Text.Length); context.AddNewEntity(context.CurrentEntity); // Trace.TraceInformation($"\n [{textEntity.Start}] {context.EntityPattern} => {matchResult.Matched} {context.CurrentEntity}"); //foreach (var childEntity in context.CurrentEntity.Children) //{ // context.AddNewEntity(childEntity); //} } }
public IEnumerable <TokenEntity> Tokenize(string text) { TokenEntity previous = null; using (var tokenStream = _exactAnalyzer.GetTokenStream("name", text)) { var termAtt = tokenStream.GetAttribute <ICharTermAttribute>(); var offsetAtt = tokenStream.GetAttribute <IOffsetAttribute>(); tokenStream.Reset(); while (tokenStream.IncrementToken()) { string token = termAtt.ToString(); bool skipFuzzy = false; var start = offsetAtt.StartOffset; var end = offsetAtt.EndOffset; if (start > 0 && text[start - 1] == '@') { token = text.Substring(start - 1, end - start + 1); skipFuzzy = true; } else if (start > 0 && text[start - 1] == '$') { token = text.Substring(start - 1, end - start + 1); skipFuzzy = true; } var resolution = new TokenResolution() { Token = token }; var tokenEntity = new TokenEntity() { Type = TokenPatternMatcher.ENTITYTYPE, Text = text.Substring(start, end - start), Start = offsetAtt.StartOffset, End = end, Resolution = resolution, Previous = previous }; if (_fuzzyAnalyzer != null && !skipFuzzy) { // get fuzzyText using (var fuzzyTokenStream = _fuzzyAnalyzer.GetTokenStream("name", tokenEntity.Text)) { var fuzzyTermAtt = fuzzyTokenStream.GetAttribute <ICharTermAttribute>(); fuzzyTokenStream.Reset(); while (fuzzyTokenStream.IncrementToken()) { resolution.FuzzyTokens.Add(fuzzyTermAtt.ToString()); } } } if (previous != null) { previous.Next = tokenEntity; } previous = tokenEntity; yield return(tokenEntity); } } }