public InputWindowToken(ShingleFilter outerInstance, AttributeSource attSource) { this.outerInstance = outerInstance; this.attSource = attSource; this.termAtt = attSource.GetAttribute <ICharTermAttribute>(); this.offsetAtt = attSource.GetAttribute <IOffsetAttribute>(); }
private SlowSynonymMap Match(SlowSynonymMap map) { SlowSynonymMap result = null; if (map.Submap != null) { AttributeSource tok = NextTok(); if (tok != null) { // clone ourselves. if (tok == this) { tok = CloneAttributes(); } // check for positionIncrement!=1? if>1, should not match, if==0, check multiple at this level? var termAtt = tok.GetAttribute <ICharTermAttribute>(); SlowSynonymMap subMap = map.Submap.Get(termAtt.Buffer, 0, termAtt.Length); if (subMap != null) { // recurse result = Match(subMap); } if (result != null) { matched.AddFirst(tok); } else { // push back unmatched token PushTok(tok); } } } // if no longer sequence matched, so if this node has synonyms, it's the match. if (result == null && map.Synonyms != null) { result = map; } return(result); }
public override bool Accept(AttributeSource a) { ICharTermAttribute termAtt = a.GetAttribute <ICharTermAttribute>(); return(termAtt.ToString().Equals("Dogs", StringComparison.CurrentCultureIgnoreCase)); }
public InputWindowToken(AttributeSource attSource) { this.attSource = attSource; this.termAtt = attSource.GetAttribute <ICharTermAttribute>(); this.offsetAtt = attSource.GetAttribute <IOffsetAttribute>(); }
public override bool IncrementToken() { if (hasMoreTokensInClone) { int start = breaker.Current; int end = breaker.Next(); if (end != BreakIterator.Done) { clonedToken.CopyTo(this); termAtt.CopyBuffer(clonedTermAtt.Buffer, start, end - start); if (hasIllegalOffsets) { offsetAtt.SetOffset(clonedOffsetAtt.StartOffset, clonedOffsetAtt.EndOffset); } else { offsetAtt.SetOffset(clonedOffsetAtt.StartOffset + start, clonedOffsetAtt.StartOffset + end); } if (handlePosIncr) { posAtt.PositionIncrement = 1; } return(true); } hasMoreTokensInClone = false; } if (!m_input.IncrementToken()) { return(false); } if (termAtt.Length == 0 || !thaiPattern.IsMatch(string.Empty + termAtt[0])) { return(true); } hasMoreTokensInClone = true; // if length by start + end offsets doesn't match the term text then assume // this is a synonym and don't adjust the offsets. hasIllegalOffsets = offsetAtt.EndOffset - offsetAtt.StartOffset != termAtt.Length; // we lazy init the cloned token, as in ctor not all attributes may be added if (clonedToken == null) { clonedToken = CloneAttributes(); clonedTermAtt = clonedToken.GetAttribute <ICharTermAttribute>(); clonedOffsetAtt = clonedToken.GetAttribute <IOffsetAttribute>(); } else { this.CopyTo(clonedToken); } // reinit CharacterIterator charIterator.SetText(clonedTermAtt.Buffer, 0, clonedTermAtt.Length); breaker.SetText(new string(charIterator.Text, charIterator.Start, charIterator.Length)); int end2 = breaker.Next(); if (end2 != BreakIterator.Done) { termAtt.Length = end2; if (hasIllegalOffsets) { offsetAtt.SetOffset(clonedOffsetAtt.StartOffset, clonedOffsetAtt.EndOffset); } else { offsetAtt.SetOffset(clonedOffsetAtt.StartOffset, clonedOffsetAtt.StartOffset + end2); } // position increment keeps as it is for first token return(true); } return(false); }
public override bool Accept(AttributeSource a) { ICharTermAttribute termAtt = a.GetAttribute <ICharTermAttribute>(); return(termAtt.ToString().Equals("The", StringComparison.OrdinalIgnoreCase)); }
public InputWindowToken(ShingleFilter outerInstance, AttributeSource attSource) { this.outerInstance = outerInstance; this.attSource = attSource; this.termAtt = attSource.GetAttribute(typeof(CharTermAttribute)); this.offsetAtt = attSource.GetAttribute(typeof(OffsetAttribute)); }