public UniqueStems ( String word ) : IEnumerable |
||
word | String | Word to find the stems for. |
Résultat | IEnumerable |
public override Boolean IncrementToken() { if (_buffer.Any()) { var nextStem = _buffer.Dequeue(); RestoreState(_savedState); _posIncAtt.PositionIncrement = 0; _termAtt.SetTermBuffer(nextStem.Stem, 0, nextStem.StemLength); return(true); } if (!input.IncrementToken()) { return(false); } var newTerms = _dedup ? _stemmer.UniqueStems(_termAtt.Term) : _stemmer.Stem(_termAtt.Term); foreach (var newTerm in newTerms) { _buffer.Enqueue(newTerm); } if (_buffer.Count == 0) { // originaly: we do not know this word, return it unchanged // changed: apply SlovakStemmer on words not found in dictionary (possible named entities) var currentTerm = new string(_termAtt.TermBuffer(), 0, _termAtt.TermLength()); if (!string.IsNullOrEmpty(currentTerm)) { _slovakStemmer.Stem(_termAtt.TermBuffer(), _termAtt.TermLength(), out char[] newTerm, out var newLength); _termAtt.SetTermBuffer(newTerm, 0, newLength); _termAtt.SetTermLength(newLength); } return(true); } var stem = _buffer.Dequeue(); _termAtt.SetTermBuffer(stem.Stem, 0, stem.StemLength); if (_buffer.Count > 0) { _savedState = CaptureState(); } return(true); }
public override Boolean IncrementToken() { if (_buffer.Any()) { var nextStem = _buffer.Dequeue(); RestoreState(_savedState); _posIncAtt.SetPositionIncrement(0); _termAtt.SetTermBuffer(nextStem.Stem, 0, nextStem.StemLength); return(true); } if (!input.IncrementToken()) { return(false); } var newTerms = _dedup ? _stemmer.UniqueStems(_termAtt.Term()) : _stemmer.Stem(_termAtt.Term()); foreach (var newTerm in newTerms) { _buffer.Enqueue(newTerm); } if (_buffer.Count == 0) { // we do not know this word, return it unchanged return(true); } var stem = _buffer.Dequeue(); _termAtt.SetTermBuffer(stem.Stem, 0, stem.StemLength); if (_buffer.Count > 0) { _savedState = CaptureState(); } return(true); }