/// <summary> /// Concatenates the saved buffer to the given <see cref="WordDelimiterConcatenation"/> /// </summary> /// <param name="concatenation"> <see cref="WordDelimiterConcatenation"/> to concatenate the buffer to </param> private void Concatenate(WordDelimiterConcatenation concatenation) { if (concatenation.IsEmpty) { concatenation.startOffset = savedStartOffset + iterator.current; } concatenation.Append(savedBuffer, iterator.current, iterator.end - iterator.current); concatenation.endOffset = savedStartOffset + iterator.end; }
/// <summary> /// Flushes the given <see cref="WordDelimiterConcatenation"/> by either writing its concat and then clearing, or just clearing. /// </summary> /// <param name="concatenation"> <see cref="WordDelimiterConcatenation"/> that will be flushed </param> /// <returns> <c>true</c> if the concatenation was written before it was cleared, <c>false</c> otherwise </returns> private bool FlushConcatenation(WordDelimiterConcatenation concatenation) { lastConcatCount = concatenation.subwordCount; if (concatenation.subwordCount != 1 || !ShouldGenerateParts(concatenation.type)) { concatenation.WriteAndClear(); return(true); } concatenation.Clear(); return(false); }
/// <summary> /// Creates a new <see cref="Lucene47WordDelimiterFilter"/> /// </summary> /// <param name="in"> <see cref="TokenStream"/> to be filtered </param> /// <param name="charTypeTable"> table containing character types </param> /// <param name="configurationFlags"> Flags configuring the filter </param> /// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param> public Lucene47WordDelimiterFilter(TokenStream @in, byte[] charTypeTable, WordDelimiterFlags configurationFlags, CharArraySet protWords) : base(@in) { termAttribute = AddAttribute <ICharTermAttribute>(); offsetAttribute = AddAttribute <IOffsetAttribute>(); posIncAttribute = AddAttribute <IPositionIncrementAttribute>(); typeAttribute = AddAttribute <ITypeAttribute>(); concat = new WordDelimiterConcatenation(this); concatAll = new WordDelimiterConcatenation(this); this.flags = configurationFlags; this.protWords = protWords; this.iterator = new WordDelimiterIterator(charTypeTable, Has(WordDelimiterFlags.SPLIT_ON_CASE_CHANGE), Has(WordDelimiterFlags.SPLIT_ON_NUMERICS), Has(WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE)); }
/// <summary> /// Creates a new WordDelimiterFilter /// </summary> /// <param name="matchVersion"> lucene compatibility version </param> /// <param name="in"> TokenStream to be filtered </param> /// <param name="charTypeTable"> table containing character types </param> /// <param name="configurationFlags"> Flags configuring the filter </param> /// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param> public WordDelimiterFilter(LuceneVersion matchVersion, TokenStream @in, byte[] charTypeTable, WordDelimiterFlags configurationFlags, CharArraySet protWords) : base(@in) { this.termAttribute = AddAttribute <ICharTermAttribute>(); this.offsetAttribute = AddAttribute <IOffsetAttribute>(); this.posIncAttribute = AddAttribute <IPositionIncrementAttribute>(); this.typeAttribute = AddAttribute <ITypeAttribute>(); concat = new WordDelimiterConcatenation(this); concatAll = new WordDelimiterConcatenation(this); sorter = new OffsetSorter(this); if (!matchVersion.OnOrAfter(LuceneVersion.LUCENE_48)) { throw new ArgumentException("This class only works with Lucene 4.8+. To emulate the old (broken) behavior of WordDelimiterFilter, use Lucene47WordDelimiterFilter"); } this.flags = configurationFlags; this.protWords = protWords; this.iterator = new WordDelimiterIterator(charTypeTable, Has(WordDelimiterFlags.SPLIT_ON_CASE_CHANGE), Has(WordDelimiterFlags.SPLIT_ON_NUMERICS), Has(WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE)); }
private void InitializeInstanceFields() { concat = new WordDelimiterConcatenation(this); concatAll = new WordDelimiterConcatenation(this); sorter = new OffsetSorter(this); }
/// <summary> /// Flushes the given WordDelimiterConcatenation by either writing its concat and then clearing, or just clearing. /// </summary> /// <param name="concatenation"> WordDelimiterConcatenation that will be flushed </param> /// <returns> {@code true} if the concatenation was written before it was cleared, {@code false} otherwise </returns> private bool flushConcatenation(WordDelimiterConcatenation concatenation) { lastConcatCount = concatenation.subwordCount; if (concatenation.subwordCount != 1 || !shouldGenerateParts(concatenation.type)) { concatenation.writeAndClear(); return true; } concatenation.clear(); return false; }
/// <summary> /// Concatenates the saved buffer to the given WordDelimiterConcatenation /// </summary> /// <param name="concatenation"> WordDelimiterConcatenation to concatenate the buffer to </param> private void concatenate(WordDelimiterConcatenation concatenation) { if (concatenation.Empty) { concatenation.startOffset = savedStartOffset + iterator.current; } concatenation.append(savedBuffer, iterator.current, iterator.end - iterator.current); concatenation.endOffset = savedStartOffset + iterator.end; }