예제 #1
0
 /// <summary>
 /// Concatenates the saved buffer to the given <see cref="WordDelimiterConcatenation"/>
 /// </summary>
 /// <param name="concatenation"> <see cref="WordDelimiterConcatenation"/> to concatenate the buffer to </param>
 private void Concatenate(WordDelimiterConcatenation concatenation)
 {
     if (concatenation.IsEmpty)
     {
         concatenation.startOffset = savedStartOffset + iterator.current;
     }
     concatenation.Append(savedBuffer, iterator.current, iterator.end - iterator.current);
     concatenation.endOffset = savedStartOffset + iterator.end;
 }
예제 #2
0
 /// <summary>
 /// Flushes the given <see cref="WordDelimiterConcatenation"/> by either writing its concat and then clearing, or just clearing.
 /// </summary>
 /// <param name="concatenation"> <see cref="WordDelimiterConcatenation"/> that will be flushed </param>
 /// <returns> <c>true</c> if the concatenation was written before it was cleared, <c>false</c> otherwise </returns>
 private bool FlushConcatenation(WordDelimiterConcatenation concatenation)
 {
     lastConcatCount = concatenation.subwordCount;
     if (concatenation.subwordCount != 1 || !ShouldGenerateParts(concatenation.type))
     {
         concatenation.WriteAndClear();
         return(true);
     }
     concatenation.Clear();
     return(false);
 }
예제 #3
0
        /// <summary>
        /// Creates a new <see cref="Lucene47WordDelimiterFilter"/>
        /// </summary>
        /// <param name="in"> <see cref="TokenStream"/> to be filtered </param>
        /// <param name="charTypeTable"> table containing character types </param>
        /// <param name="configurationFlags"> Flags configuring the filter </param>
        /// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
        public Lucene47WordDelimiterFilter(TokenStream @in, byte[] charTypeTable, WordDelimiterFlags configurationFlags, CharArraySet protWords)
            : base(@in)
        {
            termAttribute   = AddAttribute <ICharTermAttribute>();
            offsetAttribute = AddAttribute <IOffsetAttribute>();
            posIncAttribute = AddAttribute <IPositionIncrementAttribute>();
            typeAttribute   = AddAttribute <ITypeAttribute>();
            concat          = new WordDelimiterConcatenation(this);
            concatAll       = new WordDelimiterConcatenation(this);

            this.flags     = configurationFlags;
            this.protWords = protWords;
            this.iterator  = new WordDelimiterIterator(charTypeTable, Has(WordDelimiterFlags.SPLIT_ON_CASE_CHANGE), Has(WordDelimiterFlags.SPLIT_ON_NUMERICS), Has(WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE));
        }
예제 #4
0
        /// <summary>
        /// Creates a new WordDelimiterFilter
        /// </summary>
        /// <param name="matchVersion"> lucene compatibility version </param>
        /// <param name="in"> TokenStream to be filtered </param>
        /// <param name="charTypeTable"> table containing character types </param>
        /// <param name="configurationFlags"> Flags configuring the filter </param>
        /// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
        public WordDelimiterFilter(LuceneVersion matchVersion, TokenStream @in, byte[] charTypeTable, WordDelimiterFlags configurationFlags, CharArraySet protWords)
            : base(@in)
        {
            this.termAttribute   = AddAttribute <ICharTermAttribute>();
            this.offsetAttribute = AddAttribute <IOffsetAttribute>();
            this.posIncAttribute = AddAttribute <IPositionIncrementAttribute>();
            this.typeAttribute   = AddAttribute <ITypeAttribute>();
            concat    = new WordDelimiterConcatenation(this);
            concatAll = new WordDelimiterConcatenation(this);
            sorter    = new OffsetSorter(this);

            if (!matchVersion.OnOrAfter(LuceneVersion.LUCENE_48))
            {
                throw new ArgumentException("This class only works with Lucene 4.8+. To emulate the old (broken) behavior of WordDelimiterFilter, use Lucene47WordDelimiterFilter");
            }
            this.flags     = configurationFlags;
            this.protWords = protWords;
            this.iterator  = new WordDelimiterIterator(charTypeTable,
                                                       Has(WordDelimiterFlags.SPLIT_ON_CASE_CHANGE),
                                                       Has(WordDelimiterFlags.SPLIT_ON_NUMERICS),
                                                       Has(WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE));
        }
예제 #5
0
 private void InitializeInstanceFields()
 {
     concat = new WordDelimiterConcatenation(this);
     concatAll = new WordDelimiterConcatenation(this);
     sorter = new OffsetSorter(this);
 }
예제 #6
0
 /// <summary>
 /// Flushes the given WordDelimiterConcatenation by either writing its concat and then clearing, or just clearing.
 /// </summary>
 /// <param name="concatenation"> WordDelimiterConcatenation that will be flushed </param>
 /// <returns> {@code true} if the concatenation was written before it was cleared, {@code false} otherwise </returns>
 private bool flushConcatenation(WordDelimiterConcatenation concatenation)
 {
     lastConcatCount = concatenation.subwordCount;
     if (concatenation.subwordCount != 1 || !shouldGenerateParts(concatenation.type))
     {
       concatenation.writeAndClear();
       return true;
     }
     concatenation.clear();
     return false;
 }
예제 #7
0
 /// <summary>
 /// Concatenates the saved buffer to the given WordDelimiterConcatenation
 /// </summary>
 /// <param name="concatenation"> WordDelimiterConcatenation to concatenate the buffer to </param>
 private void concatenate(WordDelimiterConcatenation concatenation)
 {
     if (concatenation.Empty)
     {
       concatenation.startOffset = savedStartOffset + iterator.current;
     }
     concatenation.append(savedBuffer, iterator.current, iterator.end - iterator.current);
     concatenation.endOffset = savedStartOffset + iterator.end;
 }
예제 #8
0
 private void InitializeInstanceFields()
 {
     concat = new WordDelimiterConcatenation(this);
     concatAll = new WordDelimiterConcatenation(this);
     sorter = new OffsetSorter(this);
 }