Base class for decomposition token filters.

You must specify the required LuceneVersion compatibility when creating CompoundWordTokenFilterBase:

  • As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0 supplementary characters in strings and char arrays provided as compound word dictionaries.
  • As of 4.4, CompoundWordTokenFilterBase doesn't update offsets.

Inheritance: TokenFilter
Esempio n. 1
0
            /// <summary>
            /// Construct the compound token based on a slice of the current <seealso cref="CompoundWordTokenFilterBase#termAtt"/>. </summary>
            public CompoundToken(CompoundWordTokenFilterBase outerInstance, int offset, int length)
            {
                this.outerInstance = outerInstance;
                this.txt           = outerInstance.termAtt.subSequence(offset, offset + length);

                // offsets of the original word
                int startOff = outerInstance.offsetAtt.startOffset();
                int endOff   = outerInstance.offsetAtt.endOffset();

                if (outerInstance.matchVersion.onOrAfter(Version.LUCENE_44) || endOff - startOff != outerInstance.termAtt.length())
                {
                    // if length by start + end offsets doesn't match the term text then assume
                    // this is a synonym and don't adjust the offsets.
                    this.startOffset = startOff;
                    this.endOffset   = endOff;
                }
                else
                {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int newStart = startOff + offset;
                    int newStart = startOff + offset;
                    this.startOffset = newStart;
                    this.endOffset   = newStart + length;
                }
            }
            /// <summary>
            /// Construct the compound token based on a slice of the current <seealso cref="CompoundWordTokenFilterBase#termAtt"/>. </summary>
            public CompoundToken(CompoundWordTokenFilterBase outerInstance, int offset, int length)
            {
                this.txt = outerInstance.termAtt.SubSequence(offset, offset + length);

                // offsets of the original word
                int startOff = outerInstance.offsetAtt.StartOffset();
                int endOff   = outerInstance.offsetAtt.EndOffset();

                if (outerInstance.matchVersion.OnOrAfter(LuceneVersion.LUCENE_44) || endOff - startOff != outerInstance.termAtt.Length)
                {
                    // if length by start + end offsets doesn't match the term text then assume
                    // this is a synonym and don't adjust the offsets.
                    this.startOffset = startOff;
                    this.endOffset   = endOff;
                }
                else
                {
                    int newStart = startOff + offset;
                    this.startOffset = newStart;
                    this.endOffset   = newStart + length;
                }
            }
            public int EndOffset => endOffset;     // LUCENENET specific: changed public field into property backed by private field

            /// <summary>
            /// Construct the compound token based on a slice of the current <see cref="CompoundWordTokenFilterBase.m_termAtt"/>. </summary>
            public CompoundToken(CompoundWordTokenFilterBase outerInstance, int offset, int length)
            {
                this.txt = outerInstance.m_termAtt.Subsequence(offset, length); // LUCENENET: Corrected 2nd Subsequence parameter

                // offsets of the original word
                int startOff = outerInstance.m_offsetAtt.StartOffset;
                int endOff   = outerInstance.m_offsetAtt.EndOffset;

#pragma warning disable 612, 618
                if (outerInstance.m_matchVersion.OnOrAfter(LuceneVersion.LUCENE_44) || endOff - startOff != outerInstance.m_termAtt.Length)
#pragma warning restore 612, 618
                {
                    // if length by start + end offsets doesn't match the term text then assume
                    // this is a synonym and don't adjust the offsets.
                    this.startOffset = startOff;
                    this.endOffset   = endOff;
                }
                else
                {
                    int newStart = startOff + offset;
                    this.startOffset = newStart;
                    this.endOffset   = newStart + length;
                }
            }
            /// <summary>
            /// Construct the compound token based on a slice of the current <seealso cref="CompoundWordTokenFilterBase#termAtt"/>. </summary>
            public CompoundToken(CompoundWordTokenFilterBase outerInstance, int offset, int length)
            {
                this.txt = outerInstance.termAtt.SubSequence(offset, offset + length);

                // offsets of the original word
                int startOff = outerInstance.offsetAtt.StartOffset();
                int endOff = outerInstance.offsetAtt.EndOffset();

                if (outerInstance.matchVersion.OnOrAfter(LuceneVersion.LUCENE_44) || endOff - startOff != outerInstance.termAtt.Length)
                {
                    // if length by start + end offsets doesn't match the term text then assume
                    // this is a synonym and don't adjust the offsets.
                    this.startOffset = startOff;
                    this.endOffset = endOff;
                }
                else
                {
                    int newStart = startOff + offset;
                    this.startOffset = newStart;
                    this.endOffset = newStart + length;
                }
            }
		/// <summary>
		/// Construct the compound token based on a slice of the current <seealso cref="CompoundWordTokenFilterBase#termAtt"/>. </summary>
		public CompoundToken(CompoundWordTokenFilterBase outerInstance, int offset, int length)
		{
			this.outerInstance = outerInstance;
		  this.txt = outerInstance.termAtt.subSequence(offset, offset + length);

		  // offsets of the original word
		  int startOff = outerInstance.offsetAtt.startOffset();
		  int endOff = outerInstance.offsetAtt.endOffset();

		  if (outerInstance.matchVersion.onOrAfter(Version.LUCENE_44) || endOff - startOff != outerInstance.termAtt.length())
		  {
			// if length by start + end offsets doesn't match the term text then assume
			// this is a synonym and don't adjust the offsets.
			this.startOffset = startOff;
			this.endOffset = endOff;
		  }
		  else
		  {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int newStart = startOff + offset;
			int newStart = startOff + offset;
			this.startOffset = newStart;
			this.endOffset = newStart + length;
		  }
		}