Ejemplo n.º 1
0
        /// <summary>
        /// Creates <see cref="NGramTokenFilter"/> with given min and max n-grams. </summary>
        /// <param name="version"> Lucene version to enable correct position increments.
        ///                See <see cref="NGramTokenFilter"/> for details. </param>
        /// <param name="input"> <see cref="TokenStream"/> holding the input to be tokenized </param>
        /// <param name="minGram"> the smallest n-gram to generate </param>
        /// <param name="maxGram"> the largest n-gram to generate </param>
        public NGramTokenFilter(LuceneVersion version, TokenStream input, int minGram, int maxGram)
            : base(new CodepointCountFilter(version, input, minGram, int.MaxValue))
        {
            this.version   = version;
            this.charUtils = version.OnOrAfter(
#pragma warning disable 612, 618
                LuceneVersion.LUCENE_44) ?
#pragma warning restore 612, 618
                             CharacterUtils.GetInstance(version) : CharacterUtils.GetJava4Instance(version);
            if (minGram < 1)
            {
                throw new ArgumentException("minGram must be greater than zero");
            }
            if (minGram > maxGram)
            {
                throw new ArgumentException("minGram must not be greater than maxGram");
            }
            this.minGram = minGram;
            this.maxGram = maxGram;
#pragma warning disable 612, 618
            if (version.OnOrAfter(LuceneVersion.LUCENE_44))
#pragma warning restore 612, 618
            {
                posIncAtt = AddAttribute <IPositionIncrementAttribute>();
                posLenAtt = AddAttribute <IPositionLengthAttribute>();
            }
            else
            {
                posIncAtt = new PositionIncrementAttributeAnonymousInnerClassHelper();
                posLenAtt = new PositionLengthAttributeAnonymousInnerClassHelper();
            }
            termAtt   = AddAttribute <ICharTermAttribute>();
            offsetAtt = AddAttribute <IOffsetAttribute>();
        }
Ejemplo n.º 2
0
        private void Init(LuceneVersion version, int minGram, int maxGram, bool edgesOnly)
        {
#pragma warning disable 612, 618
            if (!version.OnOrAfter(LuceneVersion.LUCENE_44))
#pragma warning restore 612, 618
            {
                throw new ArgumentException("This class only works with Lucene 4.4+. To emulate the old (broken) behavior of NGramTokenizer, use Lucene43NGramTokenizer/Lucene43EdgeNGramTokenizer");
            }
#pragma warning disable 612, 618
            charUtils = version.OnOrAfter(LuceneVersion.LUCENE_44) ?
#pragma warning restore 612, 618
                        CharacterUtils.GetInstance(version) : CharacterUtils.GetJava4Instance(version);
            if (minGram < 1)
            {
                throw new ArgumentException("minGram must be greater than zero");
            }
            if (minGram > maxGram)
            {
                throw new ArgumentException("minGram must not be greater than maxGram");
            }
            termAtt        = AddAttribute <ICharTermAttribute>();
            posIncAtt      = AddAttribute <IPositionIncrementAttribute>();
            posLenAtt      = AddAttribute <IPositionLengthAttribute>();
            offsetAtt      = AddAttribute <IOffsetAttribute>();
            this.minGram   = minGram;
            this.maxGram   = maxGram;
            this.edgesOnly = edgesOnly;
            charBuffer     = CharacterUtils.NewCharacterBuffer(2 * maxGram + 1024); // 2 * maxGram in case all code points require 2 chars and + 1024 for buffering to not keep polling the Reader
            buffer         = new int[charBuffer.Buffer.Length];

            // Make the term att large enough
            termAtt.ResizeBuffer(2 * maxGram);
        }
Ejemplo n.º 3
0
        public EdgeNGramTokenFilter(LuceneVersion version, TokenStream input, Side side, int minGram, int maxGram)
            : base(input)
        {
            //if (version == null)
            //{
            //    throw new ArgumentException("version must not be null");
            //}

            if (version.OnOrAfter(LuceneVersion.LUCENE_44) && side == Side.BACK)
            {
                throw new ArgumentException("Side.BACK is not supported anymore as of Lucene 4.4, use ReverseStringFilter up-front and afterward");
            }

            if (!Enum.IsDefined(typeof(Side), side))
            {
                throw new ArgumentException("sideLabel must be either front or back");
            }

            if (minGram < 1)
            {
                throw new ArgumentException("minGram must be greater than zero");
            }

            if (minGram > maxGram)
            {
                throw new ArgumentException("minGram must not be greater than maxGram");
            }

            this.version   = version;
            this.charUtils = version.OnOrAfter(LuceneVersion.LUCENE_44) ? CharacterUtils.GetInstance(version) : CharacterUtils.GetJava4Instance(version);
            this.minGram   = minGram;
            this.maxGram   = maxGram;
            this.side      = side;

            this.termAtt    = AddAttribute <ICharTermAttribute>();
            this.offsetAtt  = AddAttribute <IOffsetAttribute>();
            this.posIncrAtt = AddAttribute <IPositionIncrementAttribute>();
            this.posLenAtt  = AddAttribute <IPositionLengthAttribute>();
        }
Ejemplo n.º 4
0
        public EdgeNGramTokenFilter(LuceneVersion version, TokenStream input, Side side, int minGram, int maxGram)
            : base(input)
        {
            // LUCENENET specific - version cannot be null because it is a value type.

            if (version.OnOrAfter(LuceneVersion.LUCENE_44) && side == Side.BACK)
            {
                throw new ArgumentException("Side.BACK is not supported anymore as of Lucene 4.4, use ReverseStringFilter up-front and afterward");
            }

            if (!side.IsDefined())
            {
                throw new ArgumentOutOfRangeException(nameof(side), "sideLabel must be either front or back"); // LUCENENET specific - changed from IllegalArgumentException to ArgumentOutOfRangeException (.NET convention)
            }

            if (minGram < 1)
            {
                throw new ArgumentOutOfRangeException(nameof(minGram), "minGram must be greater than zero"); // LUCENENET specific - changed from IllegalArgumentException to ArgumentOutOfRangeException (.NET convention)
            }

            if (minGram > maxGram)
            {
                throw new ArgumentException("minGram must not be greater than maxGram");
            }

            this.version   = version;
            this.charUtils = version.OnOrAfter(LuceneVersion.LUCENE_44) ? CharacterUtils.GetInstance(version) : CharacterUtils.GetJava4Instance(version);
            this.minGram   = minGram;
            this.maxGram   = maxGram;
            this.side      = side;

            this.termAtt    = AddAttribute <ICharTermAttribute>();
            this.offsetAtt  = AddAttribute <IOffsetAttribute>();
            this.posIncrAtt = AddAttribute <IPositionIncrementAttribute>();
            this.posLenAtt  = AddAttribute <IPositionLengthAttribute>();
        }