/// <summary> /// Creates <see cref="NGramTokenFilter"/> with given min and max n-grams. </summary> /// <param name="version"> Lucene version to enable correct position increments. /// See <see cref="NGramTokenFilter"/> for details. </param> /// <param name="input"> <see cref="TokenStream"/> holding the input to be tokenized </param> /// <param name="minGram"> the smallest n-gram to generate </param> /// <param name="maxGram"> the largest n-gram to generate </param> public NGramTokenFilter(LuceneVersion version, TokenStream input, int minGram, int maxGram) : base(new CodepointCountFilter(version, input, minGram, int.MaxValue)) { this.version = version; this.charUtils = version.OnOrAfter( #pragma warning disable 612, 618 LuceneVersion.LUCENE_44) ? #pragma warning restore 612, 618 CharacterUtils.GetInstance(version) : CharacterUtils.GetJava4Instance(version); if (minGram < 1) { throw new ArgumentException("minGram must be greater than zero"); } if (minGram > maxGram) { throw new ArgumentException("minGram must not be greater than maxGram"); } this.minGram = minGram; this.maxGram = maxGram; #pragma warning disable 612, 618 if (version.OnOrAfter(LuceneVersion.LUCENE_44)) #pragma warning restore 612, 618 { posIncAtt = AddAttribute <IPositionIncrementAttribute>(); posLenAtt = AddAttribute <IPositionLengthAttribute>(); } else { posIncAtt = new PositionIncrementAttributeAnonymousInnerClassHelper(); posLenAtt = new PositionLengthAttributeAnonymousInnerClassHelper(); } termAtt = AddAttribute <ICharTermAttribute>(); offsetAtt = AddAttribute <IOffsetAttribute>(); }
private void Init(LuceneVersion version, int minGram, int maxGram, bool edgesOnly) { #pragma warning disable 612, 618 if (!version.OnOrAfter(LuceneVersion.LUCENE_44)) #pragma warning restore 612, 618 { throw new ArgumentException("This class only works with Lucene 4.4+. To emulate the old (broken) behavior of NGramTokenizer, use Lucene43NGramTokenizer/Lucene43EdgeNGramTokenizer"); } #pragma warning disable 612, 618 charUtils = version.OnOrAfter(LuceneVersion.LUCENE_44) ? #pragma warning restore 612, 618 CharacterUtils.GetInstance(version) : CharacterUtils.GetJava4Instance(version); if (minGram < 1) { throw new ArgumentException("minGram must be greater than zero"); } if (minGram > maxGram) { throw new ArgumentException("minGram must not be greater than maxGram"); } termAtt = AddAttribute <ICharTermAttribute>(); posIncAtt = AddAttribute <IPositionIncrementAttribute>(); posLenAtt = AddAttribute <IPositionLengthAttribute>(); offsetAtt = AddAttribute <IOffsetAttribute>(); this.minGram = minGram; this.maxGram = maxGram; this.edgesOnly = edgesOnly; charBuffer = CharacterUtils.NewCharacterBuffer(2 * maxGram + 1024); // 2 * maxGram in case all code points require 2 chars and + 1024 for buffering to not keep polling the Reader buffer = new int[charBuffer.Buffer.Length]; // Make the term att large enough termAtt.ResizeBuffer(2 * maxGram); }
public EdgeNGramTokenFilter(LuceneVersion version, TokenStream input, Side side, int minGram, int maxGram) : base(input) { //if (version == null) //{ // throw new ArgumentException("version must not be null"); //} if (version.OnOrAfter(LuceneVersion.LUCENE_44) && side == Side.BACK) { throw new ArgumentException("Side.BACK is not supported anymore as of Lucene 4.4, use ReverseStringFilter up-front and afterward"); } if (!Enum.IsDefined(typeof(Side), side)) { throw new ArgumentException("sideLabel must be either front or back"); } if (minGram < 1) { throw new ArgumentException("minGram must be greater than zero"); } if (minGram > maxGram) { throw new ArgumentException("minGram must not be greater than maxGram"); } this.version = version; this.charUtils = version.OnOrAfter(LuceneVersion.LUCENE_44) ? CharacterUtils.GetInstance(version) : CharacterUtils.GetJava4Instance(version); this.minGram = minGram; this.maxGram = maxGram; this.side = side; this.termAtt = AddAttribute <ICharTermAttribute>(); this.offsetAtt = AddAttribute <IOffsetAttribute>(); this.posIncrAtt = AddAttribute <IPositionIncrementAttribute>(); this.posLenAtt = AddAttribute <IPositionLengthAttribute>(); }
public EdgeNGramTokenFilter(LuceneVersion version, TokenStream input, Side side, int minGram, int maxGram) : base(input) { // LUCENENET specific - version cannot be null because it is a value type. if (version.OnOrAfter(LuceneVersion.LUCENE_44) && side == Side.BACK) { throw new ArgumentException("Side.BACK is not supported anymore as of Lucene 4.4, use ReverseStringFilter up-front and afterward"); } if (!side.IsDefined()) { throw new ArgumentOutOfRangeException(nameof(side), "sideLabel must be either front or back"); // LUCENENET specific - changed from IllegalArgumentException to ArgumentOutOfRangeException (.NET convention) } if (minGram < 1) { throw new ArgumentOutOfRangeException(nameof(minGram), "minGram must be greater than zero"); // LUCENENET specific - changed from IllegalArgumentException to ArgumentOutOfRangeException (.NET convention) } if (minGram > maxGram) { throw new ArgumentException("minGram must not be greater than maxGram"); } this.version = version; this.charUtils = version.OnOrAfter(LuceneVersion.LUCENE_44) ? CharacterUtils.GetInstance(version) : CharacterUtils.GetJava4Instance(version); this.minGram = minGram; this.maxGram = maxGram; this.side = side; this.termAtt = AddAttribute <ICharTermAttribute>(); this.offsetAtt = AddAttribute <IOffsetAttribute>(); this.posIncrAtt = AddAttribute <IPositionIncrementAttribute>(); this.posLenAtt = AddAttribute <IPositionLengthAttribute>(); }