public KeywordTokenizer(AttributeSource.AttributeFactory factory, TextReader input, int bufferSize) : base(factory, input) { termAtt = AddAttribute <ICharTermAttribute>(); offsetAtt = AddAttribute <IOffsetAttribute>(); if (bufferSize <= 0) { throw new System.ArgumentException("bufferSize must be > 0"); } termAtt.ResizeBuffer(bufferSize); }
public KeywordTokenizer(AttributeSource.AttributeFactory factory, TextReader input, int bufferSize) : base(factory, input) { termAtt = AddAttribute <ICharTermAttribute>(); offsetAtt = AddAttribute <IOffsetAttribute>(); if (bufferSize <= 0) { throw new ArgumentOutOfRangeException(nameof(bufferSize), "bufferSize must be > 0"); // LUCENENET specific - changed from IllegalArgumentException to ArgumentOutOfRangeException (.NET convention) } termAtt.ResizeBuffer(bufferSize); }
/// <summary> /// Expert: Creates a token stream for numeric values with the specified /// <code>precisionStep</code> using the given /// <seealso cref="Lucene.Net.Util.AttributeSource.AttributeFactory"/>. /// The stream is not yet initialized, /// before using set a value using the various set<em>???</em>Value() methods. /// </summary> public NumericTokenStream(AttributeSource.AttributeFactory factory, int precisionStep) : base(new NumericAttributeFactory(factory)) { if (!InstanceFieldsInitialized) { InitializeInstanceFields(); InstanceFieldsInitialized = true; } if (precisionStep < 1) { throw new System.ArgumentException("precisionStep must be >=1"); } this.PrecisionStep_Renamed = precisionStep; NumericAtt.Shift = -precisionStep; }
public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input) { if (luceneMatchVersion.OnOrAfter(Version.LUCENE_44)) { if (!EdgeNGramTokenFilter.Side.FRONT.Label.Equals(side)) { throw new System.ArgumentException(typeof(EdgeNGramTokenizer).SimpleName + " does not support backward n-grams as of Lucene 4.4"); } return(new EdgeNGramTokenizer(luceneMatchVersion, input, minGramSize, maxGramSize)); } else { return(new Lucene43EdgeNGramTokenizer(luceneMatchVersion, input, side, minGramSize, maxGramSize)); } }
public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input) { if (luceneMatchVersion.OnOrAfter(LuceneVersion.LUCENE_44)) { EdgeNGramTokenFilter.Side sideEnum; if (!Enum.TryParse(this.side, true, out sideEnum)) { throw new System.ArgumentException(typeof(EdgeNGramTokenizer).Name + " does not support backward n-grams as of Lucene 4.4"); } return(new EdgeNGramTokenizer(luceneMatchVersion, input, minGramSize, maxGramSize)); } else { return(new Lucene43EdgeNGramTokenizer(luceneMatchVersion, input, side, minGramSize, maxGramSize)); } }
/// <summary> /// Creates the <see cref="TokenStream"/> of n-grams from the given <see cref="TextReader"/> and <see cref="AttributeSource.AttributeFactory"/>. </summary> public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input) { #pragma warning disable 612, 618 if (m_luceneMatchVersion.OnOrAfter(LuceneVersion.LUCENE_44)) #pragma warning restore 612, 618 { return(new NGramTokenizer(m_luceneMatchVersion, factory, input, minGramSize, maxGramSize)); } else { #pragma warning disable 612, 618 return(new Lucene43NGramTokenizer(factory, input, minGramSize, maxGramSize)); #pragma warning restore 612, 618 } }
public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input) { return(new LowerCaseTokenizer(m_luceneMatchVersion, factory, input)); }
public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input) { return(new MockTokenizer(factory, input)); }
// TODO: add support for WikipediaTokenizer's advanced options. public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input) { return(new WikipediaTokenizer(factory, input, WikipediaTokenizer.TOKENS_ONLY, Collections.EmptySet <string>())); }
public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input) { return(new JapaneseTokenizer(factory, input, userDictionary, discardPunctuation, mode)); }
public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader reader) { return(new HMMChineseTokenizer(factory, reader)); }
/// <summary> /// <b>Expert</b>: Creates an AttributeFactory returning {@link Token} as instance for the basic attributes /// and for all other attributes calls the given delegate factory. /// </summary> public TokenAttributeFactory(AttributeSource.AttributeFactory delegateFactory) { this._delegateFactory = delegateFactory; }
/// <summary> /// Construct a new LetterTokenizer using a given /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>. /// </summary> /// <param name="matchVersion"> /// Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param> /// <param name="factory"> /// the attribute factory to use for this <seealso cref="Tokenizer"/> </param> /// <param name="in"> /// the input to split up into tokens </param> public LetterTokenizer(Version matchVersion, AttributeSource.AttributeFactory factory, TextReader @in) : base(matchVersion, factory, @in) { }
public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input) { Debug.Assert(config != null, "inform must be called first!"); return(new ICUTokenizer(factory, input, config)); }
public RussianLetterTokenizer(AttributeSource.AttributeFactory factory, TextReader __in) : base(factory, __in) { }
public override Tokenizer Create(AttributeSource.AttributeFactory factory, Reader input) { return(new WhitespaceTokenizer(luceneMatchVersion, factory, input)); }
/// <summary> /// <b>Expert</b>: Creates an AttributeFactory returning <seealso cref="Token"/> as instance for the basic attributes /// and for all other attributes calls the given delegate factory. /// </summary> public TokenAttributeFactory(AttributeSource.AttributeFactory @delegate) { this.@delegate = @delegate; }
public BlankTokenizer(AttributeSource.AttributeFactory factory, TextReader in_Renamed) : base(factory, in_Renamed) { }
/// <summary> /// Construct a new <see cref="LowerCaseTokenizer"/> using a given /// <see cref="AttributeSource.AttributeFactory"/>. /// </summary> /// <param name="matchVersion"> /// <see cref="LuceneVersion"/> to match </param> /// <param name="factory"> /// the attribute factory to use for this <see cref="Tokenizer"/> </param> /// <param name="in"> /// the input to split up into tokens </param> public LowerCaseTokenizer(LuceneVersion matchVersion, AttributeSource.AttributeFactory factory, TextReader @in) : base(matchVersion, factory, @in) { }
internal NumericAttributeFactory(AttributeSource.AttributeFactory @delegate) { this.@delegate = @delegate; }
public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input) { return(new ArabicLetterTokenizer(luceneMatchVersion, factory, input)); }
/// <summary> /// Create a <see cref="CollationAttributeFactory"/>, using the supplied Attribute Factory /// as the factory for all other attributes. </summary> /// <param name="delegate"> Attribute Factory </param> /// <param name="collator"> <see cref="System.Globalization.SortKey"/> generator </param> public CollationAttributeFactory(AttributeSource.AttributeFactory @delegate, Collator collator) { this.@delegate = @delegate; this.collator = collator; }
public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader @in) { return(new CJKTokenizer(factory, @in)); }
/// <summary> /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range /// </summary> /// <param name="version"> the <a href="#version">Lucene match version</a> </param> /// <param name="factory"> <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param> /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param> /// <param name="minGram"> the smallest n-gram to generate </param> /// <param name="maxGram"> the largest n-gram to generate </param> public EdgeNGramTokenizer(LuceneVersion version, AttributeSource.AttributeFactory factory, TextReader input, int minGram, int maxGram) : base(version, factory, input, minGram, maxGram, true) { }
public override Tokenizer create(AttributeSource.AttributeFactory factory, Reader reader) { return(new ThaiTokenizer(factory, reader)); }
/// <summary> /// Creates a <see cref="TokenStream"/> of the specified input using the given <see cref="AttributeSource.AttributeFactory"/> </summary> public abstract Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input);
public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input) { return(new KeywordTokenizer(factory, input, KeywordTokenizer.DEFAULT_BUFFER_SIZE)); }
/// <summary> /// Split the input using configured pattern /// </summary> public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input) { return(new PatternTokenizer(factory, input, m_pattern, m_group)); }
public DotTokeninzer(LuceneVersion matchVersion, AttributeSource.AttributeFactory factory, TextReader input) : base(matchVersion, factory, input) { }