/// <summary> Build a filter that removes words that are too long or too /// short from the text. /// </summary> public LengthFilter(TokenStream in_Renamed, int min, int max) : base(in_Renamed) { this.min = min; this.max = max; termAtt = AddAttribute<ITermAttribute>(); }
/// <summary> Construct a token stream filtering the given input. /// If <c>stopWords</c> is an instance of <see cref="CharArraySet" /> (true if /// <c>makeStopSet()</c> was used to construct the set) it will be directly used /// and <c>ignoreCase</c> will be ignored since <c>CharArraySet</c> /// directly controls case sensitivity. /// <p/> /// If <c>stopWords</c> is not an instance of <see cref="CharArraySet" />, /// a new CharArraySet will be constructed and <c>ignoreCase</c> will be /// used to specify the case sensitivity of that set. /// </summary> /// <param name="enablePositionIncrements">true if token positions should record the removed stop words</param> /// <param name="input">Input TokenStream</param> /// <param name="stopWords">A Set of strings or strings or char[] or any other ToString()-able set representing the stopwords</param> /// <param name="ignoreCase">if true, all words are lower cased first</param> public StopFilter(bool enablePositionIncrements, TokenStream input, ISet<string> stopWords, bool ignoreCase) : base(input) { if (stopWords is CharArraySet) { this.stopWords = (CharArraySet) stopWords; } else { this.stopWords = new CharArraySet(stopWords.Count, ignoreCase); this.stopWords.AddAll(stopWords); } this.enablePositionIncrements = enablePositionIncrements; termAtt = AddAttribute<ITermAttribute>(); posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); }
public ASCIIFoldingFilter(TokenStream input):base(input) { termAtt = AddAttribute<ITermAttribute>(); }
public PorterStemFilter(TokenStream in_Renamed):base(in_Renamed) { stemmer = new PorterStemmer(); termAtt = AddAttribute<ITermAttribute>(); }
public LowerCaseFilter(TokenStream @in) : base(@in) { termAtt = AddAttribute<ITermAttribute>(); }
public ISOLatin1AccentFilter(TokenStream input):base(input) { termAtt = AddAttribute<ITermAttribute>(); }
/// <summary> Constructs a filter which removes words from the input /// TokenStream that are named in the Set. /// </summary> /// <param name="enablePositionIncrements">true if token positions should record the removed stop words</param> /// <param name="in">Input stream</param> /// <param name="stopWords">A Set of strings or char[] or any other ToString()-able set representing the stopwords</param> /// <seealso cref="MakeStopSet(string[])"/> public StopFilter(bool enablePositionIncrements, TokenStream @in, ISet<string> stopWords) : this(enablePositionIncrements, @in, stopWords, false) { }
/// <summary> Instantiates a new TeeSinkTokenFilter.</summary> public TeeSinkTokenFilter(TokenStream input):base(input) { }
/// <summary> Create a tokenized and indexed field that is not stored, optionally with /// storing term vectors. This is useful for pre-analyzed fields. /// The TokenStream is read only when the Document is added to the index, /// i.e. you may not close the TokenStream until <see cref="IndexWriter.AddDocument(Document)" /> /// has been called. /// /// </summary> /// <param name="name">The name of the field /// </param> /// <param name="tokenStream">The TokenStream with the content /// </param> /// <param name="termVector">Whether term vector should be stored /// </param> /// <throws> NullPointerException if name or tokenStream is <c>null</c> </throws> public Field(System.String name, TokenStream tokenStream, TermVector termVector) { if (name == null) throw new System.NullReferenceException("name cannot be null"); if (tokenStream == null) throw new System.NullReferenceException("tokenStream cannot be null"); this.internalName = StringHelper.Intern(name); // field names are interned this.fieldsData = null; this.tokenStream = tokenStream; this.internalIsStored = false; this.internalIsIndexed = true; this.internalIsTokenized = true; this.internalIsBinary = false; SetStoreTermVector(termVector); }
/// <summary> Create a tokenized and indexed field that is not stored. Term vectors will /// not be stored. This is useful for pre-analyzed fields. /// The TokenStream is read only when the Document is added to the index, /// i.e. you may not close the TokenStream until <see cref="IndexWriter.AddDocument(Document)" /> /// has been called. /// /// </summary> /// <param name="name">The name of the field /// </param> /// <param name="tokenStream">The TokenStream with the content /// </param> /// <throws> NullPointerException if name or tokenStream is <c>null</c> </throws> public Field(System.String name, TokenStream tokenStream):this(name, tokenStream, TermVector.NO) { }
/// <summary>Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true. /// May be combined with stored values from stringValue() or GetBinaryValue() /// </summary> public void SetTokenStream(TokenStream tokenStream) { this.internalIsIndexed = true; this.internalIsTokenized = true; this.tokenStream = tokenStream; }
public LowerCaseFilter(TokenStream @in) : base(@in) { termAtt = AddAttribute <ITermAttribute>(); }