public override TokenStream create(TokenStream input) { return(new LowerCaseFilter(input)); }
public SnowballPorterFilter(TokenStream source, SnowballProgram stemmer, CharArraySet protWords) : base(source) { this.protWords = protWords; this.stemmer = stemmer; this.termAtt = (TermAttribute)addAttribute(typeof(TermAttribute)); }
public EnglishPorterFilter(TokenStream source, CharArraySet protWords) : base(source, new org.tartarus.snowball.ext.EnglishStemmer(), protWords) { }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: private void assertCollatesToSame(org.apache.lucene.analysis.TokenStream stream1, org.apache.lucene.analysis.TokenStream stream2) throws java.io.IOException private void assertCollatesToSame(TokenStream stream1, TokenStream stream2) { stream1.reset(); stream2.reset(); CharTermAttribute term1 = stream1.addAttribute(typeof(CharTermAttribute)); CharTermAttribute term2 = stream2.addAttribute(typeof(CharTermAttribute)); assertTrue(stream1.incrementToken()); assertTrue(stream2.incrementToken()); assertEquals(term1.ToString(), term2.ToString()); assertFalse(stream1.incrementToken()); assertFalse(stream2.incrementToken()); stream1.end(); stream2.end(); stream1.close(); stream2.close(); }
public WordDelimiterFilter(TokenStream input, int generateWordParts, int generateNumberParts, int catenateWords, int catenateNumbers, int catenateAll) : this(input, defaultWordDelimTable, generateWordParts, generateNumberParts, catenateWords, catenateNumbers, catenateAll, 1, 0, 1, null) { }
public override TokenStream create(TokenStream input) { return(new EnglishPorterFilter(input, protectedWords)); }
public WordDelimiterFilter(TokenStream input, byte[] charTypeTable, int generateWordParts, int generateNumberParts, int catenateWords, int catenateNumbers, int catenateAll) : this(input, charTypeTable, generateWordParts, generateNumberParts, catenateWords, catenateNumbers, catenateAll, 1, 0, 1, null) { }
public WordDelimiterFilter(TokenStream input, int generateWordParts, int generateNumberParts, int catenateWords, int catenateNumbers, int catenateAll, int splitOnCaseChange, int preserveOriginal) : this(input, defaultWordDelimTable, generateWordParts, generateNumberParts, catenateWords, catenateNumbers, catenateAll, splitOnCaseChange, preserveOriginal) { }
/** * @param in Token stream to be filtered. * @param generateWordParts If 1, causes parts of words to be generated: "PowerShot", "Power-Shot" => "Power" "Shot" * @param generateNumberParts If 1, causes number subwords to be generated: "500-42" => "500" "42" * @param catenateWords 1, causes maximum runs of word parts to be catenated: "wi-fi" => "wifi" * @param catenateNumbers If 1, causes maximum runs of number parts to be catenated: "500-42" => "50042" * @param catenateAll If 1, causes all subword parts to be catenated: "wi-fi-4000" => "wifi4000" * @param splitOnCaseChange 1, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards) * @param preserveOriginal If 1, includes original words in subwords: "500-42" => "500" "42" "500-42" * @param splitOnNumerics 1, causes "j2se" to be three tokens; "j" "2" "se" * @param stemEnglishPossessive If 1, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil" * @param protWords If not null is the set of tokens to protect from being delimited */ public WordDelimiterFilter(TokenStream input, int generateWordParts, int generateNumberParts, int catenateWords, int catenateNumbers, int catenateAll, int splitOnCaseChange, int preserveOriginal, int splitOnNumerics, int stemEnglishPossessive, CharArraySet protWords) : this(input, defaultWordDelimTable, generateWordParts, generateNumberParts, catenateWords, catenateNumbers, catenateAll, splitOnCaseChange, preserveOriginal, splitOnNumerics, stemEnglishPossessive, protWords) { }
public WordDelimiterFilter(TokenStream input, byte[] charTypeTable, int generateWordParts, int generateNumberParts, int catenateWords, int catenateNumbers, int catenateAll, int splitOnCaseChange, int preserveOriginal) : this(input, charTypeTable, generateWordParts, generateNumberParts, catenateWords, catenateNumbers, catenateAll, splitOnCaseChange, preserveOriginal, 1, null) { }
public override TokenStream create(TokenStream input) { return(new LucidKStemFilter(input, protectedWords)); }