Пример #1
0
 public override TokenStream create(TokenStream input)
 {
     return(new LowerCaseFilter(input));
 }
Пример #2
0
 public SnowballPorterFilter(TokenStream source, SnowballProgram stemmer, CharArraySet protWords) : base(source)
 {
     this.protWords = protWords;
     this.stemmer   = stemmer;
     this.termAtt   = (TermAttribute)addAttribute(typeof(TermAttribute));
 }
Пример #3
0
 public EnglishPorterFilter(TokenStream source, CharArraySet protWords) :
     base(source, new org.tartarus.snowball.ext.EnglishStemmer(), protWords)
 {
 }
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: private void assertCollatesToSame(org.apache.lucene.analysis.TokenStream stream1, org.apache.lucene.analysis.TokenStream stream2) throws java.io.IOException
 private void assertCollatesToSame(TokenStream stream1, TokenStream stream2)
 {
     stream1.reset();
     stream2.reset();
     CharTermAttribute term1 = stream1.addAttribute(typeof(CharTermAttribute));
     CharTermAttribute term2 = stream2.addAttribute(typeof(CharTermAttribute));
     assertTrue(stream1.incrementToken());
     assertTrue(stream2.incrementToken());
     assertEquals(term1.ToString(), term2.ToString());
     assertFalse(stream1.incrementToken());
     assertFalse(stream2.incrementToken());
     stream1.end();
     stream2.end();
     stream1.close();
     stream2.close();
 }
Пример #5
0
 public WordDelimiterFilter(TokenStream input, int generateWordParts, int generateNumberParts, int catenateWords, int catenateNumbers, int catenateAll) :
     this(input, defaultWordDelimTable, generateWordParts, generateNumberParts, catenateWords, catenateNumbers, catenateAll, 1, 0, 1, null)
 {
 }
Пример #6
0
 public override TokenStream create(TokenStream input)
 {
     return(new EnglishPorterFilter(input, protectedWords));
 }
Пример #7
0
 public WordDelimiterFilter(TokenStream input, byte[] charTypeTable, int generateWordParts, int generateNumberParts, int catenateWords, int catenateNumbers, int catenateAll) :
     this(input, charTypeTable, generateWordParts, generateNumberParts, catenateWords, catenateNumbers, catenateAll, 1, 0, 1, null)
 {
 }
Пример #8
0
 public WordDelimiterFilter(TokenStream input, int generateWordParts, int generateNumberParts, int catenateWords, int catenateNumbers, int catenateAll, int splitOnCaseChange, int preserveOriginal) :
     this(input, defaultWordDelimTable, generateWordParts, generateNumberParts, catenateWords, catenateNumbers, catenateAll, splitOnCaseChange, preserveOriginal)
 {
 }
Пример #9
0
 /**
  * @param in Token stream to be filtered.
  * @param generateWordParts If 1, causes parts of words to be generated: "PowerShot", "Power-Shot" => "Power" "Shot"
  * @param generateNumberParts If 1, causes number subwords to be generated: "500-42" => "500" "42"
  * @param catenateWords  1, causes maximum runs of word parts to be catenated: "wi-fi" => "wifi"
  * @param catenateNumbers If 1, causes maximum runs of number parts to be catenated: "500-42" => "50042"
  * @param catenateAll If 1, causes all subword parts to be catenated: "wi-fi-4000" => "wifi4000"
  * @param splitOnCaseChange 1, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards)
  * @param preserveOriginal If 1, includes original words in subwords: "500-42" => "500" "42" "500-42"
  * @param splitOnNumerics 1, causes "j2se" to be three tokens; "j" "2" "se"
  * @param stemEnglishPossessive If 1, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil"
  * @param protWords If not null is the set of tokens to protect from being delimited
  */
 public WordDelimiterFilter(TokenStream input, int generateWordParts, int generateNumberParts, int catenateWords, int catenateNumbers, int catenateAll, int splitOnCaseChange, int preserveOriginal, int splitOnNumerics, int stemEnglishPossessive, CharArraySet protWords) :
     this(input, defaultWordDelimTable, generateWordParts, generateNumberParts, catenateWords, catenateNumbers, catenateAll, splitOnCaseChange, preserveOriginal, splitOnNumerics, stemEnglishPossessive, protWords)
 {
 }
Пример #10
0
 public WordDelimiterFilter(TokenStream input, byte[] charTypeTable, int generateWordParts, int generateNumberParts, int catenateWords, int catenateNumbers, int catenateAll, int splitOnCaseChange, int preserveOriginal) :
     this(input, charTypeTable, generateWordParts, generateNumberParts, catenateWords, catenateNumbers, catenateAll, splitOnCaseChange, preserveOriginal, 1, null)
 {
 }
Пример #11
0
 public override TokenStream create(TokenStream input)
 {
     return(new LucidKStemFilter(input, protectedWords));
 }