public static string AnalyzeString(string s, Analyzer analyzer) { StringBuilder sb = new StringBuilder(); TokenStream stream = analyzer.TokenStream("DUMMY", new StringReader(s)); ITermAttribute termAtt = stream.AddAttribute <ITermAttribute>(); while (true) { if (!stream.IncrementToken()) { break; } if (termAtt.Term.Length > 0) { if (sb.Length > 0) { sb.Append(' '); } sb.Append(termAtt.Term); } } //while return(sb.ToString().Trim()); }
public PorterStemFilterAndOrginal(TokenStream in_Renamed) : base(in_Renamed) { stemmer = new PorterStemmer(); termAtt = AddAttribute <ITermAttribute>(); orginal = null; }
public RemoveDuplicatesTokenFilter(TokenStream in_Renamed) : base(in_Renamed) { termAtt = AddAttribute <ITermAttribute>(); posIncrAtt = AddAttribute <IPositionIncrementAttribute>(); termOff = AddAttribute <IOffsetAttribute>(); tokenList = null; }
public PartKeywordTokenizer(System.IO.TextReader input) : base(input) { done = false; sb = new StringBuilder(); termAtt = AddAttribute <ITermAttribute>(); offsetAtt = AddAttribute <IOffsetAttribute>(); ClearAttributes(); }
/** Constructs a filter which tokenizes words from the input stream. * @param input The token stream from a tokenizer */ public WhitespaceFilter(TokenStream input) : base(input) { seen = new HashSet <string>(); termAtt = AddAttribute <ITermAttribute>(); offsetAtt = AddAttribute <IOffsetAttribute>(); posIncrAtt = AddAttribute <IPositionIncrementAttribute>(); ClearAttributes(); }
public SpecialNoneWhiteSpaceFilter(TokenStream in_Renamed) : base(in_Renamed) { noneWhiteSpaceChars = CDRWhitespaceTokenizer.NoneWhiteSpaceChars; savedTerms = new List <SavedTerm>(); termAtt = AddAttribute <ITermAttribute>(); posIncrAtt = AddAttribute <IPositionIncrementAttribute>(); termOff = AddAttribute <IOffsetAttribute>(); }