private void Init(int bufferSize) { this.done = false; termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute)); offsetAtt = (OffsetAttribute) AddAttribute(typeof(OffsetAttribute)); termAtt.ResizeTermBuffer(bufferSize); }
/// <summary> Build a filter that removes words that are too long or too /// short from the text. /// </summary> public LengthFilter(TokenStream in_Renamed, int min, int max) : base(in_Renamed) { this.min = min; this.max = max; termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute)); }
public MyTokenStream(TestTermVectorsReader enclosingInstance) { InitBlock(enclosingInstance); termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute)); posIncrAtt = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute)); offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute)); }
public virtual void TestStopListPositions() { bool defaultEnable = StopFilter.GetEnablePositionIncrementsDefault(); StopFilter.SetEnablePositionIncrementsDefault(true); try { System.Collections.Hashtable stopWordsSet = new System.Collections.Hashtable(); stopWordsSet.Add("good", "good"); stopWordsSet.Add("test", "test"); stopWordsSet.Add("analyzer", "analyzer"); StopAnalyzer newStop = new StopAnalyzer(stopWordsSet); System.IO.StringReader reader = new System.IO.StringReader("This is a good test of the english stop analyzer with positions"); int[] expectedIncr = new int[] { 1, 1, 1, 3, 1, 1, 1, 2, 1 }; TokenStream stream = newStop.TokenStream("test", reader); Assert.IsNotNull(stream); int i = 0; TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute)); PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute)stream.AddAttribute(typeof(PositionIncrementAttribute)); while (stream.IncrementToken()) { System.String text = termAtt.Term(); Assert.IsFalse(stopWordsSet.Contains(text)); Assert.AreEqual(expectedIncr[i++], posIncrAtt.GetPositionIncrement()); } } finally { StopFilter.SetEnablePositionIncrementsDefault(defaultEnable); } }
private void Init(int bufferSize) { this.done = false; termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute)); offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute)); termAtt.ResizeTermBuffer(bufferSize); }
public QueryTermVector(System.String queryString, Analyzer analyzer) { if (analyzer != null) { TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(queryString)); if (stream != null) { List <string> terms = new List <string>(); try { bool hasMoreTokens = false; stream.Reset(); TermAttribute termAtt = (TermAttribute)stream.AddAttribute(typeof(TermAttribute)); hasMoreTokens = stream.IncrementToken(); while (hasMoreTokens) { terms.Add(termAtt.Term()); hasMoreTokens = stream.IncrementToken(); } ProcessTerms(terms.ToArray()); } catch (System.IO.IOException e) { } } } }
public TestFilter(TestMultiAnalyzer enclosingInstance, TokenStream in_Renamed) : base(in_Renamed) { InitBlock(enclosingInstance); termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute)); posIncrAtt = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute)); offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute)); typeAtt = (TypeAttribute)AddAttribute(typeof(TypeAttribute)); }
public PayloadFilter(TokenStream input, System.String fieldName) : base(input) { this.fieldName = fieldName; pos = 0; i = 0; posIncrAttr = (PositionIncrementAttribute)input.AddAttribute(typeof(PositionIncrementAttribute)); payloadAttr = (PayloadAttribute)input.AddAttribute(typeof(PayloadAttribute)); termAttr = (TermAttribute)input.AddAttribute(typeof(TermAttribute)); }
internal override void Start(Fieldable f) { termAtt = (TermAttribute)fieldState.attributeSource.AddAttribute(typeof(TermAttribute)); consumer.Start(f); if (nextPerField != null) { nextPerField.Start(f); } }
public virtual void TestIgnoreCase() { System.IO.StringReader reader = new System.IO.StringReader("Now is The Time"); System.String[] stopWords = new System.String[] { "is", "the", "Time" }; TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopWords, true); TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute)); Assert.IsTrue(stream.IncrementToken()); Assert.AreEqual("Now", termAtt.Term()); Assert.IsFalse(stream.IncrementToken()); }
internal PoolingPayloadTokenStream(TestPayloads enclosingInstance, ByteArrayPool pool) { InitBlock(enclosingInstance); this.pool = pool; payload = pool.Get(); Enclosing_Instance.GenerateRandomData(payload); term = pool.BytesToString(payload); first = true; payloadAtt = (PayloadAttribute)AddAttribute(typeof(PayloadAttribute)); termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute)); }
public PayloadFilter(TestPayloadSpans enclosingInstance, TokenStream input, System.String fieldName) : base(input) { InitBlock(enclosingInstance); this.fieldName = fieldName; pos = 0; SupportClass.CollectionsHelper.AddIfNotContains(entities, "xx"); SupportClass.CollectionsHelper.AddIfNotContains(entities, "one"); SupportClass.CollectionsHelper.AddIfNotContains(nopayload, "nopayload"); SupportClass.CollectionsHelper.AddIfNotContains(nopayload, "np"); termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute)); posIncrAtt = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute)); payloadAtt = (PayloadAttribute)AddAttribute(typeof(PayloadAttribute)); }
public virtual void TestStopFilt() { System.IO.StringReader reader = new System.IO.StringReader("Now is The Time"); System.String[] stopWords = new System.String[] { "is", "the", "Time" }; System.Collections.Hashtable stopSet = StopFilter.MakeStopSet(stopWords); TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet); TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute)); Assert.IsTrue(stream.IncrementToken()); Assert.AreEqual("Now", termAtt.Term()); Assert.IsTrue(stream.IncrementToken()); Assert.AreEqual("The", termAtt.Term()); Assert.IsFalse(stream.IncrementToken()); }
public virtual void TestFilter() { TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader("short toolong evenmuchlongertext a ab toolong foo")); LengthFilter filter = new LengthFilter(stream, 2, 6); TermAttribute termAtt = (TermAttribute)filter.GetAttribute(typeof(TermAttribute)); Assert.IsTrue(filter.IncrementToken()); Assert.AreEqual("short", termAtt.Term()); Assert.IsTrue(filter.IncrementToken()); Assert.AreEqual("ab", termAtt.Term()); Assert.IsTrue(filter.IncrementToken()); Assert.AreEqual("foo", termAtt.Term()); Assert.IsFalse(filter.IncrementToken()); }
public virtual void TestDefaults() { Assert.IsTrue(stop != null); System.IO.StringReader reader = new System.IO.StringReader("This is a test of the english stop analyzer"); TokenStream stream = stop.TokenStream("test", reader); Assert.IsTrue(stream != null); TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute)); while (stream.IncrementToken()) { Assert.IsFalse(inValidTokens.Contains(termAtt.Term())); } }
public virtual void TestIntStream() { NumericTokenStream stream = new NumericTokenStream().SetIntValue(ivalue); // use getAttribute to test if attributes really exist, if not an IAE will be throwed TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute)); TypeAttribute typeAtt = (TypeAttribute)stream.GetAttribute(typeof(TypeAttribute)); for (int shift = 0; shift < 32; shift += NumericUtils.PRECISION_STEP_DEFAULT) { Assert.IsTrue(stream.IncrementToken(), "New token is available"); Assert.AreEqual(NumericUtils.IntToPrefixCoded(ivalue, shift), termAtt.Term(), "Term is correctly encoded"); Assert.AreEqual((shift == 0)?NumericTokenStream.TOKEN_TYPE_FULL_PREC:NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.Type(), "Type correct"); } Assert.IsFalse(stream.IncrementToken(), "No more tokens available"); }
private void checkTokens(TokenStream stream) { int count = 0; TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute)); Assert.IsNotNull(termAtt); while (stream.IncrementToken()) { Assert.IsTrue(count < tokens.Length); Assert.AreEqual(tokens[count], termAtt.Term()); count++; } Assert.AreEqual(tokens.Length, count); }
private void DoTestStopPositons(StopFilter stpf, bool enableIcrements) { Log("---> test with enable-increments-" + (enableIcrements?"enabled":"disabled")); stpf.SetEnablePositionIncrements(enableIcrements); TermAttribute termAtt = (TermAttribute)stpf.GetAttribute(typeof(TermAttribute)); PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute)stpf.GetAttribute(typeof(PositionIncrementAttribute)); for (int i = 0; i < 20; i += 3) { Assert.IsTrue(stpf.IncrementToken()); Log("Token " + i + ": " + stpf); System.String w = English.IntToEnglish(i).Trim(); Assert.AreEqual(w, termAtt.Term(), "expecting token " + i + " to be " + w); Assert.AreEqual(enableIcrements?(i == 0?1:3):1, posIncrAtt.GetPositionIncrement(), "all but first token must have position increment of 3"); } Assert.IsFalse(stpf.IncrementToken()); }
public virtual void TestPerField() { System.String text = "Qwerty"; PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer()); analyzer.AddAnalyzer("special", new SimpleAnalyzer()); TokenStream tokenStream = analyzer.TokenStream("field", new System.IO.StringReader(text)); TermAttribute termAtt = (TermAttribute)tokenStream.GetAttribute(typeof(TermAttribute)); Assert.IsTrue(tokenStream.IncrementToken()); Assert.AreEqual("Qwerty", termAtt.Term(), "WhitespaceAnalyzer does not lowercase"); tokenStream = analyzer.TokenStream("special", new System.IO.StringReader(text)); termAtt = (TermAttribute)tokenStream.GetAttribute(typeof(TermAttribute)); Assert.IsTrue(tokenStream.IncrementToken()); Assert.AreEqual("qwerty", termAtt.Term(), "SimpleAnalyzer lowercases"); }
public virtual void TestStopList() { System.Collections.Hashtable stopWordsSet = new System.Collections.Hashtable(); stopWordsSet.Add("good", "good"); stopWordsSet.Add("test", "test"); stopWordsSet.Add("analyzer", "analyzer"); StopAnalyzer newStop = new StopAnalyzer(stopWordsSet); System.IO.StringReader reader = new System.IO.StringReader("This is a good test of the english stop analyzer"); TokenStream stream = newStop.TokenStream("test", reader); Assert.IsNotNull(stream); TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute)); PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute)stream.AddAttribute(typeof(PositionIncrementAttribute)); while (stream.IncrementToken()) { System.String text = termAtt.Term(); Assert.IsFalse(stopWordsSet.Contains(text)); Assert.AreEqual(1, posIncrAtt.GetPositionIncrement()); // by default stop tokenizer does not apply increments. } }
public void Init() { termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute)); posIncrAtt = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute)); }
public ISOLatin1AccentFilter(TokenStream input) : base(input) { termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute)); }
internal override void Start(Fieldable f) { termAtt = (TermAttribute) fieldState.attributeSource.AddAttribute(typeof(TermAttribute)); consumer.Start(f); if (nextPerField != null) { nextPerField.Start(f); } }
/// <summary> Filter which discards the token 'stop' and which expands the /// token 'phrase' into 'phrase1 phrase2' /// </summary> public QPTestFilter(TokenStream in_Renamed):base(in_Renamed) { termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute)); offsetAtt = (OffsetAttribute) AddAttribute(typeof(OffsetAttribute)); }
private void InitBlock() { termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute)); typeAtt = (TypeAttribute) AddAttribute(typeof(TypeAttribute)); posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof(PositionIncrementAttribute)); }
public CharTokenizer(AttributeSource source, System.IO.TextReader input) : base(source, input) { offsetAtt = (OffsetAttribute) AddAttribute(typeof(OffsetAttribute)); termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute)); }
public MyTokenStream(TestTermVectorsReader enclosingInstance) { InitBlock(enclosingInstance); termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute)); posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof(PositionIncrementAttribute)); offsetAtt = (OffsetAttribute) AddAttribute(typeof(OffsetAttribute)); }
public SingleCharTokenizer(System.IO.TextReader r):base(r) { termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute)); }
public override bool Accept(AttributeSource a) { TermAttribute termAtt = (TermAttribute)a.GetAttribute(typeof(TermAttribute)); return(termAtt.Term().ToUpper().Equals("Dogs".ToUpper())); }
public LowerCaseFilter(TokenStream in_Renamed) : base(in_Renamed) { termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute)); }
public PayloadFilter(TestPayloadSpans enclosingInstance, TokenStream input, System.String fieldName):base(input) { InitBlock(enclosingInstance); this.fieldName = fieldName; pos = 0; Support.CollectionsHelper.AddIfNotContains(entities, "xx"); Support.CollectionsHelper.AddIfNotContains(entities, "one"); Support.CollectionsHelper.AddIfNotContains(nopayload, "nopayload"); Support.CollectionsHelper.AddIfNotContains(nopayload, "np"); termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute)); posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof(PositionIncrementAttribute)); payloadAtt = (PayloadAttribute) AddAttribute(typeof(PayloadAttribute)); }
public PorterStemFilter(TokenStream in_Renamed) : base(in_Renamed) { stemmer = new PorterStemmer(); termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute)); }
public RepeatingTokenStream(System.String val) { this.value_Renamed = val; this.termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute)); }
public PayloadFilter(TokenStream input, System.String fieldName):base(input) { this.fieldName = fieldName; pos = 0; i = 0; posIncrAttr = (PositionIncrementAttribute) input.AddAttribute(typeof(PositionIncrementAttribute)); payloadAttr = (PayloadAttribute) input.AddAttribute(typeof(PayloadAttribute)); termAttr = (TermAttribute) input.AddAttribute(typeof(TermAttribute)); }
public virtual void TestU() { TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader("Des mot clés À LA CHAÎNE À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï IJ Ð Ñ Ò Ó Ô Õ Ö Ø Œ Þ Ù Ú Û Ü Ý Ÿ à á â ã ä å æ ç è é ê ë ì í î ï ij ð ñ ò ó ô õ ö ø œ ß þ ù ú û ü ý ÿ fi fl")); ISOLatin1AccentFilter filter = new ISOLatin1AccentFilter(stream); TermAttribute termAtt = (TermAttribute)filter.GetAttribute(typeof(TermAttribute)); AssertTermEquals("Des", filter, termAtt); AssertTermEquals("mot", filter, termAtt); AssertTermEquals("cles", filter, termAtt); AssertTermEquals("A", filter, termAtt); AssertTermEquals("LA", filter, termAtt); AssertTermEquals("CHAINE", filter, termAtt); AssertTermEquals("A", filter, termAtt); AssertTermEquals("A", filter, termAtt); AssertTermEquals("A", filter, termAtt); AssertTermEquals("A", filter, termAtt); AssertTermEquals("A", filter, termAtt); AssertTermEquals("A", filter, termAtt); AssertTermEquals("AE", filter, termAtt); AssertTermEquals("C", filter, termAtt); AssertTermEquals("E", filter, termAtt); AssertTermEquals("E", filter, termAtt); AssertTermEquals("E", filter, termAtt); AssertTermEquals("E", filter, termAtt); AssertTermEquals("I", filter, termAtt); AssertTermEquals("I", filter, termAtt); AssertTermEquals("I", filter, termAtt); AssertTermEquals("I", filter, termAtt); AssertTermEquals("IJ", filter, termAtt); AssertTermEquals("D", filter, termAtt); AssertTermEquals("N", filter, termAtt); AssertTermEquals("O", filter, termAtt); AssertTermEquals("O", filter, termAtt); AssertTermEquals("O", filter, termAtt); AssertTermEquals("O", filter, termAtt); AssertTermEquals("O", filter, termAtt); AssertTermEquals("O", filter, termAtt); AssertTermEquals("OE", filter, termAtt); AssertTermEquals("TH", filter, termAtt); AssertTermEquals("U", filter, termAtt); AssertTermEquals("U", filter, termAtt); AssertTermEquals("U", filter, termAtt); AssertTermEquals("U", filter, termAtt); AssertTermEquals("Y", filter, termAtt); AssertTermEquals("Y", filter, termAtt); AssertTermEquals("a", filter, termAtt); AssertTermEquals("a", filter, termAtt); AssertTermEquals("a", filter, termAtt); AssertTermEquals("a", filter, termAtt); AssertTermEquals("a", filter, termAtt); AssertTermEquals("a", filter, termAtt); AssertTermEquals("ae", filter, termAtt); AssertTermEquals("c", filter, termAtt); AssertTermEquals("e", filter, termAtt); AssertTermEquals("e", filter, termAtt); AssertTermEquals("e", filter, termAtt); AssertTermEquals("e", filter, termAtt); AssertTermEquals("i", filter, termAtt); AssertTermEquals("i", filter, termAtt); AssertTermEquals("i", filter, termAtt); AssertTermEquals("i", filter, termAtt); AssertTermEquals("ij", filter, termAtt); AssertTermEquals("d", filter, termAtt); AssertTermEquals("n", filter, termAtt); AssertTermEquals("o", filter, termAtt); AssertTermEquals("o", filter, termAtt); AssertTermEquals("o", filter, termAtt); AssertTermEquals("o", filter, termAtt); AssertTermEquals("o", filter, termAtt); AssertTermEquals("o", filter, termAtt); AssertTermEquals("oe", filter, termAtt); AssertTermEquals("ss", filter, termAtt); AssertTermEquals("th", filter, termAtt); AssertTermEquals("u", filter, termAtt); AssertTermEquals("u", filter, termAtt); AssertTermEquals("u", filter, termAtt); AssertTermEquals("u", filter, termAtt); AssertTermEquals("y", filter, termAtt); AssertTermEquals("y", filter, termAtt); AssertTermEquals("fi", filter, termAtt); AssertTermEquals("fl", filter, termAtt); Assert.IsFalse(filter.IncrementToken()); }
internal PoolingPayloadTokenStream(TestPayloads enclosingInstance, ByteArrayPool pool) { InitBlock(enclosingInstance); this.pool = pool; payload = pool.Get(); Enclosing_Instance.GenerateRandomData(payload); term = pool.BytesToString(payload); first = true; payloadAtt = (PayloadAttribute) AddAttribute(typeof(PayloadAttribute)); termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute)); }
/// <summary> Build a filter that removes words that are too long or too /// short from the text. /// </summary> public LengthFilter(TokenStream in_Renamed, int min, int max) : base(in_Renamed) { this.min = min; this.max = max; termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute)); }
public PorterStemFilter(TokenStream in_Renamed) : base(in_Renamed) { stemmer = new PorterStemmer(); termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute)); }
internal SingleTokenTokenStream() { termAttribute = (TermAttribute) AddAttribute(typeof(TermAttribute)); offsetAttribute = (OffsetAttribute) AddAttribute(typeof(OffsetAttribute)); }
public SingleCharTokenizer(System.IO.TextReader r) : base(r) { termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute)); }
internal SingleTokenTokenStream() { termAttribute = (TermAttribute)AddAttribute(typeof(TermAttribute)); offsetAttribute = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute)); }
public CharTokenizer(AttributeFactory factory, System.IO.TextReader input) : base(factory, input) { offsetAtt = (OffsetAttribute) AddAttribute(typeof(OffsetAttribute)); termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute)); }
/// <summary> Fills TermAttribute with the current token text.</summary> internal void GetText(TermAttribute t) { t.SetTermBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); }
public TestPosIncrementFilter(TestMultiAnalyzer enclosingInstance, TokenStream in_Renamed):base(in_Renamed) { InitBlock(enclosingInstance); termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute)); posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof(PositionIncrementAttribute)); }
internal virtual void AssertTermEquals(System.String expected, TokenStream stream, TermAttribute termAtt) { Assert.IsTrue(stream.IncrementToken()); Assert.AreEqual(expected, termAtt.Term()); }
private void Init(System.IO.TextReader input, bool replaceInvalidAcronym) { this.replaceInvalidAcronym = replaceInvalidAcronym; this.input = input; termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute)); offsetAtt = (OffsetAttribute) AddAttribute(typeof(OffsetAttribute)); posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof(PositionIncrementAttribute)); typeAtt = (TypeAttribute) AddAttribute(typeof(TypeAttribute)); }
public RepeatingTokenStream(System.String val) { this.value_Renamed = val; this.termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute)); }
internal virtual void AssertTermEquals(System.String expected, TokenStream stream, TermAttribute termAtt) { Assert.IsTrue(stream.IncrementToken()); Assert.AreEqual(expected, termAtt.Term()); }
/// <summary> Fills TermAttribute with the current token text.</summary> internal void GetText(TermAttribute t) { t.SetTermBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); }
/// <summary> Not an explicit test, just useful to print out some info on performance /// /// </summary> /// <throws> Exception </throws> public virtual void Performance() { int[] tokCount = new int[] { 100, 500, 1000, 2000, 5000, 10000 }; int[] modCounts = new int[] { 1, 2, 5, 10, 20, 50, 100, 200, 500 }; for (int k = 0; k < tokCount.Length; k++) { System.Text.StringBuilder buffer = new System.Text.StringBuilder(); System.Console.Out.WriteLine("-----Tokens: " + tokCount[k] + "-----"); for (int i = 0; i < tokCount[k]; i++) { buffer.Append(English.IntToEnglish(i).ToUpper()).Append(' '); } //make sure we produce the same tokens TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString())))); TokenStream sink = teeStream.NewSinkTokenStream(new ModuloSinkFilter(this, 100)); teeStream.ConsumeAllTokens(); TokenStream stream = new ModuloTokenFilter(this, new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString()))), 100); TermAttribute tfTok = (TermAttribute)stream.AddAttribute(typeof(TermAttribute)); TermAttribute sinkTok = (TermAttribute)sink.AddAttribute(typeof(TermAttribute)); for (int i = 0; stream.IncrementToken(); i++) { Assert.IsTrue(sink.IncrementToken()); Assert.IsTrue(tfTok.Equals(sinkTok) == true, tfTok + " is not equal to " + sinkTok + " at token: " + i); } //simulate two fields, each being analyzed once, for 20 documents for (int j = 0; j < modCounts.Length; j++) { int tfPos = 0; long start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); for (int i = 0; i < 20; i++) { stream = new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString()))); PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute)stream.GetAttribute(typeof(PositionIncrementAttribute)); while (stream.IncrementToken()) { tfPos += posIncrAtt.GetPositionIncrement(); } stream = new ModuloTokenFilter(this, new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString()))), modCounts[j]); posIncrAtt = (PositionIncrementAttribute)stream.GetAttribute(typeof(PositionIncrementAttribute)); while (stream.IncrementToken()) { tfPos += posIncrAtt.GetPositionIncrement(); } } long finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); System.Console.Out.WriteLine("ModCount: " + modCounts[j] + " Two fields took " + (finish - start) + " ms"); int sinkPos = 0; //simulate one field with one sink start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); for (int i = 0; i < 20; i++) { teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString())))); sink = teeStream.NewSinkTokenStream(new ModuloSinkFilter(this, modCounts[j])); PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute)teeStream.GetAttribute(typeof(PositionIncrementAttribute)); while (teeStream.IncrementToken()) { sinkPos += posIncrAtt.GetPositionIncrement(); } //System.out.println("Modulo--------"); posIncrAtt = (PositionIncrementAttribute)sink.GetAttribute(typeof(PositionIncrementAttribute)); while (sink.IncrementToken()) { sinkPos += posIncrAtt.GetPositionIncrement(); } } finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); System.Console.Out.WriteLine("ModCount: " + modCounts[j] + " Tee fields took " + (finish - start) + " ms"); Assert.IsTrue(sinkPos == tfPos, sinkPos + " does not equal: " + tfPos); } System.Console.Out.WriteLine("- End Tokens: " + tokCount[k] + "-----"); } }
public void Init() { termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute)); posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof(PositionIncrementAttribute)); }
/// <summary>Construct filtering <i>in</i>. </summary> public StandardFilter(TokenStream in_Renamed) : base(in_Renamed) { termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute)); typeAtt = (TypeAttribute) AddAttribute(typeof(TypeAttribute)); }
public ASCIIFoldingFilter(TokenStream input) : base(input) { termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute)); }