private void  FillCache(/* in */ Token reusableToken)
 {
     for (Token nextToken = input.Next(reusableToken); nextToken != null; nextToken = input.Next(reusableToken))
     {
         cache.Add(nextToken.Clone());
     }
 }
Beispiel #2
0
 /// <summary> Override this method to cache only certain tokens, or new tokens based
 /// on the old tokens.
 ///
 /// </summary>
 /// <param name="t">The {@link Lucene.Net.Analysis.Token} to add to the sink
 /// </param>
 public virtual void  Add(Token t)
 {
     if (t == null)
     {
         return;
     }
     lst.Add((Token)t.Clone());
 }
        public SingleTokenTokenStream(Token token)
        {
            Debug.Assert(token != null, "Token was null!");
            _singleToken = (Token) token.Clone();

            // ReSharper disable DoNotCallOverridableMethodsInConstructor
            _tokenAtt = (AttributeImpl) AddAttribute(typeof (TermAttribute));
            // ReSharper restore DoNotCallOverridableMethodsInConstructor

            Debug.Assert(_tokenAtt is Token || _tokenAtt.GetType().Name.Equals(typeof (TokenWrapper).Name),
                         "Token Attribute is the wrong type! Type was: " + _tokenAtt.GetType().Name + " but expected " +
                         typeof (TokenWrapper).Name);
        }
 /// <summary> Returns the next token out of the list of cached tokens</summary>
 /// <returns> The next {@link Lucene.Net.Analysis.Token} in the Sink.
 /// </returns>
 /// <throws>  IOException </throws>
 public override Token Next(/* in */ Token reusableToken)
 {
     System.Diagnostics.Debug.Assert(reusableToken != null);
     if (iter == null)
     {
         iter = lst.GetEnumerator();
     }
     if (iter.MoveNext())
     {
         Token nextToken = (Token)iter.Current;
         return((Token)nextToken.Clone());
     }
     return(null);
 }
Beispiel #5
0
 public override Token Next(Token reusableToken)
 {
     System.Diagnostics.Debug.Assert(reusableToken != null);
     if (iter == null)
     {
         iter = lst.GetEnumerator();
     }
     // Since this TokenStream can be reset we have to maintain the tokens as immutable
     if (iter.MoveNext())
     {
         Token nextToken = iter.Current;
         return((Token)nextToken.Clone());
     }
     return(null);
 }
        public override Token Next(/* in */ Token reusableToken)
        {
            System.Diagnostics.Debug.Assert(reusableToken != null);
            if (cache == null)
            {
                // fill cache lazily
                cache = new System.Collections.ArrayList();
                FillCache(reusableToken);
                iterator = cache.GetEnumerator();
            }

            if (!iterator.MoveNext())
            {
                // the cache is exhausted, return null
                return(null);
            }

            Token nextToken = (Token)iterator.Current;

            return((Token)nextToken.Clone());
        }
 /// <summary> Override this method to cache only certain tokens, or new tokens based
 /// on the old tokens.
 /// 
 /// </summary>
 /// <param name="t">The {@link Lucene.Net.Analysis.Token} to add to the sink
 /// </param>
 public virtual void Add(Token t)
 {
     if (t == null)
         return ;
     lst.Add((Token) t.Clone());
 }
Beispiel #8
0
 public override System.Object Clone()
 {
     return(new TokenWrapper((Token)delegate_Renamed.Clone()));
 }
Beispiel #9
0
        /// <summary> Not an explicit test, just useful to print out some info on performance
        ///
        /// </summary>
        /// <throws>  Exception </throws>
        public virtual void  Performance()
        {
            int[] tokCount  = new int[] { 100, 500, 1000, 2000, 5000, 10000 };
            int[] modCounts = new int[] { 1, 2, 5, 10, 20, 50, 100, 200, 500 };
            for (int k = 0; k < tokCount.Length; k++)
            {
                System.Text.StringBuilder buffer = new System.Text.StringBuilder();
                System.Console.Out.WriteLine("-----Tokens: " + tokCount[k] + "-----");
                for (int i = 0; i < tokCount[k]; i++)
                {
                    buffer.Append(English.IntToEnglish(i).ToUpper()).Append(' ');
                }
                //make sure we produce the same tokens
                ModuloSinkTokenizer sink  = new ModuloSinkTokenizer(this, tokCount[k], 100);
                Token       reusableToken = new Token();
                TokenStream stream        = new TeeTokenFilter(new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString()))), sink);
                while (stream.Next(reusableToken) != null)
                {
                }
                stream = new ModuloTokenFilter(this, new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString()))), 100);
                System.Collections.IList tmp = new System.Collections.ArrayList();
                for (Token nextToken = stream.Next(reusableToken); nextToken != null; nextToken = stream.Next(reusableToken))
                {
                    tmp.Add(nextToken.Clone());
                }
                System.Collections.IList sinkList = sink.GetTokens();
                Assert.IsTrue(tmp.Count == sinkList.Count, "tmp Size: " + tmp.Count + " is not: " + sinkList.Count);
                for (int i = 0; i < tmp.Count; i++)
                {
                    Token tfTok   = (Token)tmp[i];
                    Token sinkTok = (Token)sinkList[i];
                    Assert.IsTrue(tfTok.Term().Equals(sinkTok.Term()) == true, tfTok.Term() + " is not equal to " + sinkTok.Term() + " at token: " + i);
                }
                //simulate two fields, each being analyzed once, for 20 documents

                for (int j = 0; j < modCounts.Length; j++)
                {
                    int  tfPos = 0;
                    long start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                    for (int i = 0; i < 20; i++)
                    {
                        stream = new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString())));
                        for (Token nextToken = stream.Next(reusableToken); nextToken != null; nextToken = stream.Next(reusableToken))
                        {
                            tfPos += nextToken.GetPositionIncrement();
                        }
                        stream = new ModuloTokenFilter(this, new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString()))), modCounts[j]);
                        for (Token nextToken = stream.Next(reusableToken); nextToken != null; nextToken = stream.Next(reusableToken))
                        {
                            tfPos += nextToken.GetPositionIncrement();
                        }
                    }
                    long finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                    System.Console.Out.WriteLine("ModCount: " + modCounts[j] + " Two fields took " + (finish - start) + " ms");
                    int sinkPos = 0;
                    //simulate one field with one sink
                    start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                    for (int i = 0; i < 20; i++)
                    {
                        sink   = new ModuloSinkTokenizer(this, tokCount[k], modCounts[j]);
                        stream = new TeeTokenFilter(new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString()))), sink);
                        for (Token nextToken = stream.Next(reusableToken); nextToken != null; nextToken = stream.Next(reusableToken))
                        {
                            sinkPos += nextToken.GetPositionIncrement();
                        }
                        //System.out.println("Modulo--------");
                        stream = sink;
                        for (Token nextToken = stream.Next(reusableToken); nextToken != null; nextToken = stream.Next(reusableToken))
                        {
                            sinkPos += nextToken.GetPositionIncrement();
                        }
                    }
                    finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                    System.Console.Out.WriteLine("ModCount: " + modCounts[j] + " Tee fields took " + (finish - start) + " ms");
                    Assert.IsTrue(sinkPos == tfPos, sinkPos + " does not equal: " + tfPos);
                }
                System.Console.Out.WriteLine("- End Tokens: " + tokCount[k] + "-----");
            }
        }
 public void SetToken(Token token)
 {
     _singleToken = (Token) token.Clone();
 }
			public override void  Add(Token t)
			{
				if (t != null && count % modCount == 0)
				{
					lst.Add(t.Clone());
				}
				count++;
			}
		public virtual void  TestPerformance()
		{
			int[] tokCount = new int[]{100, 500, 1000, 2000, 5000, 10000};
			int[] modCounts = new int[]{1, 2, 5, 10, 20, 50, 100, 200, 500};
			for (int k = 0; k < tokCount.Length; k++)
			{
				System.Text.StringBuilder buffer = new System.Text.StringBuilder();
				System.Console.Out.WriteLine("-----Tokens: " + tokCount[k] + "-----");
				for (int i = 0; i < tokCount[k]; i++)
				{
					buffer.Append(English.IntToEnglish(i).ToUpper()).Append(' ');
				}
				//make sure we produce the same tokens
				ModuloSinkTokenizer sink = new ModuloSinkTokenizer(this, tokCount[k], 100);
				Token next = new Token();
				TokenStream result = new TeeTokenFilter(new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString()))), sink);
				while ((next = result.Next(next)) != null)
				{
				}
				result = new ModuloTokenFilter(this, new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString()))), 100);
				next = new Token();
				System.Collections.IList tmp = new System.Collections.ArrayList();
				while ((next = result.Next(next)) != null)
				{
					tmp.Add(next.Clone());
				}
				System.Collections.IList sinkList = sink.GetTokens();
				Assert.IsTrue(tmp.Count == sinkList.Count, "tmp Size: " + tmp.Count + " is not: " + sinkList.Count);
				for (int i = 0; i < tmp.Count; i++)
				{
					Token tfTok = (Token) tmp[i];
					Token sinkTok = (Token) sinkList[i];
					Assert.IsTrue(tfTok.TermText().Equals(sinkTok.TermText()) == true, tfTok.TermText() + " is not equal to " + sinkTok.TermText() + " at token: " + i);
				}
				//simulate two fields, each being analyzed once, for 20 documents
				
				for (int j = 0; j < modCounts.Length; j++)
				{
					int tfPos = 0;
					long start = (System.DateTime.Now.Ticks - 621355968000000000) / 10000;
					for (int i = 0; i < 20; i++)
					{
						next = new Token();
						result = new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString())));
						while ((next = result.Next(next)) != null)
						{
							tfPos += next.GetPositionIncrement();
						}
						next = new Token();
						result = new ModuloTokenFilter(this, new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString()))), modCounts[j]);
						while ((next = result.Next(next)) != null)
						{
							tfPos += next.GetPositionIncrement();
						}
					}
					long finish = (System.DateTime.Now.Ticks - 621355968000000000) / 10000;
					System.Console.Out.WriteLine("ModCount: " + modCounts[j] + " Two fields took " + (finish - start) + " ms");
					int sinkPos = 0;
					//simulate one field with one sink
					start = (System.DateTime.Now.Ticks - 621355968000000000) / 10000;
					for (int i = 0; i < 20; i++)
					{
						sink = new ModuloSinkTokenizer(this, tokCount[k], modCounts[j]);
						next = new Token();
						result = new TeeTokenFilter(new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString()))), sink);
						while ((next = result.Next(next)) != null)
						{
							sinkPos += next.GetPositionIncrement();
						}
						//System.out.println("Modulo--------");
						result = sink;
						while ((next = result.Next(next)) != null)
						{
							sinkPos += next.GetPositionIncrement();
						}
					}
					finish = (System.DateTime.Now.Ticks - 621355968000000000) / 10000;
					System.Console.Out.WriteLine("ModCount: " + modCounts[j] + " Tee fields took " + (finish - start) + " ms");
					Assert.IsTrue(sinkPos == tfPos, sinkPos + " does not equal: " + tfPos);
				}
				System.Console.Out.WriteLine("- End Tokens: " + tokCount[k] + "-----");
			}
		}