Ejemplo n.º 1
0
		private void  Init(int bufferSize)
		{
			this.done = false;
			termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
			offsetAtt = (OffsetAttribute) AddAttribute(typeof(OffsetAttribute));
			termAtt.ResizeTermBuffer(bufferSize);
		}
Ejemplo n.º 2
0
 /// <summary> Build a filter that removes words that are too long or too
 /// short from the text.
 /// </summary>
 public LengthFilter(TokenStream in_Renamed, int min, int max)
     : base(in_Renamed)
 {
     this.min = min;
     this.max = max;
     termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
 }
 public MyTokenStream(TestTermVectorsReader enclosingInstance)
 {
     InitBlock(enclosingInstance);
     termAtt    = (TermAttribute)AddAttribute(typeof(TermAttribute));
     posIncrAtt = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute));
     offsetAtt  = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
 }
Ejemplo n.º 4
0
        public virtual void  TestStopListPositions()
        {
            bool defaultEnable = StopFilter.GetEnablePositionIncrementsDefault();

            StopFilter.SetEnablePositionIncrementsDefault(true);
            try
            {
                System.Collections.Hashtable stopWordsSet = new System.Collections.Hashtable();
                stopWordsSet.Add("good", "good");
                stopWordsSet.Add("test", "test");
                stopWordsSet.Add("analyzer", "analyzer");
                StopAnalyzer           newStop = new StopAnalyzer(stopWordsSet);
                System.IO.StringReader reader  = new System.IO.StringReader("This is a good test of the english stop analyzer with positions");
                int[]       expectedIncr       = new int[] { 1, 1, 1, 3, 1, 1, 1, 2, 1 };
                TokenStream stream             = newStop.TokenStream("test", reader);
                Assert.IsNotNull(stream);
                int           i       = 0;
                TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute));
                PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute)stream.AddAttribute(typeof(PositionIncrementAttribute));

                while (stream.IncrementToken())
                {
                    System.String text = termAtt.Term();
                    Assert.IsFalse(stopWordsSet.Contains(text));
                    Assert.AreEqual(expectedIncr[i++], posIncrAtt.GetPositionIncrement());
                }
            }
            finally
            {
                StopFilter.SetEnablePositionIncrementsDefault(defaultEnable);
            }
        }
Ejemplo n.º 5
0
 private void  Init(int bufferSize)
 {
     this.done = false;
     termAtt   = (TermAttribute)AddAttribute(typeof(TermAttribute));
     offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
     termAtt.ResizeTermBuffer(bufferSize);
 }
Ejemplo n.º 6
0
        public QueryTermVector(System.String queryString, Analyzer analyzer)
        {
            if (analyzer != null)
            {
                TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(queryString));
                if (stream != null)
                {
                    List <string> terms = new List <string>();
                    try
                    {
                        bool hasMoreTokens = false;

                        stream.Reset();
                        TermAttribute termAtt = (TermAttribute)stream.AddAttribute(typeof(TermAttribute));

                        hasMoreTokens = stream.IncrementToken();
                        while (hasMoreTokens)
                        {
                            terms.Add(termAtt.Term());
                            hasMoreTokens = stream.IncrementToken();
                        }
                        ProcessTerms(terms.ToArray());
                    }
                    catch (System.IO.IOException e)
                    {
                    }
                }
            }
        }
Ejemplo n.º 7
0
 public TestFilter(TestMultiAnalyzer enclosingInstance, TokenStream in_Renamed) : base(in_Renamed)
 {
     InitBlock(enclosingInstance);
     termAtt    = (TermAttribute)AddAttribute(typeof(TermAttribute));
     posIncrAtt = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute));
     offsetAtt  = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
     typeAtt    = (TypeAttribute)AddAttribute(typeof(TypeAttribute));
 }
Ejemplo n.º 8
0
 public PayloadFilter(TokenStream input, System.String fieldName) : base(input)
 {
     this.fieldName = fieldName;
     pos            = 0;
     i           = 0;
     posIncrAttr = (PositionIncrementAttribute)input.AddAttribute(typeof(PositionIncrementAttribute));
     payloadAttr = (PayloadAttribute)input.AddAttribute(typeof(PayloadAttribute));
     termAttr    = (TermAttribute)input.AddAttribute(typeof(TermAttribute));
 }
Ejemplo n.º 9
0
 internal override void  Start(Fieldable f)
 {
     termAtt = (TermAttribute)fieldState.attributeSource.AddAttribute(typeof(TermAttribute));
     consumer.Start(f);
     if (nextPerField != null)
     {
         nextPerField.Start(f);
     }
 }
Ejemplo n.º 10
0
        public virtual void  TestIgnoreCase()
        {
            System.IO.StringReader reader    = new System.IO.StringReader("Now is The Time");
            System.String[]        stopWords = new System.String[] { "is", "the", "Time" };
            TokenStream            stream    = new StopFilter(false, new WhitespaceTokenizer(reader), stopWords, true);
            TermAttribute          termAtt   = (TermAttribute)stream.GetAttribute(typeof(TermAttribute));

            Assert.IsTrue(stream.IncrementToken());
            Assert.AreEqual("Now", termAtt.Term());
            Assert.IsFalse(stream.IncrementToken());
        }
Ejemplo n.º 11
0
 internal PoolingPayloadTokenStream(TestPayloads enclosingInstance, ByteArrayPool pool)
 {
     InitBlock(enclosingInstance);
     this.pool = pool;
     payload   = pool.Get();
     Enclosing_Instance.GenerateRandomData(payload);
     term       = pool.BytesToString(payload);
     first      = true;
     payloadAtt = (PayloadAttribute)AddAttribute(typeof(PayloadAttribute));
     termAtt    = (TermAttribute)AddAttribute(typeof(TermAttribute));
 }
Ejemplo n.º 12
0
 public PayloadFilter(TestPayloadSpans enclosingInstance, TokenStream input, System.String fieldName) : base(input)
 {
     InitBlock(enclosingInstance);
     this.fieldName = fieldName;
     pos            = 0;
     SupportClass.CollectionsHelper.AddIfNotContains(entities, "xx");
     SupportClass.CollectionsHelper.AddIfNotContains(entities, "one");
     SupportClass.CollectionsHelper.AddIfNotContains(nopayload, "nopayload");
     SupportClass.CollectionsHelper.AddIfNotContains(nopayload, "np");
     termAtt    = (TermAttribute)AddAttribute(typeof(TermAttribute));
     posIncrAtt = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute));
     payloadAtt = (PayloadAttribute)AddAttribute(typeof(PayloadAttribute));
 }
Ejemplo n.º 13
0
        public virtual void  TestStopFilt()
        {
            System.IO.StringReader       reader    = new System.IO.StringReader("Now is The Time");
            System.String[]              stopWords = new System.String[] { "is", "the", "Time" };
            System.Collections.Hashtable stopSet   = StopFilter.MakeStopSet(stopWords);
            TokenStream   stream  = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet);
            TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute));

            Assert.IsTrue(stream.IncrementToken());
            Assert.AreEqual("Now", termAtt.Term());
            Assert.IsTrue(stream.IncrementToken());
            Assert.AreEqual("The", termAtt.Term());
            Assert.IsFalse(stream.IncrementToken());
        }
Ejemplo n.º 14
0
        public virtual void  TestFilter()
        {
            TokenStream   stream  = new WhitespaceTokenizer(new System.IO.StringReader("short toolong evenmuchlongertext a ab toolong foo"));
            LengthFilter  filter  = new LengthFilter(stream, 2, 6);
            TermAttribute termAtt = (TermAttribute)filter.GetAttribute(typeof(TermAttribute));

            Assert.IsTrue(filter.IncrementToken());
            Assert.AreEqual("short", termAtt.Term());
            Assert.IsTrue(filter.IncrementToken());
            Assert.AreEqual("ab", termAtt.Term());
            Assert.IsTrue(filter.IncrementToken());
            Assert.AreEqual("foo", termAtt.Term());
            Assert.IsFalse(filter.IncrementToken());
        }
Ejemplo n.º 15
0
        public virtual void  TestDefaults()
        {
            Assert.IsTrue(stop != null);
            System.IO.StringReader reader = new System.IO.StringReader("This is a test of the english stop analyzer");
            TokenStream            stream = stop.TokenStream("test", reader);

            Assert.IsTrue(stream != null);
            TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute));

            while (stream.IncrementToken())
            {
                Assert.IsFalse(inValidTokens.Contains(termAtt.Term()));
            }
        }
Ejemplo n.º 16
0
        public virtual void  TestIntStream()
        {
            NumericTokenStream stream = new NumericTokenStream().SetIntValue(ivalue);
            // use getAttribute to test if attributes really exist, if not an IAE will be throwed
            TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute));
            TypeAttribute typeAtt = (TypeAttribute)stream.GetAttribute(typeof(TypeAttribute));

            for (int shift = 0; shift < 32; shift += NumericUtils.PRECISION_STEP_DEFAULT)
            {
                Assert.IsTrue(stream.IncrementToken(), "New token is available");
                Assert.AreEqual(NumericUtils.IntToPrefixCoded(ivalue, shift), termAtt.Term(), "Term is correctly encoded");
                Assert.AreEqual((shift == 0)?NumericTokenStream.TOKEN_TYPE_FULL_PREC:NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.Type(), "Type correct");
            }
            Assert.IsFalse(stream.IncrementToken(), "No more tokens available");
        }
        private void  checkTokens(TokenStream stream)
        {
            int count = 0;

            TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute));

            Assert.IsNotNull(termAtt);
            while (stream.IncrementToken())
            {
                Assert.IsTrue(count < tokens.Length);
                Assert.AreEqual(tokens[count], termAtt.Term());
                count++;
            }

            Assert.AreEqual(tokens.Length, count);
        }
Ejemplo n.º 18
0
        private void  DoTestStopPositons(StopFilter stpf, bool enableIcrements)
        {
            Log("---> test with enable-increments-" + (enableIcrements?"enabled":"disabled"));
            stpf.SetEnablePositionIncrements(enableIcrements);
            TermAttribute termAtt = (TermAttribute)stpf.GetAttribute(typeof(TermAttribute));
            PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute)stpf.GetAttribute(typeof(PositionIncrementAttribute));

            for (int i = 0; i < 20; i += 3)
            {
                Assert.IsTrue(stpf.IncrementToken());
                Log("Token " + i + ": " + stpf);
                System.String w = English.IntToEnglish(i).Trim();
                Assert.AreEqual(w, termAtt.Term(), "expecting token " + i + " to be " + w);
                Assert.AreEqual(enableIcrements?(i == 0?1:3):1, posIncrAtt.GetPositionIncrement(), "all but first token must have position increment of 3");
            }
            Assert.IsFalse(stpf.IncrementToken());
        }
Ejemplo n.º 19
0
        public virtual void  TestPerField()
        {
            System.String           text     = "Qwerty";
            PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer());

            analyzer.AddAnalyzer("special", new SimpleAnalyzer());

            TokenStream   tokenStream = analyzer.TokenStream("field", new System.IO.StringReader(text));
            TermAttribute termAtt     = (TermAttribute)tokenStream.GetAttribute(typeof(TermAttribute));

            Assert.IsTrue(tokenStream.IncrementToken());
            Assert.AreEqual("Qwerty", termAtt.Term(), "WhitespaceAnalyzer does not lowercase");

            tokenStream = analyzer.TokenStream("special", new System.IO.StringReader(text));
            termAtt     = (TermAttribute)tokenStream.GetAttribute(typeof(TermAttribute));
            Assert.IsTrue(tokenStream.IncrementToken());
            Assert.AreEqual("qwerty", termAtt.Term(), "SimpleAnalyzer lowercases");
        }
Ejemplo n.º 20
0
        public virtual void  TestStopList()
        {
            System.Collections.Hashtable stopWordsSet = new System.Collections.Hashtable();
            stopWordsSet.Add("good", "good");
            stopWordsSet.Add("test", "test");
            stopWordsSet.Add("analyzer", "analyzer");
            StopAnalyzer newStop = new StopAnalyzer(stopWordsSet);

            System.IO.StringReader reader = new System.IO.StringReader("This is a good test of the english stop analyzer");
            TokenStream            stream = newStop.TokenStream("test", reader);

            Assert.IsNotNull(stream);
            TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute));
            PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute)stream.AddAttribute(typeof(PositionIncrementAttribute));

            while (stream.IncrementToken())
            {
                System.String text = termAtt.Term();
                Assert.IsFalse(stopWordsSet.Contains(text));
                Assert.AreEqual(1, posIncrAtt.GetPositionIncrement());                 // by default stop tokenizer does not apply increments.
            }
        }
Ejemplo n.º 21
0
 public void  Init()
 {
     termAtt    = (TermAttribute)AddAttribute(typeof(TermAttribute));
     posIncrAtt = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute));
 }
Ejemplo n.º 22
0
 public ISOLatin1AccentFilter(TokenStream input)
     : base(input)
 {
     termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
 }
Ejemplo n.º 23
0
		internal override void  Start(Fieldable f)
		{
			termAtt = (TermAttribute) fieldState.attributeSource.AddAttribute(typeof(TermAttribute));
			consumer.Start(f);
			if (nextPerField != null)
			{
				nextPerField.Start(f);
			}
		}
Ejemplo n.º 24
0
			/// <summary> Filter which discards the token 'stop' and which expands the
			/// token 'phrase' into 'phrase1 phrase2'
			/// </summary>
			public QPTestFilter(TokenStream in_Renamed):base(in_Renamed)
			{
				termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
				offsetAtt = (OffsetAttribute) AddAttribute(typeof(OffsetAttribute));
			}
Ejemplo n.º 25
0
 private void InitBlock()
 {
     termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
     typeAtt = (TypeAttribute) AddAttribute(typeof(TypeAttribute));
     posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof(PositionIncrementAttribute));
 }
Ejemplo n.º 26
0
 public CharTokenizer(AttributeSource source, System.IO.TextReader input)
     : base(source, input)
 {
     offsetAtt = (OffsetAttribute) AddAttribute(typeof(OffsetAttribute));
     termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
 }
			public MyTokenStream(TestTermVectorsReader enclosingInstance)
			{
				InitBlock(enclosingInstance);
				termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
				posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof(PositionIncrementAttribute));
				offsetAtt = (OffsetAttribute) AddAttribute(typeof(OffsetAttribute));
			}
Ejemplo n.º 28
0
				public SingleCharTokenizer(System.IO.TextReader r):base(r)
				{
					termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
				}
Ejemplo n.º 29
0
            public override bool Accept(AttributeSource a)
            {
                TermAttribute termAtt = (TermAttribute)a.GetAttribute(typeof(TermAttribute));

                return(termAtt.Term().ToUpper().Equals("Dogs".ToUpper()));
            }
Ejemplo n.º 30
0
 public LowerCaseFilter(TokenStream in_Renamed)
     : base(in_Renamed)
 {
     termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
 }
Ejemplo n.º 31
0
			public PayloadFilter(TestPayloadSpans enclosingInstance, TokenStream input, System.String fieldName):base(input)
			{
				InitBlock(enclosingInstance);
				this.fieldName = fieldName;
				pos = 0;
				Support.CollectionsHelper.AddIfNotContains(entities, "xx");
				Support.CollectionsHelper.AddIfNotContains(entities, "one");
				Support.CollectionsHelper.AddIfNotContains(nopayload, "nopayload");
				Support.CollectionsHelper.AddIfNotContains(nopayload, "np");
				termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
				posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof(PositionIncrementAttribute));
				payloadAtt = (PayloadAttribute) AddAttribute(typeof(PayloadAttribute));
			}
Ejemplo n.º 32
0
 public PorterStemFilter(TokenStream in_Renamed)
     : base(in_Renamed)
 {
     stemmer = new PorterStemmer();
     termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
 }
Ejemplo n.º 33
0
		public RepeatingTokenStream(System.String val)
		{
			this.value_Renamed = val;
			this.termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
		}
Ejemplo n.º 34
0
		public PayloadFilter(TokenStream input, System.String fieldName):base(input)
		{
			this.fieldName = fieldName;
			pos = 0;
			i = 0;
			posIncrAttr = (PositionIncrementAttribute) input.AddAttribute(typeof(PositionIncrementAttribute));
			payloadAttr = (PayloadAttribute) input.AddAttribute(typeof(PayloadAttribute));
			termAttr = (TermAttribute) input.AddAttribute(typeof(TermAttribute));
		}
Ejemplo n.º 35
0
        public virtual void  TestU()
        {
            TokenStream           stream  = new WhitespaceTokenizer(new System.IO.StringReader("Des mot clés À LA CHAÎNE À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï IJ Ð Ñ Ò Ó Ô Õ Ö Ø Œ Þ Ù Ú Û Ü Ý Ÿ à á â ã ä å æ ç è é ê ë ì í î ï ij ð ñ ò ó ô õ ö ø œ ß þ ù ú û ü ý ÿ fi fl"));
            ISOLatin1AccentFilter filter  = new ISOLatin1AccentFilter(stream);
            TermAttribute         termAtt = (TermAttribute)filter.GetAttribute(typeof(TermAttribute));

            AssertTermEquals("Des", filter, termAtt);
            AssertTermEquals("mot", filter, termAtt);
            AssertTermEquals("cles", filter, termAtt);
            AssertTermEquals("A", filter, termAtt);
            AssertTermEquals("LA", filter, termAtt);
            AssertTermEquals("CHAINE", filter, termAtt);
            AssertTermEquals("A", filter, termAtt);
            AssertTermEquals("A", filter, termAtt);
            AssertTermEquals("A", filter, termAtt);
            AssertTermEquals("A", filter, termAtt);
            AssertTermEquals("A", filter, termAtt);
            AssertTermEquals("A", filter, termAtt);
            AssertTermEquals("AE", filter, termAtt);
            AssertTermEquals("C", filter, termAtt);
            AssertTermEquals("E", filter, termAtt);
            AssertTermEquals("E", filter, termAtt);
            AssertTermEquals("E", filter, termAtt);
            AssertTermEquals("E", filter, termAtt);
            AssertTermEquals("I", filter, termAtt);
            AssertTermEquals("I", filter, termAtt);
            AssertTermEquals("I", filter, termAtt);
            AssertTermEquals("I", filter, termAtt);
            AssertTermEquals("IJ", filter, termAtt);
            AssertTermEquals("D", filter, termAtt);
            AssertTermEquals("N", filter, termAtt);
            AssertTermEquals("O", filter, termAtt);
            AssertTermEquals("O", filter, termAtt);
            AssertTermEquals("O", filter, termAtt);
            AssertTermEquals("O", filter, termAtt);
            AssertTermEquals("O", filter, termAtt);
            AssertTermEquals("O", filter, termAtt);
            AssertTermEquals("OE", filter, termAtt);
            AssertTermEquals("TH", filter, termAtt);
            AssertTermEquals("U", filter, termAtt);
            AssertTermEquals("U", filter, termAtt);
            AssertTermEquals("U", filter, termAtt);
            AssertTermEquals("U", filter, termAtt);
            AssertTermEquals("Y", filter, termAtt);
            AssertTermEquals("Y", filter, termAtt);
            AssertTermEquals("a", filter, termAtt);
            AssertTermEquals("a", filter, termAtt);
            AssertTermEquals("a", filter, termAtt);
            AssertTermEquals("a", filter, termAtt);
            AssertTermEquals("a", filter, termAtt);
            AssertTermEquals("a", filter, termAtt);
            AssertTermEquals("ae", filter, termAtt);
            AssertTermEquals("c", filter, termAtt);
            AssertTermEquals("e", filter, termAtt);
            AssertTermEquals("e", filter, termAtt);
            AssertTermEquals("e", filter, termAtt);
            AssertTermEquals("e", filter, termAtt);
            AssertTermEquals("i", filter, termAtt);
            AssertTermEquals("i", filter, termAtt);
            AssertTermEquals("i", filter, termAtt);
            AssertTermEquals("i", filter, termAtt);
            AssertTermEquals("ij", filter, termAtt);
            AssertTermEquals("d", filter, termAtt);
            AssertTermEquals("n", filter, termAtt);
            AssertTermEquals("o", filter, termAtt);
            AssertTermEquals("o", filter, termAtt);
            AssertTermEquals("o", filter, termAtt);
            AssertTermEquals("o", filter, termAtt);
            AssertTermEquals("o", filter, termAtt);
            AssertTermEquals("o", filter, termAtt);
            AssertTermEquals("oe", filter, termAtt);
            AssertTermEquals("ss", filter, termAtt);
            AssertTermEquals("th", filter, termAtt);
            AssertTermEquals("u", filter, termAtt);
            AssertTermEquals("u", filter, termAtt);
            AssertTermEquals("u", filter, termAtt);
            AssertTermEquals("u", filter, termAtt);
            AssertTermEquals("y", filter, termAtt);
            AssertTermEquals("y", filter, termAtt);
            AssertTermEquals("fi", filter, termAtt);
            AssertTermEquals("fl", filter, termAtt);
            Assert.IsFalse(filter.IncrementToken());
        }
Ejemplo n.º 36
0
			internal PoolingPayloadTokenStream(TestPayloads enclosingInstance, ByteArrayPool pool)
			{
				InitBlock(enclosingInstance);
				this.pool = pool;
				payload = pool.Get();
				Enclosing_Instance.GenerateRandomData(payload);
				term = pool.BytesToString(payload);
				first = true;
				payloadAtt = (PayloadAttribute) AddAttribute(typeof(PayloadAttribute));
				termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
			}
Ejemplo n.º 37
0
 /// <summary> Build a filter that removes words that are too long or too
 /// short from the text.
 /// </summary>
 public LengthFilter(TokenStream in_Renamed, int min, int max) : base(in_Renamed)
 {
     this.min = min;
     this.max = max;
     termAtt  = (TermAttribute)AddAttribute(typeof(TermAttribute));
 }
Ejemplo n.º 38
0
 public PorterStemFilter(TokenStream in_Renamed) : base(in_Renamed)
 {
     stemmer = new PorterStemmer();
     termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
 }
			internal SingleTokenTokenStream()
			{
				termAttribute = (TermAttribute) AddAttribute(typeof(TermAttribute));
				offsetAttribute = (OffsetAttribute) AddAttribute(typeof(OffsetAttribute));
			}
Ejemplo n.º 40
0
 public SingleCharTokenizer(System.IO.TextReader r) : base(r)
 {
     termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
 }
Ejemplo n.º 41
0
 internal SingleTokenTokenStream()
 {
     termAttribute   = (TermAttribute)AddAttribute(typeof(TermAttribute));
     offsetAttribute = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
 }
Ejemplo n.º 42
0
 public CharTokenizer(AttributeFactory factory, System.IO.TextReader input)
     : base(factory, input)
 {
     offsetAtt = (OffsetAttribute) AddAttribute(typeof(OffsetAttribute));
     termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
 }
		/// <summary> Fills TermAttribute with the current token text.</summary>
		internal void  GetText(TermAttribute t)
		{
			t.SetTermBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
		}
Ejemplo n.º 44
0
			public TestPosIncrementFilter(TestMultiAnalyzer enclosingInstance, TokenStream in_Renamed):base(in_Renamed)
			{
				InitBlock(enclosingInstance);
				termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
				posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof(PositionIncrementAttribute));
			}
Ejemplo n.º 45
0
 internal virtual void  AssertTermEquals(System.String expected, TokenStream stream, TermAttribute termAtt)
 {
     Assert.IsTrue(stream.IncrementToken());
     Assert.AreEqual(expected, termAtt.Term());
 }
Ejemplo n.º 46
0
 private void Init(System.IO.TextReader input, bool replaceInvalidAcronym)
 {
     this.replaceInvalidAcronym = replaceInvalidAcronym;
     this.input = input;
     termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
     offsetAtt = (OffsetAttribute) AddAttribute(typeof(OffsetAttribute));
     posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof(PositionIncrementAttribute));
     typeAtt = (TypeAttribute) AddAttribute(typeof(TypeAttribute));
 }
Ejemplo n.º 47
0
 public RepeatingTokenStream(System.String val)
 {
     this.value_Renamed = val;
     this.termAtt       = (TermAttribute)AddAttribute(typeof(TermAttribute));
 }
Ejemplo n.º 48
0
		internal virtual void  AssertTermEquals(System.String expected, TokenStream stream, TermAttribute termAtt)
		{
			Assert.IsTrue(stream.IncrementToken());
			Assert.AreEqual(expected, termAtt.Term());
		}
Ejemplo n.º 49
0
 /// <summary> Fills TermAttribute with the current token text.</summary>
 internal void  GetText(TermAttribute t)
 {
     t.SetTermBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
 }
Ejemplo n.º 50
0
        /// <summary> Not an explicit test, just useful to print out some info on performance
        ///
        /// </summary>
        /// <throws>  Exception </throws>
        public virtual void  Performance()
        {
            int[] tokCount  = new int[] { 100, 500, 1000, 2000, 5000, 10000 };
            int[] modCounts = new int[] { 1, 2, 5, 10, 20, 50, 100, 200, 500 };
            for (int k = 0; k < tokCount.Length; k++)
            {
                System.Text.StringBuilder buffer = new System.Text.StringBuilder();
                System.Console.Out.WriteLine("-----Tokens: " + tokCount[k] + "-----");
                for (int i = 0; i < tokCount[k]; i++)
                {
                    buffer.Append(English.IntToEnglish(i).ToUpper()).Append(' ');
                }
                //make sure we produce the same tokens
                TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString()))));
                TokenStream        sink      = teeStream.NewSinkTokenStream(new ModuloSinkFilter(this, 100));
                teeStream.ConsumeAllTokens();
                TokenStream   stream  = new ModuloTokenFilter(this, new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString()))), 100);
                TermAttribute tfTok   = (TermAttribute)stream.AddAttribute(typeof(TermAttribute));
                TermAttribute sinkTok = (TermAttribute)sink.AddAttribute(typeof(TermAttribute));
                for (int i = 0; stream.IncrementToken(); i++)
                {
                    Assert.IsTrue(sink.IncrementToken());
                    Assert.IsTrue(tfTok.Equals(sinkTok) == true, tfTok + " is not equal to " + sinkTok + " at token: " + i);
                }

                //simulate two fields, each being analyzed once, for 20 documents
                for (int j = 0; j < modCounts.Length; j++)
                {
                    int  tfPos = 0;
                    long start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                    for (int i = 0; i < 20; i++)
                    {
                        stream = new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString())));
                        PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute)stream.GetAttribute(typeof(PositionIncrementAttribute));
                        while (stream.IncrementToken())
                        {
                            tfPos += posIncrAtt.GetPositionIncrement();
                        }
                        stream     = new ModuloTokenFilter(this, new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString()))), modCounts[j]);
                        posIncrAtt = (PositionIncrementAttribute)stream.GetAttribute(typeof(PositionIncrementAttribute));
                        while (stream.IncrementToken())
                        {
                            tfPos += posIncrAtt.GetPositionIncrement();
                        }
                    }
                    long finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                    System.Console.Out.WriteLine("ModCount: " + modCounts[j] + " Two fields took " + (finish - start) + " ms");
                    int sinkPos = 0;
                    //simulate one field with one sink
                    start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                    for (int i = 0; i < 20; i++)
                    {
                        teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString()))));
                        sink      = teeStream.NewSinkTokenStream(new ModuloSinkFilter(this, modCounts[j]));
                        PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute)teeStream.GetAttribute(typeof(PositionIncrementAttribute));
                        while (teeStream.IncrementToken())
                        {
                            sinkPos += posIncrAtt.GetPositionIncrement();
                        }
                        //System.out.println("Modulo--------");
                        posIncrAtt = (PositionIncrementAttribute)sink.GetAttribute(typeof(PositionIncrementAttribute));
                        while (sink.IncrementToken())
                        {
                            sinkPos += posIncrAtt.GetPositionIncrement();
                        }
                    }
                    finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                    System.Console.Out.WriteLine("ModCount: " + modCounts[j] + " Tee fields took " + (finish - start) + " ms");
                    Assert.IsTrue(sinkPos == tfPos, sinkPos + " does not equal: " + tfPos);
                }
                System.Console.Out.WriteLine("- End Tokens: " + tokCount[k] + "-----");
            }
        }
Ejemplo n.º 51
0
 public void Init()
 {
     termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
     posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof(PositionIncrementAttribute));
 }
Ejemplo n.º 52
0
 /// <summary>Construct filtering <i>in</i>. </summary>
 public StandardFilter(TokenStream in_Renamed)
     : base(in_Renamed)
 {
     termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
     typeAtt = (TypeAttribute) AddAttribute(typeof(TypeAttribute));
 }
Ejemplo n.º 53
0
 public ASCIIFoldingFilter(TokenStream input)
     : base(input)
 {
     termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
 }