Пример #1
0
        public virtual void  TestExactCase()
        {
            System.IO.StringReader reader    = new System.IO.StringReader("Now is The Time");
            System.String[]        stopWords = new System.String[] { "is", "the", "Time" };
            TokenStream            stream    = new StopFilter(false, new WhitespaceTokenizer(reader), stopWords);
            TermAttribute          termAtt   = (TermAttribute)stream.GetAttribute(typeof(TermAttribute));

            Assert.IsTrue(stream.IncrementToken());
            Assert.AreEqual("Now", termAtt.Term());
            Assert.IsTrue(stream.IncrementToken());
            Assert.AreEqual("The", termAtt.Term());
            Assert.IsFalse(stream.IncrementToken());
        }
Пример #2
0
        public virtual void  TestFilter()
        {
            TokenStream   stream  = new WhitespaceTokenizer(new System.IO.StringReader("short toolong evenmuchlongertext a ab toolong foo"));
            LengthFilter  filter  = new LengthFilter(stream, 2, 6);
            TermAttribute termAtt = (TermAttribute)filter.GetAttribute(typeof(TermAttribute));

            Assert.IsTrue(filter.IncrementToken());
            Assert.AreEqual("short", termAtt.Term());
            Assert.IsTrue(filter.IncrementToken());
            Assert.AreEqual("ab", termAtt.Term());
            Assert.IsTrue(filter.IncrementToken());
            Assert.AreEqual("foo", termAtt.Term());
            Assert.IsFalse(filter.IncrementToken());
        }
Пример #3
0
        public virtual void  TestStopListPositions()
        {
            bool defaultEnable = StopFilter.GetEnablePositionIncrementsDefault();

            StopFilter.SetEnablePositionIncrementsDefault(true);
            try
            {
                System.Collections.Hashtable stopWordsSet = new System.Collections.Hashtable();
                stopWordsSet.Add("good", "good");
                stopWordsSet.Add("test", "test");
                stopWordsSet.Add("analyzer", "analyzer");
                StopAnalyzer           newStop = new StopAnalyzer(stopWordsSet);
                System.IO.StringReader reader  = new System.IO.StringReader("This is a good test of the english stop analyzer with positions");
                int[]       expectedIncr       = new int[] { 1, 1, 1, 3, 1, 1, 1, 2, 1 };
                TokenStream stream             = newStop.TokenStream("test", reader);
                Assert.IsNotNull(stream);
                int           i       = 0;
                TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute));
                PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute)stream.AddAttribute(typeof(PositionIncrementAttribute));

                while (stream.IncrementToken())
                {
                    System.String text = termAtt.Term();
                    Assert.IsFalse(stopWordsSet.Contains(text));
                    Assert.AreEqual(expectedIncr[i++], posIncrAtt.GetPositionIncrement());
                }
            }
            finally
            {
                StopFilter.SetEnablePositionIncrementsDefault(defaultEnable);
            }
        }
Пример #4
0
        public QueryTermVector(System.String queryString, Analyzer analyzer)
        {
            if (analyzer != null)
            {
                TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(queryString));
                if (stream != null)
                {
                    List <string> terms = new List <string>();
                    try
                    {
                        bool hasMoreTokens = false;

                        stream.Reset();
                        TermAttribute termAtt = (TermAttribute)stream.AddAttribute(typeof(TermAttribute));

                        hasMoreTokens = stream.IncrementToken();
                        while (hasMoreTokens)
                        {
                            terms.Add(termAtt.Term());
                            hasMoreTokens = stream.IncrementToken();
                        }
                        ProcessTerms(terms.ToArray());
                    }
                    catch (System.IO.IOException e)
                    {
                    }
                }
            }
        }
        public virtual void  TestPerField()
        {
            System.String           text     = "Qwerty";
            PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer());

            analyzer.AddAnalyzer("special", new SimpleAnalyzer());

            TokenStream   tokenStream = analyzer.TokenStream("field", new System.IO.StringReader(text));
            TermAttribute termAtt     = (TermAttribute)tokenStream.GetAttribute(typeof(TermAttribute));

            Assert.IsTrue(tokenStream.IncrementToken());
            Assert.AreEqual("Qwerty", termAtt.Term(), "WhitespaceAnalyzer does not lowercase");

            tokenStream = analyzer.TokenStream("special", new System.IO.StringReader(text));
            termAtt     = (TermAttribute)tokenStream.GetAttribute(typeof(TermAttribute));
            Assert.IsTrue(tokenStream.IncrementToken());
            Assert.AreEqual("qwerty", termAtt.Term(), "SimpleAnalyzer lowercases");
        }
Пример #6
0
 public override bool IncrementToken()
 {
     while (input.IncrementToken())
     {
         if (termAtt.Term().Equals("the"))
         {
             // stopword, do nothing
         }
         else if (termAtt.Term().Equals("quick"))
         {
             posIncrAtt.SetPositionIncrement(2);
             return(true);
         }
         else
         {
             posIncrAtt.SetPositionIncrement(1);
             return(true);
         }
     }
     return(false);
 }
Пример #7
0
        public virtual void  TestDefaults()
        {
            Assert.IsTrue(stop != null);
            System.IO.StringReader reader = new System.IO.StringReader("This is a test of the english stop analyzer");
            TokenStream            stream = stop.TokenStream("test", reader);

            Assert.IsTrue(stream != null);
            TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute));

            while (stream.IncrementToken())
            {
                Assert.IsFalse(inValidTokens.Contains(termAtt.Term()));
            }
        }
Пример #8
0
        public virtual void  TestIntStream()
        {
            NumericTokenStream stream = new NumericTokenStream().SetIntValue(ivalue);
            // use getAttribute to test if attributes really exist, if not an IAE will be throwed
            TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute));
            TypeAttribute typeAtt = (TypeAttribute)stream.GetAttribute(typeof(TypeAttribute));

            for (int shift = 0; shift < 32; shift += NumericUtils.PRECISION_STEP_DEFAULT)
            {
                Assert.IsTrue(stream.IncrementToken(), "New token is available");
                Assert.AreEqual(NumericUtils.IntToPrefixCoded(ivalue, shift), termAtt.Term(), "Term is correctly encoded");
                Assert.AreEqual((shift == 0)?NumericTokenStream.TOKEN_TYPE_FULL_PREC:NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.Type(), "Type correct");
            }
            Assert.IsFalse(stream.IncrementToken(), "No more tokens available");
        }
        private void  checkTokens(TokenStream stream)
        {
            int count = 0;

            TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute));

            Assert.IsNotNull(termAtt);
            while (stream.IncrementToken())
            {
                Assert.IsTrue(count < tokens.Length);
                Assert.AreEqual(tokens[count], termAtt.Term());
                count++;
            }

            Assert.AreEqual(tokens.Length, count);
        }
Пример #10
0
        private void  DoTestStopPositons(StopFilter stpf, bool enableIcrements)
        {
            Log("---> test with enable-increments-" + (enableIcrements?"enabled":"disabled"));
            stpf.SetEnablePositionIncrements(enableIcrements);
            TermAttribute termAtt = (TermAttribute)stpf.GetAttribute(typeof(TermAttribute));
            PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute)stpf.GetAttribute(typeof(PositionIncrementAttribute));

            for (int i = 0; i < 20; i += 3)
            {
                Assert.IsTrue(stpf.IncrementToken());
                Log("Token " + i + ": " + stpf);
                System.String w = English.IntToEnglish(i).Trim();
                Assert.AreEqual(w, termAtt.Term(), "expecting token " + i + " to be " + w);
                Assert.AreEqual(enableIcrements?(i == 0?1:3):1, posIncrAtt.GetPositionIncrement(), "all but first token must have position increment of 3");
            }
            Assert.IsFalse(stpf.IncrementToken());
        }
Пример #11
0
        public virtual void  TestStopList()
        {
            System.Collections.Hashtable stopWordsSet = new System.Collections.Hashtable();
            stopWordsSet.Add("good", "good");
            stopWordsSet.Add("test", "test");
            stopWordsSet.Add("analyzer", "analyzer");
            StopAnalyzer newStop = new StopAnalyzer(stopWordsSet);

            System.IO.StringReader reader = new System.IO.StringReader("This is a good test of the english stop analyzer");
            TokenStream            stream = newStop.TokenStream("test", reader);

            Assert.IsNotNull(stream);
            TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute));
            PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute)stream.AddAttribute(typeof(PositionIncrementAttribute));

            while (stream.IncrementToken())
            {
                System.String text = termAtt.Term();
                Assert.IsFalse(stopWordsSet.Contains(text));
                Assert.AreEqual(1, posIncrAtt.GetPositionIncrement());                 // by default stop tokenizer does not apply increments.
            }
        }
Пример #12
0
 public override bool IncrementToken()
 {
     if (Lucene.Net.QueryParsers.TestMultiAnalyzer.multiToken > 0)
     {
         termAtt.SetTermBuffer("multi" + (Lucene.Net.QueryParsers.TestMultiAnalyzer.multiToken + 1));
         offsetAtt.SetOffset(prevStartOffset, prevEndOffset);
         typeAtt.SetType(prevType);
         posIncrAtt.SetPositionIncrement(0);
         Lucene.Net.QueryParsers.TestMultiAnalyzer.multiToken--;
         return(true);
     }
     else
     {
         bool next = input.IncrementToken();
         if (next == false)
         {
             return(false);
         }
         prevType        = typeAtt.Type();
         prevStartOffset = offsetAtt.StartOffset();
         prevEndOffset   = offsetAtt.EndOffset();
         System.String text = termAtt.Term();
         if (text.Equals("triplemulti"))
         {
             Lucene.Net.QueryParsers.TestMultiAnalyzer.multiToken = 2;
             return(true);
         }
         else if (text.Equals("multi"))
         {
             Lucene.Net.QueryParsers.TestMultiAnalyzer.multiToken = 1;
             return(true);
         }
         else
         {
             return(true);
         }
     }
 }
Пример #13
0
            public override bool Accept(AttributeSource a)
            {
                TermAttribute termAtt = (TermAttribute)a.GetAttribute(typeof(TermAttribute));

                return(termAtt.Term().ToUpper().Equals("Dogs".ToUpper()));
            }
Пример #14
0
		internal virtual void  AssertTermEquals(System.String expected, TokenStream stream, TermAttribute termAtt)
		{
			Assert.IsTrue(stream.IncrementToken());
			Assert.AreEqual(expected, termAtt.Term());
		}
Пример #15
0
 internal virtual void  AssertTermEquals(System.String expected, TokenStream stream, TermAttribute termAtt)
 {
     Assert.IsTrue(stream.IncrementToken());
     Assert.AreEqual(expected, termAtt.Term());
 }