Exemple #1
0
        public virtual void TestMultipleSources()
        {
            TeeSinkTokenFilter tee1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(new System.IO.StringReader(buffer1.ToString())));

            TeeSinkTokenFilter.SinkTokenStream dogDetector = tee1.NewSinkTokenStream(dogFilter);
            TeeSinkTokenFilter.SinkTokenStream theDetector = tee1.NewSinkTokenStream(theFilter);
            TokenStream source1 = new CachingTokenFilter(tee1);


            tee1.AddAttribute <ICheckClearAttributesAttribute>();
            dogDetector.AddAttribute <ICheckClearAttributesAttribute>();
            theDetector.AddAttribute <ICheckClearAttributesAttribute>();


            TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(new System.IO.StringReader(buffer2.ToString())));

            tee2.AddSinkTokenStream(dogDetector);
            tee2.AddSinkTokenStream(theDetector);
            TokenStream source2 = tee2;

            AssertTokenStreamContents(source1, tokens1);
            AssertTokenStreamContents(source2, tokens2);

            AssertTokenStreamContents(theDetector, new String[] { "The", "the", "The", "the" });

            source1.Reset();
            TokenStream lowerCasing = new LowerCaseFilter(source1);

            String[] lowerCaseTokens = new String[tokens1.Length];
            for (int i = 0; i < tokens1.Length; i++)
            {
                lowerCaseTokens[i] = tokens1[i].ToLower();
            }
        }
        public virtual void  TestMultipleSources()
        {
            SinkTokenizer theDetector   = new AnonymousClassSinkTokenizer1(this, null);
            SinkTokenizer dogDetector   = new AnonymousClassSinkTokenizer2(this, null);
            TokenStream   source1       = new CachingTokenFilter(new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(new System.IO.StringReader(buffer1.ToString())), theDetector), dogDetector));
            TokenStream   source2       = new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(new System.IO.StringReader(buffer2.ToString())), theDetector), dogDetector);
            int           i             = 0;
            Token         reusableToken = new Token();

            for (Token nextToken = source1.Next(reusableToken); nextToken != null; nextToken = source1.Next(reusableToken))
            {
                Assert.IsTrue(nextToken.Term().Equals(tokens1[i]) == true, nextToken.Term() + " is not equal to " + tokens1[i]);
                i++;
            }
            Assert.IsTrue(i == tokens1.Length, i + " does not equal: " + tokens1.Length);
            Assert.IsTrue(theDetector.GetTokens().Count == 2, "theDetector Size: " + theDetector.GetTokens().Count + " is not: " + 2);
            Assert.IsTrue(dogDetector.GetTokens().Count == 1, "dogDetector Size: " + dogDetector.GetTokens().Count + " is not: " + 1);
            i = 0;
            for (Token nextToken = source2.Next(reusableToken); nextToken != null; nextToken = source2.Next(reusableToken))
            {
                Assert.IsTrue(nextToken.Term().Equals(tokens2[i]) == true, nextToken.Term() + " is not equal to " + tokens2[i]);
                i++;
            }
            Assert.IsTrue(i == tokens2.Length, i + " does not equal: " + tokens2.Length);
            Assert.IsTrue(theDetector.GetTokens().Count == 4, "theDetector Size: " + theDetector.GetTokens().Count + " is not: " + 4);
            Assert.IsTrue(dogDetector.GetTokens().Count == 2, "dogDetector Size: " + dogDetector.GetTokens().Count + " is not: " + 2);
            i = 0;
            for (Token nextToken = theDetector.Next(reusableToken); nextToken != null; nextToken = theDetector.Next(reusableToken))
            {
                Assert.IsTrue(nextToken.Term().ToUpper().Equals("The".ToUpper()) == true, nextToken.Term() + " is not equal to " + "The");
                i++;
            }
            Assert.IsTrue(i == theDetector.GetTokens().Count, i + " does not equal: " + theDetector.GetTokens().Count);
            i = 0;
            for (Token nextToken = dogDetector.Next(reusableToken); nextToken != null; nextToken = dogDetector.Next(reusableToken))
            {
                Assert.IsTrue(nextToken.Term().ToUpper().Equals("Dogs".ToUpper()) == true, nextToken.Term() + " is not equal to " + "Dogs");
                i++;
            }
            Assert.IsTrue(i == dogDetector.GetTokens().Count, i + " does not equal: " + dogDetector.GetTokens().Count);
            source1.Reset();
            TokenStream lowerCasing = new LowerCaseFilter(source1);

            i = 0;
            for (Token nextToken = lowerCasing.Next(reusableToken); nextToken != null; nextToken = lowerCasing.Next(reusableToken))
            {
                Assert.IsTrue(nextToken.Term().Equals(tokens1[i].ToLower()) == true, nextToken.Term() + " is not equal to " + tokens1[i].ToLower());
                i++;
            }
            Assert.IsTrue(i == tokens1.Length, i + " does not equal: " + tokens1.Length);
        }
        // This is a simplified query builder which works for single Terms and single Phrases
        // Returns null, TermQuery, or PhraseQuery
        public static Lucene.Net.Search.Query GetFieldQuery(Analyzer analyzer, string field, string queryText)
        {
            TokenStream stream = analyzer.TokenStream(field, new StringReader(queryText));
            TokenFilter filter = new CachingTokenFilter(stream);
            filter.Reset();

            // This attribute way of getting token properties isn't very good, but it's the non-obsolete one.
            var attr1 = filter.GetAttribute<ITermAttribute>();
            Func<string> getText = () => attr1 != null ? attr1.Term : null;

            Func<int> getPositionIncrement;
            if (filter.HasAttribute<IPositionIncrementAttribute>())
            {
                var attr = filter.GetAttribute<IPositionIncrementAttribute>();
                getPositionIncrement = () => attr.PositionIncrement;
            }
            else
            {
                getPositionIncrement = () => 1;
            }

            // 0 tokens
            if (!filter.IncrementToken())
            {
                return new BooleanQuery();
            }

            // 1 token?
            string token1 = getText();
            int position = 0;
            if (!filter.IncrementToken())
            {
                return new TermQuery(new Term(field, token1));
            }

            // many tokens - handle first token
            PhraseQuery ret = new PhraseQuery();
            ret.Add(new Term(field, token1));

            do
            {
                // handle rest of tokens
                string tokenNext = getText();
                position += getPositionIncrement();
                ret.Add(new Term(field, tokenNext), position);
            }
            while (filter.IncrementToken());

            return ret;
        }
Exemple #4
0
        public virtual void TestCaching()
        {
            Directory         dir    = new RAMDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            Document    doc    = new Document();
            TokenStream stream = new TokenStreamAnonymousInnerClassHelper(this);

            stream = new CachingTokenFilter(stream);

            doc.Add(new TextField("preanalyzed", stream));

            // 1) we consume all tokens twice before we add the doc to the index
            CheckTokens(stream);
            stream.Reset();
            CheckTokens(stream);

            // 2) now add the document to the index and verify if all tokens are indexed
            //    don't reset the stream here, the DocumentWriter should do that implicitly
            writer.AddDocument(doc);

            IndexReader          reader        = writer.GetReader();
            DocsAndPositionsEnum termPositions = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "preanalyzed", new BytesRef("term1"));

            Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(1, termPositions.Freq);
            Assert.AreEqual(0, termPositions.NextPosition());

            termPositions = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "preanalyzed", new BytesRef("term2"));
            Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(2, termPositions.Freq);
            Assert.AreEqual(1, termPositions.NextPosition());
            Assert.AreEqual(3, termPositions.NextPosition());

            termPositions = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "preanalyzed", new BytesRef("term3"));
            Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(1, termPositions.Freq);
            Assert.AreEqual(2, termPositions.NextPosition());
            reader.Dispose();
            writer.Dispose();
            // 3) reset stream and consume tokens again
            stream.Reset();
            CheckTokens(stream);
            dir.Dispose();
        }
        public virtual void  TestCaching()
        {
            Directory   dir    = new RAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
            Document    doc    = new Document();
            TokenStream stream = new AnonymousClassTokenStream(this);

            stream = new CachingTokenFilter(stream);

            doc.Add(new Field("preanalyzed", stream, TermVector.NO));

            // 1) we consume all tokens twice before we add the doc to the index
            checkTokens(stream);
            stream.Reset();
            checkTokens(stream);

            // 2) now add the document to the index and verify if all tokens are indexed
            //    don't reset the stream here, the DocumentWriter should do that implicitly
            writer.AddDocument(doc);
            writer.Close();

            IndexReader   reader        = IndexReader.Open(dir);
            TermPositions termPositions = reader.TermPositions(new Term("preanalyzed", "term1"));

            Assert.IsTrue(termPositions.Next());
            Assert.AreEqual(1, termPositions.Freq());
            Assert.AreEqual(0, termPositions.NextPosition());

            termPositions.Seek(new Term("preanalyzed", "term2"));
            Assert.IsTrue(termPositions.Next());
            Assert.AreEqual(2, termPositions.Freq());
            Assert.AreEqual(1, termPositions.NextPosition());
            Assert.AreEqual(3, termPositions.NextPosition());

            termPositions.Seek(new Term("preanalyzed", "term3"));
            Assert.IsTrue(termPositions.Next());
            Assert.AreEqual(1, termPositions.Freq());
            Assert.AreEqual(2, termPositions.NextPosition());
            reader.Close();

            // 3) reset stream and consume tokens again
            stream.Reset();
            checkTokens(stream);
        }
        public virtual void TestCaching()
        {
            Directory dir = new RAMDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
            Document doc = new Document();
            TokenStream stream = new TokenStreamAnonymousInnerClassHelper(this);

            stream = new CachingTokenFilter(stream);

            doc.Add(new TextField("preanalyzed", stream));

            // 1) we consume all tokens twice before we add the doc to the index
            CheckTokens(stream);
            stream.Reset();
            CheckTokens(stream);

            // 2) now add the document to the index and verify if all tokens are indexed
            //    don't reset the stream here, the DocumentWriter should do that implicitly
            writer.AddDocument(doc);

            IndexReader reader = writer.Reader;
            DocsAndPositionsEnum termPositions = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "preanalyzed", new BytesRef("term1"));
            Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(1, termPositions.Freq());
            Assert.AreEqual(0, termPositions.NextPosition());

            termPositions = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "preanalyzed", new BytesRef("term2"));
            Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(2, termPositions.Freq());
            Assert.AreEqual(1, termPositions.NextPosition());
            Assert.AreEqual(3, termPositions.NextPosition());

            termPositions = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "preanalyzed", new BytesRef("term3"));
            Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(1, termPositions.Freq());
            Assert.AreEqual(2, termPositions.NextPosition());
            reader.Dispose();
            writer.Dispose();
            // 3) reset stream and consume tokens again
            stream.Reset();
            CheckTokens(stream);
            dir.Dispose();
        }
		public virtual void  TestCaching()
		{
			Directory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer());
			Document doc = new Document();
			TokenStream stream = new AnonymousClassTokenStream(this);
			
			stream = new CachingTokenFilter(stream);
			
			doc.Add(new Field("preanalyzed", stream, TermVector.NO));
			
			// 1) we consume all tokens twice before we add the doc to the index
			CheckTokens(stream);
			stream.Reset();
			CheckTokens(stream);
			
			// 2) now add the document to the index and verify if all tokens are indexed
			//    don't reset the stream here, the DocumentWriter should do that implicitly
			writer.AddDocument(doc);
			writer.Close();
			
			IndexReader reader = IndexReader.Open(dir);
			TermPositions termPositions = reader.TermPositions(new Term("preanalyzed", "term1"));
			Assert.IsTrue(termPositions.Next());
			Assert.AreEqual(1, termPositions.Freq());
			Assert.AreEqual(0, termPositions.NextPosition());
			
			termPositions.Seek(new Term("preanalyzed", "term2"));
			Assert.IsTrue(termPositions.Next());
			Assert.AreEqual(2, termPositions.Freq());
			Assert.AreEqual(1, termPositions.NextPosition());
			Assert.AreEqual(3, termPositions.NextPosition());
			
			termPositions.Seek(new Term("preanalyzed", "term3"));
			Assert.IsTrue(termPositions.Next());
			Assert.AreEqual(1, termPositions.Freq());
			Assert.AreEqual(2, termPositions.NextPosition());
			reader.Close();
			
			// 3) reset stream and consume tokens again
			stream.Reset();
			CheckTokens(stream);
		}
        private void  TestCachingCustomToken(int api)
        {
            TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader(doc));

            stream = new PartOfSpeechTaggingFilter(stream);
            stream = new LowerCaseFilter(stream);
            stream = new StopFilter(stream, stopwords);
            stream = new CachingTokenFilter(stream);             // <- the caching is done before the annotating!
            stream = new PartOfSpeechAnnotatingFilter(stream);

            switch (api)
            {
            case 0:
                ConsumeStreamNewAPI(stream);
                ConsumeStreamNewAPI(stream);
                break;

            case 1:
                ConsumeStreamOldAPI(stream);
                ConsumeStreamOldAPI(stream);
                break;

            case 2:
                ConsumeStreamVeryOldAPI(stream);
                ConsumeStreamVeryOldAPI(stream);
                break;

            case 3:
                ConsumeStreamNewAPI(stream);
                ConsumeStreamOldAPI(stream);
                ConsumeStreamVeryOldAPI(stream);
                ConsumeStreamNewAPI(stream);
                ConsumeStreamVeryOldAPI(stream);
                break;
            }
        }
		public virtual void  TestMultipleSources()
		{
			SinkTokenizer theDetector = new AnonymousClassSinkTokenizer1(this, null);
			SinkTokenizer dogDetector = new AnonymousClassSinkTokenizer2(this, null);
			TokenStream source1 = new CachingTokenFilter(new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(new System.IO.StringReader(buffer1.ToString())), theDetector), dogDetector));
			TokenStream source2 = new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(new System.IO.StringReader(buffer2.ToString())), theDetector), dogDetector);
			int i = 0;
			Token reusableToken = new Token();
			for (Token nextToken = source1.Next(reusableToken); nextToken != null; nextToken = source1.Next(reusableToken))
			{
				Assert.IsTrue(nextToken.Term().Equals(tokens1[i]) == true, nextToken.Term() + " is not equal to " + tokens1[i]);
				i++;
			}
			Assert.IsTrue(i == tokens1.Length, i + " does not equal: " + tokens1.Length);
			Assert.IsTrue(theDetector.GetTokens().Count == 2, "theDetector Size: " + theDetector.GetTokens().Count + " is not: " + 2);
			Assert.IsTrue(dogDetector.GetTokens().Count == 1, "dogDetector Size: " + dogDetector.GetTokens().Count + " is not: " + 1);
			i = 0;
			for (Token nextToken = source2.Next(reusableToken); nextToken != null; nextToken = source2.Next(reusableToken))
			{
				Assert.IsTrue(nextToken.Term().Equals(tokens2[i]) == true, nextToken.Term() + " is not equal to " + tokens2[i]);
				i++;
			}
			Assert.IsTrue(i == tokens2.Length, i + " does not equal: " + tokens2.Length);
			Assert.IsTrue(theDetector.GetTokens().Count == 4, "theDetector Size: " + theDetector.GetTokens().Count + " is not: " + 4);
			Assert.IsTrue(dogDetector.GetTokens().Count == 2, "dogDetector Size: " + dogDetector.GetTokens().Count + " is not: " + 2);
			i = 0;
			for (Token nextToken = theDetector.Next(reusableToken); nextToken != null; nextToken = theDetector.Next(reusableToken))
			{
				Assert.IsTrue(nextToken.Term().ToUpper().Equals("The".ToUpper()) == true, nextToken.Term() + " is not equal to " + "The");
				i++;
			}
			Assert.IsTrue(i == theDetector.GetTokens().Count, i + " does not equal: " + theDetector.GetTokens().Count);
			i = 0;
			for (Token nextToken = dogDetector.Next(reusableToken); nextToken != null; nextToken = dogDetector.Next(reusableToken))
			{
				Assert.IsTrue(nextToken.Term().ToUpper().Equals("Dogs".ToUpper()) == true, nextToken.Term() + " is not equal to " + "Dogs");
				i++;
			}
			Assert.IsTrue(i == dogDetector.GetTokens().Count, i + " does not equal: " + dogDetector.GetTokens().Count);
			source1.Reset();
			TokenStream lowerCasing = new LowerCaseFilter(source1);
			i = 0;
			for (Token nextToken = lowerCasing.Next(reusableToken); nextToken != null; nextToken = lowerCasing.Next(reusableToken))
			{
				Assert.IsTrue(nextToken.Term().Equals(tokens1[i].ToLower()) == true, nextToken.Term() + " is not equal to " + tokens1[i].ToLower());
				i++;
			}
			Assert.IsTrue(i == tokens1.Length, i + " does not equal: " + tokens1.Length);
		}
        protected override IQueryNode PostProcessNode(IQueryNode node)
        {
            if (node is ITextableQueryNode
                && !(node is WildcardQueryNode)
                && !(node is FuzzyQueryNode)
                && !(node is RegexpQueryNode)
                && !(node.Parent is IRangeQueryNode))
            {
                FieldQueryNode fieldNode = ((FieldQueryNode)node);
                string text = fieldNode.GetTextAsString();
                string field = fieldNode.GetFieldAsString();

                CachingTokenFilter buffer = null;
                IPositionIncrementAttribute posIncrAtt = null;
                int numTokens = 0;
                int positionCount = 0;
                bool severalTokensAtSamePosition = false;

                TokenStream source = null;
                try
                {
                    source = this.analyzer.TokenStream(field, text);
                    source.Reset();
                    buffer = new CachingTokenFilter(source);

                    if (buffer.HasAttribute<IPositionIncrementAttribute>())
                    {
                        posIncrAtt = buffer.GetAttribute<IPositionIncrementAttribute>();
                    }

                    try
                    {
                        while (buffer.IncrementToken())
                        {
                            numTokens++;
                            int positionIncrement = (posIncrAtt != null) ? posIncrAtt
                                .PositionIncrement : 1;
                            if (positionIncrement != 0)
                            {
                                positionCount += positionIncrement;
                            }
                            else
                            {
                                severalTokensAtSamePosition = true;
                            }
                        }
                    }
#pragma warning disable 168
                    catch (IOException e)
#pragma warning restore 168
                    {
                        // ignore
                    }
                }
                catch (IOException e)
                {
                    throw new Exception(e.Message, e);
                }
                finally
                {
                    IOUtils.CloseWhileHandlingException(source);
                }

                // rewind the buffer stream
                buffer.Reset();

                if (!buffer.HasAttribute<ICharTermAttribute>())
                {
                    return new NoTokenFoundQueryNode();
                }

                ICharTermAttribute termAtt = buffer.GetAttribute<ICharTermAttribute>();

                if (numTokens == 0)
                {
                    return new NoTokenFoundQueryNode();

                }
                else if (numTokens == 1)
                {
                    string term = null;
                    try
                    {
                        bool hasNext;
                        hasNext = buffer.IncrementToken();
                        Debug.Assert(hasNext == true);
                        term = termAtt.ToString();
                    }
#pragma warning disable 168
                    catch (IOException e)
#pragma warning restore 168
                    {
                        // safe to ignore, because we know the number of tokens
                    }

                    fieldNode.Text = term.ToCharSequence();

                    return fieldNode;
                }
                else if (severalTokensAtSamePosition || !(node is QuotedFieldQueryNode))
                {
                    if (positionCount == 1 || !(node is QuotedFieldQueryNode))
                    {
                        // no phrase query:

                        if (positionCount == 1)
                        {
                            // simple case: only one position, with synonyms
                            List<IQueryNode> children = new List<IQueryNode>();

                            for (int i = 0; i < numTokens; i++)
                            {
                                string term = null;
                                try
                                {
                                    bool hasNext = buffer.IncrementToken();
                                    Debug.Assert(hasNext == true);
                                    term = termAtt.ToString();
                                }
#pragma warning disable 168
                                catch (IOException e)
#pragma warning restore 168
                                {
                                    // safe to ignore, because we know the number of tokens
                                }

                                children.Add(new FieldQueryNode(field, term, -1, -1));

                            }
                            return new GroupQueryNode(
                                new StandardBooleanQueryNode(children, positionCount == 1));
                        }
                        else
                        {
                            // multiple positions
                            IQueryNode q = new StandardBooleanQueryNode(new List<IQueryNode>(), false);
                            IQueryNode currentQuery = null;
                            for (int i = 0; i < numTokens; i++)
                            {
                                string term = null;
                                try
                                {
                                    bool hasNext = buffer.IncrementToken();
                                    Debug.Assert(hasNext == true);
                                    term = termAtt.ToString();
                                }
#pragma warning disable 168
                                catch (IOException e)
#pragma warning restore 168
                                {
                                    // safe to ignore, because we know the number of tokens
                                }
                                if (posIncrAtt != null && posIncrAtt.PositionIncrement == 0)
                                {
                                    if (!(currentQuery is BooleanQueryNode))
                                    {
                                        IQueryNode t = currentQuery;
                                        currentQuery = new StandardBooleanQueryNode(new List<IQueryNode>(), true);
                                        ((BooleanQueryNode)currentQuery).Add(t);
                                    }
                                  ((BooleanQueryNode)currentQuery).Add(new FieldQueryNode(field, term, -1, -1));
                                }
                                else
                                {
                                    if (currentQuery != null)
                                    {
                                        if (this.defaultOperator == Operator.OR)
                                        {
                                            q.Add(currentQuery);
                                        }
                                        else
                                        {
                                            q.Add(new ModifierQueryNode(currentQuery, Modifier.MOD_REQ));
                                        }
                                    }
                                    currentQuery = new FieldQueryNode(field, term, -1, -1);
                                }
                            }
                            if (this.defaultOperator == Operator.OR)
                            {
                                q.Add(currentQuery);
                            }
                            else
                            {
                                q.Add(new ModifierQueryNode(currentQuery, Modifier.MOD_REQ));
                            }

                            if (q is BooleanQueryNode)
                            {
                                q = new GroupQueryNode(q);
                            }
                            return q;
                        }
                    }
                    else
                    {
                        // phrase query:
                        MultiPhraseQueryNode mpq = new MultiPhraseQueryNode();

                        List<FieldQueryNode> multiTerms = new List<FieldQueryNode>();
                        int position = -1;
                        int i = 0;
                        int termGroupCount = 0;
                        for (; i < numTokens; i++)
                        {
                            string term = null;
                            int positionIncrement = 1;
                            try
                            {
                                bool hasNext = buffer.IncrementToken();
                                Debug.Assert(hasNext == true);
                                term = termAtt.ToString();
                                if (posIncrAtt != null)
                                {
                                    positionIncrement = posIncrAtt.PositionIncrement;
                                }
                            }
#pragma warning disable 168
                            catch (IOException e)
#pragma warning restore 168
                            {
                                // safe to ignore, because we know the number of tokens
                            }

                            if (positionIncrement > 0 && multiTerms.Count > 0)
                            {
                                foreach (FieldQueryNode termNode in multiTerms)
                                {
                                    if (this.positionIncrementsEnabled)
                                    {
                                        termNode.PositionIncrement = position;
                                    }
                                    else
                                    {
                                        termNode.PositionIncrement = termGroupCount;
                                    }

                                    mpq.Add(termNode);
                                }

                                // Only increment once for each "group" of
                                // terms that were in the same position:
                                termGroupCount++;

                                multiTerms.Clear();
                            }

                            position += positionIncrement;
                            multiTerms.Add(new FieldQueryNode(field, term, -1, -1));
                        }

                        foreach (FieldQueryNode termNode in multiTerms)
                        {
                            if (this.positionIncrementsEnabled)
                            {
                                termNode.PositionIncrement = position;
                            }
                            else
                            {
                                termNode.PositionIncrement = termGroupCount;
                            }

                            mpq.Add(termNode);
                        }

                        return mpq;
                    }
                }
                else
                {
                    TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode();

                    int position = -1;

                    for (int i = 0; i < numTokens; i++)
                    {
                        string term = null;
                        int positionIncrement = 1;

                        try
                        {
                            bool hasNext = buffer.IncrementToken();
                            Debug.Assert(hasNext == true);
                            term = termAtt.ToString();

                            if (posIncrAtt != null)
                            {
                                positionIncrement = posIncrAtt.PositionIncrement;
                            }
                        }
#pragma warning disable 168
                        catch (IOException e)
#pragma warning restore 168
                        {
                            // safe to ignore, because we know the number of tokens
                        }

                        FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1);

                        if (this.positionIncrementsEnabled)
                        {
                            position += positionIncrement;
                            newFieldNode.PositionIncrement = position;
                        }
                        else
                        {
                            newFieldNode.PositionIncrement = i;
                        }

                        pq.Add(newFieldNode);
                    }

                    return pq;
                }
            }

            return node;
        }
		private void  TestCachingCustomToken(int api)
		{
			TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader(doc));
			stream = new PartOfSpeechTaggingFilter(stream);
			stream = new LowerCaseFilter(stream);
			stream = new StopFilter(stream, stopwords);
			stream = new CachingTokenFilter(stream); // <- the caching is done before the annotating!
			stream = new PartOfSpeechAnnotatingFilter(stream);
			
			switch (api)
			{
				
				case 0: 
					ConsumeStreamNewAPI(stream);
					ConsumeStreamNewAPI(stream);
					break;
				
				case 1: 
					ConsumeStreamOldAPI(stream);
					ConsumeStreamOldAPI(stream);
					break;
				
				case 2: 
					ConsumeStreamVeryOldAPI(stream);
					ConsumeStreamVeryOldAPI(stream);
					break;
				
				case 3: 
					ConsumeStreamNewAPI(stream);
					ConsumeStreamOldAPI(stream);
					ConsumeStreamVeryOldAPI(stream);
					ConsumeStreamNewAPI(stream);
					ConsumeStreamVeryOldAPI(stream);
					break;
				}
		}
Exemple #12
0
        /// <exception cref="ParseException">throw in overridden method to disallow
        /// </exception>
        protected internal virtual Query GetFieldQuery(String field, String queryText)
        {
            // Use the analyzer to get all the tokens, and then build a TermQuery,
            // PhraseQuery, or nothing based on the term count

            TokenStream source;
            try
            {
                source = analyzer.ReusableTokenStream(field, new StringReader(queryText));
                source.Reset();
            }
            catch (IOException)
            {
                source = analyzer.TokenStream(field, new StringReader(queryText));
            }
            CachingTokenFilter buffer = new CachingTokenFilter(source);
            ITermAttribute termAtt = null;
            IPositionIncrementAttribute posIncrAtt = null;
            int numTokens = 0;

            bool success = false;
            try
            {
                buffer.Reset();
                success = true;
            }
            catch (IOException)
            {
                // success==false if we hit an exception
            }
            if (success)
            {
                if (buffer.HasAttribute<ITermAttribute>())
                {
                    termAtt = buffer.GetAttribute<ITermAttribute>();
                }
                if (buffer.HasAttribute<IPositionIncrementAttribute>())
                {
                    posIncrAtt = buffer.GetAttribute<IPositionIncrementAttribute>();
                }
            }

            int positionCount = 0;
            bool severalTokensAtSamePosition = false;

            bool hasMoreTokens = false;
            if (termAtt != null)
            {
                try
                {
                    hasMoreTokens = buffer.IncrementToken();
                    while (hasMoreTokens)
                    {
                        numTokens++;
                        int positionIncrement = (posIncrAtt != null) ? posIncrAtt.PositionIncrement : 1;
                        if (positionIncrement != 0)
                        {
                            positionCount += positionIncrement;
                        }
                        else
                        {
                            severalTokensAtSamePosition = true;
                        }
                        hasMoreTokens = buffer.IncrementToken();
                    }
                }
                catch (IOException)
                {
                    // ignore
                }
            }
            try
            {
                // rewind the buffer stream
                buffer.Reset();

                // close original stream - all tokens buffered
                source.Close();
            }
            catch (IOException)
            {
                // ignore
            }

            if (numTokens == 0)
                return null;
            else if (numTokens == 1)
            {
                String term = null;
                try
                {
                    bool hasNext = buffer.IncrementToken();
                    Debug.Assert(hasNext);
                    term = termAtt.Term;
                }
                catch (IOException)
                {
                    // safe to ignore, because we know the number of tokens
                }
                return NewTermQuery(new Term(field, term));
            }
            else
            {
                if (severalTokensAtSamePosition)
                {
                    if (positionCount == 1)
                    {
                        // no phrase query:
                        BooleanQuery q = NewBooleanQuery(true);
                        for (int i = 0; i < numTokens; i++)
                        {
                            String term = null;
                            try
                            {
                                bool hasNext = buffer.IncrementToken();
                                Debug.Assert(hasNext);
                                term = termAtt.Term;
                            }
                            catch (IOException)
                            {
                                // safe to ignore, because we know the number of tokens
                            }

                            Query currentQuery = NewTermQuery(
                                new Term(field, term));
                            q.Add(currentQuery, Occur.SHOULD);
                        }
                        return q;
                    }
                    else
                    {
                        // phrase query:
                        MultiPhraseQuery mpq = NewMultiPhraseQuery();
                        mpq.Slop = phraseSlop;
                        List<Term> multiTerms = new List<Term>();
                        int position = -1;
                        for (int i = 0; i < numTokens; i++)
                        {
                            String term = null;
                            int positionIncrement = 1;
                            try
                            {
                                bool hasNext = buffer.IncrementToken();
                                Debug.Assert(hasNext == true);
                                term = termAtt.Term;
                                if (posIncrAtt != null)
                                {
                                    positionIncrement = posIncrAtt.PositionIncrement;
                                }
                            }
                            catch (IOException)
                            {
                                // safe to ignore, because we know the number of tokens
                            }

                            if (positionIncrement > 0 && multiTerms.Count > 0)
                            {
                                if (enablePositionIncrements)
                                {
                                    mpq.Add(multiTerms.ToArray(), position);
                                }
                                else
                                {
                                    mpq.Add(multiTerms.ToArray());
                                }
                                multiTerms.Clear();
                            }
                            position += positionIncrement;
                            multiTerms.Add(new Term(field, term));
                        }
                        if (enablePositionIncrements)
                        {
                            mpq.Add(multiTerms.ToArray(), position);
                        }
                        else
                        {
                            mpq.Add(multiTerms.ToArray());
                        }
                        return mpq;
                    }
                }
                else
                {
                    PhraseQuery pq = NewPhraseQuery();
                    pq.Slop = phraseSlop;
                    int position = -1;


                    for (int i = 0; i < numTokens; i++)
                    {
                        String term = null;
                        int positionIncrement = 1;

                        try
                        {
                            bool hasNext = buffer.IncrementToken();
                            Debug.Assert(hasNext == true);
                            term = termAtt.Term;
                            if (posIncrAtt != null)
                            {
                                positionIncrement = posIncrAtt.PositionIncrement;
                            }
                        }
                        catch (IOException)
                        {
                            // safe to ignore, because we know the number of tokens
                        }

                        if (enablePositionIncrements)
                        {
                            position += positionIncrement;
                            pq.Add(new Term(field, term), position);
                        }
                        else
                        {
                            pq.Add(new Term(field, term));
                        }
                    }
                    return pq;
                }
            }
        }
        public void TestTokenStream()
        {
            ShingleMatrixFilter.DefaultSettingsCodec = null;
            //new ShingleMatrixFilter.SimpleThreeDimensionalTokenSettingsCodec();

            // test a plain old token stream with synonyms tranlated to rows.

            var tokens = new LinkedList<Token>();
            tokens.AddLast(TokenFactory("hello", 1, 0, 4));
            tokens.AddLast(TokenFactory("greetings", 0, 0, 4));
            tokens.AddLast(TokenFactory("world", 1, 5, 10));
            tokens.AddLast(TokenFactory("earth", 0, 5, 10));
            tokens.AddLast(TokenFactory("tellus", 0, 5, 10));

            TokenStream tls = new TokenListStream(tokens);

            // bi-grams

            TokenStream ts = new ShingleMatrixFilter(tls, 2, 2, '_', false,
                                                     new TwoDimensionalNonWeightedSynonymTokenSettingsCodec());

            AssertNext(ts, "hello_world");
            AssertNext(ts, "greetings_world");
            AssertNext(ts, "hello_earth");
            AssertNext(ts, "greetings_earth");
            AssertNext(ts, "hello_tellus");
            AssertNext(ts, "greetings_tellus");
            Assert.IsFalse(ts.IncrementToken());

            // bi-grams with no spacer character, start offset, end offset

            tls.Reset();
            ts = new ShingleMatrixFilter(tls, 2, 2, null, false,
                                         new TwoDimensionalNonWeightedSynonymTokenSettingsCodec());
            AssertNext(ts, "helloworld", 0, 10);
            AssertNext(ts, "greetingsworld", 0, 10);
            AssertNext(ts, "helloearth", 0, 10);
            AssertNext(ts, "greetingsearth", 0, 10);
            AssertNext(ts, "hellotellus", 0, 10);
            AssertNext(ts, "greetingstellus", 0, 10);
            Assert.IsFalse(ts.IncrementToken());


            // add ^_prefix_and_suffix_$
            //
            // using 3d codec as it supports weights

            ShingleMatrixFilter.DefaultSettingsCodec =
                new SimpleThreeDimensionalTokenSettingsCodec();

            tokens = new LinkedList<Token>();
            tokens.AddLast(TokenFactory("hello", 1, 1f, 0, 4, TokenPositioner.NewColumn));
            tokens.AddLast(TokenFactory("greetings", 0, 1f, 0, 4, TokenPositioner.NewRow));
            tokens.AddLast(TokenFactory("world", 1, 1f, 5, 10, TokenPositioner.NewColumn));
            tokens.AddLast(TokenFactory("earth", 0, 1f, 5, 10, TokenPositioner.NewRow));
            tokens.AddLast(TokenFactory("tellus", 0, 1f, 5, 10, TokenPositioner.NewRow));

            tls = new TokenListStream(tokens);

            // bi-grams, position incrememnt, weight, start offset, end offset

            ts = new PrefixAndSuffixAwareTokenFilter(
                new SingleTokenTokenStream(TokenFactory("^", 1, 100f, 0, 0)),
                tls,
                new SingleTokenTokenStream(TokenFactory("$", 1, 50f, 0, 0))
                );
            tls = new CachingTokenFilter(ts);

            ts = new ShingleMatrixFilter(tls, 2, 2, '_', false);

            //for (Token token = ts.Next(new Token()); token != null; token = ts.Next(token)) {
            //    Console.Out.WriteLine("AssertNext(ts, \"" + token.Term() + "\", " + token.GetPositionIncrement() + ", " + (token.GetPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.GetPayload().GetData()).ToString()) + "f, " + token.StartOffset() + ", " + token.EndOffset() + ");");
            //    token.Clear();
            //}

            AssertNext(ts, "^_hello", 1, 10.049875f, 0, 4);
            AssertNext(ts, "^_greetings", 1, 10.049875f, 0, 4);
            AssertNext(ts, "hello_world", 1, 1.4142135f, 0, 10);
            AssertNext(ts, "greetings_world", 1, 1.4142135f, 0, 10);
            AssertNext(ts, "hello_earth", 1, 1.4142135f, 0, 10);
            AssertNext(ts, "greetings_earth", 1, 1.4142135f, 0, 10);
            AssertNext(ts, "hello_tellus", 1, 1.4142135f, 0, 10);
            AssertNext(ts, "greetings_tellus", 1, 1.4142135f, 0, 10);
            AssertNext(ts, "world_$", 1, 7.1414285f, 5, 10);
            AssertNext(ts, "earth_$", 1, 7.1414285f, 5, 10);
            AssertNext(ts, "tellus_$", 1, 7.1414285f, 5, 10);
            Assert.IsFalse(ts.IncrementToken());

            // test unlimited size and allow single boundary token as shingle
            tls.Reset();

            ts = new ShingleMatrixFilter(tls, 1, Int32.MaxValue, '_', false);


            //for (Token token = ts.Next(new Token()); token != null; token = ts.Next(token))
            //{
            //    Console.Out.WriteLine("AssertNext(ts, \"" + token.Term() + "\", " + token.GetPositionIncrement() + ", " + (token.GetPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.GetPayload().GetData()).ToString()) + "f, " + token.StartOffset() + ", " + token.EndOffset() + ");");
            //    token.Clear();
            //}

            AssertNext(ts, "^", 1, 10.0f, 0, 0);
            AssertNext(ts, "^_hello", 1, 10.049875f, 0, 4);
            AssertNext(ts, "^_hello_world", 1, 10.099504f, 0, 10);
            AssertNext(ts, "^_hello_world_$", 1, 12.328828f, 0, 10);
            AssertNext(ts, "hello", 1, 1.0f, 0, 4);
            AssertNext(ts, "hello_world", 1, 1.4142135f, 0, 10);
            AssertNext(ts, "hello_world_$", 1, 7.2111025f, 0, 10);
            AssertNext(ts, "world", 1, 1.0f, 5, 10);
            AssertNext(ts, "world_$", 1, 7.1414285f, 5, 10);
            AssertNext(ts, "$", 1, 7.071068f, 10, 10);
            AssertNext(ts, "^_greetings", 1, 10.049875f, 0, 4);
            AssertNext(ts, "^_greetings_world", 1, 10.099504f, 0, 10);
            AssertNext(ts, "^_greetings_world_$", 1, 12.328828f, 0, 10);
            AssertNext(ts, "greetings", 1, 1.0f, 0, 4);
            AssertNext(ts, "greetings_world", 1, 1.4142135f, 0, 10);
            AssertNext(ts, "greetings_world_$", 1, 7.2111025f, 0, 10);
            AssertNext(ts, "^_hello_earth", 1, 10.099504f, 0, 10);
            AssertNext(ts, "^_hello_earth_$", 1, 12.328828f, 0, 10);
            AssertNext(ts, "hello_earth", 1, 1.4142135f, 0, 10);
            AssertNext(ts, "hello_earth_$", 1, 7.2111025f, 0, 10);
            AssertNext(ts, "earth", 1, 1.0f, 5, 10);
            AssertNext(ts, "earth_$", 1, 7.1414285f, 5, 10);
            AssertNext(ts, "^_greetings_earth", 1, 10.099504f, 0, 10);
            AssertNext(ts, "^_greetings_earth_$", 1, 12.328828f, 0, 10);
            AssertNext(ts, "greetings_earth", 1, 1.4142135f, 0, 10);
            AssertNext(ts, "greetings_earth_$", 1, 7.2111025f, 0, 10);
            AssertNext(ts, "^_hello_tellus", 1, 10.099504f, 0, 10);
            AssertNext(ts, "^_hello_tellus_$", 1, 12.328828f, 0, 10);
            AssertNext(ts, "hello_tellus", 1, 1.4142135f, 0, 10);
            AssertNext(ts, "hello_tellus_$", 1, 7.2111025f, 0, 10);
            AssertNext(ts, "tellus", 1, 1.0f, 5, 10);
            AssertNext(ts, "tellus_$", 1, 7.1414285f, 5, 10);
            AssertNext(ts, "^_greetings_tellus", 1, 10.099504f, 0, 10);
            AssertNext(ts, "^_greetings_tellus_$", 1, 12.328828f, 0, 10);
            AssertNext(ts, "greetings_tellus", 1, 1.4142135f, 0, 10);
            AssertNext(ts, "greetings_tellus_$", 1, 7.2111025f, 0, 10);

            Assert.IsFalse(ts.IncrementToken());

            // test unlimited size but don't allow single boundary token as shingle

            tls.Reset();
            ts = new ShingleMatrixFilter(tls, 1, Int32.MaxValue, '_', true);

            //  for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) {
            //      System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");");
            //      token.clear();
            //    }

            AssertNext(ts, "^_hello", 1, 10.049875f, 0, 4);
            AssertNext(ts, "^_hello_world", 1, 10.099504f, 0, 10);
            AssertNext(ts, "^_hello_world_$", 1, 12.328828f, 0, 10);
            AssertNext(ts, "hello", 1, 1.0f, 0, 4);
            AssertNext(ts, "hello_world", 1, 1.4142135f, 0, 10);
            AssertNext(ts, "hello_world_$", 1, 7.2111025f, 0, 10);
            AssertNext(ts, "world", 1, 1.0f, 5, 10);
            AssertNext(ts, "world_$", 1, 7.1414285f, 5, 10);
            AssertNext(ts, "^_greetings", 1, 10.049875f, 0, 4);
            AssertNext(ts, "^_greetings_world", 1, 10.099504f, 0, 10);
            AssertNext(ts, "^_greetings_world_$", 1, 12.328828f, 0, 10);
            AssertNext(ts, "greetings", 1, 1.0f, 0, 4);
            AssertNext(ts, "greetings_world", 1, 1.4142135f, 0, 10);
            AssertNext(ts, "greetings_world_$", 1, 7.2111025f, 0, 10);
            AssertNext(ts, "^_hello_earth", 1, 10.099504f, 0, 10);
            AssertNext(ts, "^_hello_earth_$", 1, 12.328828f, 0, 10);
            AssertNext(ts, "hello_earth", 1, 1.4142135f, 0, 10);
            AssertNext(ts, "hello_earth_$", 1, 7.2111025f, 0, 10);
            AssertNext(ts, "earth", 1, 1.0f, 5, 10);
            AssertNext(ts, "earth_$", 1, 7.1414285f, 5, 10);
            AssertNext(ts, "^_greetings_earth", 1, 10.099504f, 0, 10);
            AssertNext(ts, "^_greetings_earth_$", 1, 12.328828f, 0, 10);
            AssertNext(ts, "greetings_earth", 1, 1.4142135f, 0, 10);
            AssertNext(ts, "greetings_earth_$", 1, 7.2111025f, 0, 10);
            AssertNext(ts, "^_hello_tellus", 1, 10.099504f, 0, 10);
            AssertNext(ts, "^_hello_tellus_$", 1, 12.328828f, 0, 10);
            AssertNext(ts, "hello_tellus", 1, 1.4142135f, 0, 10);
            AssertNext(ts, "hello_tellus_$", 1, 7.2111025f, 0, 10);
            AssertNext(ts, "tellus", 1, 1.0f, 5, 10);
            AssertNext(ts, "tellus_$", 1, 7.1414285f, 5, 10);
            AssertNext(ts, "^_greetings_tellus", 1, 10.099504f, 0, 10);
            AssertNext(ts, "^_greetings_tellus_$", 1, 12.328828f, 0, 10);
            AssertNext(ts, "greetings_tellus", 1, 1.4142135f, 0, 10);
            AssertNext(ts, "greetings_tellus_$", 1, 7.2111025f, 0, 10);


            Assert.IsFalse(ts.IncrementToken());

            //System.currentTimeMillis();

            // multi-token synonyms
            //
            // Token[][][] {
            //    {{hello}, {greetings, and, salutations},
            //    {{world}, {earth}, {tellus}}
            // }
            //


            tokens = new LinkedList<Token>();
            tokens.AddLast(TokenFactory("hello", 1, 1f, 0, 4, TokenPositioner.NewColumn));
            tokens.AddLast(TokenFactory("greetings", 1, 1f, 0, 4, TokenPositioner.NewRow));
            tokens.AddLast(TokenFactory("and", 1, 1f, 0, 4, TokenPositioner.SameRow));
            tokens.AddLast(TokenFactory("salutations", 1, 1f, 0, 4, TokenPositioner.SameRow));
            tokens.AddLast(TokenFactory("world", 1, 1f, 5, 10, TokenPositioner.NewColumn));
            tokens.AddLast(TokenFactory("earth", 1, 1f, 5, 10, TokenPositioner.NewRow));
            tokens.AddLast(TokenFactory("tellus", 1, 1f, 5, 10, TokenPositioner.NewRow));

            tls = new TokenListStream(tokens);

            // 2-3 grams

            ts = new ShingleMatrixFilter(tls, 2, 3, '_', false);

            //  for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) {
            //      System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");");
            //      token.clear();
            //    }

            // shingle, position increment, weight, start offset, end offset

            AssertNext(ts, "hello_world", 1, 1.4142135f, 0, 10);
            AssertNext(ts, "greetings_and", 1, 1.4142135f, 0, 4);
            AssertNext(ts, "greetings_and_salutations", 1, 1.7320508f, 0, 4);
            AssertNext(ts, "and_salutations", 1, 1.4142135f, 0, 4);
            AssertNext(ts, "and_salutations_world", 1, 1.7320508f, 0, 10);
            AssertNext(ts, "salutations_world", 1, 1.4142135f, 0, 10);
            AssertNext(ts, "hello_earth", 1, 1.4142135f, 0, 10);
            AssertNext(ts, "and_salutations_earth", 1, 1.7320508f, 0, 10);
            AssertNext(ts, "salutations_earth", 1, 1.4142135f, 0, 10);
            AssertNext(ts, "hello_tellus", 1, 1.4142135f, 0, 10);
            AssertNext(ts, "and_salutations_tellus", 1, 1.7320508f, 0, 10);
            AssertNext(ts, "salutations_tellus", 1, 1.4142135f, 0, 10);

            Assert.IsFalse(ts.IncrementToken());

            //System.currentTimeMillis();
        }
		public virtual void  TestMultipleSources()
		{
			TeeSinkTokenFilter tee1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(new System.IO.StringReader(buffer1.ToString())));
			TeeSinkTokenFilter.SinkTokenStream dogDetector = tee1.NewSinkTokenStream(dogFilter);
			TeeSinkTokenFilter.SinkTokenStream theDetector = tee1.NewSinkTokenStream(theFilter);
			TokenStream source1 = new CachingTokenFilter(tee1);
			
             
            tee1.AddAttribute(typeof(CheckClearAttributesAttribute));
            dogDetector.AddAttribute(typeof(CheckClearAttributesAttribute));
            theDetector.AddAttribute(typeof(CheckClearAttributesAttribute));


			TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(new System.IO.StringReader(buffer2.ToString())));
			tee2.AddSinkTokenStream(dogDetector);
			tee2.AddSinkTokenStream(theDetector);
			TokenStream source2 = tee2;

            AssertTokenStreamContents(source1, tokens1);
            AssertTokenStreamContents(source2, tokens2);

            AssertTokenStreamContents(theDetector, new String[] { "The", "the", "The", "the" });
            			
			source1.Reset();
			TokenStream lowerCasing = new LowerCaseFilter(source1);
            String[] lowerCaseTokens = new String[tokens1.Length];
            for (int i = 0; i < tokens1.Length; i++)
                lowerCaseTokens[i] = tokens1[i].ToLower();

		}