예제 #1
0
		public virtual void  TestPhrasePrefix()
		{
			RAMDirectory indexStore = new RAMDirectory();
			IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			Document doc1 = new Document();
			Document doc2 = new Document();
			Document doc3 = new Document();
			Document doc4 = new Document();
			Document doc5 = new Document();
			doc1.Add(new Field("body", "blueberry pie", Field.Store.YES, Field.Index.ANALYZED));
			doc2.Add(new Field("body", "blueberry strudel", Field.Store.YES, Field.Index.ANALYZED));
			doc3.Add(new Field("body", "blueberry pizza", Field.Store.YES, Field.Index.ANALYZED));
			doc4.Add(new Field("body", "blueberry chewing gum", Field.Store.YES, Field.Index.ANALYZED));
			doc5.Add(new Field("body", "piccadilly circus", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc1);
			writer.AddDocument(doc2);
			writer.AddDocument(doc3);
			writer.AddDocument(doc4);
			writer.AddDocument(doc5);
			writer.Optimize();
			writer.Close();
			
			IndexSearcher searcher = new IndexSearcher(indexStore);
			
			//PhrasePrefixQuery query1 = new PhrasePrefixQuery();
			MultiPhraseQuery query1 = new MultiPhraseQuery();
			//PhrasePrefixQuery query2 = new PhrasePrefixQuery();
			MultiPhraseQuery query2 = new MultiPhraseQuery();
			query1.Add(new Term("body", "blueberry"));
			query2.Add(new Term("body", "strawberry"));
			
			System.Collections.ArrayList termsWithPrefix = new System.Collections.ArrayList();
			IndexReader ir = IndexReader.Open(indexStore);
			
			// this TermEnum gives "piccadilly", "pie" and "pizza".
			System.String prefix = "pi";
			TermEnum te = ir.Terms(new Term("body", prefix + "*"));
			do 
			{
				if (te.Term().Text().StartsWith(prefix))
				{
					termsWithPrefix.Add(te.Term());
				}
			}
			while (te.Next());
			
			query1.Add((Term[]) termsWithPrefix.ToArray(typeof(Term)));
			query2.Add((Term[]) termsWithPrefix.ToArray(typeof(Term)));
			
			ScoreDoc[] result;
			result = searcher.Search(query1, null, 1000).scoreDocs;
			Assert.AreEqual(2, result.Length);
			
			result = searcher.Search(query2, null, 1000).scoreDocs;
			Assert.AreEqual(0, result.Length);
		}
        public virtual Query Build(IQueryNode queryNode)
        {
            MultiPhraseQueryNode phraseNode = (MultiPhraseQueryNode)queryNode;

            MultiPhraseQuery phraseQuery = new MultiPhraseQuery();

            IList<IQueryNode> children = phraseNode.GetChildren();

            if (children != null)
            {
                IDictionary<int?, List<Term>> positionTermMap = new SortedDictionary<int?, List<Term>>();

                foreach (IQueryNode child in children)
                {
                    FieldQueryNode termNode = (FieldQueryNode)child;
                    TermQuery termQuery = (TermQuery)termNode
                        .GetTag(QueryTreeBuilder.QUERY_TREE_BUILDER_TAGID);

                    List<Term> termList;
                    if (!positionTermMap.TryGetValue(termNode.PositionIncrement, out termList) || termList == null)
                    {
                        termList = new List<Term>();
                        positionTermMap[termNode.PositionIncrement] = termList;
                    }

                    termList.Add(termQuery.Term);
                }

                foreach (int positionIncrement in positionTermMap.Keys)
                {
                    List<Term> termList = positionTermMap[positionIncrement];

                    phraseQuery.Add(termList.ToArray(/*new Term[termList.size()]*/),
                                positionIncrement);
                }
            }

            return phraseQuery;
        }
예제 #3
0
        public virtual void TestNoDocs()
        {
            Directory         indexStore = NewDirectory();
            RandomIndexWriter writer     = new RandomIndexWriter(Random(), indexStore);

            Add("a note", "note", writer);

            IndexReader   reader   = writer.Reader;
            IndexSearcher searcher = NewSearcher(reader);

            MultiPhraseQuery q = new MultiPhraseQuery();

            q.Add(new Term("body", "a"));
            q.Add(new Term[] { new Term("body", "nope"), new Term("body", "nope") });
            Assert.AreEqual(0, searcher.Search(q, null, 1).TotalHits, "Wrong number of hits");

            // just make sure no exc:
            searcher.Explain(q, 0);

            writer.Dispose();
            reader.Dispose();
            indexStore.Dispose();
        }
예제 #4
0
        public virtual void TestMultiExactWithRepeats()
        {
            Directory         indexStore = NewDirectory();
            RandomIndexWriter writer     = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, indexStore);

            Add("a b c d e f g h i k", writer);
            IndexReader r = writer.GetReader();

            writer.Dispose();

            IndexSearcher    searcher = NewSearcher(r);
            MultiPhraseQuery q        = new MultiPhraseQuery();

            q.Add(new Term[] { new Term("body", "a"), new Term("body", "d") }, 0);
            q.Add(new Term[] { new Term("body", "a"), new Term("body", "f") }, 2);
            Assert.AreEqual(1, searcher.Search(q, 1).TotalHits); // should match on "a b"
            r.Dispose();
            indexStore.Dispose();
        }
예제 #5
0
        public virtual void TestMultiSloppyWithRepeats() //LUCENE-3821 fixes sloppy phrase scoring, except for this known problem
        {
            Directory         indexStore = NewDirectory();
            RandomIndexWriter writer     = new RandomIndexWriter(Random(), indexStore);

            Add("a b c d e f g h i k", writer);
            IndexReader r = writer.Reader;

            writer.Dispose();

            IndexSearcher searcher = NewSearcher(r);

            MultiPhraseQuery q = new MultiPhraseQuery();

            // this will fail, when the scorer would propagate [a] rather than [a,b],
            q.Add(new Term[] { new Term("body", "a"), new Term("body", "b") });
            q.Add(new Term[] { new Term("body", "a") });
            q.Slop = 6;
            Assert.AreEqual(1, searcher.Search(q, 1).TotalHits); // should match on "a b"

            r.Dispose();
            indexStore.Dispose();
        }
예제 #6
0
        public virtual void TestBooleanQueryContainingSingleTermPrefixQuery()
        {
            // this tests against bug 33161 (now fixed)
            // In order to cause the bug, the outer query must have more than one term
            // and all terms required.
            // The contained PhraseMultiQuery must contain exactly one term array.
            Directory         indexStore = NewDirectory();
            RandomIndexWriter writer     = new RandomIndexWriter(Random(), indexStore);

            Add("blueberry pie", writer);
            Add("blueberry chewing gum", writer);
            Add("blue raspberry pie", writer);

            IndexReader   reader   = writer.Reader;
            IndexSearcher searcher = NewSearcher(reader);
            // this query will be equivalent to +body:pie +body:"blue*"
            BooleanQuery q = new BooleanQuery();

            q.Add(new TermQuery(new Term("body", "pie")), BooleanClause.Occur.MUST);

            MultiPhraseQuery trouble = new MultiPhraseQuery();

            trouble.Add(new Term[] { new Term("body", "blueberry"), new Term("body", "blue") });
            q.Add(trouble, BooleanClause.Occur.MUST);

            // exception will be thrown here without fix
            ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs;

            Assert.AreEqual(2, hits.Length, "Wrong number of hits");

            // just make sure no exc:
            searcher.Explain(q, 0);

            writer.Dispose();
            reader.Dispose();
            indexStore.Dispose();
        }
예제 #7
0
 public virtual void  TestMPQ6()
 {
     MultiPhraseQuery q = new MultiPhraseQuery();
     q.Add(Ta(new System.String[]{"w1", "w3"}));
     q.Add(Ta(new System.String[]{"w2"}));
     q.Slop = 1;
     Qtest(q, new int[]{0, 1, 2, 3});
 }
예제 #8
0
		public virtual void  TestPhrasePrefixWithBooleanQuery()
		{
			RAMDirectory indexStore = new RAMDirectory();
			IndexWriter writer = new IndexWriter(indexStore, new StandardAnalyzer(new System.Collections.Hashtable(0)), true, IndexWriter.MaxFieldLength.LIMITED);
			Add("This is a test", "object", writer);
			Add("a note", "note", writer);
			writer.Close();
			
			IndexSearcher searcher = new IndexSearcher(indexStore);
			
			// This query will be equivalent to +type:note +body:"a t*"
			BooleanQuery q = new BooleanQuery();
			q.Add(new TermQuery(new Term("type", "note")), BooleanClause.Occur.MUST);
			
			MultiPhraseQuery trouble = new MultiPhraseQuery();
			trouble.Add(new Term("body", "a"));
			trouble.Add(new Term[]{new Term("body", "test"), new Term("body", "this")});
			q.Add(trouble, BooleanClause.Occur.MUST);
			
			// exception will be thrown here without fix for #35626:
			ScoreDoc[] hits = searcher.Search(q, null, 1000).scoreDocs;
			Assert.AreEqual(0, hits.Length, "Wrong number of hits");
			searcher.Close();
		}
        public virtual void  TestPhrasePrefix()
        {
            RAMDirectory indexStore = new RAMDirectory();
            IndexWriter  writer     = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            Add("blueberry pie", writer);
            Add("blueberry strudel", writer);
            Add("blueberry pizza", writer);
            Add("blueberry chewing gum", writer);
            Add("bluebird pizza", writer);
            Add("bluebird foobar pizza", writer);
            Add("piccadilly circus", writer);
            writer.Optimize();
            writer.Close();

            IndexSearcher searcher = new IndexSearcher(indexStore, true);

            // search for "blueberry pi*":
            MultiPhraseQuery query1 = new MultiPhraseQuery();
            // search for "strawberry pi*":
            MultiPhraseQuery query2 = new MultiPhraseQuery();

            query1.Add(new Term("body", "blueberry"));
            query2.Add(new Term("body", "strawberry"));

            System.Collections.ArrayList termsWithPrefix = new System.Collections.ArrayList();
            IndexReader ir = IndexReader.Open(indexStore, true);

            // this TermEnum gives "piccadilly", "pie" and "pizza".
            System.String prefix = "pi";
            TermEnum      te     = ir.Terms(new Term("body", prefix));

            do
            {
                if (te.Term.Text.StartsWith(prefix))
                {
                    termsWithPrefix.Add(te.Term);
                }
            }while (te.Next());

            query1.Add((Term[])termsWithPrefix.ToArray(typeof(Term)));
            Assert.AreEqual("body:\"blueberry (piccadilly pie pizza)\"", query1.ToString());
            query2.Add((Term[])termsWithPrefix.ToArray(typeof(Term)));
            Assert.AreEqual("body:\"strawberry (piccadilly pie pizza)\"", query2.ToString());

            ScoreDoc[] result;
            result = searcher.Search(query1, null, 1000).ScoreDocs;
            Assert.AreEqual(2, result.Length);
            result = searcher.Search(query2, null, 1000).ScoreDocs;
            Assert.AreEqual(0, result.Length);

            // search for "blue* pizza":
            MultiPhraseQuery query3 = new MultiPhraseQuery();

            termsWithPrefix.Clear();
            prefix = "blue";
            te     = ir.Terms(new Term("body", prefix));
            do
            {
                if (te.Term.Text.StartsWith(prefix))
                {
                    termsWithPrefix.Add(te.Term);
                }
            }while (te.Next());
            query3.Add((Term[])termsWithPrefix.ToArray(typeof(Term)));
            query3.Add(new Term("body", "pizza"));

            result = searcher.Search(query3, null, 1000).ScoreDocs;
            Assert.AreEqual(2, result.Length); // blueberry pizza, bluebird pizza
            Assert.AreEqual("body:\"(blueberry bluebird) pizza\"", query3.ToString());

            // test slop:
            query3.Slop = 1;
            result      = searcher.Search(query3, null, 1000).ScoreDocs;
            Assert.AreEqual(3, result.Length); // blueberry pizza, bluebird pizza, bluebird foobar pizza

            MultiPhraseQuery query4 = new MultiPhraseQuery();

            // okay, all terms must belong to the same field
            Assert.Throws <ArgumentException>(() =>
            {
                query4.Add(new Term("field1", "foo"));
                query4.Add(new Term("field2", "foobar"));
            });

            searcher.Close();
            indexStore.Close();
        }
        public virtual void TestZeroPosIncr()
        {
            Directory dir = new RAMDirectory();
            Token[] tokens = new Token[3];
            tokens[0] = new Token();
            tokens[0].Append("a");
            tokens[0].PositionIncrement = 1;
            tokens[1] = new Token();
            tokens[1].Append("b");
            tokens[1].PositionIncrement = 0;
            tokens[2] = new Token();
            tokens[2].Append("c");
            tokens[2].PositionIncrement = 0;

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document doc = new Document();
            doc.Add(new TextField("field", new CannedTokenStream(tokens)));
            writer.AddDocument(doc);
            doc = new Document();
            doc.Add(new TextField("field", new CannedTokenStream(tokens)));
            writer.AddDocument(doc);
            IndexReader r = writer.Reader;
            writer.Dispose();
            IndexSearcher s = NewSearcher(r);
            MultiPhraseQuery mpq = new MultiPhraseQuery();
            //mpq.setSlop(1);

            // NOTE: not great that if we do the else clause here we
            // get different scores!  MultiPhraseQuery counts that
            // phrase as occurring twice per doc (it should be 1, I
            // think?).  this is because MultipleTermPositions is able to
            // return the same position more than once (0, in this
            // case):
            if (true)
            {
                mpq.Add(new Term[] { new Term("field", "b"), new Term("field", "c") }, 0);
                mpq.Add(new Term[] { new Term("field", "a") }, 0);
            }
            else
            {
                mpq.Add(new Term[] { new Term("field", "a") }, 0);
                mpq.Add(new Term[] { new Term("field", "b"), new Term("field", "c") }, 0);
            }
            TopDocs hits = s.Search(mpq, 2);
            Assert.AreEqual(2, hits.TotalHits);
            Assert.AreEqual(hits.ScoreDocs[0].Score, hits.ScoreDocs[1].Score, 1e-5);
            /*
            for(int hit=0;hit<hits.TotalHits;hit++) {
              ScoreDoc sd = hits.ScoreDocs[hit];
              System.out.println("  hit doc=" + sd.Doc + " score=" + sd.Score);
            }
            */
            r.Dispose();
            dir.Dispose();
        }
예제 #11
0
            public MultiPhraseWeight(MultiPhraseQuery enclosingInstance, Searcher searcher)
            {
                InitBlock(enclosingInstance);
                this.similarity = Enclosing_Instance.GetSimilarity(searcher);

                // compute idf
                System.Collections.IEnumerator i = Enclosing_Instance.termArrays.GetEnumerator();
                while (i.MoveNext())
                {
                    Term[] terms = (Term[]) i.Current;
                    for (int j = 0; j < terms.Length; j++)
                    {
                        idf += Enclosing_Instance.GetSimilarity(searcher).Idf(terms[j], searcher);
                    }
                }
            }
예제 #12
0
 private void  InitBlock(MultiPhraseQuery enclosingInstance)
 {
     this.enclosingInstance = enclosingInstance;
 }
예제 #13
0
        private static Query ExecuteAnalyzer(Analyzer analyzer, string field, string text)
        {
            TokenStream tokenStream = analyzer.TokenStream(field, new StringReader(text));

            ITermAttribute termAttribute = tokenStream.AddAttribute<ITermAttribute>();
            IPositionIncrementAttribute positionIncrementAttribute = tokenStream.AddAttribute<IPositionIncrementAttribute>();

            List<List<Term>> terms = new List<List<Term>>();
            List<Term> current = null;
            while (tokenStream.IncrementToken())
            {
                if (positionIncrementAttribute.PositionIncrement > 0)
                {
                    current = new List<Term>();
                    terms.Add(current);
                }
                if (current != null)
                {
                    current.Add(new Term(field, termAttribute.Term));
                }
            }

            if (terms.Count == 1 && terms[0].Count == 1)
            {
                return new TermQuery(terms[0][0]);
            }
            else if (terms.Select(l => l.Count).Sum() == terms.Count)
            {
                PhraseQuery phraseQuery = new PhraseQuery();
                foreach (var positionList in terms)
                {
                    phraseQuery.Add(positionList[0]);
                }
                return phraseQuery;
            }
            else
            {
                MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery();
                foreach (var positionList in terms)
                {
                    multiPhraseQuery.Add(positionList.ToArray());
                }
                return multiPhraseQuery;
            }
        }
예제 #14
0
        public virtual void TestPhrasePrefix()
        {
            Directory         indexStore = NewDirectory();
            RandomIndexWriter writer     = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, indexStore);
            Document doc1 = new Document();
            Document doc2 = new Document();
            Document doc3 = new Document();
            Document doc4 = new Document();
            Document doc5 = new Document();

            doc1.Add(NewTextField("body", "blueberry pie", Field.Store.YES));
            doc2.Add(NewTextField("body", "blueberry strudel", Field.Store.YES));
            doc3.Add(NewTextField("body", "blueberry pizza", Field.Store.YES));
            doc4.Add(NewTextField("body", "blueberry chewing gum", Field.Store.YES));
            doc5.Add(NewTextField("body", "piccadilly circus", Field.Store.YES));
            writer.AddDocument(doc1);
            writer.AddDocument(doc2);
            writer.AddDocument(doc3);
            writer.AddDocument(doc4);
            writer.AddDocument(doc5);
            IndexReader reader = writer.GetReader();

            writer.Dispose();

            IndexSearcher searcher = NewSearcher(reader);

            // PhrasePrefixQuery query1 = new PhrasePrefixQuery();
            MultiPhraseQuery query1 = new MultiPhraseQuery();
            // PhrasePrefixQuery query2 = new PhrasePrefixQuery();
            MultiPhraseQuery query2 = new MultiPhraseQuery();

            query1.Add(new Term("body", "blueberry"));
            query2.Add(new Term("body", "strawberry"));

            LinkedList <Term> termsWithPrefix = new LinkedList <Term>();

            // this TermEnum gives "piccadilly", "pie" and "pizza".
            string    prefix = "pi";
            TermsEnum te     = MultiFields.GetFields(reader).GetTerms("body").GetIterator(null);

            te.SeekCeil(new BytesRef(prefix));
            do
            {
                string s = te.Term.Utf8ToString();
                if (s.StartsWith(prefix, StringComparison.Ordinal))
                {
                    termsWithPrefix.AddLast(new Term("body", s));
                }
                else
                {
                    break;
                }
            } while (te.Next() != null);

            query1.Add(termsWithPrefix.ToArray(/*new Term[0]*/));
            query2.Add(termsWithPrefix.ToArray(/*new Term[0]*/));

            ScoreDoc[] result;
            result = searcher.Search(query1, null, 1000).ScoreDocs;
            Assert.AreEqual(2, result.Length);

            result = searcher.Search(query2, null, 1000).ScoreDocs;
            Assert.AreEqual(0, result.Length);
            reader.Dispose();
            indexStore.Dispose();
        }
 public virtual void TestZeroPosIncrSloppyMpqAndOrNoMatch()
 {
     MultiPhraseQuery mpq = new MultiPhraseQuery();
     foreach (Token[] tap in INCR_0_QUERY_TOKENS_AND_OR_NO_MATCHN)
     {
         Term[] terms = TapTerms(tap);
         int pos = tap[0].PositionIncrement - 1;
         mpq.Add(terms, pos); //AND logic in pos, OR across lines
     }
     DoTestZeroPosIncrSloppy(mpq, 0);
     mpq.Slop = 2;
     DoTestZeroPosIncrSloppy(mpq, 0);
 }
 public virtual void TestNegativeSlop()
 {
     MultiPhraseQuery query = new MultiPhraseQuery();
     query.Add(new Term("field", "two"));
     query.Add(new Term("field", "one"));
     try
     {
         query.Slop = -2;
         Assert.Fail("didn't get expected exception");
     }
     catch (System.ArgumentException expected)
     {
         // expected exception
     }
 }
 public virtual void TestZeroPosIncrSloppyMpqAnd()
 {
     MultiPhraseQuery mpq = new MultiPhraseQuery();
     int pos = -1;
     foreach (Token tap in INCR_0_QUERY_TOKENS_AND)
     {
         pos += tap.PositionIncrement;
         mpq.Add(new Term[] { new Term("field", tap.ToString()) }, pos); //AND logic
     }
     DoTestZeroPosIncrSloppy(mpq, 0);
     mpq.Slop = 1;
     DoTestZeroPosIncrSloppy(mpq, 0);
     mpq.Slop = 2;
     DoTestZeroPosIncrSloppy(mpq, 1);
 }
        public virtual void TestPhrasePrefix()
        {
            Directory indexStore = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore, Similarity, TimeZone);
            Add("blueberry pie", writer);
            Add("blueberry strudel", writer);
            Add("blueberry pizza", writer);
            Add("blueberry chewing gum", writer);
            Add("bluebird pizza", writer);
            Add("bluebird foobar pizza", writer);
            Add("piccadilly circus", writer);

            IndexReader reader = writer.Reader;
            IndexSearcher searcher = NewSearcher(reader);

            // search for "blueberry pi*":
            MultiPhraseQuery query1 = new MultiPhraseQuery();
            // search for "strawberry pi*":
            MultiPhraseQuery query2 = new MultiPhraseQuery();
            query1.Add(new Term("body", "blueberry"));
            query2.Add(new Term("body", "strawberry"));

            LinkedList<Term> termsWithPrefix = new LinkedList<Term>();

            // this TermEnum gives "piccadilly", "pie" and "pizza".
            string prefix = "pi";
            TermsEnum te = MultiFields.GetFields(reader).Terms("body").Iterator(null);
            te.SeekCeil(new BytesRef(prefix));
            do
            {
                string s = te.Term().Utf8ToString();
                if (s.StartsWith(prefix))
                {
                    termsWithPrefix.AddLast(new Term("body", s));
                }
                else
                {
                    break;
                }
            } while (te.Next() != null);

            query1.Add(termsWithPrefix.ToArray(/*new Term[0]*/));
            Assert.AreEqual("body:\"blueberry (piccadilly pie pizza)\"", query1.ToString());
            query2.Add(termsWithPrefix.ToArray(/*new Term[0]*/));
            Assert.AreEqual("body:\"strawberry (piccadilly pie pizza)\"", query2.ToString());

            ScoreDoc[] result;
            result = searcher.Search(query1, null, 1000).ScoreDocs;
            Assert.AreEqual(2, result.Length);
            result = searcher.Search(query2, null, 1000).ScoreDocs;
            Assert.AreEqual(0, result.Length);

            // search for "blue* pizza":
            MultiPhraseQuery query3 = new MultiPhraseQuery();
            termsWithPrefix.Clear();
            prefix = "blue";
            te.SeekCeil(new BytesRef(prefix));

            do
            {
                if (te.Term().Utf8ToString().StartsWith(prefix))
                {
                    termsWithPrefix.AddLast(new Term("body", te.Term().Utf8ToString()));
                }
            } while (te.Next() != null);

            query3.Add(termsWithPrefix.ToArray(/*new Term[0]*/));
            query3.Add(new Term("body", "pizza"));

            result = searcher.Search(query3, null, 1000).ScoreDocs;
            Assert.AreEqual(2, result.Length); // blueberry pizza, bluebird pizza
            Assert.AreEqual("body:\"(blueberry bluebird) pizza\"", query3.ToString());

            // test slop:
            query3.Slop = 1;
            result = searcher.Search(query3, null, 1000).ScoreDocs;

            // just make sure no exc:
            searcher.Explain(query3, 0);

            Assert.AreEqual(3, result.Length); // blueberry pizza, bluebird pizza, bluebird
            // foobar pizza

            MultiPhraseQuery query4 = new MultiPhraseQuery();
            try
            {
                query4.Add(new Term("field1", "foo"));
                query4.Add(new Term("field2", "foobar"));
                Assert.Fail();
            }
            catch (System.ArgumentException e)
            {
                // okay, all terms must belong to the same field
            }

            writer.Dispose();
            reader.Dispose();
            indexStore.Dispose();
        }
 public virtual void TestZeroPosIncrSloppyParsedAnd()
 {
     MultiPhraseQuery q = new MultiPhraseQuery();
     q.Add(new Term[] { new Term("field", "a"), new Term("field", "1") }, -1);
     q.Add(new Term[] { new Term("field", "b"), new Term("field", "1") }, 0);
     q.Add(new Term[] { new Term("field", "c") }, 1);
     DoTestZeroPosIncrSloppy(q, 0);
     q.Slop = 1;
     DoTestZeroPosIncrSloppy(q, 0);
     q.Slop = 2;
     DoTestZeroPosIncrSloppy(q, 1);
 }
        public virtual void TestPhrasePrefixWithBooleanQuery()
        {
            Directory indexStore = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore, Similarity, TimeZone);
            Add("this is a test", "object", writer);
            Add("a note", "note", writer);

            IndexReader reader = writer.Reader;
            IndexSearcher searcher = NewSearcher(reader);

            // this query will be equivalent to +type:note +body:"a t*"
            BooleanQuery q = new BooleanQuery();
            q.Add(new TermQuery(new Term("type", "note")), BooleanClause.Occur.MUST);

            MultiPhraseQuery trouble = new MultiPhraseQuery();
            trouble.Add(new Term("body", "a"));
            trouble.Add(new Term[] { new Term("body", "test"), new Term("body", "this") });
            q.Add(trouble, BooleanClause.Occur.MUST);

            // exception will be thrown here without fix for #35626:
            ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length, "Wrong number of hits");
            writer.Dispose();
            reader.Dispose();
            indexStore.Dispose();
        }
예제 #21
0
        public virtual void TestSetPosition()
        {
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);
            Directory store = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), store, analyzer);
            Document d = new Document();
            d.Add(NewTextField("field", "bogus", Field.Store.YES));
            writer.AddDocument(d);
            IndexReader reader = writer.Reader;
            writer.Dispose();

            IndexSearcher searcher = NewSearcher(reader);

            DocsAndPositionsEnum pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("1"));
            pos.NextDoc();
            // first token should be at position 0
            Assert.AreEqual(0, pos.NextPosition());

            pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("2"));
            pos.NextDoc();
            // second token should be at position 2
            Assert.AreEqual(2, pos.NextPosition());

            PhraseQuery q;
            ScoreDoc[] hits;

            q = new PhraseQuery();
            q.Add(new Term("field", "1"));
            q.Add(new Term("field", "2"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // same as previous, just specify positions explicitely.
            q = new PhraseQuery();
            q.Add(new Term("field", "1"), 0);
            q.Add(new Term("field", "2"), 1);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // specifying correct positions should find the phrase.
            q = new PhraseQuery();
            q.Add(new Term("field", "1"), 0);
            q.Add(new Term("field", "2"), 2);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "3"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "3"));
            q.Add(new Term("field", "4"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // phrase query would find it when correct positions are specified.
            q = new PhraseQuery();
            q.Add(new Term("field", "3"), 0);
            q.Add(new Term("field", "4"), 0);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            // phrase query should fail for non existing searched term
            // even if there exist another searched terms in the same searched position.
            q = new PhraseQuery();
            q.Add(new Term("field", "3"), 0);
            q.Add(new Term("field", "9"), 0);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // multi-phrase query should succed for non existing searched term
            // because there exist another searched terms in the same searched position.
            MultiPhraseQuery mq = new MultiPhraseQuery();
            mq.Add(new Term[] { new Term("field", "3"), new Term("field", "9") }, 0);
            hits = searcher.Search(mq, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "4"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "3"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "4"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            reader.Dispose();
            store.Dispose();
        }
예제 #22
0
 public MultiPhraseWeight(MultiPhraseQuery enclosingInstance, Searcher searcher)
 {
     InitBlock(enclosingInstance);
     this.similarity = Enclosing_Instance.GetSimilarity(searcher);
     
     // compute idf
     int maxDoc = searcher.MaxDoc;
     foreach (Term[] terms in enclosingInstance.termArrays)
     {
         foreach (Term term in terms)
         {
             idf += similarity.Idf(searcher.DocFreq(term), maxDoc);
         }
     }
 }
 private static void VisitQuery(MultiPhraseQuery query, AzureQueryLogger.IndentedTextWriter writer)
 {
     writer.WriteLine("Slop: {0}", (object)query.Slop);
     foreach (Term[] termArray in (IEnumerable<Term[]>)query.GetTermArrays())
     {
         writer.WriteLine("array");
         ++writer.Indent;
         foreach (Term term in termArray)
             AzureQueryLogger.VisitTerm(term, writer);
         --writer.Indent;
     }
 }
		public virtual void  TestMPQ7()
		{
			MultiPhraseQuery q = new MultiPhraseQuery();
			q.Add(Ta(new System.String[]{"w1"}));
			q.Add(Ta(new System.String[]{"w2"}));
			q.SetSlop(1);
			q.SetBoost(0.0f);
			Bqtest(q, new int[]{0, 1, 2});
		}
        public virtual void TestNoDocs()
        {
            Directory indexStore = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore, Similarity, TimeZone);
            Add("a note", "note", writer);

            IndexReader reader = writer.Reader;
            IndexSearcher searcher = NewSearcher(reader);

            MultiPhraseQuery q = new MultiPhraseQuery();
            q.Add(new Term("body", "a"));
            q.Add(new Term[] { new Term("body", "nope"), new Term("body", "nope") });
            Assert.AreEqual(0, searcher.Search(q, null, 1).TotalHits, "Wrong number of hits");

            // just make sure no exc:
            searcher.Explain(q, 0);

            writer.Dispose();
            reader.Dispose();
            indexStore.Dispose();
        }
예제 #26
0
 private void InitBlock(MultiPhraseQuery enclosingInstance)
 {
     this.enclosingInstance = enclosingInstance;
 }
예제 #27
0
 public virtual void TestMPQ7()
 {
     MultiPhraseQuery q = new MultiPhraseQuery();
     q.Add(Ta(new string[] { "w1" }));
     q.Add(Ta(new string[] { "w2" }));
     q.Slop = 1;
     q.Boost = 0.0f;
     Bqtest(q, new int[] { 0, 1, 2 });
 }
예제 #28
0
 private MultiPhraseQuery RandomPhraseQuery(int seed)
 {
     Random random = new Random(seed);
     int length = TestUtil.NextInt(random, 2, 5);
     MultiPhraseQuery pq = new MultiPhraseQuery();
     int position = 0;
     for (int i = 0; i < length; i++)
     {
         int depth = TestUtil.NextInt(random, 1, 3);
         Term[] terms = new Term[depth];
         for (int j = 0; j < depth; j++)
         {
             terms[j] = new Term("field", "" + (char)TestUtil.NextInt(random, 'a', 'z'));
         }
         pq.Add(terms, position);
         position += TestUtil.NextInt(random, 1, 3);
     }
     return pq;
 }
예제 #29
0
		public virtual void  TestPhrasePrefixWithBooleanQuery()
		{
			RAMDirectory indexStore = new RAMDirectory();
			IndexWriter writer = new IndexWriter(indexStore, new StandardAnalyzer(Util.Version.LUCENE_CURRENT, Support.Compatibility.SetFactory.CreateHashSet<string>()), true, IndexWriter.MaxFieldLength.LIMITED);
			Add("This is a test", "object", writer);
			Add("a note", "note", writer);
			writer.Close();

		    IndexSearcher searcher = new IndexSearcher(indexStore, true);
			
			// This query will be equivalent to +type:note +body:"a t*"
			BooleanQuery q = new BooleanQuery();
			q.Add(new TermQuery(new Term("type", "note")), Occur.MUST);
			
			MultiPhraseQuery trouble = new MultiPhraseQuery();
			trouble.Add(new Term("body", "a"));
			trouble.Add(new Term[]{new Term("body", "test"), new Term("body", "this")});
			q.Add(trouble, Occur.MUST);
			
			// exception will be thrown here without fix for #35626:
			ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length, "Wrong number of hits");
			searcher.Close();
		}
 public virtual void TestMPQ3()
 {
     MultiPhraseQuery q = new MultiPhraseQuery();
     q.Add(Ta(new string[] { "w1", "xx" }));
     q.Add(Ta(new string[] { "w2", "w3" }));
     Qtest(q, new int[] { 0, 1, 2, 3 });
 }
        public virtual void TestCustomIDF()
        {
            Directory indexStore = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore, Similarity, TimeZone);
            Add("this is a test", "object", writer);
            Add("a note", "note", writer);

            IndexReader reader = writer.Reader;
            IndexSearcher searcher = NewSearcher(reader);
            searcher.Similarity = new DefaultSimilarityAnonymousInnerClassHelper(this);

            MultiPhraseQuery query = new MultiPhraseQuery();
            query.Add(new Term[] { new Term("body", "this"), new Term("body", "that") });
            query.Add(new Term("body", "is"));
            Weight weight = query.CreateWeight(searcher);
            Assert.AreEqual(10f * 10f, weight.ValueForNormalization, 0.001f);

            writer.Dispose();
            reader.Dispose();
            indexStore.Dispose();
        }
예제 #32
0
        public virtual void TestSetPosition()
        {
            Analyzer          analyzer = new AnalyzerAnonymousClass(this);
            Directory         store    = NewDirectory();
            RandomIndexWriter writer   = new RandomIndexWriter(Random, store, analyzer);
            Document          d        = new Document();

            d.Add(NewTextField("field", "bogus", Field.Store.YES));
            writer.AddDocument(d);
            IndexReader reader = writer.GetReader();

            writer.Dispose();

            IndexSearcher searcher = NewSearcher(reader);

            DocsAndPositionsEnum pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("1"));

            pos.NextDoc();
            // first token should be at position 0
            Assert.AreEqual(0, pos.NextPosition());

            pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("2"));
            pos.NextDoc();
            // second token should be at position 2
            Assert.AreEqual(2, pos.NextPosition());

            PhraseQuery q;

            ScoreDoc[] hits;

            q = new PhraseQuery();
            q.Add(new Term("field", "1"));
            q.Add(new Term("field", "2"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // same as previous, just specify positions explicitely.
            q = new PhraseQuery();
            q.Add(new Term("field", "1"), 0);
            q.Add(new Term("field", "2"), 1);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // specifying correct positions should find the phrase.
            q = new PhraseQuery();
            q.Add(new Term("field", "1"), 0);
            q.Add(new Term("field", "2"), 2);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "3"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "3"));
            q.Add(new Term("field", "4"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // phrase query would find it when correct positions are specified.
            q = new PhraseQuery();
            q.Add(new Term("field", "3"), 0);
            q.Add(new Term("field", "4"), 0);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            // phrase query should fail for non existing searched term
            // even if there exist another searched terms in the same searched position.
            q = new PhraseQuery();
            q.Add(new Term("field", "3"), 0);
            q.Add(new Term("field", "9"), 0);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // multi-phrase query should succed for non existing searched term
            // because there exist another searched terms in the same searched position.
            MultiPhraseQuery mq = new MultiPhraseQuery();

            mq.Add(new Term[] { new Term("field", "3"), new Term("field", "9") }, 0);
            hits = searcher.Search(mq, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "4"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "3"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "4"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            reader.Dispose();
            store.Dispose();
        }
예제 #33
0
		public virtual void  TestPhrasePrefix()
		{
			RAMDirectory indexStore = new RAMDirectory();
			IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			Add("blueberry pie", writer);
			Add("blueberry strudel", writer);
			Add("blueberry pizza", writer);
			Add("blueberry chewing gum", writer);
			Add("bluebird pizza", writer);
			Add("bluebird foobar pizza", writer);
			Add("piccadilly circus", writer);
			writer.Optimize();
			writer.Close();
			
			IndexSearcher searcher = new IndexSearcher(indexStore);
			
			// search for "blueberry pi*":
			MultiPhraseQuery query1 = new MultiPhraseQuery();
			// search for "strawberry pi*":
			MultiPhraseQuery query2 = new MultiPhraseQuery();
			query1.Add(new Term("body", "blueberry"));
			query2.Add(new Term("body", "strawberry"));
			
			System.Collections.ArrayList termsWithPrefix = new System.Collections.ArrayList();
			IndexReader ir = IndexReader.Open(indexStore);
			
			// this TermEnum gives "piccadilly", "pie" and "pizza".
			System.String prefix = "pi";
			TermEnum te = ir.Terms(new Term("body", prefix));
			do 
			{
				if (te.Term().Text().StartsWith(prefix))
				{
					termsWithPrefix.Add(te.Term());
				}
			}
			while (te.Next());
			
			query1.Add((Term[]) termsWithPrefix.ToArray(typeof(Term)));
			Assert.AreEqual("body:\"blueberry (piccadilly pie pizza)\"", query1.ToString());
			query2.Add((Term[]) termsWithPrefix.ToArray(typeof(Term)));
			Assert.AreEqual("body:\"strawberry (piccadilly pie pizza)\"", query2.ToString());
			
			ScoreDoc[] result;
			result = searcher.Search(query1, null, 1000).scoreDocs;
			Assert.AreEqual(2, result.Length);
			result = searcher.Search(query2, null, 1000).scoreDocs;
			Assert.AreEqual(0, result.Length);
			
			// search for "blue* pizza":
			MultiPhraseQuery query3 = new MultiPhraseQuery();
			termsWithPrefix.Clear();
			prefix = "blue";
			te = ir.Terms(new Term("body", prefix));
			do 
			{
				if (te.Term().Text().StartsWith(prefix))
				{
					termsWithPrefix.Add(te.Term());
				}
			}
			while (te.Next());
			query3.Add((Term[]) termsWithPrefix.ToArray(typeof(Term)));
			query3.Add(new Term("body", "pizza"));
			
			result = searcher.Search(query3, null, 1000).scoreDocs;
			Assert.AreEqual(2, result.Length); // blueberry pizza, bluebird pizza
			Assert.AreEqual("body:\"(blueberry bluebird) pizza\"", query3.ToString());
			
			// test slop:
			query3.SetSlop(1);
			result = searcher.Search(query3, null, 1000).scoreDocs;
			Assert.AreEqual(3, result.Length); // blueberry pizza, bluebird pizza, bluebird foobar pizza
			
			MultiPhraseQuery query4 = new MultiPhraseQuery();
			try
			{
				query4.Add(new Term("field1", "foo"));
				query4.Add(new Term("field2", "foobar"));
				Assert.Fail();
			}
			catch (System.ArgumentException e)
			{
				// okay, all terms must belong to the same field
			}
			
			searcher.Close();
			indexStore.Close();
		}
예제 #34
0
 public virtual void  TestMPQ4()
 {
     MultiPhraseQuery q = new MultiPhraseQuery();
     q.Add(Ta(new System.String[]{"w1"}));
     q.Add(Ta(new System.String[]{"w2"}));
     Qtest(q, new int[]{0});
 }
        public virtual void  TestSetPosition()
        {
            Analyzer    analyzer = new AnonymousClassAnalyzer(this);
            Directory   store    = new MockRAMDirectory();
            IndexWriter writer   = new IndexWriter(store, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
            Document    d        = new Document();

            d.Add(new Field("field", "bogus", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(d);
            writer.Optimize();
            writer.Close();


            IndexSearcher searcher = new IndexSearcher(store, true);

            TermPositions pos = searcher.IndexReader.TermPositions(new Term("field", "1"));

            pos.Next();
            // first token should be at position 0
            Assert.AreEqual(0, pos.NextPosition());

            pos = searcher.IndexReader.TermPositions(new Term("field", "2"));
            pos.Next();
            // second token should be at position 2
            Assert.AreEqual(2, pos.NextPosition());

            PhraseQuery q;

            ScoreDoc[] hits;

            q = new PhraseQuery();
            q.Add(new Term("field", "1"));
            q.Add(new Term("field", "2"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // same as previous, just specify positions explicitely.
            q = new PhraseQuery();
            q.Add(new Term("field", "1"), 0);
            q.Add(new Term("field", "2"), 1);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // specifying correct positions should find the phrase.
            q = new PhraseQuery();
            q.Add(new Term("field", "1"), 0);
            q.Add(new Term("field", "2"), 2);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "3"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "3"));
            q.Add(new Term("field", "4"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // phrase query would find it when correct positions are specified.
            q = new PhraseQuery();
            q.Add(new Term("field", "3"), 0);
            q.Add(new Term("field", "4"), 0);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            // phrase query should fail for non existing searched term
            // even if there exist another searched terms in the same searched position.
            q = new PhraseQuery();
            q.Add(new Term("field", "3"), 0);
            q.Add(new Term("field", "9"), 0);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // multi-phrase query should succed for non existing searched term
            // because there exist another searched terms in the same searched position.
            MultiPhraseQuery mq = new MultiPhraseQuery();

            mq.Add(new Term[] { new Term("field", "3"), new Term("field", "9") }, 0);
            hits = searcher.Search(mq, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "4"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "3"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "4"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // should not find "1 2" because there is a gap of 1 in the index
            QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT, "field", new StopWhitespaceAnalyzer(false));

            q    = (PhraseQuery)qp.Parse("\"1 2\"");
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // omitted stop word cannot help because stop filter swallows the increments.
            q    = (PhraseQuery)qp.Parse("\"1 stop 2\"");
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // query parser alone won't help, because stop filter swallows the increments.
            qp.EnablePositionIncrements = true;
            q    = (PhraseQuery)qp.Parse("\"1 stop 2\"");
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // stop filter alone won't help, because query parser swallows the increments.
            qp.EnablePositionIncrements = false;
            q    = (PhraseQuery)qp.Parse("\"1 stop 2\"");
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // when both qp qnd stopFilter propagate increments, we should find the doc.
            qp = new QueryParser(Util.Version.LUCENE_CURRENT, "field", new StopWhitespaceAnalyzer(true));
            qp.EnablePositionIncrements = true;
            q    = (PhraseQuery)qp.Parse("\"1 stop 2\"");
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
        }
예제 #36
0
		public virtual void  TestBooleanQueryContainingSingleTermPrefixQuery()
		{
			// this tests against bug 33161 (now fixed)
			// In order to cause the bug, the outer query must have more than one term 
			// and all terms required.
			// The contained PhraseMultiQuery must contain exactly one term array.
			
			RAMDirectory indexStore = new RAMDirectory();
			IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			Add("blueberry pie", writer);
			Add("blueberry chewing gum", writer);
			Add("blue raspberry pie", writer);
			writer.Optimize();
			writer.Close();
			
			IndexSearcher searcher = new IndexSearcher(indexStore);
			// This query will be equivalent to +body:pie +body:"blue*"
			BooleanQuery q = new BooleanQuery();
			q.Add(new TermQuery(new Term("body", "pie")), BooleanClause.Occur.MUST);
			
			MultiPhraseQuery trouble = new MultiPhraseQuery();
			trouble.Add(new Term[]{new Term("body", "blueberry"), new Term("body", "blue")});
			q.Add(trouble, BooleanClause.Occur.MUST);
			
			// exception will be thrown here without fix
			ScoreDoc[] hits = searcher.Search(q, null, 1000).scoreDocs;
			
			Assert.AreEqual(2, hits.Length, "Wrong number of hits");
			searcher.Close();
		}
예제 #37
0
        public virtual void TestPhrasePrefix()
        {
            Directory         indexStore = NewDirectory();
            RandomIndexWriter writer     = new RandomIndexWriter(Random(), indexStore);

            Add("blueberry pie", writer);
            Add("blueberry strudel", writer);
            Add("blueberry pizza", writer);
            Add("blueberry chewing gum", writer);
            Add("bluebird pizza", writer);
            Add("bluebird foobar pizza", writer);
            Add("piccadilly circus", writer);

            IndexReader   reader   = writer.Reader;
            IndexSearcher searcher = NewSearcher(reader);

            // search for "blueberry pi*":
            MultiPhraseQuery query1 = new MultiPhraseQuery();
            // search for "strawberry pi*":
            MultiPhraseQuery query2 = new MultiPhraseQuery();

            query1.Add(new Term("body", "blueberry"));
            query2.Add(new Term("body", "strawberry"));

            LinkedList <Term> termsWithPrefix = new LinkedList <Term>();

            // this TermEnum gives "piccadilly", "pie" and "pizza".
            string    prefix = "pi";
            TermsEnum te     = MultiFields.GetFields(reader).Terms("body").Iterator(null);

            te.SeekCeil(new BytesRef(prefix));
            do
            {
                string s = te.Term().Utf8ToString();
                if (s.StartsWith(prefix))
                {
                    termsWithPrefix.AddLast(new Term("body", s));
                }
                else
                {
                    break;
                }
            } while (te.Next() != null);

            query1.Add(termsWithPrefix.ToArray(/*new Term[0]*/));
            Assert.AreEqual("body:\"blueberry (piccadilly pie pizza)\"", query1.ToString());
            query2.Add(termsWithPrefix.ToArray(/*new Term[0]*/));
            Assert.AreEqual("body:\"strawberry (piccadilly pie pizza)\"", query2.ToString());

            ScoreDoc[] result;
            result = searcher.Search(query1, null, 1000).ScoreDocs;
            Assert.AreEqual(2, result.Length);
            result = searcher.Search(query2, null, 1000).ScoreDocs;
            Assert.AreEqual(0, result.Length);

            // search for "blue* pizza":
            MultiPhraseQuery query3 = new MultiPhraseQuery();

            termsWithPrefix.Clear();
            prefix = "blue";
            te.SeekCeil(new BytesRef(prefix));

            do
            {
                if (te.Term().Utf8ToString().StartsWith(prefix))
                {
                    termsWithPrefix.AddLast(new Term("body", te.Term().Utf8ToString()));
                }
            } while (te.Next() != null);

            query3.Add(termsWithPrefix.ToArray(/*new Term[0]*/));
            query3.Add(new Term("body", "pizza"));

            result = searcher.Search(query3, null, 1000).ScoreDocs;
            Assert.AreEqual(2, result.Length); // blueberry pizza, bluebird pizza
            Assert.AreEqual("body:\"(blueberry bluebird) pizza\"", query3.ToString());

            // test slop:
            query3.Slop = 1;
            result      = searcher.Search(query3, null, 1000).ScoreDocs;

            // just make sure no exc:
            searcher.Explain(query3, 0);

            Assert.AreEqual(3, result.Length); // blueberry pizza, bluebird pizza, bluebird
            // foobar pizza

            MultiPhraseQuery query4 = new MultiPhraseQuery();

            try
            {
                query4.Add(new Term("field1", "foo"));
                query4.Add(new Term("field2", "foobar"));
                Assert.Fail();
            }
            catch (System.ArgumentException e)
            {
                // okay, all terms must belong to the same field
            }

            writer.Dispose();
            reader.Dispose();
            indexStore.Dispose();
        }
예제 #38
0
		public virtual void  TestHashCodeAndEquals()
		{
			MultiPhraseQuery query1 = new MultiPhraseQuery();
			MultiPhraseQuery query2 = new MultiPhraseQuery();
			
			Assert.AreEqual(query1.GetHashCode(), query2.GetHashCode());
			Assert.AreEqual(query1, query2);
			
			Term term1 = new Term("someField", "someText");
			
			query1.Add(term1);
			query2.Add(term1);
			
			Assert.AreEqual(query1.GetHashCode(), query2.GetHashCode());
			Assert.AreEqual(query1, query2);
			
			Term term2 = new Term("someField", "someMoreText");
			
			query1.Add(term2);
			
			Assert.IsFalse(query1.GetHashCode() == query2.GetHashCode());
			Assert.IsFalse(query1.Equals(query2));
			
			query2.Add(term2);
			
			Assert.AreEqual(query1.GetHashCode(), query2.GetHashCode());
			Assert.AreEqual(query1, query2);
		}
        public virtual void TestBooleanQueryContainingSingleTermPrefixQuery()
        {
            // this tests against bug 33161 (now fixed)
            // In order to cause the bug, the outer query must have more than one term
            // and all terms required.
            // The contained PhraseMultiQuery must contain exactly one term array.
            Directory indexStore = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore, Similarity, TimeZone);
            Add("blueberry pie", writer);
            Add("blueberry chewing gum", writer);
            Add("blue raspberry pie", writer);

            IndexReader reader = writer.Reader;
            IndexSearcher searcher = NewSearcher(reader);
            // this query will be equivalent to +body:pie +body:"blue*"
            BooleanQuery q = new BooleanQuery();
            q.Add(new TermQuery(new Term("body", "pie")), BooleanClause.Occur.MUST);

            MultiPhraseQuery trouble = new MultiPhraseQuery();
            trouble.Add(new Term[] { new Term("body", "blueberry"), new Term("body", "blue") });
            q.Add(trouble, BooleanClause.Occur.MUST);

            // exception will be thrown here without fix
            ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs;

            Assert.AreEqual(2, hits.Length, "Wrong number of hits");

            // just make sure no exc:
            searcher.Explain(q, 0);

            writer.Dispose();
            reader.Dispose();
            indexStore.Dispose();
        }
예제 #40
0
		/// <exception cref="ParseException">throw in overridden method to disallow
		/// </exception>
		protected internal virtual Query GetFieldQuery(System.String field, System.String queryText)
		{
			// Use the analyzer to get all the tokens, and then build a TermQuery,
			// PhraseQuery, or nothing based on the term count
			
			TokenStream source = analyzer.TokenStream(field, new System.IO.StringReader(queryText));
			System.Collections.ArrayList v = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
			Lucene.Net.Analysis.Token t;
			int positionCount = 0;
			bool severalTokensAtSamePosition = false;
			
			while (true)
			{
				try
				{
					t = source.Next();
				}
				catch (System.IO.IOException e)
				{
					t = null;
				}
				if (t == null)
					break;
				v.Add(t);
				if (t.GetPositionIncrement() != 0)
					positionCount += t.GetPositionIncrement();
				else
					severalTokensAtSamePosition = true;
			}
			try
			{
				source.Close();
			}
			catch (System.IO.IOException e)
			{
				// ignore
			}
			
			if (v.Count == 0)
				return null;
			else if (v.Count == 1)
			{
				t = (Lucene.Net.Analysis.Token) v[0];
				return new TermQuery(new Term(field, t.TermText()));
			}
			else
			{
				if (severalTokensAtSamePosition)
				{
					if (positionCount == 1)
					{
						// no phrase query:
						BooleanQuery q = new BooleanQuery(true);
						for (int i = 0; i < v.Count; i++)
						{
							t = (Lucene.Net.Analysis.Token) v[i];
							TermQuery currentQuery = new TermQuery(new Term(field, t.TermText()));
							q.Add(currentQuery, BooleanClause.Occur.SHOULD);
						}
						return q;
					}
					else
					{
						// phrase query:
						MultiPhraseQuery mpq = new MultiPhraseQuery();
						System.Collections.ArrayList multiTerms = new System.Collections.ArrayList();
						for (int i = 0; i < v.Count; i++)
						{
							t = (Lucene.Net.Analysis.Token) v[i];
							if (t.GetPositionIncrement() == 1 && multiTerms.Count > 0)
							{
								mpq.Add((Term[]) multiTerms.ToArray(typeof(Term)));
								multiTerms.Clear();
							}
							multiTerms.Add(new Term(field, t.TermText()));
						}
						mpq.Add((Term[]) multiTerms.ToArray(typeof(Term)));
						return mpq;
					}
				}
				else
				{
					PhraseQuery q = new PhraseQuery();
					q.SetSlop(phraseSlop);
					for (int i = 0; i < v.Count; i++)
					{
						q.Add(new Term(field, ((Lucene.Net.Analysis.Token) v[i]).TermText()));
					}
					return q;
				}
			}
		}
예제 #41
0
        public virtual void TestZeroPosIncr()
        {
            Directory dir = new RAMDirectory();

            Token[] tokens = new Token[3];
            tokens[0] = new Token();
            tokens[0].Append("a");
            tokens[0].PositionIncrement = 1;
            tokens[1] = new Token();
            tokens[1].Append("b");
            tokens[1].PositionIncrement = 0;
            tokens[2] = new Token();
            tokens[2].Append("c");
            tokens[2].PositionIncrement = 0;

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
            Document          doc    = new Document();

            doc.Add(new TextField("field", new CannedTokenStream(tokens)));
            writer.AddDocument(doc);
            doc = new Document();
            doc.Add(new TextField("field", new CannedTokenStream(tokens)));
            writer.AddDocument(doc);
            IndexReader r = writer.Reader;

            writer.Dispose();
            IndexSearcher    s   = NewSearcher(r);
            MultiPhraseQuery mpq = new MultiPhraseQuery();

            //mpq.setSlop(1);

            // NOTE: not great that if we do the else clause here we
            // get different scores!  MultiPhraseQuery counts that
            // phrase as occurring twice per doc (it should be 1, I
            // think?).  this is because MultipleTermPositions is able to
            // return the same position more than once (0, in this
            // case):
            if (true)
            {
                mpq.Add(new Term[] { new Term("field", "b"), new Term("field", "c") }, 0);
                mpq.Add(new Term[] { new Term("field", "a") }, 0);
            }
            else
            {
                mpq.Add(new Term[] { new Term("field", "a") }, 0);
                mpq.Add(new Term[] { new Term("field", "b"), new Term("field", "c") }, 0);
            }
            TopDocs hits = s.Search(mpq, 2);

            Assert.AreEqual(2, hits.TotalHits);
            Assert.AreEqual(hits.ScoreDocs[0].Score, hits.ScoreDocs[1].Score, 1e-5);

            /*
             * for(int hit=0;hit<hits.TotalHits;hit++) {
             * ScoreDoc sd = hits.ScoreDocs[hit];
             * System.out.println("  hit doc=" + sd.Doc + " score=" + sd.Score);
             * }
             */
            r.Dispose();
            dir.Dispose();
        }