public virtual void  TestFilterWorks()
		{
			Directory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			for (int i = 0; i < 500; i++)
			{
				Document document = new Document();
				document.Add(new Field("field", English.IntToEnglish(i) + " equals " + English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED));
				writer.AddDocument(document);
			}
			writer.Close();
			
			IndexReader reader = IndexReader.Open(dir, true);
			
			SpanTermQuery query = new SpanTermQuery(new Term("field", English.IntToEnglish(10).Trim()));
			SpanQueryFilter filter = new SpanQueryFilter(query);
			SpanFilterResult result = filter.BitSpans(reader);
			DocIdSet docIdSet = result.DocIdSet;
			Assert.IsTrue(docIdSet != null, "docIdSet is null and it shouldn't be");
			AssertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10);
			var spans = result.Positions;
			Assert.IsTrue(spans != null, "spans is null and it shouldn't be");
			int size = GetDocIdSetSize(docIdSet);
			Assert.IsTrue(spans.Count == size, "spans Size: " + spans.Count + " is not: " + size);
			for (System.Collections.IEnumerator iterator = spans.GetEnumerator(); iterator.MoveNext(); )
			{
				SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo) iterator.Current;
				Assert.IsTrue(info != null, "info is null and it shouldn't be");
				//The doc should indicate the bit is on
				AssertContainsDocId("docIdSet doesn't contain docId " + info.Doc, docIdSet, info.Doc);
				//There should be two positions in each
				Assert.IsTrue(info.Positions.Count == 2, "info.getPositions() Size: " + info.Positions.Count + " is not: " + 2);
			}
			reader.Close();
		}
示例#2
0
		public override Query Rewrite(IndexReader reader)
		{
			Query orig = new RegexQuery(term).Rewrite(reader);
			
			// RegexQuery (via MultiTermQuery).rewrite always returns a BooleanQuery
			BooleanQuery bq = (BooleanQuery) orig;
			
			BooleanClause[] clauses = bq.GetClauses();
			SpanQuery[] sqs = new SpanQuery[clauses.Length];
			for (int i = 0; i < clauses.Length; i++)
			{
				BooleanClause clause = clauses[i];
				
				// Clauses from RegexQuery.rewrite are always TermQuery's
				TermQuery tq = (TermQuery) clause.GetQuery();
				
				sqs[i] = new SpanTermQuery(tq.GetTerm());
				sqs[i].SetBoost(tq.GetBoost());
			}
			
			SpanOrQuery query = new SpanOrQuery(sqs);
			query.SetBoost(orig.GetBoost());
			
			return query;
		}
示例#3
0
        public override Query Rewrite(IndexReader reader)
        {
            Query orig = new RegexQuery(term).Rewrite(reader);

            // RegexQuery (via MultiTermQuery).rewrite always returns a BooleanQuery
            BooleanQuery bq = (BooleanQuery)orig;

            BooleanClause[] clauses = bq.GetClauses();
            SpanQuery[]     sqs     = new SpanQuery[clauses.Length];
            for (int i = 0; i < clauses.Length; i++)
            {
                BooleanClause clause = clauses[i];

                // Clauses from RegexQuery.rewrite are always TermQuery's
                TermQuery tq = (TermQuery)clause.GetQuery();

                sqs[i] = new SpanTermQuery(tq.GetTerm());
                sqs[i].SetBoost(tq.GetBoost());
            }

            SpanOrQuery query = new SpanOrQuery(sqs);

            query.SetBoost(orig.GetBoost());

            return(query);
        }
示例#4
0
        public virtual void  TestQuery()
        {
            PayloadTermQuery boostingFuncTermQuery = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"), new MaxPayloadFunction());

            QueryUtils.Check(boostingFuncTermQuery);

            SpanTermQuery spanTermQuery = new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"));

            Assert.IsTrue(boostingFuncTermQuery.Equals(spanTermQuery) == spanTermQuery.Equals(boostingFuncTermQuery));

            PayloadTermQuery boostingFuncTermQuery2 = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"), new AveragePayloadFunction());

            QueryUtils.CheckUnequal(boostingFuncTermQuery, boostingFuncTermQuery2);
        }
示例#5
0
        public virtual void  TestFilterWorks()
        {
            Directory   dir    = new RAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);

            for (int i = 0; i < 500; i++)
            {
                Document document = new Document();
                document.Add(new Field("field", English.IntToEnglish(i) + " equals " + English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED));
                writer.AddDocument(document, null);
            }
            writer.Close();

            IndexReader reader = IndexReader.Open(dir, true, null);

            SpanTermQuery    query    = new SpanTermQuery(new Term("field", English.IntToEnglish(10).Trim()));
            SpanQueryFilter  filter   = new SpanQueryFilter(query);
            SpanFilterResult result   = filter.BitSpans(reader, null);
            DocIdSet         docIdSet = result.DocIdSet;

            Assert.IsTrue(docIdSet != null, "docIdSet is null and it shouldn't be");
            AssertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10);
            var spans = result.Positions;

            Assert.IsTrue(spans != null, "spans is null and it shouldn't be");
            int size = GetDocIdSetSize(docIdSet);

            Assert.IsTrue(spans.Count == size, "spans Size: " + spans.Count + " is not: " + size);
            for (System.Collections.IEnumerator iterator = spans.GetEnumerator(); iterator.MoveNext();)
            {
                SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo)iterator.Current;
                Assert.IsTrue(info != null, "info is null and it shouldn't be");
                //The doc should indicate the bit is on
                AssertContainsDocId("docIdSet doesn't contain docId " + info.Doc, docIdSet, info.Doc);
                //There should be two positions in each
                Assert.IsTrue(info.Positions.Count == 2, "info.getPositions() Size: " + info.Positions.Count + " is not: " + 2);
            }
            reader.Close();
        }
		public virtual void  TestFilterWorks()
		{
			Directory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true);
			for (int i = 0; i < 500; i++)
			{
				Document document = new Document();
				document.Add(new Field("field", English.IntToEnglish(i) + " equals " + English.IntToEnglish(i), Field.Store.NO, Field.Index.TOKENIZED));
				writer.AddDocument(document);
			}
			writer.Close();
			
			IndexReader reader = IndexReader.Open(dir);
			
			SpanTermQuery query = new SpanTermQuery(new Term("field", English.IntToEnglish(10).Trim()));
			SpanQueryFilter filter = new SpanQueryFilter(query);
			SpanFilterResult result = filter.BitSpans(reader);
			System.Collections.BitArray bits = result.GetBits();
			Assert.IsTrue(bits != null, "bits is null and it shouldn't be");
			Assert.IsTrue(bits.Get(10), "tenth bit is not on");
			System.Collections.IList spans = result.GetPositions();
			Assert.IsTrue(spans != null, "spans is null and it shouldn't be");
			int cardinality = 0;
			for (int i = 0; i < bits.Count; i++)
			{
				if (bits.Get(i)) cardinality++;
			}
			Assert.IsTrue(spans.Count == cardinality, "spans Size: " + spans.Count + " is not: " + cardinality);
			for (System.Collections.IEnumerator iterator = spans.GetEnumerator(); iterator.MoveNext(); )
			{
				SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo) iterator.Current;
				Assert.IsTrue(info != null, "info is null and it shouldn't be");
				//The doc should indicate the bit is on
				Assert.IsTrue(bits.Get(info.GetDoc()), "Bit is not on and it should be");
				//There should be two positions in each
				Assert.IsTrue(info.GetPositions().Count == 2, "info.getPositions() Size: " + info.GetPositions().Count + " is not: " + 2);
			}
			reader.Close();
		}
		public virtual void  TestQuery()
		{
			PayloadTermQuery boostingFuncTermQuery = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"), new MaxPayloadFunction());
			QueryUtils.Check(boostingFuncTermQuery);
			
			SpanTermQuery spanTermQuery = new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"));
			
			Assert.IsTrue(boostingFuncTermQuery.Equals(spanTermQuery) == spanTermQuery.Equals(boostingFuncTermQuery));
			
			PayloadTermQuery boostingFuncTermQuery2 = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"), new AveragePayloadFunction());
			
			QueryUtils.CheckUnequal(boostingFuncTermQuery, boostingFuncTermQuery2);
		}
        private void QueryToSpanQuery(Query query, ICollection <byte[]> payloads)
        {
            if (query is BooleanQuery)
            {
                BooleanClause[] queryClauses = ((BooleanQuery)query).GetClauses();

                for (int i = 0; i < queryClauses.Length; i++)
                {
                    if (!queryClauses[i].IsProhibited())
                    {
                        QueryToSpanQuery(queryClauses[i].GetQuery(), payloads);
                    }
                }
            }
            else if (query is PhraseQuery)
            {
                Term[]      phraseQueryTerms = ((PhraseQuery)query).GetTerms();
                SpanQuery[] clauses          = new SpanQuery[phraseQueryTerms.Length];
                for (int i = 0; i < phraseQueryTerms.Length; i++)
                {
                    clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
                }

                int  slop    = ((PhraseQuery)query).GetSlop();
                bool inorder = false;

                if (slop == 0)
                {
                    inorder = true;
                }

                SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
                sp.SetBoost(query.GetBoost());
                GetPayloads(payloads, sp);
            }
            else if (query is TermQuery)
            {
                SpanTermQuery stq = new SpanTermQuery(((TermQuery)query).GetTerm());
                stq.SetBoost(query.GetBoost());
                GetPayloads(payloads, stq);
            }
            else if (query is SpanQuery)
            {
                GetPayloads(payloads, (SpanQuery)query);
            }
            else if (query is FilteredQuery)
            {
                QueryToSpanQuery(((FilteredQuery)query).GetQuery(), payloads);
            }
            else if (query is DisjunctionMaxQuery)
            {
                for (System.Collections.IEnumerator iterator = ((DisjunctionMaxQuery)query).Iterator(); iterator.MoveNext();)
                {
                    QueryToSpanQuery((Query)iterator.Current, payloads);
                }
            }
            else if (query is MultiPhraseQuery)
            {
                MultiPhraseQuery         mpq        = (MultiPhraseQuery)query;
                System.Collections.IList termArrays = mpq.GetTermArrays();
                int[] positions = mpq.GetPositions();
                if (positions.Length > 0)
                {
                    int maxPosition = positions[positions.Length - 1];
                    for (int i = 0; i < positions.Length - 1; ++i)
                    {
                        if (positions[i] > maxPosition)
                        {
                            maxPosition = positions[i];
                        }
                    }

                    System.Collections.ArrayList[] disjunctLists = new System.Collections.ArrayList[maxPosition + 1];
                    int distinctPositions = 0;

                    for (int i = 0; i < termArrays.Count; ++i)
                    {
                        Term[] termArray = (Term[])termArrays[i];
                        System.Collections.ArrayList disjuncts = disjunctLists[positions[i]];
                        if (disjuncts == null)
                        {
                            disjuncts = (disjunctLists[positions[i]] = new System.Collections.ArrayList(termArray.Length));
                            ++distinctPositions;
                        }
                        for (int j = 0; j < termArray.Length; ++j)
                        {
                            disjuncts.Add(new SpanTermQuery(termArray[j]));
                        }
                    }

                    int         positionGaps = 0;
                    int         position     = 0;
                    SpanQuery[] clauses      = new SpanQuery[distinctPositions];
                    for (int i = 0; i < disjunctLists.Length; ++i)
                    {
                        System.Collections.ArrayList disjuncts = disjunctLists[i];
                        if (disjuncts != null)
                        {
                            clauses[position++] = new SpanOrQuery((SpanQuery[])(disjuncts.ToArray(typeof(SpanQuery[]))));
                        }
                        else
                        {
                            ++positionGaps;
                        }
                    }

                    int  slop    = mpq.GetSlop();
                    bool inorder = (slop == 0);

                    SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                    sp.SetBoost(query.GetBoost());
                    GetPayloads(payloads, sp);
                }
            }
        }
        public virtual void TestBooleanSpanQuery()
        {
            bool failed = false;
            int hits = 0;
            Directory directory = NewDirectory();
            Analyzer indexerAnalyzer = new MockAnalyzer(Random());

            IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, indexerAnalyzer);
            IndexWriter writer = new IndexWriter(directory, config);
            string FIELD = "content";
            Document d = new Document();
            d.Add(new TextField(FIELD, "clockwork orange", Field.Store.YES));
            writer.AddDocument(d);
            writer.Dispose();

            IndexReader indexReader = DirectoryReader.Open(directory);
            IndexSearcher searcher = NewSearcher(indexReader);

            BooleanQuery query = new BooleanQuery();
            SpanQuery sq1 = new SpanTermQuery(new Term(FIELD, "clockwork"));
            SpanQuery sq2 = new SpanTermQuery(new Term(FIELD, "clckwork"));
            query.Add(sq1, BooleanClause.Occur.SHOULD);
            query.Add(sq2, BooleanClause.Occur.SHOULD);
            TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true);
            searcher.Search(query, collector);
            hits = collector.TopDocs().ScoreDocs.Length;
            foreach (ScoreDoc scoreDoc in collector.TopDocs().ScoreDocs)
            {
                Console.WriteLine(scoreDoc.Doc);
            }
            indexReader.Dispose();
            Assert.AreEqual(failed, false, "Bug in boolean query composed of span queries");
            Assert.AreEqual(hits, 1, "Bug in boolean query composed of span queries");
            directory.Dispose();
        }
 public void TestNegativePositions()
 {
     SinkTokenizer tokens = new SinkTokenizer();
     Token t = new Token();
     t.SetTermText("a");
     t.SetPositionIncrement(0);
     tokens.Add(t);
     t.SetTermText("b");
     t.SetPositionIncrement(1);
     tokens.Add(t);
     t.SetTermText("c");
     tokens.Add(t);
     MockRAMDirectory dir = new MockRAMDirectory();
     IndexWriter w = new IndexWriter(dir, false, new WhitespaceAnalyzer(), true);
     Document doc = new Document();
     doc.Add(new Field("field", tokens));
     w.AddDocument(doc);
     w.Close();
     IndexSearcher s = new IndexSearcher(dir);
     PhraseQuery pq = new PhraseQuery();
     pq.Add(new Term("field", "a"));
     pq.Add(new Term("field", "b"));
     pq.Add(new Term("field", "c"));
     Hits hits = s.Search(pq);
     Assert.AreEqual(1, hits.Length());
     Query q = new SpanTermQuery(new Term("field", "a"));
     hits = s.Search(q);
     Assert.AreEqual(1, hits.Length());
     TermPositions tps = s.GetIndexReader().TermPositions(new Term("field", "a"));
     Assert.IsTrue(tps.Next());
     Assert.AreEqual(1, tps.Freq());
     Assert.AreEqual(-1, tps.NextPosition());
     Assert.IsTrue(_TestUtil.CheckIndex(dir));
     s.Close();
     dir.Close();
 }
        private void QueryToSpanQuery(Query query, ICollection<byte[]> payloads)
        {
            if (query is BooleanQuery)
            {
                BooleanClause[] queryClauses = ((BooleanQuery) query).GetClauses();

                for (int i = 0; i < queryClauses.Length; i++)
                {
                    if (!queryClauses[i].IsProhibited())
                    {
                        QueryToSpanQuery(queryClauses[i].GetQuery(), payloads);
                    }
                }
            }
            else if (query is PhraseQuery)
            {
                Term[] phraseQueryTerms = ((PhraseQuery) query).GetTerms();
                SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length];
                for (int i = 0; i < phraseQueryTerms.Length; i++)
                {
                    clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
                }

                int slop = ((PhraseQuery) query).GetSlop();
                bool inorder = false;

                if (slop == 0)
                {
                    inorder = true;
                }

                SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
                sp.SetBoost(query.GetBoost());
                GetPayloads(payloads, sp);
            }
            else if (query is TermQuery)
            {
                SpanTermQuery stq = new SpanTermQuery(((TermQuery) query).GetTerm());
                stq.SetBoost(query.GetBoost());
                GetPayloads(payloads, stq);
            }
            else if (query is SpanQuery)
            {
                GetPayloads(payloads, (SpanQuery) query);
            }
            else if (query is FilteredQuery)
            {
                QueryToSpanQuery(((FilteredQuery) query).GetQuery(), payloads);
            }
            else if (query is DisjunctionMaxQuery)
            {

                for (System.Collections.IEnumerator iterator = ((DisjunctionMaxQuery) query).Iterator(); iterator.MoveNext(); )
                {
                    QueryToSpanQuery((Query) iterator.Current, payloads);
                }
            }
            else if (query is MultiPhraseQuery)
            {
                MultiPhraseQuery mpq = (MultiPhraseQuery) query;
                System.Collections.IList termArrays = mpq.GetTermArrays();
                int[] positions = mpq.GetPositions();
                if (positions.Length > 0)
                {

                    int maxPosition = positions[positions.Length - 1];
                    for (int i = 0; i < positions.Length - 1; ++i)
                    {
                        if (positions[i] > maxPosition)
                        {
                            maxPosition = positions[i];
                        }
                    }

                    System.Collections.ArrayList[] disjunctLists = new System.Collections.ArrayList[maxPosition + 1];
                    int distinctPositions = 0;

                    for (int i = 0; i < termArrays.Count; ++i)
                    {
                        Term[] termArray = (Term[]) termArrays[i];
                        System.Collections.IList disjuncts = disjunctLists[positions[i]];
                        if (disjuncts == null)
                        {
                            disjuncts = (disjunctLists[positions[i]] = new System.Collections.ArrayList(termArray.Length));
                            ++distinctPositions;
                        }
                        for (int j = 0; j < termArray.Length; ++j)
                        {
                            disjuncts.Add(new SpanTermQuery(termArray[j]));
                        }
                    }

                    int positionGaps = 0;
                    int position = 0;
                    SpanQuery[] clauses = new SpanQuery[distinctPositions];
                    for (int i = 0; i < disjunctLists.Length; ++i)
                    {
                        System.Collections.ArrayList disjuncts = disjunctLists[i];
                        if (disjuncts != null)
                        {
                            clauses[position++] = new SpanOrQuery((SpanQuery[]) (disjuncts.ToArray(typeof(SpanQuery[]))));
                        }
                        else
                        {
                            ++positionGaps;
                        }
                    }

                    int slop = mpq.GetSlop();
                    bool inorder = (slop == 0);

                    SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                    sp.SetBoost(query.GetBoost());
                    GetPayloads(payloads, sp);
                }
            }
        }
示例#12
0
		public virtual void  TestNegativePositions()
		{
			TokenStream tokens = new AnonymousClassTokenStream(this);
			
			MockRAMDirectory dir = new MockRAMDirectory();
			IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
			Document doc = new Document();
			doc.Add(new Field("field", tokens));
			w.AddDocument(doc);
			w.Commit();
			
			IndexSearcher s = new IndexSearcher(dir);
			PhraseQuery pq = new PhraseQuery();
			pq.Add(new Term("field", "a"));
			pq.Add(new Term("field", "b"));
			pq.Add(new Term("field", "c"));
			ScoreDoc[] hits = s.Search(pq, null, 1000).scoreDocs;
			Assert.AreEqual(1, hits.Length);
			
			Query q = new SpanTermQuery(new Term("field", "a"));
			hits = s.Search(q, null, 1000).scoreDocs;
			Assert.AreEqual(1, hits.Length);
			TermPositions tps = s.GetIndexReader().TermPositions(new Term("field", "a"));
			Assert.IsTrue(tps.Next());
			Assert.AreEqual(1, tps.Freq());
			Assert.AreEqual(0, tps.NextPosition());
			w.Close();
			
			Assert.IsTrue(_TestUtil.CheckIndex(dir));
			s.Close();
			dir.Close();
		}
示例#13
0
 public override Query VisitSpanTermQuery(Lucene.Net.Search.Spans.SpanTermQuery spanTermq)
 {
     throw new SnNotSupportedException();
 }
		private void  QueryToSpanQuery(Query query, ICollection<byte[]> payloads)
		{
			if (query is BooleanQuery)
			{
				BooleanClause[] queryClauses = ((BooleanQuery) query).GetClauses();
				
				for (int i = 0; i < queryClauses.Length; i++)
				{
                    if (!queryClauses[i].IsProhibited)
					{
						QueryToSpanQuery(queryClauses[i].Query, payloads);
					}
				}
			}
			else if (query is PhraseQuery)
			{
				Term[] phraseQueryTerms = ((PhraseQuery) query).GetTerms();
				SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length];
				for (int i = 0; i < phraseQueryTerms.Length; i++)
				{
					clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
				}
				
				int slop = ((PhraseQuery) query).Slop;
				bool inorder = false;
				
				if (slop == 0)
				{
					inorder = true;
				}
				
				SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
				sp.Boost = query.Boost;
				GetPayloads(payloads, sp);
			}
			else if (query is TermQuery)
			{
				SpanTermQuery stq = new SpanTermQuery(((TermQuery) query).Term);
				stq.Boost = query.Boost;
				GetPayloads(payloads, stq);
			}
			else if (query is SpanQuery)
			{
				GetPayloads(payloads, (SpanQuery) query);
			}
			else if (query is FilteredQuery)
			{
				QueryToSpanQuery(((FilteredQuery) query).Query, payloads);
			}
			else if (query is DisjunctionMaxQuery)
			{

                for (IEnumerator<Query> iterator = ((DisjunctionMaxQuery)query).GetEnumerator(); iterator.MoveNext(); )
                {
                    QueryToSpanQuery(iterator.Current, payloads);
                }
			}
			else if (query is MultiPhraseQuery)
			{
				MultiPhraseQuery mpq = (MultiPhraseQuery) query;
				System.Collections.Generic.IList<Term[]> termArrays = mpq.GetTermArrays();
				int[] positions = mpq.GetPositions();
				if (positions.Length > 0)
				{
					
					int maxPosition = positions[positions.Length - 1];
					for (int i = 0; i < positions.Length - 1; ++i)
					{
						if (positions[i] > maxPosition)
						{
							maxPosition = positions[i];
						}
					}

                    IList<Query>[] disjunctLists = new IList<Query>[maxPosition + 1];
					int distinctPositions = 0;
					
					for (int i = 0; i < termArrays.Count; ++i)
					{
						Term[] termArray = termArrays[i];
						IList<Query> disjuncts = disjunctLists[positions[i]];
						if (disjuncts == null)
						{
							disjuncts = (disjunctLists[positions[i]] = new List<Query>(termArray.Length));
							++distinctPositions;
						}
						foreach(Term term in termArray)
						{
							disjuncts.Add(new SpanTermQuery(term));
						}
					}
					
					int positionGaps = 0;
					int position = 0;
					SpanQuery[] clauses = new SpanQuery[distinctPositions];
					for (int i = 0; i < disjunctLists.Length; ++i)
					{
						IList<Query> disjuncts = disjunctLists[i];
						if (disjuncts != null)
						{
                            clauses[position++] = new SpanOrQuery((SpanQuery[]) (disjuncts.ToArray()));
						}
						else
						{
							++positionGaps;
						}
					}
					
					int slop = mpq.Slop;
					bool inorder = (slop == 0);
					
					SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
					sp.Boost = query.Boost;
					GetPayloads(payloads, sp);
				}
			}
		}
		public virtual void  TestPayloadsPos0()
		{
			for (int x = 0; x < 2; x++)
			{
				Directory dir = new MockRAMDirectory();
				IndexWriter writer = new IndexWriter(dir, new TestPayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
				if (x == 1)
				{
					writer.SetAllowMinus1Position();
				}
				Document doc = new Document();
                System.IO.MemoryStream ms = new System.IO.MemoryStream();
                System.IO.StreamWriter sw = new System.IO.StreamWriter(ms);
                sw.Write("a a b c d e a f g h i j a b k k");
                // flush to stream & reset it's position so it can be read
                sw.Flush();
                ms.Position = 0;
                doc.Add(new Field("content", new System.IO.StreamReader(ms)));
				writer.AddDocument(doc);
				
				IndexReader r = writer.GetReader();
				
				TermPositions tp = r.TermPositions(new Term("content", "a"));
				int count = 0;
				Assert.IsTrue(tp.Next());
				// "a" occurs 4 times
				Assert.AreEqual(4, tp.Freq());
				int expected;
				if (x == 1)
				{
					expected = System.Int32.MaxValue;
				}
				else
				{
					expected = 0;
				}
				Assert.AreEqual(expected, tp.NextPosition());
				if (x == 1)
				{
					continue;
				}
				Assert.AreEqual(1, tp.NextPosition());
				Assert.AreEqual(3, tp.NextPosition());
				Assert.AreEqual(6, tp.NextPosition());
				
				// only one doc has "a"
				Assert.IsFalse(tp.Next());
				
				IndexSearcher is_Renamed = new IndexSearcher(r);
				
				SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
				SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
				SpanQuery[] sqs = new SpanQuery[]{stq1, stq2};
				SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);
				
				count = 0;
				bool sawZero = false;
				//System.out.println("\ngetPayloadSpans test");
				Lucene.Net.Search.Spans.Spans pspans = snq.GetSpans(is_Renamed.GetIndexReader());
				while (pspans.Next())
				{
					//System.out.println(pspans.doc() + " - " + pspans.start() + " - "+ pspans.end());
					System.Collections.Generic.ICollection<byte[]> payloads = pspans.GetPayload();
					sawZero |= pspans.Start() == 0;
					for (System.Collections.IEnumerator it = payloads.GetEnumerator(); it.MoveNext(); )
					{
						count++;
						System.Object generatedAux2 = it.Current;
						//System.out.println(new String((byte[]) it.next()));
					}
				}
				Assert.AreEqual(5, count);
				Assert.IsTrue(sawZero);
				
				//System.out.println("\ngetSpans test");
				Lucene.Net.Search.Spans.Spans spans = snq.GetSpans(is_Renamed.GetIndexReader());
				count = 0;
				sawZero = false;
				while (spans.Next())
				{
					count++;
					sawZero |= spans.Start() == 0;
					//System.out.println(spans.doc() + " - " + spans.start() + " - " + spans.end());
				}
				Assert.AreEqual(4, count);
				Assert.IsTrue(sawZero);
				
				//System.out.println("\nPayloadSpanUtil test");
				
				sawZero = false;
				PayloadSpanUtil psu = new PayloadSpanUtil(is_Renamed.GetIndexReader());
				System.Collections.Generic.ICollection<byte[]> pls = psu.GetPayloadsForQuery(snq);
				count = pls.Count;
				for (System.Collections.IEnumerator it = pls.GetEnumerator(); it.MoveNext(); )
				{
					System.String s = new System.String(System.Text.UTF8Encoding.UTF8.GetChars((byte[]) it.Current));
					//System.out.println(s);
					sawZero |= s.Equals("pos: 0");
				}
				Assert.AreEqual(5, count);
				Assert.IsTrue(sawZero);
				writer.Close();
				is_Renamed.GetIndexReader().Close();
				dir.Close();
			}
		}
        public virtual void TestPayloadsPos0()
        {
            Directory   dir    = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new TestPayloadAnalyzer(), true,
                                                 IndexWriter.MaxFieldLength.LIMITED);
            Document doc = new Document();

            System.IO.MemoryStream ms = new System.IO.MemoryStream();
            System.IO.StreamWriter sw = new System.IO.StreamWriter(ms);
            sw.Write("a a b c d e a f g h i j a b k k");
            // flush to stream & reset it's position so it can be read
            sw.Flush();
            ms.Position = 0;
            doc.Add(new Field("content", new System.IO.StreamReader(ms)));
            writer.AddDocument(doc);

            IndexReader r = writer.GetReader();

            TermPositions tp    = r.TermPositions(new Term("content", "a"));
            int           count = 0;

            Assert.IsTrue(tp.Next());
            // "a" occurs 4 times
            Assert.AreEqual(4, tp.Freq);
            int expected = 0;

            Assert.AreEqual(expected, tp.NextPosition());
            Assert.AreEqual(1, tp.NextPosition());
            Assert.AreEqual(3, tp.NextPosition());
            Assert.AreEqual(6, tp.NextPosition());

            // only one doc has "a"
            Assert.IsFalse(tp.Next());

            IndexSearcher is_Renamed = new IndexSearcher(r);

            SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
            SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));

            SpanQuery[]   sqs = new SpanQuery[] { stq1, stq2 };
            SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);

            count = 0;
            bool sawZero = false;

            //System.out.println("\ngetPayloadSpans test");
            Lucene.Net.Search.Spans.Spans pspans = snq.GetSpans(is_Renamed.IndexReader);
            while (pspans.Next())
            {
                //System.out.println(pspans.doc() + " - " + pspans.start() + " - "+ pspans.end());
                System.Collections.Generic.ICollection <byte[]> payloads = pspans.GetPayload();
                sawZero |= pspans.Start() == 0;
                for (System.Collections.IEnumerator it = payloads.GetEnumerator(); it.MoveNext();)
                {
                    count++;
                    System.Object generatedAux2 = it.Current;
                    //System.out.println(new String((byte[]) it.next()));
                }
            }
            Assert.AreEqual(5, count);
            Assert.IsTrue(sawZero);

            //System.out.println("\ngetSpans test");
            Lucene.Net.Search.Spans.Spans spans = snq.GetSpans(is_Renamed.IndexReader);
            count   = 0;
            sawZero = false;
            while (spans.Next())
            {
                count++;
                sawZero |= spans.Start() == 0;
                //System.out.println(spans.doc() + " - " + spans.start() + " - " + spans.end());
            }
            Assert.AreEqual(4, count);
            Assert.IsTrue(sawZero);

            //System.out.println("\nPayloadSpanUtil test");

            sawZero = false;
            PayloadSpanUtil psu = new PayloadSpanUtil(is_Renamed.IndexReader);

            System.Collections.Generic.ICollection <byte[]> pls = psu.GetPayloadsForQuery(snq);
            count = pls.Count;
            for (System.Collections.IEnumerator it = pls.GetEnumerator(); it.MoveNext();)
            {
                System.String s = new System.String(System.Text.UTF8Encoding.UTF8.GetChars((byte[])it.Current));
                //System.out.println(s);
                sawZero |= s.Equals("pos: 0");
            }
            Assert.AreEqual(5, count);
            Assert.IsTrue(sawZero);
            writer.Close();
            is_Renamed.IndexReader.Close();
            dir.Close();
        }
示例#17
0
        private void  QueryToSpanQuery(Query query, ICollection <byte[]> payloads, IState state)
        {
            if (query is BooleanQuery)
            {
                BooleanClause[] queryClauses = ((BooleanQuery)query).GetClauses();

                for (int i = 0; i < queryClauses.Length; i++)
                {
                    if (!queryClauses[i].IsProhibited)
                    {
                        QueryToSpanQuery(queryClauses[i].Query, payloads, state);
                    }
                }
            }
            else if (query is PhraseQuery)
            {
                Term[]      phraseQueryTerms = ((PhraseQuery)query).GetTerms();
                SpanQuery[] clauses          = new SpanQuery[phraseQueryTerms.Length];
                for (int i = 0; i < phraseQueryTerms.Length; i++)
                {
                    clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
                }

                int  slop    = ((PhraseQuery)query).Slop;
                bool inorder = false;

                if (slop == 0)
                {
                    inorder = true;
                }

                SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
                sp.Boost = query.Boost;
                GetPayloads(payloads, sp, state);
            }
            else if (query is TermQuery)
            {
                SpanTermQuery stq = new SpanTermQuery(((TermQuery)query).Term);
                stq.Boost = query.Boost;
                GetPayloads(payloads, stq, state);
            }
            else if (query is SpanQuery)
            {
                GetPayloads(payloads, (SpanQuery)query, state);
            }
            else if (query is FilteredQuery)
            {
                QueryToSpanQuery(((FilteredQuery)query).Query, payloads, state);
            }
            else if (query is DisjunctionMaxQuery)
            {
                for (IEnumerator <Query> iterator = ((DisjunctionMaxQuery)query).GetEnumerator(); iterator.MoveNext();)
                {
                    QueryToSpanQuery(iterator.Current, payloads, state);
                }
            }
            else if (query is MultiPhraseQuery)
            {
                MultiPhraseQuery mpq = (MultiPhraseQuery)query;
                System.Collections.Generic.IList <Term[]> termArrays = mpq.GetTermArrays();
                int[] positions = mpq.GetPositions();
                if (positions.Length > 0)
                {
                    int maxPosition = positions[positions.Length - 1];
                    for (int i = 0; i < positions.Length - 1; ++i)
                    {
                        if (positions[i] > maxPosition)
                        {
                            maxPosition = positions[i];
                        }
                    }

                    IList <Query>[] disjunctLists     = new IList <Query> [maxPosition + 1];
                    int             distinctPositions = 0;

                    for (int i = 0; i < termArrays.Count; ++i)
                    {
                        Term[]        termArray = termArrays[i];
                        IList <Query> disjuncts = disjunctLists[positions[i]];
                        if (disjuncts == null)
                        {
                            disjuncts = (disjunctLists[positions[i]] = new List <Query>(termArray.Length));
                            ++distinctPositions;
                        }
                        foreach (Term term in termArray)
                        {
                            disjuncts.Add(new SpanTermQuery(term));
                        }
                    }

                    int         positionGaps = 0;
                    int         position     = 0;
                    SpanQuery[] clauses      = new SpanQuery[distinctPositions];
                    for (int i = 0; i < disjunctLists.Length; ++i)
                    {
                        IList <Query> disjuncts = disjunctLists[i];
                        if (disjuncts != null)
                        {
                            clauses[position++] = new SpanOrQuery((SpanQuery[])(disjuncts.ToArray()));
                        }
                        else
                        {
                            ++positionGaps;
                        }
                    }

                    int  slop    = mpq.Slop;
                    bool inorder = (slop == 0);

                    SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                    sp.Boost = query.Boost;
                    GetPayloads(payloads, sp, state);
                }
            }
        }
 private PayloadNearQuery NewPhraseQuery(string fieldName, string phrase, bool inOrder, PayloadFunction function)
 {
     var words = _whiteSpaceRegex.Split(phrase);
     var clauses = new SpanQuery[words.Length];
     for (var i = 0; i < clauses.Length; i++)
     {
         clauses[i] = new SpanTermQuery(new Term(fieldName, words[i]));
     }
     return new PayloadNearQuery(clauses, 0, inOrder, function);
 }
示例#19
0
        public virtual void TestCrazySpans()
        {
            // The problem: "normal" lucene queries create scorers, returning null if terms dont exist
            // this means they never score a term that does not exist.
            // however with spans, there is only one scorer for the whole hierarchy:
            // inner queries are not real queries, their boosts are ignored, etc.
            Directory dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir);
            Document doc = new Document();
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            doc.Add(NewField("foo", "bar", ft));
            iw.AddDocument(doc);
            IndexReader ir = iw.Reader;
            iw.Dispose();
            IndexSearcher @is = NewSearcher(ir);

            foreach (Similarity sim in Sims)
            {
                @is.Similarity = sim;
                SpanTermQuery s1 = new SpanTermQuery(new Term("foo", "bar"));
                SpanTermQuery s2 = new SpanTermQuery(new Term("foo", "baz"));
                Query query = new SpanOrQuery(s1, s2);
                TopDocs td = @is.Search(query, 10);
                Assert.AreEqual(1, td.TotalHits);
                float score = td.ScoreDocs[0].Score;
                Assert.IsTrue(score >= 0.0f);
                Assert.IsFalse(float.IsInfinity(score), "inf score for " + sim);
            }
            ir.Dispose();
            dir.Dispose();
        }
        public virtual void TestPayloadsPos0()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, new MockPayloadAnalyzer());
            Document doc = new Document();
            doc.Add(new TextField("content", new StringReader("a a b c d e a f g h i j a b k k")));
            writer.AddDocument(doc);

            IndexReader readerFromWriter = writer.Reader;
            AtomicReader r = SlowCompositeReaderWrapper.Wrap(readerFromWriter);

            DocsAndPositionsEnum tp = r.TermPositionsEnum(new Term("content", "a"));

            int count = 0;
            Assert.IsTrue(tp.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            // "a" occurs 4 times
            Assert.AreEqual(4, tp.Freq());
            Assert.AreEqual(0, tp.NextPosition());
            Assert.AreEqual(1, tp.NextPosition());
            Assert.AreEqual(3, tp.NextPosition());
            Assert.AreEqual(6, tp.NextPosition());

            // only one doc has "a"
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, tp.NextDoc());

            IndexSearcher @is = NewSearcher(readerFromWriter);

            SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
            SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
            SpanQuery[] sqs = new SpanQuery[] { stq1, stq2 };
            SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);

            count = 0;
            bool sawZero = false;
            if (VERBOSE)
            {
                Console.WriteLine("\ngetPayloadSpans test");
            }
            Search.Spans.Spans pspans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq);
            while (pspans.Next())
            {
                if (VERBOSE)
                {
                    Console.WriteLine("doc " + pspans.Doc() + ": span " + pspans.Start() + " to " + pspans.End());
                }
                var payloads = pspans.Payload;
                sawZero |= pspans.Start() == 0;
                foreach (var bytes in payloads)
                {
                    count++;
                    if (VERBOSE)
                    {
                        Console.WriteLine("  payload: " + Encoding.UTF8.GetString((byte[])(Array)bytes));
                    }
                }
            }
            Assert.IsTrue(sawZero);
            Assert.AreEqual(5, count);

            // System.out.println("\ngetSpans test");
            Search.Spans.Spans spans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq);
            count = 0;
            sawZero = false;
            while (spans.Next())
            {
                count++;
                sawZero |= spans.Start() == 0;
                // System.out.println(spans.Doc() + " - " + spans.Start() + " - " +
                // spans.End());
            }
            Assert.AreEqual(4, count);
            Assert.IsTrue(sawZero);

            // System.out.println("\nPayloadSpanUtil test");

            sawZero = false;
            PayloadSpanUtil psu = new PayloadSpanUtil(@is.TopReaderContext);
            var pls = psu.GetPayloadsForQuery(snq);
            count = pls.Count;
            foreach (var bytes in pls)
            {
                string s = Encoding.UTF8.GetString(bytes);
                //System.out.println(s);
                sawZero |= s.Equals("pos: 0");
            }
            Assert.AreEqual(5, count);
            Assert.IsTrue(sawZero);
            writer.Dispose();
            @is.IndexReader.Dispose();
            dir.Dispose();
        }
示例#21
0
 public override Query VisitSpanTermQuery(Lucene.Net.Search.Spans.SpanTermQuery spanTermq)
 {
     throw new NotImplementedException();
 }