public virtual void TestFilterWorks() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < 500; i++) { Document document = new Document(); document.Add(new Field("field", English.IntToEnglish(i) + " equals " + English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(document); } writer.Close(); IndexReader reader = IndexReader.Open(dir, true); SpanTermQuery query = new SpanTermQuery(new Term("field", English.IntToEnglish(10).Trim())); SpanQueryFilter filter = new SpanQueryFilter(query); SpanFilterResult result = filter.BitSpans(reader); DocIdSet docIdSet = result.DocIdSet; Assert.IsTrue(docIdSet != null, "docIdSet is null and it shouldn't be"); AssertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10); var spans = result.Positions; Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); int size = GetDocIdSetSize(docIdSet); Assert.IsTrue(spans.Count == size, "spans Size: " + spans.Count + " is not: " + size); for (System.Collections.IEnumerator iterator = spans.GetEnumerator(); iterator.MoveNext(); ) { SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo) iterator.Current; Assert.IsTrue(info != null, "info is null and it shouldn't be"); //The doc should indicate the bit is on AssertContainsDocId("docIdSet doesn't contain docId " + info.Doc, docIdSet, info.Doc); //There should be two positions in each Assert.IsTrue(info.Positions.Count == 2, "info.getPositions() Size: " + info.Positions.Count + " is not: " + 2); } reader.Close(); }
public override Query Rewrite(IndexReader reader) { Query orig = new RegexQuery(term).Rewrite(reader); // RegexQuery (via MultiTermQuery).rewrite always returns a BooleanQuery BooleanQuery bq = (BooleanQuery) orig; BooleanClause[] clauses = bq.GetClauses(); SpanQuery[] sqs = new SpanQuery[clauses.Length]; for (int i = 0; i < clauses.Length; i++) { BooleanClause clause = clauses[i]; // Clauses from RegexQuery.rewrite are always TermQuery's TermQuery tq = (TermQuery) clause.GetQuery(); sqs[i] = new SpanTermQuery(tq.GetTerm()); sqs[i].SetBoost(tq.GetBoost()); } SpanOrQuery query = new SpanOrQuery(sqs); query.SetBoost(orig.GetBoost()); return query; }
public override Query Rewrite(IndexReader reader) { Query orig = new RegexQuery(term).Rewrite(reader); // RegexQuery (via MultiTermQuery).rewrite always returns a BooleanQuery BooleanQuery bq = (BooleanQuery)orig; BooleanClause[] clauses = bq.GetClauses(); SpanQuery[] sqs = new SpanQuery[clauses.Length]; for (int i = 0; i < clauses.Length; i++) { BooleanClause clause = clauses[i]; // Clauses from RegexQuery.rewrite are always TermQuery's TermQuery tq = (TermQuery)clause.GetQuery(); sqs[i] = new SpanTermQuery(tq.GetTerm()); sqs[i].SetBoost(tq.GetBoost()); } SpanOrQuery query = new SpanOrQuery(sqs); query.SetBoost(orig.GetBoost()); return(query); }
public virtual void TestQuery() { PayloadTermQuery boostingFuncTermQuery = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"), new MaxPayloadFunction()); QueryUtils.Check(boostingFuncTermQuery); SpanTermQuery spanTermQuery = new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy")); Assert.IsTrue(boostingFuncTermQuery.Equals(spanTermQuery) == spanTermQuery.Equals(boostingFuncTermQuery)); PayloadTermQuery boostingFuncTermQuery2 = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"), new AveragePayloadFunction()); QueryUtils.CheckUnequal(boostingFuncTermQuery, boostingFuncTermQuery2); }
public virtual void TestFilterWorks() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); for (int i = 0; i < 500; i++) { Document document = new Document(); document.Add(new Field("field", English.IntToEnglish(i) + " equals " + English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(document, null); } writer.Close(); IndexReader reader = IndexReader.Open(dir, true, null); SpanTermQuery query = new SpanTermQuery(new Term("field", English.IntToEnglish(10).Trim())); SpanQueryFilter filter = new SpanQueryFilter(query); SpanFilterResult result = filter.BitSpans(reader, null); DocIdSet docIdSet = result.DocIdSet; Assert.IsTrue(docIdSet != null, "docIdSet is null and it shouldn't be"); AssertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10); var spans = result.Positions; Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); int size = GetDocIdSetSize(docIdSet); Assert.IsTrue(spans.Count == size, "spans Size: " + spans.Count + " is not: " + size); for (System.Collections.IEnumerator iterator = spans.GetEnumerator(); iterator.MoveNext();) { SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo)iterator.Current; Assert.IsTrue(info != null, "info is null and it shouldn't be"); //The doc should indicate the bit is on AssertContainsDocId("docIdSet doesn't contain docId " + info.Doc, docIdSet, info.Doc); //There should be two positions in each Assert.IsTrue(info.Positions.Count == 2, "info.getPositions() Size: " + info.Positions.Count + " is not: " + 2); } reader.Close(); }
public virtual void TestFilterWorks() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true); for (int i = 0; i < 500; i++) { Document document = new Document(); document.Add(new Field("field", English.IntToEnglish(i) + " equals " + English.IntToEnglish(i), Field.Store.NO, Field.Index.TOKENIZED)); writer.AddDocument(document); } writer.Close(); IndexReader reader = IndexReader.Open(dir); SpanTermQuery query = new SpanTermQuery(new Term("field", English.IntToEnglish(10).Trim())); SpanQueryFilter filter = new SpanQueryFilter(query); SpanFilterResult result = filter.BitSpans(reader); System.Collections.BitArray bits = result.GetBits(); Assert.IsTrue(bits != null, "bits is null and it shouldn't be"); Assert.IsTrue(bits.Get(10), "tenth bit is not on"); System.Collections.IList spans = result.GetPositions(); Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); int cardinality = 0; for (int i = 0; i < bits.Count; i++) { if (bits.Get(i)) cardinality++; } Assert.IsTrue(spans.Count == cardinality, "spans Size: " + spans.Count + " is not: " + cardinality); for (System.Collections.IEnumerator iterator = spans.GetEnumerator(); iterator.MoveNext(); ) { SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo) iterator.Current; Assert.IsTrue(info != null, "info is null and it shouldn't be"); //The doc should indicate the bit is on Assert.IsTrue(bits.Get(info.GetDoc()), "Bit is not on and it should be"); //There should be two positions in each Assert.IsTrue(info.GetPositions().Count == 2, "info.getPositions() Size: " + info.GetPositions().Count + " is not: " + 2); } reader.Close(); }
private void QueryToSpanQuery(Query query, ICollection <byte[]> payloads) { if (query is BooleanQuery) { BooleanClause[] queryClauses = ((BooleanQuery)query).GetClauses(); for (int i = 0; i < queryClauses.Length; i++) { if (!queryClauses[i].IsProhibited()) { QueryToSpanQuery(queryClauses[i].GetQuery(), payloads); } } } else if (query is PhraseQuery) { Term[] phraseQueryTerms = ((PhraseQuery)query).GetTerms(); SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length]; for (int i = 0; i < phraseQueryTerms.Length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = ((PhraseQuery)query).GetSlop(); bool inorder = false; if (slop == 0) { inorder = true; } SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); sp.SetBoost(query.GetBoost()); GetPayloads(payloads, sp); } else if (query is TermQuery) { SpanTermQuery stq = new SpanTermQuery(((TermQuery)query).GetTerm()); stq.SetBoost(query.GetBoost()); GetPayloads(payloads, stq); } else if (query is SpanQuery) { GetPayloads(payloads, (SpanQuery)query); } else if (query is FilteredQuery) { QueryToSpanQuery(((FilteredQuery)query).GetQuery(), payloads); } else if (query is DisjunctionMaxQuery) { for (System.Collections.IEnumerator iterator = ((DisjunctionMaxQuery)query).Iterator(); iterator.MoveNext();) { QueryToSpanQuery((Query)iterator.Current, payloads); } } else if (query is MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery)query; System.Collections.IList termArrays = mpq.GetTermArrays(); int[] positions = mpq.GetPositions(); if (positions.Length > 0) { int maxPosition = positions[positions.Length - 1]; for (int i = 0; i < positions.Length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } System.Collections.ArrayList[] disjunctLists = new System.Collections.ArrayList[maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.Count; ++i) { Term[] termArray = (Term[])termArrays[i]; System.Collections.ArrayList disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new System.Collections.ArrayList(termArray.Length)); ++distinctPositions; } for (int j = 0; j < termArray.Length; ++j) { disjuncts.Add(new SpanTermQuery(termArray[j])); } } int positionGaps = 0; int position = 0; SpanQuery[] clauses = new SpanQuery[distinctPositions]; for (int i = 0; i < disjunctLists.Length; ++i) { System.Collections.ArrayList disjuncts = disjunctLists[i]; if (disjuncts != null) { clauses[position++] = new SpanOrQuery((SpanQuery[])(disjuncts.ToArray(typeof(SpanQuery[])))); } else { ++positionGaps; } } int slop = mpq.GetSlop(); bool inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); sp.SetBoost(query.GetBoost()); GetPayloads(payloads, sp); } } }
public virtual void TestBooleanSpanQuery() { bool failed = false; int hits = 0; Directory directory = NewDirectory(); Analyzer indexerAnalyzer = new MockAnalyzer(Random()); IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, indexerAnalyzer); IndexWriter writer = new IndexWriter(directory, config); string FIELD = "content"; Document d = new Document(); d.Add(new TextField(FIELD, "clockwork orange", Field.Store.YES)); writer.AddDocument(d); writer.Dispose(); IndexReader indexReader = DirectoryReader.Open(directory); IndexSearcher searcher = NewSearcher(indexReader); BooleanQuery query = new BooleanQuery(); SpanQuery sq1 = new SpanTermQuery(new Term(FIELD, "clockwork")); SpanQuery sq2 = new SpanTermQuery(new Term(FIELD, "clckwork")); query.Add(sq1, BooleanClause.Occur.SHOULD); query.Add(sq2, BooleanClause.Occur.SHOULD); TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true); searcher.Search(query, collector); hits = collector.TopDocs().ScoreDocs.Length; foreach (ScoreDoc scoreDoc in collector.TopDocs().ScoreDocs) { Console.WriteLine(scoreDoc.Doc); } indexReader.Dispose(); Assert.AreEqual(failed, false, "Bug in boolean query composed of span queries"); Assert.AreEqual(hits, 1, "Bug in boolean query composed of span queries"); directory.Dispose(); }
public void TestNegativePositions() { SinkTokenizer tokens = new SinkTokenizer(); Token t = new Token(); t.SetTermText("a"); t.SetPositionIncrement(0); tokens.Add(t); t.SetTermText("b"); t.SetPositionIncrement(1); tokens.Add(t); t.SetTermText("c"); tokens.Add(t); MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, false, new WhitespaceAnalyzer(), true); Document doc = new Document(); doc.Add(new Field("field", tokens)); w.AddDocument(doc); w.Close(); IndexSearcher s = new IndexSearcher(dir); PhraseQuery pq = new PhraseQuery(); pq.Add(new Term("field", "a")); pq.Add(new Term("field", "b")); pq.Add(new Term("field", "c")); Hits hits = s.Search(pq); Assert.AreEqual(1, hits.Length()); Query q = new SpanTermQuery(new Term("field", "a")); hits = s.Search(q); Assert.AreEqual(1, hits.Length()); TermPositions tps = s.GetIndexReader().TermPositions(new Term("field", "a")); Assert.IsTrue(tps.Next()); Assert.AreEqual(1, tps.Freq()); Assert.AreEqual(-1, tps.NextPosition()); Assert.IsTrue(_TestUtil.CheckIndex(dir)); s.Close(); dir.Close(); }
private void QueryToSpanQuery(Query query, ICollection<byte[]> payloads) { if (query is BooleanQuery) { BooleanClause[] queryClauses = ((BooleanQuery) query).GetClauses(); for (int i = 0; i < queryClauses.Length; i++) { if (!queryClauses[i].IsProhibited()) { QueryToSpanQuery(queryClauses[i].GetQuery(), payloads); } } } else if (query is PhraseQuery) { Term[] phraseQueryTerms = ((PhraseQuery) query).GetTerms(); SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length]; for (int i = 0; i < phraseQueryTerms.Length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = ((PhraseQuery) query).GetSlop(); bool inorder = false; if (slop == 0) { inorder = true; } SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); sp.SetBoost(query.GetBoost()); GetPayloads(payloads, sp); } else if (query is TermQuery) { SpanTermQuery stq = new SpanTermQuery(((TermQuery) query).GetTerm()); stq.SetBoost(query.GetBoost()); GetPayloads(payloads, stq); } else if (query is SpanQuery) { GetPayloads(payloads, (SpanQuery) query); } else if (query is FilteredQuery) { QueryToSpanQuery(((FilteredQuery) query).GetQuery(), payloads); } else if (query is DisjunctionMaxQuery) { for (System.Collections.IEnumerator iterator = ((DisjunctionMaxQuery) query).Iterator(); iterator.MoveNext(); ) { QueryToSpanQuery((Query) iterator.Current, payloads); } } else if (query is MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery) query; System.Collections.IList termArrays = mpq.GetTermArrays(); int[] positions = mpq.GetPositions(); if (positions.Length > 0) { int maxPosition = positions[positions.Length - 1]; for (int i = 0; i < positions.Length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } System.Collections.ArrayList[] disjunctLists = new System.Collections.ArrayList[maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.Count; ++i) { Term[] termArray = (Term[]) termArrays[i]; System.Collections.IList disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new System.Collections.ArrayList(termArray.Length)); ++distinctPositions; } for (int j = 0; j < termArray.Length; ++j) { disjuncts.Add(new SpanTermQuery(termArray[j])); } } int positionGaps = 0; int position = 0; SpanQuery[] clauses = new SpanQuery[distinctPositions]; for (int i = 0; i < disjunctLists.Length; ++i) { System.Collections.ArrayList disjuncts = disjunctLists[i]; if (disjuncts != null) { clauses[position++] = new SpanOrQuery((SpanQuery[]) (disjuncts.ToArray(typeof(SpanQuery[])))); } else { ++positionGaps; } } int slop = mpq.GetSlop(); bool inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); sp.SetBoost(query.GetBoost()); GetPayloads(payloads, sp); } } }
public virtual void TestNegativePositions() { TokenStream tokens = new AnonymousClassTokenStream(this); MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.Add(new Field("field", tokens)); w.AddDocument(doc); w.Commit(); IndexSearcher s = new IndexSearcher(dir); PhraseQuery pq = new PhraseQuery(); pq.Add(new Term("field", "a")); pq.Add(new Term("field", "b")); pq.Add(new Term("field", "c")); ScoreDoc[] hits = s.Search(pq, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); Query q = new SpanTermQuery(new Term("field", "a")); hits = s.Search(q, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); TermPositions tps = s.GetIndexReader().TermPositions(new Term("field", "a")); Assert.IsTrue(tps.Next()); Assert.AreEqual(1, tps.Freq()); Assert.AreEqual(0, tps.NextPosition()); w.Close(); Assert.IsTrue(_TestUtil.CheckIndex(dir)); s.Close(); dir.Close(); }
public override Query VisitSpanTermQuery(Lucene.Net.Search.Spans.SpanTermQuery spanTermq) { throw new SnNotSupportedException(); }
private void QueryToSpanQuery(Query query, ICollection<byte[]> payloads) { if (query is BooleanQuery) { BooleanClause[] queryClauses = ((BooleanQuery) query).GetClauses(); for (int i = 0; i < queryClauses.Length; i++) { if (!queryClauses[i].IsProhibited) { QueryToSpanQuery(queryClauses[i].Query, payloads); } } } else if (query is PhraseQuery) { Term[] phraseQueryTerms = ((PhraseQuery) query).GetTerms(); SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length]; for (int i = 0; i < phraseQueryTerms.Length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = ((PhraseQuery) query).Slop; bool inorder = false; if (slop == 0) { inorder = true; } SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); sp.Boost = query.Boost; GetPayloads(payloads, sp); } else if (query is TermQuery) { SpanTermQuery stq = new SpanTermQuery(((TermQuery) query).Term); stq.Boost = query.Boost; GetPayloads(payloads, stq); } else if (query is SpanQuery) { GetPayloads(payloads, (SpanQuery) query); } else if (query is FilteredQuery) { QueryToSpanQuery(((FilteredQuery) query).Query, payloads); } else if (query is DisjunctionMaxQuery) { for (IEnumerator<Query> iterator = ((DisjunctionMaxQuery)query).GetEnumerator(); iterator.MoveNext(); ) { QueryToSpanQuery(iterator.Current, payloads); } } else if (query is MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery) query; System.Collections.Generic.IList<Term[]> termArrays = mpq.GetTermArrays(); int[] positions = mpq.GetPositions(); if (positions.Length > 0) { int maxPosition = positions[positions.Length - 1]; for (int i = 0; i < positions.Length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } IList<Query>[] disjunctLists = new IList<Query>[maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.Count; ++i) { Term[] termArray = termArrays[i]; IList<Query> disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new List<Query>(termArray.Length)); ++distinctPositions; } foreach(Term term in termArray) { disjuncts.Add(new SpanTermQuery(term)); } } int positionGaps = 0; int position = 0; SpanQuery[] clauses = new SpanQuery[distinctPositions]; for (int i = 0; i < disjunctLists.Length; ++i) { IList<Query> disjuncts = disjunctLists[i]; if (disjuncts != null) { clauses[position++] = new SpanOrQuery((SpanQuery[]) (disjuncts.ToArray())); } else { ++positionGaps; } } int slop = mpq.Slop; bool inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); sp.Boost = query.Boost; GetPayloads(payloads, sp); } } }
public virtual void TestPayloadsPos0() { for (int x = 0; x < 2; x++) { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new TestPayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); if (x == 1) { writer.SetAllowMinus1Position(); } Document doc = new Document(); System.IO.MemoryStream ms = new System.IO.MemoryStream(); System.IO.StreamWriter sw = new System.IO.StreamWriter(ms); sw.Write("a a b c d e a f g h i j a b k k"); // flush to stream & reset it's position so it can be read sw.Flush(); ms.Position = 0; doc.Add(new Field("content", new System.IO.StreamReader(ms))); writer.AddDocument(doc); IndexReader r = writer.GetReader(); TermPositions tp = r.TermPositions(new Term("content", "a")); int count = 0; Assert.IsTrue(tp.Next()); // "a" occurs 4 times Assert.AreEqual(4, tp.Freq()); int expected; if (x == 1) { expected = System.Int32.MaxValue; } else { expected = 0; } Assert.AreEqual(expected, tp.NextPosition()); if (x == 1) { continue; } Assert.AreEqual(1, tp.NextPosition()); Assert.AreEqual(3, tp.NextPosition()); Assert.AreEqual(6, tp.NextPosition()); // only one doc has "a" Assert.IsFalse(tp.Next()); IndexSearcher is_Renamed = new IndexSearcher(r); SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = new SpanQuery[]{stq1, stq2}; SpanNearQuery snq = new SpanNearQuery(sqs, 30, false); count = 0; bool sawZero = false; //System.out.println("\ngetPayloadSpans test"); Lucene.Net.Search.Spans.Spans pspans = snq.GetSpans(is_Renamed.GetIndexReader()); while (pspans.Next()) { //System.out.println(pspans.doc() + " - " + pspans.start() + " - "+ pspans.end()); System.Collections.Generic.ICollection<byte[]> payloads = pspans.GetPayload(); sawZero |= pspans.Start() == 0; for (System.Collections.IEnumerator it = payloads.GetEnumerator(); it.MoveNext(); ) { count++; System.Object generatedAux2 = it.Current; //System.out.println(new String((byte[]) it.next())); } } Assert.AreEqual(5, count); Assert.IsTrue(sawZero); //System.out.println("\ngetSpans test"); Lucene.Net.Search.Spans.Spans spans = snq.GetSpans(is_Renamed.GetIndexReader()); count = 0; sawZero = false; while (spans.Next()) { count++; sawZero |= spans.Start() == 0; //System.out.println(spans.doc() + " - " + spans.start() + " - " + spans.end()); } Assert.AreEqual(4, count); Assert.IsTrue(sawZero); //System.out.println("\nPayloadSpanUtil test"); sawZero = false; PayloadSpanUtil psu = new PayloadSpanUtil(is_Renamed.GetIndexReader()); System.Collections.Generic.ICollection<byte[]> pls = psu.GetPayloadsForQuery(snq); count = pls.Count; for (System.Collections.IEnumerator it = pls.GetEnumerator(); it.MoveNext(); ) { System.String s = new System.String(System.Text.UTF8Encoding.UTF8.GetChars((byte[]) it.Current)); //System.out.println(s); sawZero |= s.Equals("pos: 0"); } Assert.AreEqual(5, count); Assert.IsTrue(sawZero); writer.Close(); is_Renamed.GetIndexReader().Close(); dir.Close(); } }
public virtual void TestPayloadsPos0() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new TestPayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); System.IO.MemoryStream ms = new System.IO.MemoryStream(); System.IO.StreamWriter sw = new System.IO.StreamWriter(ms); sw.Write("a a b c d e a f g h i j a b k k"); // flush to stream & reset it's position so it can be read sw.Flush(); ms.Position = 0; doc.Add(new Field("content", new System.IO.StreamReader(ms))); writer.AddDocument(doc); IndexReader r = writer.GetReader(); TermPositions tp = r.TermPositions(new Term("content", "a")); int count = 0; Assert.IsTrue(tp.Next()); // "a" occurs 4 times Assert.AreEqual(4, tp.Freq); int expected = 0; Assert.AreEqual(expected, tp.NextPosition()); Assert.AreEqual(1, tp.NextPosition()); Assert.AreEqual(3, tp.NextPosition()); Assert.AreEqual(6, tp.NextPosition()); // only one doc has "a" Assert.IsFalse(tp.Next()); IndexSearcher is_Renamed = new IndexSearcher(r); SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = new SpanQuery[] { stq1, stq2 }; SpanNearQuery snq = new SpanNearQuery(sqs, 30, false); count = 0; bool sawZero = false; //System.out.println("\ngetPayloadSpans test"); Lucene.Net.Search.Spans.Spans pspans = snq.GetSpans(is_Renamed.IndexReader); while (pspans.Next()) { //System.out.println(pspans.doc() + " - " + pspans.start() + " - "+ pspans.end()); System.Collections.Generic.ICollection <byte[]> payloads = pspans.GetPayload(); sawZero |= pspans.Start() == 0; for (System.Collections.IEnumerator it = payloads.GetEnumerator(); it.MoveNext();) { count++; System.Object generatedAux2 = it.Current; //System.out.println(new String((byte[]) it.next())); } } Assert.AreEqual(5, count); Assert.IsTrue(sawZero); //System.out.println("\ngetSpans test"); Lucene.Net.Search.Spans.Spans spans = snq.GetSpans(is_Renamed.IndexReader); count = 0; sawZero = false; while (spans.Next()) { count++; sawZero |= spans.Start() == 0; //System.out.println(spans.doc() + " - " + spans.start() + " - " + spans.end()); } Assert.AreEqual(4, count); Assert.IsTrue(sawZero); //System.out.println("\nPayloadSpanUtil test"); sawZero = false; PayloadSpanUtil psu = new PayloadSpanUtil(is_Renamed.IndexReader); System.Collections.Generic.ICollection <byte[]> pls = psu.GetPayloadsForQuery(snq); count = pls.Count; for (System.Collections.IEnumerator it = pls.GetEnumerator(); it.MoveNext();) { System.String s = new System.String(System.Text.UTF8Encoding.UTF8.GetChars((byte[])it.Current)); //System.out.println(s); sawZero |= s.Equals("pos: 0"); } Assert.AreEqual(5, count); Assert.IsTrue(sawZero); writer.Close(); is_Renamed.IndexReader.Close(); dir.Close(); }
private void QueryToSpanQuery(Query query, ICollection <byte[]> payloads, IState state) { if (query is BooleanQuery) { BooleanClause[] queryClauses = ((BooleanQuery)query).GetClauses(); for (int i = 0; i < queryClauses.Length; i++) { if (!queryClauses[i].IsProhibited) { QueryToSpanQuery(queryClauses[i].Query, payloads, state); } } } else if (query is PhraseQuery) { Term[] phraseQueryTerms = ((PhraseQuery)query).GetTerms(); SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length]; for (int i = 0; i < phraseQueryTerms.Length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = ((PhraseQuery)query).Slop; bool inorder = false; if (slop == 0) { inorder = true; } SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); sp.Boost = query.Boost; GetPayloads(payloads, sp, state); } else if (query is TermQuery) { SpanTermQuery stq = new SpanTermQuery(((TermQuery)query).Term); stq.Boost = query.Boost; GetPayloads(payloads, stq, state); } else if (query is SpanQuery) { GetPayloads(payloads, (SpanQuery)query, state); } else if (query is FilteredQuery) { QueryToSpanQuery(((FilteredQuery)query).Query, payloads, state); } else if (query is DisjunctionMaxQuery) { for (IEnumerator <Query> iterator = ((DisjunctionMaxQuery)query).GetEnumerator(); iterator.MoveNext();) { QueryToSpanQuery(iterator.Current, payloads, state); } } else if (query is MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery)query; System.Collections.Generic.IList <Term[]> termArrays = mpq.GetTermArrays(); int[] positions = mpq.GetPositions(); if (positions.Length > 0) { int maxPosition = positions[positions.Length - 1]; for (int i = 0; i < positions.Length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } IList <Query>[] disjunctLists = new IList <Query> [maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.Count; ++i) { Term[] termArray = termArrays[i]; IList <Query> disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new List <Query>(termArray.Length)); ++distinctPositions; } foreach (Term term in termArray) { disjuncts.Add(new SpanTermQuery(term)); } } int positionGaps = 0; int position = 0; SpanQuery[] clauses = new SpanQuery[distinctPositions]; for (int i = 0; i < disjunctLists.Length; ++i) { IList <Query> disjuncts = disjunctLists[i]; if (disjuncts != null) { clauses[position++] = new SpanOrQuery((SpanQuery[])(disjuncts.ToArray())); } else { ++positionGaps; } } int slop = mpq.Slop; bool inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); sp.Boost = query.Boost; GetPayloads(payloads, sp, state); } } }
private PayloadNearQuery NewPhraseQuery(string fieldName, string phrase, bool inOrder, PayloadFunction function) { var words = _whiteSpaceRegex.Split(phrase); var clauses = new SpanQuery[words.Length]; for (var i = 0; i < clauses.Length; i++) { clauses[i] = new SpanTermQuery(new Term(fieldName, words[i])); } return new PayloadNearQuery(clauses, 0, inOrder, function); }
public virtual void TestCrazySpans() { // The problem: "normal" lucene queries create scorers, returning null if terms dont exist // this means they never score a term that does not exist. // however with spans, there is only one scorer for the whole hierarchy: // inner queries are not real queries, their boosts are ignored, etc. Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); doc.Add(NewField("foo", "bar", ft)); iw.AddDocument(doc); IndexReader ir = iw.Reader; iw.Dispose(); IndexSearcher @is = NewSearcher(ir); foreach (Similarity sim in Sims) { @is.Similarity = sim; SpanTermQuery s1 = new SpanTermQuery(new Term("foo", "bar")); SpanTermQuery s2 = new SpanTermQuery(new Term("foo", "baz")); Query query = new SpanOrQuery(s1, s2); TopDocs td = @is.Search(query, 10); Assert.AreEqual(1, td.TotalHits); float score = td.ScoreDocs[0].Score; Assert.IsTrue(score >= 0.0f); Assert.IsFalse(float.IsInfinity(score), "inf score for " + sim); } ir.Dispose(); dir.Dispose(); }
public virtual void TestPayloadsPos0() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, new MockPayloadAnalyzer()); Document doc = new Document(); doc.Add(new TextField("content", new StringReader("a a b c d e a f g h i j a b k k"))); writer.AddDocument(doc); IndexReader readerFromWriter = writer.Reader; AtomicReader r = SlowCompositeReaderWrapper.Wrap(readerFromWriter); DocsAndPositionsEnum tp = r.TermPositionsEnum(new Term("content", "a")); int count = 0; Assert.IsTrue(tp.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); // "a" occurs 4 times Assert.AreEqual(4, tp.Freq()); Assert.AreEqual(0, tp.NextPosition()); Assert.AreEqual(1, tp.NextPosition()); Assert.AreEqual(3, tp.NextPosition()); Assert.AreEqual(6, tp.NextPosition()); // only one doc has "a" Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, tp.NextDoc()); IndexSearcher @is = NewSearcher(readerFromWriter); SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = new SpanQuery[] { stq1, stq2 }; SpanNearQuery snq = new SpanNearQuery(sqs, 30, false); count = 0; bool sawZero = false; if (VERBOSE) { Console.WriteLine("\ngetPayloadSpans test"); } Search.Spans.Spans pspans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq); while (pspans.Next()) { if (VERBOSE) { Console.WriteLine("doc " + pspans.Doc() + ": span " + pspans.Start() + " to " + pspans.End()); } var payloads = pspans.Payload; sawZero |= pspans.Start() == 0; foreach (var bytes in payloads) { count++; if (VERBOSE) { Console.WriteLine(" payload: " + Encoding.UTF8.GetString((byte[])(Array)bytes)); } } } Assert.IsTrue(sawZero); Assert.AreEqual(5, count); // System.out.println("\ngetSpans test"); Search.Spans.Spans spans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq); count = 0; sawZero = false; while (spans.Next()) { count++; sawZero |= spans.Start() == 0; // System.out.println(spans.Doc() + " - " + spans.Start() + " - " + // spans.End()); } Assert.AreEqual(4, count); Assert.IsTrue(sawZero); // System.out.println("\nPayloadSpanUtil test"); sawZero = false; PayloadSpanUtil psu = new PayloadSpanUtil(@is.TopReaderContext); var pls = psu.GetPayloadsForQuery(snq); count = pls.Count; foreach (var bytes in pls) { string s = Encoding.UTF8.GetString(bytes); //System.out.println(s); sawZero |= s.Equals("pos: 0"); } Assert.AreEqual(5, count); Assert.IsTrue(sawZero); writer.Dispose(); @is.IndexReader.Dispose(); dir.Dispose(); }
public override Query VisitSpanTermQuery(Lucene.Net.Search.Spans.SpanTermQuery spanTermq) { throw new NotImplementedException(); }