private float freq; //prhase frequency in current doc as computed by phraseFreq(). internal PhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, byte[] norms):base(similarity) { this.norms = norms; this.weight = weight; this.value_Renamed = weight.Value; // convert tps to a list of phrase positions. // note: phrase-position differs from term-position in that its position // reflects the phrase offset: pp.pos = tp.pos - offset. // this allows to easily identify a matching (exact) phrase // when all PhrasePositions have exactly the same position. for (int i = 0; i < tps.Length; i++) { PhrasePositions pp = new PhrasePositions(tps[i], offsets[i]); if (last != null) { // add next to end of list last.next = pp; } else { first = pp; } last = pp; } pq = new PhraseQueue(tps.Length); // construct empty pq first.doc = - 1; }
public TermSpans(TermPositions positions, Term term) { this.internalPositions = positions; this.term = term; internalDoc = - 1; }
public TermSpans(TermPositions positions, Term term) { this.positions = positions; this.term = term; doc = - 1; }
public virtual Scorer Scorer(IndexReader reader) { if (Enclosing_Instance.termArrays.Count == 0) // optimize zero-term case return null; TermPositions[] tps = new TermPositions[Enclosing_Instance.termArrays.Count]; for (int i = 0; i < tps.Length; i++) { Term[] terms = (Term[]) Enclosing_Instance.termArrays[i]; TermPositions p; if (terms.Length > 1) p = new MultipleTermPositions(reader, terms); else p = reader.TermPositions(terms[0]); if (p == null) return null; tps[i] = p; } if (Enclosing_Instance.slop == 0) return new ExactPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, reader.Norms(Enclosing_Instance.field)); else return new SloppyPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, Enclosing_Instance.slop, reader.Norms(Enclosing_Instance.field)); }
public virtual Scorer Scorer(IndexReader reader) { if (Enclosing_Instance.terms.Count == 0) // optimize zero-term case return null; TermPositions[] tps = new TermPositions[Enclosing_Instance.terms.Count]; for (int i = 0; i < Enclosing_Instance.terms.Count; i++) { TermPositions p = reader.TermPositions((Term) Enclosing_Instance.terms[i]); if (p == null) return null; tps[i] = p; } if (Enclosing_Instance.slop == 0) // optimize exact case return new ExactPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, reader.Norms(Enclosing_Instance.field)); else return new SloppyPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, Enclosing_Instance.slop, reader.Norms(Enclosing_Instance.field)); }
private void InitBlock(Lucene.Net.Index.IndexReader reader, SpanTermQuery enclosingInstance) { this.reader = reader; this.enclosingInstance = enclosingInstance; positions = reader.TermPositions(Enclosing_Instance.term); }
internal SloppyPhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity, int slop, byte[] norms) : base(weight, tps, positions, similarity, norms) { this.slop = slop; }
public PayloadTermSpanScorer(PayloadTermWeight enclosingInstance, TermSpans spans, Weight weight, Similarity similarity, byte[] norms) : base(spans, weight, similarity, norms) { InitBlock(enclosingInstance); positions = spans.GetPositions(); }
public virtual void TestPayloadsPos0() { for (int x = 0; x < 2; x++) { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new TestPayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); if (x == 1) { writer.SetAllowMinus1Position(); } Document doc = new Document(); System.IO.MemoryStream ms = new System.IO.MemoryStream(); System.IO.StreamWriter sw = new System.IO.StreamWriter(ms); sw.Write("a a b c d e a f g h i j a b k k"); // flush to stream & reset it's position so it can be read sw.Flush(); ms.Position = 0; doc.Add(new Field("content", new System.IO.StreamReader(ms))); writer.AddDocument(doc); IndexReader r = writer.GetReader(); TermPositions tp = r.TermPositions(new Term("content", "a")); int count = 0; Assert.IsTrue(tp.Next()); // "a" occurs 4 times Assert.AreEqual(4, tp.Freq()); int expected; if (x == 1) { expected = System.Int32.MaxValue; } else { expected = 0; } Assert.AreEqual(expected, tp.NextPosition()); if (x == 1) { continue; } Assert.AreEqual(1, tp.NextPosition()); Assert.AreEqual(3, tp.NextPosition()); Assert.AreEqual(6, tp.NextPosition()); // only one doc has "a" Assert.IsFalse(tp.Next()); IndexSearcher is_Renamed = new IndexSearcher(r); SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = new SpanQuery[] { stq1, stq2 }; SpanNearQuery snq = new SpanNearQuery(sqs, 30, false); count = 0; bool sawZero = false; //System.out.println("\ngetPayloadSpans test"); Lucene.Net.Search.Spans.Spans pspans = snq.GetSpans(is_Renamed.GetIndexReader()); while (pspans.Next()) { //System.out.println(pspans.doc() + " - " + pspans.start() + " - "+ pspans.end()); System.Collections.Generic.ICollection <byte[]> payloads = pspans.GetPayload(); sawZero |= pspans.Start() == 0; for (System.Collections.IEnumerator it = payloads.GetEnumerator(); it.MoveNext();) { count++; System.Object generatedAux2 = it.Current; //System.out.println(new String((byte[]) it.next())); } } Assert.AreEqual(5, count); Assert.IsTrue(sawZero); //System.out.println("\ngetSpans test"); Lucene.Net.Search.Spans.Spans spans = snq.GetSpans(is_Renamed.GetIndexReader()); count = 0; sawZero = false; while (spans.Next()) { count++; sawZero |= spans.Start() == 0; //System.out.println(spans.doc() + " - " + spans.start() + " - " + spans.end()); } Assert.AreEqual(4, count); Assert.IsTrue(sawZero); //System.out.println("\nPayloadSpanUtil test"); sawZero = false; PayloadSpanUtil psu = new PayloadSpanUtil(is_Renamed.GetIndexReader()); System.Collections.Generic.ICollection <byte[]> pls = psu.GetPayloadsForQuery(snq); count = pls.Count; for (System.Collections.IEnumerator it = pls.GetEnumerator(); it.MoveNext();) { System.String s = new System.String(System.Text.UTF8Encoding.UTF8.GetChars((byte[])it.Current)); //System.out.println(s); sawZero |= s.Equals("pos: 0"); } Assert.AreEqual(5, count); Assert.IsTrue(sawZero); writer.Close(); is_Renamed.GetIndexReader().Close(); dir.Close(); } }
public virtual void TestSetPosition() { Analyzer analyzer = new AnonymousClassAnalyzer(this); Directory store = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(store, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document d = new Document(); d.Add(new Field("field", "bogus", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(store); TermPositions pos = searcher.GetIndexReader().TermPositions(new Term("field", "1")); pos.Next(); // first token should be at position 0 Assert.AreEqual(0, pos.NextPosition()); pos = searcher.GetIndexReader().TermPositions(new Term("field", "2")); pos.Next(); // second token should be at position 2 Assert.AreEqual(2, pos.NextPosition()); PhraseQuery q; ScoreDoc[] hits; q = new PhraseQuery(); q.Add(new Term("field", "1")); q.Add(new Term("field", "2")); hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); // same as previous, just specify positions explicitely. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 1); hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); // specifying correct positions should find the phrase. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 2); hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "3")); hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); // phrase query would find it when correct positions are specified. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "4"), 0); hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); // phrase query should fail for non existing searched term // even if there exist another searched terms in the same searched position. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "9"), 0); hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); // multi-phrase query should succed for non existing searched term // because there exist another searched terms in the same searched position. MultiPhraseQuery mq = new MultiPhraseQuery(); mq.Add(new Term[] { new Term("field", "3"), new Term("field", "9") }, 0); hits = searcher.Search(mq, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "4")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); // should not find "1 2" because there is a gap of 1 in the index QueryParser qp = new QueryParser("field", new StopWhitespaceAnalyzer(false)); q = (PhraseQuery)qp.Parse("\"1 2\""); hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); // omitted stop word cannot help because stop filter swallows the increments. q = (PhraseQuery)qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); // query parser alone won't help, because stop filter swallows the increments. qp.SetEnablePositionIncrements(true); q = (PhraseQuery)qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); // stop filter alone won't help, because query parser swallows the increments. qp.SetEnablePositionIncrements(false); q = (PhraseQuery)qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); // when both qp qnd stopFilter propagate increments, we should find the doc. qp = new QueryParser("field", new StopWhitespaceAnalyzer(true)); qp.SetEnablePositionIncrements(true); q = (PhraseQuery)qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); }
public TermSpans(TermPositions positions, Term term) { this.positions = positions; this.term = term; doc = -1; }
public TermSpans(TermPositions positions, Term term) { this.internalPositions = positions; this.term = term; internalDoc = -1; }