public void RestDocsAndPositionsEnumStart() { Analyzer analyzer = new MockAnalyzer(Random()); int numIters = AtLeast(3); MemoryIndex memory = new MemoryIndex(true, Random().nextInt(50) * 1024 * 1024); for (int i = 0; i < numIters; i++) { // check reuse memory.AddField("foo", "bar", analyzer); AtomicReader reader = (AtomicReader)memory.CreateSearcher().IndexReader; assertEquals(1, reader.Terms("foo").SumTotalTermFreq); DocsAndPositionsEnum disi = reader.TermPositionsEnum(new Term("foo", "bar")); int docid = disi.DocID(); assertEquals(-1, docid); assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(0, disi.NextPosition()); assertEquals(0, disi.StartOffset()); assertEquals(3, disi.EndOffset()); // now reuse and check again TermsEnum te = reader.Terms("foo").Iterator(null); assertTrue(te.SeekExact(new BytesRef("bar"))); disi = te.DocsAndPositions(null, disi); docid = disi.DocID(); assertEquals(-1, docid); assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); reader.Dispose(); memory.Reset(); } }
public virtual void TestDocsAndPositionsEnum() { TermsEnum termsEnum = reader.Terms(DOC_POSITIONS_FIELD).Iterator(null); assertEquals(TermsEnum.SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef(DOC_POSITIONS_TERM))); DocsAndPositionsEnum sortedPositions = termsEnum.DocsAndPositions(null, null); int doc; // test nextDoc() while ((doc = sortedPositions.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { int freq = sortedPositions.Freq(); assertEquals("incorrect freq for doc=" + doc, sortedValues[doc] / 10 + 1, freq); for (int i = 0; i < freq; i++) { assertEquals("incorrect position for doc=" + doc, i, sortedPositions.NextPosition()); if (!DoesntSupportOffsets.contains(TestUtil.GetPostingsFormat(DOC_POSITIONS_FIELD))) { assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.StartOffset()); assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.EndOffset()); } assertEquals("incorrect payload for doc=" + doc, freq - i, int.Parse(sortedPositions.Payload.Utf8ToString(), CultureInfo.InvariantCulture)); } } // test advance() DocsAndPositionsEnum reuse = sortedPositions; sortedPositions = termsEnum.DocsAndPositions(null, reuse); if (sortedPositions is SortingAtomicReader.SortingDocsAndPositionsEnum) { assertTrue(((SortingAtomicReader.SortingDocsAndPositionsEnum)sortedPositions).Reused(reuse)); // make sure reuse worked } doc = 0; while ((doc = sortedPositions.Advance(doc + TestUtil.NextInt(Random(), 1, 5))) != DocIdSetIterator.NO_MORE_DOCS) { int freq = sortedPositions.Freq(); assertEquals("incorrect freq for doc=" + doc, sortedValues[doc] / 10 + 1, freq); for (int i = 0; i < freq; i++) { assertEquals("incorrect position for doc=" + doc, i, sortedPositions.NextPosition()); if (!DoesntSupportOffsets.contains(TestUtil.GetPostingsFormat(DOC_POSITIONS_FIELD))) { assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.StartOffset()); assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.EndOffset()); } assertEquals("incorrect payload for doc=" + doc, freq - i, int.Parse(sortedPositions.Payload.Utf8ToString(), CultureInfo.InvariantCulture)); } } }
public virtual void TestReuseDocsEnumNoReuse() { Directory dir = NewDirectory(); Codec cp = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat(OLD_FORMAT_IMPERSONATION_IS_ACTIVE)); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(cp)); int numdocs = AtLeast(20); CreateRandomIndex(numdocs, writer, Random()); writer.Commit(); DirectoryReader open = DirectoryReader.Open(dir); foreach (AtomicReaderContext ctx in open.Leaves) { AtomicReader indexReader = (AtomicReader)ctx.Reader; Terms terms = indexReader.Terms("body"); TermsEnum iterator = terms.Iterator(null); IdentityHashMap <DocsEnum, bool?> enums = new IdentityHashMap <DocsEnum, bool?>(); MatchNoBits bits = new MatchNoBits(indexReader.MaxDoc); while ((iterator.Next()) != null) { DocsEnum docs = iterator.Docs(Random().NextBoolean() ? bits : new MatchNoBits(indexReader.MaxDoc), null, Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE); enums[docs] = true; } Assert.AreEqual(terms.Size(), enums.Count); } IOUtils.Close(writer, open, dir); }
/// <summary> /// Takes the categories from the given taxonomy directory, and adds the /// missing ones to this taxonomy. Additionally, it fills the given /// <seealso cref="OrdinalMap"/> with a mapping from the original ordinal to the new /// ordinal. /// </summary> public virtual void AddTaxonomy(Directory taxoDir, OrdinalMap map) { EnsureOpen(); DirectoryReader r = DirectoryReader.Open(taxoDir); try { int size = r.NumDocs; OrdinalMap ordinalMap = map; ordinalMap.Size = size; int @base = 0; TermsEnum te = null; DocsEnum docs = null; foreach (AtomicReaderContext ctx in r.Leaves) { AtomicReader ar = ctx.AtomicReader; Terms terms = ar.Terms(Consts.FULL); te = terms.Iterator(te); while (te.Next() != null) { FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(te.Term().Utf8ToString())); int ordinal = AddCategory(cp); docs = te.Docs(null, docs, DocsEnum.FLAG_NONE); ordinalMap.AddMapping(docs.NextDoc() + @base, ordinal); } @base += ar.MaxDoc; // no deletions, so we're ok } ordinalMap.AddDone(); } finally { r.Dispose(); } }
public virtual void TestNestedPulsing() { // we always run this test with pulsing codec. Codec cp = TestUtil.AlwaysPostingsFormat(new NestedPulsingPostingsFormat()); BaseDirectoryWrapper dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(cp)); Document doc = new Document(); doc.Add(new TextField("foo", "a b b c c c d e f g g g h i i j j k l l m m m", Field.Store.NO)); // note: the reuse is imperfect, here we would have 4 enums (lost reuse when we get an enum for 'm') // this is because we only track the 'last' enum we reused (not all). // but this seems 'good enough' for now. iw.AddDocument(doc); DirectoryReader ir = iw.Reader; iw.Dispose(); AtomicReader segment = GetOnlySegmentReader(ir); DocsEnum reuse = null; IDictionary <DocsEnum, bool?> allEnums = new IdentityHashMap <DocsEnum, bool?>(); TermsEnum te = segment.Terms("foo").Iterator(null); while (te.Next() != null) { reuse = te.Docs(null, reuse, DocsEnum.FLAG_NONE); allEnums[reuse] = true; } assertEquals(4, allEnums.Count); allEnums.Clear(); DocsAndPositionsEnum posReuse = null; te = segment.Terms("foo").Iterator(null); while (te.Next() != null) { posReuse = te.DocsAndPositions(null, posReuse); allEnums[posReuse] = true; } assertEquals(4, allEnums.Count); ir.Dispose(); dir.Dispose(); }
public virtual void TestSophisticatedReuse() { // we always run this test with pulsing codec. Codec cp = TestUtil.AlwaysPostingsFormat(new Pulsing41PostingsFormat(1)); Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(cp)); Document doc = new Document(); doc.Add(new TextField("foo", "a b b c c c d e f g g h i i j j k", Field.Store.NO)); iw.AddDocument(doc); DirectoryReader ir = iw.Reader; iw.Dispose(); AtomicReader segment = GetOnlySegmentReader(ir); DocsEnum reuse = null; IDictionary <DocsEnum, bool?> allEnums = new IdentityHashMap <DocsEnum, bool?>(); TermsEnum te = segment.Terms("foo").Iterator(null); while (te.Next() != null) { reuse = te.Docs(null, reuse, DocsEnum.FLAG_NONE); allEnums[reuse] = true; } assertEquals(2, allEnums.Count); allEnums.Clear(); DocsAndPositionsEnum posReuse = null; te = segment.Terms("foo").Iterator(null); while (te.Next() != null) { posReuse = te.DocsAndPositions(null, posReuse); allEnums[posReuse] = true; } assertEquals(2, allEnums.Count); ir.Dispose(); dir.Dispose(); }
public void TestNonExistingsField() { MemoryIndex mindex = new MemoryIndex(Random().nextBoolean(), Random().nextInt(50) * 1024 * 1024); MockAnalyzer mockAnalyzer = new MockAnalyzer(Random()); mindex.AddField("field", "the quick brown fox", mockAnalyzer); AtomicReader reader = (AtomicReader)mindex.CreateSearcher().IndexReader; assertNull(reader.GetNumericDocValues("not-in-index")); assertNull(reader.GetNormValues("not-in-index")); assertNull(reader.TermDocsEnum(new Term("not-in-index", "foo"))); assertNull(reader.TermPositionsEnum(new Term("not-in-index", "foo"))); assertNull(reader.Terms("not-in-index")); }
public BaseTermsEnumTraverser(AbstractPrefixTreeFilter outerInstance, AtomicReaderContext context, Bits acceptDocs) { this.outerInstance = outerInstance; this.context = context; AtomicReader reader = context.AtomicReader; this.acceptDocs = acceptDocs; maxDoc = reader.MaxDoc; Terms terms = reader.Terms(outerInstance.fieldName); if (terms != null) { termsEnum = terms.Iterator(null); } }
/// <exception cref="System.IO.IOException"></exception> public BaseTermsEnumTraverser(AbstractPrefixTreeFilter _enclosing, AtomicReaderContext context, IBits acceptDocs) { this._enclosing = _enclosing; //remember to check for null in getDocIdSet this.context = context; AtomicReader reader = context.AtomicReader; this.acceptDocs = acceptDocs; maxDoc = reader.MaxDoc; Terms terms = reader.Terms(this._enclosing.fieldName); if (terms != null) { termsEnum = terms.Iterator(null); } }
/// <exception cref="System.IO.IOException"></exception> public virtual ShapeFieldCache <T> GetCache(AtomicReader reader) { lock (locker) { ShapeFieldCache <T> idx; if (sidx.TryGetValue(reader, out idx) && idx != null) { return(idx); } /*long startTime = Runtime.CurrentTimeMillis(); * log.Fine("Building Cache [" + reader.MaxDoc() + "]");*/ idx = new ShapeFieldCache <T>(reader.MaxDoc, defaultSize); int count = 0; DocsEnum docs = null; Terms terms = reader.Terms(shapeField); TermsEnum te = null; if (terms != null) { te = terms.Iterator(te); BytesRef term = te.Next(); while (term != null) { T shape = ReadShape(term); if (shape != null) { docs = te.Docs(null, docs, DocsEnum.FLAG_NONE); int docid = docs.NextDoc(); while (docid != DocIdSetIterator.NO_MORE_DOCS) { idx.Add(docid, shape); docid = docs.NextDoc(); count++; } } term = te.Next(); } } sidx.Add(reader, idx); /*long elapsed = Runtime.CurrentTimeMillis() - startTime; * log.Fine("Cached: [" + count + " in " + elapsed + "ms] " + idx);*/ return(idx); } }
public virtual DocsEnum RandomDocsEnum(string field, BytesRef term, IList <AtomicReaderContext> readers, Bits bits) { if (Random().Next(10) == 0) { return(null); } AtomicReader indexReader = (AtomicReader)readers[Random().Next(readers.Count)].Reader; Terms terms = indexReader.Terms(field); if (terms == null) { return(null); } TermsEnum iterator = terms.Iterator(null); if (iterator.SeekExact(term)) { return(iterator.Docs(bits, null, Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE)); } return(null); }
public void TestDocsEnumStart() { Analyzer analyzer = new MockAnalyzer(Random()); MemoryIndex memory = new MemoryIndex(Random().nextBoolean(), Random().nextInt(50) * 1024 * 1024); memory.AddField("foo", "bar", analyzer); AtomicReader reader = (AtomicReader)memory.CreateSearcher().IndexReader; DocsEnum disi = TestUtil.Docs(Random(), reader, "foo", new BytesRef("bar"), null, null, DocsEnum.FLAG_NONE); int docid = disi.DocID(); assertEquals(-1, docid); assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); // now reuse and check again TermsEnum te = reader.Terms("foo").Iterator(null); assertTrue(te.SeekExact(new BytesRef("bar"))); disi = te.Docs(null, disi, DocsEnum.FLAG_NONE); docid = disi.DocID(); assertEquals(-1, docid); assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); reader.Dispose(); }
public void TestSameFieldAddedMultipleTimes() { MemoryIndex mindex = new MemoryIndex(Random().nextBoolean(), Random().nextInt(50) * 1024 * 1024); MockAnalyzer mockAnalyzer = new MockAnalyzer(Random()); mindex.AddField("field", "the quick brown fox", mockAnalyzer); mindex.AddField("field", "jumps over the", mockAnalyzer); AtomicReader reader = (AtomicReader)mindex.CreateSearcher().IndexReader; assertEquals(7, reader.Terms("field").SumTotalTermFreq); PhraseQuery query = new PhraseQuery(); query.Add(new Term("field", "fox")); query.Add(new Term("field", "jumps")); assertTrue(mindex.Search(query) > 0.1); mindex.Reset(); mockAnalyzer.PositionIncrementGap = (1 + Random().nextInt(10)); mindex.AddField("field", "the quick brown fox", mockAnalyzer); mindex.AddField("field", "jumps over the", mockAnalyzer); assertEquals(0, mindex.Search(query), 0.00001f); query.Slop = (10); assertTrue("posGap" + mockAnalyzer.GetPositionIncrementGap("field"), mindex.Search(query) > 0.0001); }
private void Verify(Directory dir) { DirectoryReader ir = DirectoryReader.Open(dir); foreach (AtomicReaderContext leaf in ir.Leaves) { AtomicReader leafReader = (AtomicReader)leaf.Reader; AssertTerms(leafReader.Terms("field1docs"), leafReader.Terms("field2freqs"), true); AssertTerms(leafReader.Terms("field3positions"), leafReader.Terms("field4offsets"), true); AssertTerms(leafReader.Terms("field4offsets"), leafReader.Terms("field5payloadsFixed"), true); AssertTerms(leafReader.Terms("field5payloadsFixed"), leafReader.Terms("field6payloadsVariable"), true); AssertTerms(leafReader.Terms("field6payloadsVariable"), leafReader.Terms("field7payloadsFixedOffsets"), true); AssertTerms(leafReader.Terms("field7payloadsFixedOffsets"), leafReader.Terms("field8payloadsVariableOffsets"), true); } ir.Dispose(); }
private IndexIterationContext CreateContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter, bool multipleValuesPerDocument, bool scoreDocsInOrder) { IndexIterationContext context = new IndexIterationContext(); int numRandomValues = nDocs / 2; context.RandomUniqueValues = new string[numRandomValues]; ISet <string> trackSet = new HashSet <string>(); context.RandomFrom = new bool[numRandomValues]; for (int i = 0; i < numRandomValues; i++) { string uniqueRandomValue; do { uniqueRandomValue = TestUtil.RandomRealisticUnicodeString(Random()); // uniqueRandomValue = TestUtil.randomSimpleString(random); } while ("".Equals(uniqueRandomValue) || trackSet.Contains(uniqueRandomValue)); // Generate unique values and empty strings aren't allowed. trackSet.Add(uniqueRandomValue); context.RandomFrom[i] = Random().NextBoolean(); context.RandomUniqueValues[i] = uniqueRandomValue; } RandomDoc[] docs = new RandomDoc[nDocs]; for (int i = 0; i < nDocs; i++) { string id = Convert.ToString(i); int randomI = Random().Next(context.RandomUniqueValues.Length); string value = context.RandomUniqueValues[randomI]; Document document = new Document(); document.Add(NewTextField(Random(), "id", id, Field.Store.NO)); document.Add(NewTextField(Random(), "value", value, Field.Store.NO)); bool from = context.RandomFrom[randomI]; int numberOfLinkValues = multipleValuesPerDocument ? 2 + Random().Next(10) : 1; docs[i] = new RandomDoc(id, numberOfLinkValues, value, from); for (int j = 0; j < numberOfLinkValues; j++) { string linkValue = context.RandomUniqueValues[Random().Next(context.RandomUniqueValues.Length)]; docs[i].LinkValues.Add(linkValue); if (from) { if (!context.FromDocuments.ContainsKey(linkValue)) { context.FromDocuments[linkValue] = new List <RandomDoc>(); } if (!context.RandomValueFromDocs.ContainsKey(value)) { context.RandomValueFromDocs[value] = new List <RandomDoc>(); } context.FromDocuments[linkValue].Add(docs[i]); context.RandomValueFromDocs[value].Add(docs[i]); document.Add(NewTextField(Random(), "from", linkValue, Field.Store.NO)); } else { if (!context.ToDocuments.ContainsKey(linkValue)) { context.ToDocuments[linkValue] = new List <RandomDoc>(); } if (!context.RandomValueToDocs.ContainsKey(value)) { context.RandomValueToDocs[value] = new List <RandomDoc>(); } context.ToDocuments[linkValue].Add(docs[i]); context.RandomValueToDocs[value].Add(docs[i]); document.Add(NewTextField(Random(), "to", linkValue, Field.Store.NO)); } } RandomIndexWriter w; if (from) { w = fromWriter; } else { w = toWriter; } w.AddDocument(document); if (Random().Next(10) == 4) { w.Commit(); } if (VERBOSE) { Console.WriteLine("Added document[" + docs[i].Id + "]: " + document); } } // Pre-compute all possible hits for all unique random values. On top of this also compute all possible score for // any ScoreMode. IndexSearcher fromSearcher = NewSearcher(fromWriter.Reader); IndexSearcher toSearcher = NewSearcher(toWriter.Reader); for (int i = 0; i < context.RandomUniqueValues.Length; i++) { string uniqueRandomValue = context.RandomUniqueValues[i]; string fromField; string toField; IDictionary <string, IDictionary <int, JoinScore> > queryVals; if (context.RandomFrom[i]) { fromField = "from"; toField = "to"; queryVals = context.FromHitsToJoinScore; } else { fromField = "to"; toField = "from"; queryVals = context.ToHitsToJoinScore; } IDictionary <BytesRef, JoinScore> joinValueToJoinScores = new Dictionary <BytesRef, JoinScore>(); if (multipleValuesPerDocument) { fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)), new CollectorAnonymousInnerClassHelper3(this, context, fromField, joinValueToJoinScores)); } else { fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)), new CollectorAnonymousInnerClassHelper4(this, context, fromField, joinValueToJoinScores)); } IDictionary <int, JoinScore> docToJoinScore = new Dictionary <int, JoinScore>(); if (multipleValuesPerDocument) { if (scoreDocsInOrder) { AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.Wrap(toSearcher.IndexReader); Terms terms = slowCompositeReader.Terms(toField); if (terms != null) { DocsEnum docsEnum = null; TermsEnum termsEnum = null; SortedSet <BytesRef> joinValues = new SortedSet <BytesRef>(BytesRef.UTF8SortedAsUnicodeComparer); joinValues.AddAll(joinValueToJoinScores.Keys); foreach (BytesRef joinValue in joinValues) { termsEnum = terms.Iterator(termsEnum); if (termsEnum.SeekExact(joinValue)) { docsEnum = termsEnum.Docs(slowCompositeReader.LiveDocs, docsEnum, DocsEnum.FLAG_NONE); JoinScore joinScore = joinValueToJoinScores[joinValue]; for (int doc = docsEnum.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.NextDoc()) { // First encountered join value determines the score. // Something to keep in mind for many-to-many relations. if (!docToJoinScore.ContainsKey(doc)) { docToJoinScore[doc] = joinScore; } } } } } } else { toSearcher.Search(new MatchAllDocsQuery(), new CollectorAnonymousInnerClassHelper5(this, context, toField, joinValueToJoinScores, docToJoinScore)); } } else { toSearcher.Search(new MatchAllDocsQuery(), new CollectorAnonymousInnerClassHelper6(this, context, toField, joinValueToJoinScores, docToJoinScore)); } queryVals[uniqueRandomValue] = docToJoinScore; } fromSearcher.IndexReader.Dispose(); toSearcher.IndexReader.Dispose(); return(context); }
public override Scorer Scorer(AtomicReaderContext context, Bits acceptDocs) { Debug.Assert(OuterInstance.termArrays.Count > 0); AtomicReader reader = (context.AtomicReader); Bits liveDocs = acceptDocs; PhraseQuery.PostingsAndFreq[] postingsFreqs = new PhraseQuery.PostingsAndFreq[OuterInstance.termArrays.Count]; Terms fieldTerms = reader.Terms(OuterInstance.Field); if (fieldTerms == null) { return(null); } // Reuse single TermsEnum below: TermsEnum termsEnum = fieldTerms.Iterator(null); for (int pos = 0; pos < postingsFreqs.Length; pos++) { Term[] terms = OuterInstance.termArrays[pos]; DocsAndPositionsEnum postingsEnum; int docFreq; if (terms.Length > 1) { postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, context, terms, TermContexts, termsEnum); // coarse -- this overcounts since a given doc can // have more than one term: docFreq = 0; for (int termIdx = 0; termIdx < terms.Length; termIdx++) { Term term = terms[termIdx]; TermState termState = TermContexts[term].Get(context.Ord); if (termState == null) { // Term not in reader continue; } termsEnum.SeekExact(term.Bytes(), termState); docFreq += termsEnum.DocFreq(); } if (docFreq == 0) { // None of the terms are in this reader return(null); } } else { Term term = terms[0]; TermState termState = TermContexts[term].Get(context.Ord); if (termState == null) { // Term not in reader return(null); } termsEnum.SeekExact(term.Bytes(), termState); postingsEnum = termsEnum.DocsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE); if (postingsEnum == null) { // term does exist, but has no positions Debug.Assert(termsEnum.Docs(liveDocs, null, DocsEnum.FLAG_NONE) != null, "termstate found but no term exists in reader"); throw new InvalidOperationException("field \"" + term.Field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.Text() + ")"); } docFreq = termsEnum.DocFreq(); } postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, (int)OuterInstance.positions[pos], terms); } // sort by increasing docFreq order if (OuterInstance.slop == 0) { ArrayUtil.TimSort(postingsFreqs); } if (OuterInstance.slop == 0) { ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, Similarity.DoSimScorer(Stats, context)); if (s.NoDocs) { return(null); } else { return(s); } } else { return(new SloppyPhraseScorer(this, postingsFreqs, OuterInstance.slop, Similarity.DoSimScorer(Stats, context))); } }
private void DuellReaders(CompositeReader other, AtomicReader memIndexReader) { AtomicReader competitor = SlowCompositeReaderWrapper.Wrap(other); Fields memFields = memIndexReader.Fields; foreach (string field in competitor.Fields) { Terms memTerms = memFields.Terms(field); Terms iwTerms = memIndexReader.Terms(field); if (iwTerms == null) { assertNull(memTerms); } else { NumericDocValues normValues = competitor.GetNormValues(field); NumericDocValues memNormValues = memIndexReader.GetNormValues(field); if (normValues != null) { // mem idx always computes norms on the fly assertNotNull(memNormValues); assertEquals(normValues.Get(0), memNormValues.Get(0)); } assertNotNull(memTerms); assertEquals(iwTerms.DocCount, memTerms.DocCount); assertEquals(iwTerms.SumDocFreq, memTerms.SumDocFreq); assertEquals(iwTerms.SumTotalTermFreq, memTerms.SumTotalTermFreq); TermsEnum iwTermsIter = iwTerms.Iterator(null); TermsEnum memTermsIter = memTerms.Iterator(null); if (iwTerms.HasPositions()) { bool offsets = iwTerms.HasOffsets() && memTerms.HasOffsets(); while (iwTermsIter.Next() != null) { assertNotNull(memTermsIter.Next()); assertEquals(iwTermsIter.Term(), memTermsIter.Term()); DocsAndPositionsEnum iwDocsAndPos = iwTermsIter.DocsAndPositions(null, null); DocsAndPositionsEnum memDocsAndPos = memTermsIter.DocsAndPositions(null, null); while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) { assertEquals(iwDocsAndPos.DocID(), memDocsAndPos.NextDoc()); assertEquals(iwDocsAndPos.Freq(), memDocsAndPos.Freq()); for (int i = 0; i < iwDocsAndPos.Freq(); i++) { assertEquals("term: " + iwTermsIter.Term().Utf8ToString(), iwDocsAndPos.NextPosition(), memDocsAndPos.NextPosition()); if (offsets) { assertEquals(iwDocsAndPos.StartOffset(), memDocsAndPos.StartOffset()); assertEquals(iwDocsAndPos.EndOffset(), memDocsAndPos.EndOffset()); } } } } } else { while (iwTermsIter.Next() != null) { assertEquals(iwTermsIter.Term(), memTermsIter.Term()); DocsEnum iwDocsAndPos = iwTermsIter.Docs(null, null); DocsEnum memDocsAndPos = memTermsIter.Docs(null, null); while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) { assertEquals(iwDocsAndPos.DocID(), memDocsAndPos.NextDoc()); assertEquals(iwDocsAndPos.Freq(), memDocsAndPos.Freq()); } } } } } }
public override Scorer Scorer(AtomicReaderContext context, Bits acceptDocs) { Debug.Assert(OuterInstance.Terms_Renamed.Count > 0); AtomicReader reader = context.AtomicReader; Bits liveDocs = acceptDocs; PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[OuterInstance.Terms_Renamed.Count]; Terms fieldTerms = reader.Terms(OuterInstance.Field); if (fieldTerms == null) { return(null); } // Reuse single TermsEnum below: TermsEnum te = fieldTerms.Iterator(null); for (int i = 0; i < OuterInstance.Terms_Renamed.Count; i++) { Term t = OuterInstance.Terms_Renamed[i]; TermState state = States[i].Get(context.Ord); if (state == null) // term doesnt exist in this segment { Debug.Assert(TermNotInReader(reader, t), "no termstate found but term exists in reader"); return(null); } te.SeekExact(t.Bytes, state); DocsAndPositionsEnum postingsEnum = te.DocsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE); // PhraseQuery on a field that did not index // positions. if (postingsEnum == null) { Debug.Assert(te.SeekExact(t.Bytes), "termstate found but no term exists in reader"); // term does exist, but has no positions throw new InvalidOperationException("field \"" + t.Field + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.Text() + ")"); } postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.DocFreq(), (int)OuterInstance.Positions_Renamed[i], t); } // sort by increasing docFreq order if (OuterInstance.slop == 0) { ArrayUtil.TimSort(postingsFreqs); } if (OuterInstance.slop == 0) // optimize exact case { ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, Similarity.DoSimScorer(Stats, context)); if (s.NoDocs) { return(null); } else { return(s); } } else { return(new SloppyPhraseScorer(this, postingsFreqs, OuterInstance.slop, Similarity.DoSimScorer(Stats, context))); } }