public virtual void TestSimple() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy())); Document doc = new Document(); Field field = NewTextField("field", "", Field.Store.NO); doc.Add(field); field.SetStringValue("a b c"); w.AddDocument(doc); field.SetStringValue("d e f"); w.AddDocument(doc); field.SetStringValue("a f"); w.AddDocument(doc); IndexReader r = w.GetReader(); w.Dispose(); AtomicReader ar = SlowCompositeReaderWrapper.Wrap(r); DocTermOrds dto = new DocTermOrds(ar, ar.LiveDocs, "field"); SortedSetDocValues iter = dto.GetIterator(ar); iter.SetDocument(0); Assert.AreEqual(0, iter.NextOrd()); Assert.AreEqual(1, iter.NextOrd()); Assert.AreEqual(2, iter.NextOrd()); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, iter.NextOrd()); iter.SetDocument(1); Assert.AreEqual(3, iter.NextOrd()); Assert.AreEqual(4, iter.NextOrd()); Assert.AreEqual(5, iter.NextOrd()); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, iter.NextOrd()); iter.SetDocument(2); Assert.AreEqual(0, iter.NextOrd()); Assert.AreEqual(5, iter.NextOrd()); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, iter.NextOrd()); r.Dispose(); dir.Dispose(); }
public override SortedSetDocValues GetSortedSetDocValues(string field) { EnsureOpen(); OrdinalMap map = null; lock (cachedOrdMaps) { if (!cachedOrdMaps.TryGetValue(field, out map)) { // uncached, or not a multi dv SortedSetDocValues dv = MultiDocValues.GetSortedSetValues(@in, field); if (dv is MultiSortedSetDocValues docValues) { map = docValues.Mapping; if (map.owner == CoreCacheKey) { cachedOrdMaps[field] = map; } } return(dv); } } // cached ordinal map if (FieldInfos.FieldInfo(field).DocValuesType != DocValuesType.SORTED_SET) { return(null); } if (Debugging.AssertsEnabled) { Debugging.Assert(map != null); } int size = @in.Leaves.Count; var values = new SortedSetDocValues[size]; int[] starts = new int[size + 1]; for (int i = 0; i < size; i++) { AtomicReaderContext context = @in.Leaves[i]; SortedSetDocValues v = context.AtomicReader.GetSortedSetDocValues(field) ?? DocValues.EMPTY_SORTED_SET; values[i] = v; starts[i] = context.DocBase; } starts[size] = MaxDoc; return(new MultiSortedSetDocValues(values, starts, map)); }
public AssertingSortedSetDocValues(SortedSetDocValues @in, int maxDoc) { this.@in = @in; this.MaxDoc = maxDoc; this.ValueCount_Renamed = @in.ValueCount; Debug.Assert(ValueCount_Renamed >= 0); }
private void AssertEquals(int maxDoc, SortedSetDocValues expected, SortedSetDocValues actual) { // can be null for the segment if no docs actually had any SortedDocValues // in this case FC.getDocTermsOrds returns EMPTY if (actual == null) { Assert.AreEqual(DocValues.EMPTY_SORTED_SET, expected); return; } Assert.AreEqual(expected.ValueCount, actual.ValueCount); // compare ord lists for (int i = 0; i < maxDoc; i++) { expected.Document = i; actual.Document = i; long expectedOrd; while ((expectedOrd = expected.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { Assert.AreEqual(expectedOrd, actual.NextOrd()); } Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, actual.NextOrd()); } // compare ord dictionary BytesRef expectedBytes = new BytesRef(); BytesRef actualBytes = new BytesRef(); for (long i = 0; i < expected.ValueCount; i++) { expected.LookupTerm(expectedBytes); actual.LookupTerm(actualBytes); Assert.AreEqual(expectedBytes, actualBytes); } // compare termsenum AssertEquals(expected.ValueCount, expected.TermsEnum(), actual.TermsEnum()); }
public FieldCacheDocIdSetAnonymousInnerClassHelper(DocTermOrdsRangeFilterAnonymousInnerClassHelper outerInstance, int maxDoc, Bits acceptDocs, SortedSetDocValues docTermOrds, long inclusiveLowerPoint, long inclusiveUpperPoint) : base(maxDoc, acceptDocs) { this.OuterInstance = outerInstance; this.DocTermOrds = docTermOrds; this.InclusiveLowerPoint = inclusiveLowerPoint; this.InclusiveUpperPoint = inclusiveUpperPoint; }
/// <summary> /// Creates a new <see cref="TermsEnum"/> over the provided values </summary> public SortedSetDocValuesTermsEnum(SortedSetDocValues values) { this.values = values; }
public BitsAnonymousClass2(SortedSetDocValues dv, int maxDoc) { this.dv = dv; this.maxDoc = maxDoc; }
public virtual void TestSortedSetWithDups() { AssumeTrue("codec does not support SORTED_SET", DefaultCodecSupportsSortedSet()); Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); int numValues = Random().Next(5); for (int j = 0; j < numValues; j++) { doc.Add(new SortedSetDocValuesField("bytes", new BytesRef(TestUtil.RandomSimpleString(Random(), 2)))); } iw.AddDocument(doc); if (Random().Next(17) == 0) { iw.Commit(); } } DirectoryReader ir = iw.Reader; iw.ForceMerge(1); DirectoryReader ir2 = iw.Reader; AtomicReader merged = GetOnlySegmentReader(ir2); iw.Dispose(); SortedSetDocValues multi = MultiDocValues.GetSortedSetValues(ir, "bytes"); SortedSetDocValues single = merged.GetSortedSetDocValues("bytes"); if (multi == null) { Assert.IsNull(single); } else { Assert.AreEqual(single.ValueCount, multi.ValueCount); BytesRef actual = new BytesRef(); BytesRef expected = new BytesRef(); // check values for (long i = 0; i < single.ValueCount; i++) { single.LookupOrd(i, expected); multi.LookupOrd(i, actual); Assert.AreEqual(expected, actual); } // check ord list for (int i = 0; i < numDocs; i++) { single.SetDocument(i); List <long?> expectedList = new List <long?>(); long ord; while ((ord = single.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { expectedList.Add(ord); } multi.SetDocument(i); int upto = 0; while ((ord = multi.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { Assert.AreEqual((long)expectedList[upto], ord); upto++; } Assert.AreEqual(expectedList.Count, upto); } } ir.Dispose(); ir2.Dispose(); dir.Dispose(); }
public TermsAnonymousInnerClassHelper(MultiTermQueryDocTermOrdsWrapperFilter outerInstance, SortedSetDocValues docTermOrds) { this.OuterInstance = outerInstance; this.DocTermOrds = docTermOrds; }
private void MergeDocValues(SegmentWriteState segmentWriteState) { DocValuesConsumer consumer = Codec.DocValuesFormat().FieldsConsumer(segmentWriteState); bool success = false; try { foreach (FieldInfo field in MergeState.FieldInfos) { DocValuesType_e?type = field.DocValuesType; if (type != null) { if (type == DocValuesType_e.NUMERIC) { IList <NumericDocValues> toMerge = new List <NumericDocValues>(); //IList<Bits> docsWithField = new List<Bits>(); foreach (AtomicReader reader in MergeState.Readers) { NumericDocValues values = reader.GetNumericDocValues(field.Name); Bits bits = reader.GetDocsWithField(field.Name); if (values == null) { values = DocValues.EMPTY_NUMERIC; bits = new Lucene.Net.Util.Bits_MatchNoBits(reader.MaxDoc); } toMerge.Add(values); //docsWithField.Add(bits); } consumer.MergeNumericField(field, MergeState, toMerge /*, docsWithField*/); } else if (type == DocValuesType_e.BINARY) { IList <BinaryDocValues> toMerge = new List <BinaryDocValues>(); //IList<Bits> docsWithField = new List<Bits>(); foreach (AtomicReader reader in MergeState.Readers) { BinaryDocValues values = reader.GetBinaryDocValues(field.Name); Bits bits = reader.GetDocsWithField(field.Name); if (values == null) { values = DocValues.EMPTY_BINARY; bits = new Lucene.Net.Util.Bits_MatchNoBits(reader.MaxDoc); } toMerge.Add(values); //docsWithField.Add(bits); } consumer.MergeBinaryField(field, MergeState, toMerge /*, docsWithField*/); } else if (type == DocValuesType_e.SORTED) { IList <SortedDocValues> toMerge = new List <SortedDocValues>(); foreach (AtomicReader reader in MergeState.Readers) { SortedDocValues values = reader.GetSortedDocValues(field.Name); if (values == null) { values = DocValues.EMPTY_SORTED; } toMerge.Add(values); } consumer.MergeSortedField(field, MergeState, toMerge); } else if (type == DocValuesType_e.SORTED_SET) { IList <SortedSetDocValues> toMerge = new List <SortedSetDocValues>(); foreach (AtomicReader reader in MergeState.Readers) { SortedSetDocValues values = reader.GetSortedSetDocValues(field.Name); if (values == null) { values = DocValues.EMPTY_SORTED_SET; } toMerge.Add(values); } consumer.MergeSortedSetField(field, MergeState, toMerge); } else { throw new InvalidOperationException("type=" + type); } } } success = true; } finally { if (success) { IOUtils.Close(consumer); } else { IOUtils.CloseWhileHandlingException(consumer); } } }
/// <summary> /// Creates a new TermsEnum over the provided values </summary> public SortedSetDocValuesTermsEnum(SortedSetDocValues values) { this.Values = values; }
/// <summary> /// Returns a single-valued view of the SortedSetDocValues, if it was previously /// wrapped with <seealso cref="#singleton"/>, or null. /// </summary> public static SortedDocValues UnwrapSingleton(SortedSetDocValues dv) { if (dv is SingletonSortedSetDocValues) { return ((SingletonSortedSetDocValues)dv).SortedDocValues; } else { return null; } }
/// <summary> /// Returns a Bits representing all documents from <code>dv</code> that have a value. /// </summary> public static Bits DocsWithValue(SortedSetDocValues dv, int maxDoc) { return new BitsAnonymousInnerClassHelper2(dv, maxDoc); }
/// <summary> /// Creates a new MultiSortedSetDocValues over <code>values</code> </summary> internal MultiSortedSetDocValues(SortedSetDocValues[] values, int[] docStarts, OrdinalMap mapping) { Debug.Assert(values.Length == mapping.OrdDeltas.Length); Debug.Assert(docStarts.Length == values.Length + 1); this.Values = values; this.DocStarts = docStarts; this.Mapping = mapping; }
/// <summary> /// Returns a SortedSetDocValues for a reader's docvalues (potentially doing extremely slow things). /// <p> /// this is an extremely slow way to access sorted values. Instead, access them per-segment /// with <seealso cref="AtomicReader#getSortedSetDocValues(String)"/> /// </p> /// </summary> public static SortedSetDocValues GetSortedSetValues(IndexReader r, string field) { IList<AtomicReaderContext> leaves = r.Leaves; int size = leaves.Count; if (size == 0) { return null; } else if (size == 1) { return leaves[0].AtomicReader.GetSortedSetDocValues(field); } bool anyReal = false; SortedSetDocValues[] values = new SortedSetDocValues[size]; int[] starts = new int[size + 1]; for (int i = 0; i < size; i++) { AtomicReaderContext context = leaves[i]; SortedSetDocValues v = context.AtomicReader.GetSortedSetDocValues(field); if (v == null) { v = DocValues.EMPTY_SORTED_SET; } else { anyReal = true; } values[i] = v; starts[i] = context.DocBase; } starts[size] = r.MaxDoc; if (!anyReal) { return null; } else { TermsEnum[] enums = new TermsEnum[values.Length]; for (int i = 0; i < values.Length; i++) { enums[i] = values[i].TermsEnum(); } OrdinalMap mapping = new OrdinalMap(r.CoreCacheKey, enums); return new MultiSortedSetDocValues(values, starts, mapping); } }
private void Verify(AtomicReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef) { DocTermOrds dto = new DocTermOrds(r, r.LiveDocs, "field", prefixRef, int.MaxValue, TestUtil.NextInt32(Random, 2, 10)); FieldCache.Int32s docIDToID = FieldCache.DEFAULT.GetInt32s(r, "id", false); /* * for(int docID=0;docID<subR.MaxDoc;docID++) { * System.out.println(" docID=" + docID + " id=" + docIDToID[docID]); * } */ if (VERBOSE) { Console.WriteLine("TEST: verify prefix=" + (prefixRef == null ? "null" : prefixRef.Utf8ToString())); Console.WriteLine("TEST: all TERMS:"); TermsEnum allTE = MultiFields.GetTerms(r, "field").GetIterator(null); int ord = 0; while (allTE.Next() != null) { Console.WriteLine(" ord=" + (ord++) + " term=" + allTE.Term.Utf8ToString()); } } //final TermsEnum te = subR.Fields.Terms("field").iterator(); TermsEnum te = dto.GetOrdTermsEnum(r); if (dto.NumTerms == 0) { if (prefixRef == null) { Assert.IsNull(MultiFields.GetTerms(r, "field")); } else { Terms terms = MultiFields.GetTerms(r, "field"); if (terms != null) { TermsEnum termsEnum = terms.GetIterator(null); TermsEnum.SeekStatus result = termsEnum.SeekCeil(prefixRef); if (result != TermsEnum.SeekStatus.END) { Assert.IsFalse(StringHelper.StartsWith(termsEnum.Term, prefixRef), "term=" + termsEnum.Term.Utf8ToString() + " matches prefix=" + prefixRef.Utf8ToString()); } else { // ok } } else { // ok } } return; } if (VERBOSE) { Console.WriteLine("TEST: TERMS:"); te.SeekExact(0); while (true) { Console.WriteLine(" ord=" + te.Ord + " term=" + te.Term.Utf8ToString()); if (te.Next() == null) { break; } } } SortedSetDocValues iter = dto.GetIterator(r); for (int docID = 0; docID < r.MaxDoc; docID++) { if (VERBOSE) { Console.WriteLine("TEST: docID=" + docID + " of " + r.MaxDoc + " (id=" + docIDToID.Get(docID) + ")"); } iter.SetDocument(docID); int[] answers = idToOrds[docIDToID.Get(docID)]; int upto = 0; long ord; while ((ord = iter.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { te.SeekExact(ord); BytesRef expected = termsArray[answers[upto++]]; if (VERBOSE) { Console.WriteLine(" exp=" + expected.Utf8ToString() + " actual=" + te.Term.Utf8ToString()); } Assert.AreEqual(expected, te.Term, "expected=" + expected.Utf8ToString() + " actual=" + te.Term.Utf8ToString() + " ord=" + ord); } Assert.AreEqual(answers.Length, upto); } }
public virtual void TestSortedTermsEnum() { Directory directory = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random); IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwconfig.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, iwconfig); Document doc = new Document(); doc.Add(new StringField("field", "hello", Field.Store.NO)); iwriter.AddDocument(doc); doc = new Document(); doc.Add(new StringField("field", "world", Field.Store.NO)); iwriter.AddDocument(doc); doc = new Document(); doc.Add(new StringField("field", "beer", Field.Store.NO)); iwriter.AddDocument(doc); iwriter.ForceMerge(1); DirectoryReader ireader = iwriter.GetReader(); iwriter.Dispose(); AtomicReader ar = GetOnlySegmentReader(ireader); SortedSetDocValues dv = FieldCache.DEFAULT.GetDocTermOrds(ar, "field"); Assert.AreEqual(3, dv.ValueCount); TermsEnum termsEnum = dv.GetTermsEnum(); // next() Assert.AreEqual("beer", termsEnum.Next().Utf8ToString()); Assert.AreEqual(0, termsEnum.Ord); Assert.AreEqual("hello", termsEnum.Next().Utf8ToString()); Assert.AreEqual(1, termsEnum.Ord); Assert.AreEqual("world", termsEnum.Next().Utf8ToString()); Assert.AreEqual(2, termsEnum.Ord); // seekCeil() Assert.AreEqual(SeekStatus.NOT_FOUND, termsEnum.SeekCeil(new BytesRef("ha!"))); Assert.AreEqual("hello", termsEnum.Term.Utf8ToString()); Assert.AreEqual(1, termsEnum.Ord); Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("beer"))); Assert.AreEqual("beer", termsEnum.Term.Utf8ToString()); Assert.AreEqual(0, termsEnum.Ord); Assert.AreEqual(SeekStatus.END, termsEnum.SeekCeil(new BytesRef("zzz"))); // seekExact() Assert.IsTrue(termsEnum.SeekExact(new BytesRef("beer"))); Assert.AreEqual("beer", termsEnum.Term.Utf8ToString()); Assert.AreEqual(0, termsEnum.Ord); Assert.IsTrue(termsEnum.SeekExact(new BytesRef("hello"))); Assert.AreEqual("hello", termsEnum.Term.Utf8ToString()); Assert.AreEqual(1, termsEnum.Ord); Assert.IsTrue(termsEnum.SeekExact(new BytesRef("world"))); Assert.AreEqual("world", termsEnum.Term.Utf8ToString()); Assert.AreEqual(2, termsEnum.Ord); Assert.IsFalse(termsEnum.SeekExact(new BytesRef("bogus"))); // seek(ord) termsEnum.SeekExact(0); Assert.AreEqual("beer", termsEnum.Term.Utf8ToString()); Assert.AreEqual(0, termsEnum.Ord); termsEnum.SeekExact(1); Assert.AreEqual("hello", termsEnum.Term.Utf8ToString()); Assert.AreEqual(1, termsEnum.Ord); termsEnum.SeekExact(2); Assert.AreEqual("world", termsEnum.Term.Utf8ToString()); Assert.AreEqual(2, termsEnum.Ord); ireader.Dispose(); directory.Dispose(); }
public FieldCacheDocIdSetAnonymousInnerClassHelper(MultiTermQueryDocTermOrdsWrapperFilter outerInstance, int maxDoc, Bits acceptDocs, SortedSetDocValues docTermOrds, LongBitSet termSet) : base(maxDoc, acceptDocs) { this.OuterInstance = outerInstance; this.DocTermOrds = docTermOrds; this.TermSet = termSet; }
private static void CheckSortedSetDocValues(string fieldName, AtomicReader reader, SortedSetDocValues dv, Bits docsWithField) { long maxOrd = dv.ValueCount - 1; LongBitSet seenOrds = new LongBitSet(dv.ValueCount); long maxOrd2 = -1; for (int i = 0; i < reader.MaxDoc; i++) { dv.Document = i; long lastOrd = -1; long ord; if (docsWithField.Get(i)) { int ordCount = 0; while ((ord = dv.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { if (ord <= lastOrd) { throw new Exception("ords out of order: " + ord + " <= " + lastOrd + " for doc: " + i); } if (ord < 0 || ord > maxOrd) { throw new Exception("ord out of bounds: " + ord); } if (dv is RandomAccessOrds) { long ord2 = ((RandomAccessOrds)dv).OrdAt(ordCount); if (ord != ord2) { throw new Exception("ordAt(" + ordCount + ") inconsistent, expected=" + ord + ",got=" + ord2 + " for doc: " + i); } } lastOrd = ord; maxOrd2 = Math.Max(maxOrd2, ord); seenOrds.Set(ord); ordCount++; } if (ordCount == 0) { throw new Exception("dv for field: " + fieldName + " has no ordinals but is not marked missing for doc: " + i); } if (dv is RandomAccessOrds) { long ordCount2 = ((RandomAccessOrds)dv).Cardinality(); if (ordCount != ordCount2) { throw new Exception("cardinality inconsistent, expected=" + ordCount + ",got=" + ordCount2 + " for doc: " + i); } } } else { long o = dv.NextOrd(); if (o != SortedSetDocValues.NO_MORE_ORDS) { throw new Exception("dv for field: " + fieldName + " is marked missing but has ord=" + o + " for doc: " + i); } if (dv is RandomAccessOrds) { long ordCount2 = ((RandomAccessOrds)dv).Cardinality(); if (ordCount2 != 0) { throw new Exception("dv for field: " + fieldName + " is marked missing but has cardinality " + ordCount2 + " for doc: " + i); } } } } if (maxOrd != maxOrd2) { throw new Exception("dv for field: " + fieldName + " reports wrong maxOrd=" + maxOrd + " but this is not the case: " + maxOrd2); } if (seenOrds.Cardinality() != dv.ValueCount) { throw new Exception("dv for field: " + fieldName + " has holes in its ords, valueCount=" + dv.ValueCount + " but only used: " + seenOrds.Cardinality()); } BytesRef lastValue = null; BytesRef scratch = new BytesRef(); for (long i = 0; i <= maxOrd; i++) { dv.LookupOrd(i, scratch); Debug.Assert(scratch.Valid); if (lastValue != null) { if (scratch.CompareTo(lastValue) <= 0) { throw new Exception("dv for field: " + fieldName + " has ords out of order: " + lastValue + " >=" + scratch); } } lastValue = BytesRef.DeepCopyOf(scratch); } }
internal MinValue(SortedSetDocValues @in) { this.@in = @in; }
/// <summary> /// Returns a <see cref="IBits"/> representing all documents from <paramref name="dv"/> that have a value. /// </summary> public static IBits DocsWithValue(SortedSetDocValues dv, int maxDoc) { return new BitsAnonymousClass2(dv, maxDoc); }
/// <exception cref="System.IO.IOException"></exception> public override void SetNextReader(AtomicReaderContext context) { if (segmentFacetCounts != null) { segmentResults.AddItem(((TermGroupFacetCollector.MV.SegmentResult)CreateSegmentResult ())); } groupFieldTermsIndex = FieldCache.DEFAULT.GetTermsIndex(((AtomicReader)context.Reader ()), groupField); facetFieldDocTermOrds = FieldCache.DEFAULT.GetDocTermOrds(((AtomicReader)context. Reader()), facetField); facetFieldNumTerms = (int)facetFieldDocTermOrds.GetValueCount(); if (facetFieldNumTerms == 0) { facetOrdTermsEnum = null; } else { facetOrdTermsEnum = facetFieldDocTermOrds.TermsEnum(); } // [facetFieldNumTerms() + 1] for all possible facet values and docs not containing facet field segmentFacetCounts = new int[facetFieldNumTerms + 1]; segmentTotalCount = 0; segmentGroupedFacetHits.Clear(); foreach (GroupedFacetHit groupedFacetHit in groupedFacetHits) { int groupOrd = groupedFacetHit.groupValue == null ? -1 : groupFieldTermsIndex.LookupTerm (groupedFacetHit.groupValue); if (groupedFacetHit.groupValue != null && groupOrd < 0) { continue; } int facetOrd; if (groupedFacetHit.facetValue != null) { if (facetOrdTermsEnum == null || !facetOrdTermsEnum.SeekExact(groupedFacetHit.facetValue )) { continue; } facetOrd = (int)facetOrdTermsEnum.Ord(); } else { facetOrd = facetFieldNumTerms; } // (facetFieldDocTermOrds.numTerms() + 1) for all possible facet values and docs not containing facet field int segmentGroupedFacetsIndex = groupOrd * (facetFieldNumTerms + 1) + facetOrd; segmentGroupedFacetHits.Put(segmentGroupedFacetsIndex); } if (facetPrefix != null) { TermsEnum.SeekStatus seekStatus; if (facetOrdTermsEnum != null) { seekStatus = facetOrdTermsEnum.SeekCeil(facetPrefix); } else { seekStatus = TermsEnum.SeekStatus.END; } if (seekStatus != TermsEnum.SeekStatus.END) { startFacetOrd = (int)facetOrdTermsEnum.Ord(); } else { startFacetOrd = 0; endFacetOrd = 0; return; } BytesRef facetEndPrefix = BytesRef.DeepCopyOf(facetPrefix); facetEndPrefix.Append(UnicodeUtil.BIG_TERM); seekStatus = facetOrdTermsEnum.SeekCeil(facetEndPrefix); if (seekStatus != TermsEnum.SeekStatus.END) { endFacetOrd = (int)facetOrdTermsEnum.Ord(); } else { endFacetOrd = facetFieldNumTerms; } } else { // Don't include null... startFacetOrd = 0; endFacetOrd = facetFieldNumTerms + 1; } }
/// <summary> /// Returns a <see cref="IBits"/> representing all documents from <paramref name="dv"/> that have a value. /// </summary> public static IBits DocsWithValue(SortedSetDocValues dv, int maxDoc) { return(new BitsAnonymousInnerClassHelper2(dv, maxDoc)); }
public BitsAnonymousInnerClassHelper2(SortedSetDocValues dv, int maxDoc) { this.dv = dv; this.maxDoc = maxDoc; }
private void MergeDocValues(SegmentWriteState segmentWriteState) { DocValuesConsumer consumer = codec.DocValuesFormat.FieldsConsumer(segmentWriteState); bool success = false; try { foreach (FieldInfo field in mergeState.FieldInfos) { DocValuesType type = field.DocValuesType; if (type != DocValuesType.NONE) { if (type == DocValuesType.NUMERIC) { IList <NumericDocValues> toMerge = new JCG.List <NumericDocValues>(); IList <IBits> docsWithField = new JCG.List <IBits>(); foreach (AtomicReader reader in mergeState.Readers) { NumericDocValues values = reader.GetNumericDocValues(field.Name); IBits bits = reader.GetDocsWithField(field.Name); if (values == null) { values = DocValues.EMPTY_NUMERIC; bits = new Lucene.Net.Util.Bits.MatchNoBits(reader.MaxDoc); } toMerge.Add(values); docsWithField.Add(bits); } consumer.MergeNumericField(field, mergeState, toMerge, docsWithField); } else if (type == DocValuesType.BINARY) { IList <BinaryDocValues> toMerge = new JCG.List <BinaryDocValues>(); IList <IBits> docsWithField = new JCG.List <IBits>(); foreach (AtomicReader reader in mergeState.Readers) { BinaryDocValues values = reader.GetBinaryDocValues(field.Name); IBits bits = reader.GetDocsWithField(field.Name); if (values == null) { values = DocValues.EMPTY_BINARY; bits = new Lucene.Net.Util.Bits.MatchNoBits(reader.MaxDoc); } toMerge.Add(values); docsWithField.Add(bits); } consumer.MergeBinaryField(field, mergeState, toMerge, docsWithField); } else if (type == DocValuesType.SORTED) { IList <SortedDocValues> toMerge = new JCG.List <SortedDocValues>(); foreach (AtomicReader reader in mergeState.Readers) { SortedDocValues values = reader.GetSortedDocValues(field.Name); if (values == null) { values = DocValues.EMPTY_SORTED; } toMerge.Add(values); } consumer.MergeSortedField(field, mergeState, toMerge); } else if (type == DocValuesType.SORTED_SET) { IList <SortedSetDocValues> toMerge = new JCG.List <SortedSetDocValues>(); foreach (AtomicReader reader in mergeState.Readers) { SortedSetDocValues values = reader.GetSortedSetDocValues(field.Name); if (values == null) { values = DocValues.EMPTY_SORTED_SET; } toMerge.Add(values); } consumer.MergeSortedSetField(field, mergeState, toMerge); } else { throw AssertionError.Create("type=" + type); } } } success = true; } finally { if (success) { IOUtils.Dispose(consumer); } else { IOUtils.DisposeWhileHandlingException(consumer); } } }
public override SortedSetDocValues GetSortedSetDocValues(string field) { EnsureOpen(); OrdinalMap map = null; lock (CachedOrdMaps) { if (!CachedOrdMaps.TryGetValue(field, out map)) { // uncached, or not a multi dv SortedSetDocValues dv = MultiDocValues.GetSortedSetValues(@in, field); MultiSortedSetDocValues docValues = dv as MultiSortedSetDocValues; if (docValues != null) { map = docValues.Mapping; if (map.Owner == CoreCacheKey) { CachedOrdMaps[field] = map; } } return dv; } } // cached ordinal map if (FieldInfos.FieldInfo(field).DocValuesType != DocValuesType.SORTED_SET) { return null; } Debug.Assert(map != null); int size = @in.Leaves().Count; SortedSetDocValues[] values = new SortedSetDocValues[size]; int[] starts = new int[size + 1]; for (int i = 0; i < size; i++) { AtomicReaderContext context = @in.Leaves()[i]; SortedSetDocValues v = context.AtomicReader.GetSortedSetDocValues(field) ?? DocValues.EMPTY_SORTED_SET; values[i] = v; starts[i] = context.DocBase; } starts[size] = MaxDoc(); return new MultiSortedSetDocValues(values, starts, map); }
public virtual void SearchIndex(Directory dir, string oldName) { //QueryParser parser = new QueryParser("contents", new MockAnalyzer(random)); //Query query = parser.parse("handle:1"); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = NewSearcher(reader); TestUtil.CheckIndex(dir); // true if this is a 4.0+ index bool is40Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("content5") != null; // true if this is a 4.2+ index bool is42Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("dvSortedSet") != null; Debug.Assert(is40Index); // NOTE: currently we can only do this on trunk! Bits liveDocs = MultiFields.GetLiveDocs(reader); for (int i = 0; i < 35; i++) { if (liveDocs.Get(i)) { Document d = reader.Document(i); IList <IndexableField> fields = d.Fields; bool isProxDoc = d.GetField("content3") == null; if (isProxDoc) { int numFields = is40Index ? 7 : 5; Assert.AreEqual(numFields, fields.Count); IndexableField f = d.GetField("id"); Assert.AreEqual("" + i, f.StringValue); f = d.GetField("utf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue); f = d.GetField("autf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue); f = d.GetField("content2"); Assert.AreEqual("here is more content with aaa aaa aaa", f.StringValue); f = d.GetField("fie\u2C77ld"); Assert.AreEqual("field with non-ascii name", f.StringValue); } Fields tfvFields = reader.GetTermVectors(i); Assert.IsNotNull(tfvFields, "i=" + i); Terms tfv = tfvFields.Terms("utf8"); Assert.IsNotNull(tfv, "docID=" + i + " index=" + oldName); } else { // Only ID 7 is deleted Assert.AreEqual(7, i); } } if (is40Index) { // check docvalues fields NumericDocValues dvByte = MultiDocValues.GetNumericValues(reader, "dvByte"); BinaryDocValues dvBytesDerefFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefFixed"); BinaryDocValues dvBytesDerefVar = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefVar"); SortedDocValues dvBytesSortedFixed = MultiDocValues.GetSortedValues(reader, "dvBytesSortedFixed"); SortedDocValues dvBytesSortedVar = MultiDocValues.GetSortedValues(reader, "dvBytesSortedVar"); BinaryDocValues dvBytesStraightFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightFixed"); BinaryDocValues dvBytesStraightVar = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightVar"); NumericDocValues dvDouble = MultiDocValues.GetNumericValues(reader, "dvDouble"); NumericDocValues dvFloat = MultiDocValues.GetNumericValues(reader, "dvFloat"); NumericDocValues dvInt = MultiDocValues.GetNumericValues(reader, "dvInt"); NumericDocValues dvLong = MultiDocValues.GetNumericValues(reader, "dvLong"); NumericDocValues dvPacked = MultiDocValues.GetNumericValues(reader, "dvPacked"); NumericDocValues dvShort = MultiDocValues.GetNumericValues(reader, "dvShort"); SortedSetDocValues dvSortedSet = null; if (is42Index) { dvSortedSet = MultiDocValues.GetSortedSetValues(reader, "dvSortedSet"); } for (int i = 0; i < 35; i++) { int id = Convert.ToInt32(reader.Document(i).Get("id")); Assert.AreEqual(id, dvByte.Get(i)); sbyte[] bytes = new sbyte[] { (sbyte)((int)((uint)id >> 24)), (sbyte)((int)((uint)id >> 16)), (sbyte)((int)((uint)id >> 8)), (sbyte)id }; BytesRef expectedRef = new BytesRef((byte[])(Array)bytes); BytesRef scratch = new BytesRef(); dvBytesDerefFixed.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesDerefVar.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesSortedFixed.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesSortedVar.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesStraightFixed.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesStraightVar.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); Assert.AreEqual((double)id, BitConverter.Int64BitsToDouble(dvDouble.Get(i)), 0D); Assert.AreEqual((float)id, Number.IntBitsToFloat((int)dvFloat.Get(i)), 0F); Assert.AreEqual(id, dvInt.Get(i)); Assert.AreEqual(id, dvLong.Get(i)); Assert.AreEqual(id, dvPacked.Get(i)); Assert.AreEqual(id, dvShort.Get(i)); if (is42Index) { dvSortedSet.Document = i; long ord = dvSortedSet.NextOrd(); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dvSortedSet.NextOrd()); dvSortedSet.LookupOrd(ord, scratch); Assert.AreEqual(expectedRef, scratch); } } } ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; // First document should be #0 Document doc = searcher.IndexReader.Document(hits[0].Doc); assertEquals("didn't get the right document first", "0", doc.Get("id")); DoTestHits(hits, 34, searcher.IndexReader); if (is40Index) { hits = searcher.Search(new TermQuery(new Term("content5", "aaa")), null, 1000).ScoreDocs; DoTestHits(hits, 34, searcher.IndexReader); hits = searcher.Search(new TermQuery(new Term("content6", "aaa")), null, 1000).ScoreDocs; DoTestHits(hits, 34, searcher.IndexReader); } hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); reader.Dispose(); }
internal SlowMinShouldMatchScorer(BooleanWeight weight, AtomicReader reader, IndexSearcher searcher) : base(weight) { this.Dv = reader.GetSortedSetDocValues("dv"); this.MaxDoc = reader.MaxDoc; BooleanQuery bq = (BooleanQuery)weight.Query; this.MinNrShouldMatch = bq.MinimumNumberShouldMatch; this.Sims = new SimScorer[(int)Dv.ValueCount]; foreach (BooleanClause clause in bq.Clauses) { Debug.Assert(!clause.Prohibited); Debug.Assert(!clause.Required); Term term = ((TermQuery)clause.Query).Term; long ord = Dv.LookupTerm(term.Bytes); if (ord >= 0) { bool success = Ords.Add(ord); Debug.Assert(success); // no dups TermContext context = TermContext.Build(reader.Context, term); SimWeight w = weight.Similarity.ComputeWeight(1f, searcher.CollectionStatistics("field"), searcher.TermStatistics(term, context)); var dummy = w.ValueForNormalization; // ignored w.Normalize(1F, 1F); Sims[(int)ord] = weight.Similarity.DoSimScorer(w, (AtomicReaderContext)reader.Context); } } }