public override void AddField(int docID, IIndexableField field, FieldInfo fieldInfo) { DocValuesType dvType = field.FieldType.DocValueType; if (dvType != DocValuesType.NONE) { fieldInfo.DocValuesType = dvType; if (dvType == DocValuesType.BINARY) { AddBinaryField(fieldInfo, docID, field.GetBinaryValue()); } else if (dvType == DocValuesType.SORTED) { AddSortedField(fieldInfo, docID, field.GetBinaryValue()); } else if (dvType == DocValuesType.SORTED_SET) { AddSortedSetField(fieldInfo, docID, field.GetBinaryValue()); } else if (dvType == DocValuesType.NUMERIC) { if (!(field.GetNumericValue() is long?)) { throw new System.ArgumentException("illegal type " + field.GetNumericValue().GetType() + ": DocValues types must be Long"); } AddNumericField(fieldInfo, docID, (long)field.GetNumericValue()); } else { Debug.Assert(false, "unrecognized DocValues.Type: " + dvType); } } }
private void AddField(Document doc, string field, string value, DocValuesType type) { doc.Add(new StringField(field, value, Field.Store.YES)); if (type == DocValuesType.NONE) { return; } string dvField = field + "_dv"; Field valuesField = null; switch (type) { case DocValuesType.NUMERIC: valuesField = new NumericDocValuesField(dvField, int.Parse(value, CultureInfo.InvariantCulture)); break; case DocValuesType.BINARY: valuesField = new BinaryDocValuesField(dvField, new BytesRef(value)); break; case DocValuesType.SORTED: valuesField = new SortedDocValuesField(dvField, new BytesRef(value)); break; } doc.Add(valuesField); }
private static sbyte DocValuesByte(DocValuesType type) { if (type == DocValuesType.NONE) { return(0); } else if (type == DocValuesType.NUMERIC) { return(1); } else if (type == DocValuesType.BINARY) { return(2); } else if (type == DocValuesType.SORTED) { return(3); } else if (type == DocValuesType.SORTED_SET) { return(4); } else { throw new InvalidOperationException(); } }
private static sbyte DocValuesByte(DocValuesType type) { if (type == DocValuesType.NONE) { return(0); } else if (type == DocValuesType.NUMERIC) { return(1); } else if (type == DocValuesType.BINARY) { return(2); } else if (type == DocValuesType.SORTED) { return(3); } else if (type == DocValuesType.SORTED_SET) { return(4); } else { throw AssertionError.Create(); } }
private long dvGen = -1; // the DocValues generation of this field // LUCENENET specific: De-nested the IndexOptions and DocValuesType enums from this class to prevent naming conflicts /// <summary> /// Sole Constructor. /// <para/> /// @lucene.experimental /// </summary> public FieldInfo(string name, bool indexed, int number, bool storeTermVector, bool omitNorms, bool storePayloads, IndexOptions indexOptions, DocValuesType docValues, DocValuesType normsType, IDictionary <string, string> attributes) { this.Name = name; this.indexed = indexed; this.Number = number; this.docValueType = docValues; if (indexed) { this.storeTermVector = storeTermVector; this.storePayloads = storePayloads; this.omitNorms = omitNorms; this.indexOptions = indexOptions; this.normType = !omitNorms ? normsType : DocValuesType.NONE; } // for non-indexed fields, leave defaults else { this.storeTermVector = false; this.storePayloads = false; this.omitNorms = false; this.indexOptions = IndexOptions.NONE; this.normType = DocValuesType.NONE; } this.attributes = attributes; if (Debugging.AssertsEnabled) { Debugging.Assert(CheckConsistency()); } }
public override void AddField(int docID, IIndexableField field, FieldInfo fieldInfo) { DocValuesType dvType = field.IndexableFieldType.DocValueType; if (dvType != DocValuesType.NONE) { fieldInfo.DocValuesType = dvType; if (dvType == DocValuesType.BINARY) { AddBinaryField(fieldInfo, docID, field.GetBinaryValue()); } else if (dvType == DocValuesType.SORTED) { AddSortedField(fieldInfo, docID, field.GetBinaryValue()); } else if (dvType == DocValuesType.SORTED_SET) { AddSortedSetField(fieldInfo, docID, field.GetBinaryValue()); } else if (dvType == DocValuesType.NUMERIC) { if (field.NumericType != NumericFieldType.INT64) { throw new ArgumentException("illegal type " + field.NumericType + ": DocValues types must be " + NumericFieldType.INT64); } AddNumericField(fieldInfo, docID, field.GetInt64ValueOrDefault()); } else { Debug.Assert(false, "unrecognized DocValues.Type: " + dvType); } } }
internal void SetDocValuesType(int number, string name, DocValuesType dvType) { lock (this) { Debug.Assert(ContainsConsistent(number, name, dvType)); docValuesType[name] = dvType; } }
internal IndexContext(Directory directory, DirectoryReader indexReader, DocValuesType dvType, IDictionary <string, IDictionary <string, ISet <string> > > searchTermToGroupCounts, string[] contentStrings) { this.directory = directory; this.indexReader = indexReader; this.dvType = dvType; this.searchTermToGroupCounts = searchTermToGroupCounts; this.contentStrings = contentStrings; }
/// <summary>Write the header for this field. </summary> private void WriteFieldEntry(FieldInfo field, DocValuesType type) { SimpleTextUtil.Write(data, FIELD); SimpleTextUtil.Write(data, field.Name, scratch); SimpleTextUtil.WriteNewline(data); SimpleTextUtil.Write(data, TYPE); SimpleTextUtil.Write(data, type.ToString(), scratch); SimpleTextUtil.WriteNewline(data); }
// used by assert internal bool ContainsConsistent(int?number, string name, DocValuesType dvType) { lock (this) { numberToName.TryGetValue(number, out string numberToNameStr); nameToNumber.TryGetValue(name, out int?nameToNumberVal); this.docValuesType.TryGetValue(name, out DocValuesType docValuesType); return(name.Equals(numberToNameStr, StringComparison.Ordinal) && number.Equals(nameToNumber[name]) && (dvType == DocValuesType.NONE || docValuesType == DocValuesType.NONE || dvType == docValuesType)); } }
/// <summary> /// 4.0-style docvalues byte </summary> public virtual byte DocValuesByte(DocValuesType type, string legacyTypeAtt) { if (type == DocValuesType.NONE) { Debug.Assert(legacyTypeAtt == null); return(0); } else { Debug.Assert(legacyTypeAtt != null); //return (sbyte)LegacyDocValuesType.ordinalLookup[legacyTypeAtt]; return((byte)legacyTypeAtt.ToLegacyDocValuesType()); } }
/// <summary> /// Create a new mutable <see cref="FieldType"/> with all of the properties from <paramref name="ref"/> /// </summary> public FieldType(FieldType @ref) { this.indexed = @ref.IsIndexed; this.stored = @ref.IsStored; this.tokenized = @ref.IsTokenized; this.storeTermVectors = @ref.StoreTermVectors; this.storeTermVectorOffsets = @ref.StoreTermVectorOffsets; this.storeTermVectorPositions = @ref.StoreTermVectorPositions; this.storeTermVectorPayloads = @ref.StoreTermVectorPayloads; this.omitNorms = @ref.OmitNorms; this.indexOptions = @ref.IndexOptions; this.docValueType = @ref.DocValueType; this.numericType = @ref.NumericType; // Do not copy frozen! }
internal void SetDocValuesType(int number, string name, DocValuesType dvType) { UninterruptableMonitor.Enter(this); try { if (Debugging.AssertsEnabled) { Debugging.Assert(ContainsConsistent(number, name, dvType)); } docValuesType[name] = dvType; } finally { UninterruptableMonitor.Exit(this); } }
// should only be called by FieldInfos#addOrUpdate internal void Update(bool indexed, bool storeTermVector, bool omitNorms, bool storePayloads, IndexOptions indexOptions) { //System.out.println("FI.update field=" + name + " indexed=" + indexed + " omitNorms=" + omitNorms + " this.omitNorms=" + this.omitNorms); if (this.indexed != indexed) { this.indexed = true; // once indexed, always index } if (indexed) // if updated field data is not for indexing, leave the updates out { if (this.storeTermVector != storeTermVector) { this.storeTermVector = true; // once vector, always vector } if (this.storePayloads != storePayloads) { this.storePayloads = true; } if (this.omitNorms != omitNorms) { this.omitNorms = true; // if one require omitNorms at least once, it remains off for life this.normType = DocValuesType.NONE; } if (this.indexOptions != indexOptions) { if (this.indexOptions == IndexOptions.NONE) { this.indexOptions = indexOptions; } else { // downgrade // LUCENENET specific - to avoid boxing, changed from CompareTo() to IndexOptionsComparer.Compare() this.indexOptions = IndexOptionsComparer.Default.Compare(this.indexOptions, indexOptions) < 0 ? this.indexOptions : indexOptions; } // LUCENENET specific - to avoid boxing, changed from CompareTo() to IndexOptionsComparer.Compare() if (IndexOptionsComparer.Default.Compare(this.indexOptions, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { // cannot store payloads if we don't store positions: this.storePayloads = false; } } } if (Debugging.AssertsEnabled) { Debugging.Assert(CheckConsistency()); } }
/// <summary> /// Returns the global field number for the given field name. If the name /// does not exist yet it tries to add it with the given preferred field /// number assigned if possible otherwise the first unassigned field number /// is used as the field number. /// </summary> internal int AddOrGet(string fieldName, int preferredFieldNumber, DocValuesType dvType) { UninterruptableMonitor.Enter(this); try { if (dvType != DocValuesType.NONE) { if (!docValuesType.TryGetValue(fieldName, out DocValuesType currentDVType) || currentDVType == DocValuesType.NONE) // default value in .NET (value type 0) { docValuesType[fieldName] = dvType; } else if (currentDVType != DocValuesType.NONE && currentDVType != dvType) { throw new ArgumentException("cannot change DocValues type from " + currentDVType + " to " + dvType + " for field \"" + fieldName + "\""); } } if (!nameToNumber.TryGetValue(fieldName, out int?fieldNumber) || fieldNumber == null) { int?preferredBoxed = preferredFieldNumber; if (preferredFieldNumber != -1 && !numberToName.ContainsKey(preferredBoxed)) { // cool - we can use this number globally fieldNumber = preferredBoxed; } else { // find a new FieldNumber while (numberToName.ContainsKey(++lowestUnassignedFieldNumber)) { // might not be up to date - lets do the work once needed } fieldNumber = lowestUnassignedFieldNumber; } numberToName[fieldNumber] = fieldName; nameToNumber[fieldName] = fieldNumber; } return((int)fieldNumber); } finally { UninterruptableMonitor.Exit(this); } }
/// <summary> /// Returns <c>true</c> if the <paramref name="fieldName"/> exists in the map and is of the /// same <paramref name="dvType"/>. /// </summary> internal bool Contains(string fieldName, DocValuesType dvType) { lock (this) { // used by IndexWriter.updateNumericDocValue if (!nameToNumber.ContainsKey(fieldName)) { return(false); } else { // only return true if the field has the same dvType as the requested one docValuesType.TryGetValue(fieldName, out DocValuesType dvCand); // LUCENENET NOTE: This could be NONE even if TryGetValue returns false return(dvType == dvCand); } } }
// used by assert internal bool ContainsConsistent(int number, string name, DocValuesType dvType) { UninterruptableMonitor.Enter(this); try { numberToName.TryGetValue(number, out string numberToNameStr); nameToNumber.TryGetValue(name, out int nameToNumberVal); this.docValuesType.TryGetValue(name, out DocValuesType docValuesType); return(name.Equals(numberToNameStr, StringComparison.Ordinal) && number.Equals(nameToNumber[name]) && (dvType == DocValuesType.NONE || docValuesType == DocValuesType.NONE || dvType == docValuesType)); } finally { UninterruptableMonitor.Exit(this); } }
/// <summary> /// Returns <c>true</c> if the <paramref name="fieldName"/> exists in the map and is of the /// same <paramref name="dvType"/>. /// </summary> internal bool Contains(string fieldName, DocValuesType dvType) { lock (this) { // used by IndexWriter.updateNumericDocValue if (!nameToNumber.ContainsKey(fieldName)) { return(false); } else { // only return true if the field has the same dvType as the requested one DocValuesType dvCand; docValuesType.TryGetValue(fieldName, out dvCand); return(dvType == dvCand); } } }
// returns the FieldInfo that corresponds to the given field and type, or // null if the field does not exist, or not indexed as the requested // DovDocValuesType. private FieldInfo GetDVField(string field, DocValuesType type) { FieldInfo fi = FieldInfos.FieldInfo(field); if (fi == null) { // Field does not exist return null; } if (fi.DocValuesType == DocValuesType.NONE) { // Field was not indexed with doc values return null; } if (fi.DocValuesType != type) { // Field DocValues are different than requested type return null; } return fi; }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in C#: //ORIGINAL LINE: void assertValidType(String key, Object value, IndexIdentifier identifier) throws org.neo4j.internal.kernel.api.exceptions.explicitindex.ExplicitIndexNotFoundKernelException internal virtual void AssertValidType(string key, object value, IndexIdentifier identifier) { DocValuesType expectedType; string expectedTypeName; if (value is Number) { expectedType = DocValuesType.SORTED_NUMERIC; expectedTypeName = "numbers"; } else { expectedType = DocValuesType.SORTED_SET; expectedTypeName = "strings"; } IDictionary <string, DocValuesType> stringDocValuesTypeMap = _indexTypeMap[identifier]; // If the index searcher has never been loaded, we need to load it now to populate the map. int iterations = 0; // Iterate a bit in case we race with an index drop or create. while (stringDocValuesTypeMap == null && iterations++ < 20) { // We don't use ensureInstantiated because we want to surface the exception in this case. GetIndexSearcher(identifier).close(); stringDocValuesTypeMap = _indexTypeMap[identifier]; } if (stringDocValuesTypeMap == null) { // Looks like we are running into some adversarial racing, so let's just give up. throw new ExplicitIndexNotFoundKernelException("Index '%s' doesn't exist.", identifier); } DocValuesType actualType = stringDocValuesTypeMap.putIfAbsent(key, expectedType); if (actualType != null && !actualType.Equals(DocValuesType.NONE) && !actualType.Equals(expectedType)) { throw new System.ArgumentException(string.Format("Cannot index '{0}' for key '{1}', since this key has been used to index {2}. Raw value of the index type is {3}", value, key, expectedTypeName, actualType)); } }
/// <returns> the {@code NumericDocValues} for a given field </returns> /// <exception cref="IllegalArgumentException"> if this field is not indexed with numeric doc values </exception> public NumericDocValues ReadDocValues(string field) { try { NumericDocValues dv = Context.reader().getNumericDocValues(field); if (dv == null) { FieldInfo fi = Context.reader().FieldInfos.fieldInfo(field); DocValuesType actual = null; if (fi != null) { actual = fi.DocValuesType; } throw new System.InvalidOperationException("The field '" + field + "' is not indexed properly, expected NumericDV, but got '" + actual + "'"); } return(dv); } catch (IOException e) { throw new Exception(e); } }
private void MergeDocValues(SegmentWriteState segmentWriteState) { DocValuesConsumer consumer = codec.DocValuesFormat.FieldsConsumer(segmentWriteState); bool success = false; try { foreach (FieldInfo field in mergeState.FieldInfos) { DocValuesType type = field.DocValuesType; if (type != DocValuesType.NONE) { if (type == DocValuesType.NUMERIC) { IList <NumericDocValues> toMerge = new JCG.List <NumericDocValues>(); IList <IBits> docsWithField = new JCG.List <IBits>(); foreach (AtomicReader reader in mergeState.Readers) { NumericDocValues values = reader.GetNumericDocValues(field.Name); IBits bits = reader.GetDocsWithField(field.Name); if (values == null) { values = DocValues.EMPTY_NUMERIC; bits = new Lucene.Net.Util.Bits.MatchNoBits(reader.MaxDoc); } toMerge.Add(values); docsWithField.Add(bits); } consumer.MergeNumericField(field, mergeState, toMerge, docsWithField); } else if (type == DocValuesType.BINARY) { IList <BinaryDocValues> toMerge = new JCG.List <BinaryDocValues>(); IList <IBits> docsWithField = new JCG.List <IBits>(); foreach (AtomicReader reader in mergeState.Readers) { BinaryDocValues values = reader.GetBinaryDocValues(field.Name); IBits bits = reader.GetDocsWithField(field.Name); if (values == null) { values = DocValues.EMPTY_BINARY; bits = new Lucene.Net.Util.Bits.MatchNoBits(reader.MaxDoc); } toMerge.Add(values); docsWithField.Add(bits); } consumer.MergeBinaryField(field, mergeState, toMerge, docsWithField); } else if (type == DocValuesType.SORTED) { IList <SortedDocValues> toMerge = new JCG.List <SortedDocValues>(); foreach (AtomicReader reader in mergeState.Readers) { SortedDocValues values = reader.GetSortedDocValues(field.Name); if (values == null) { values = DocValues.EMPTY_SORTED; } toMerge.Add(values); } consumer.MergeSortedField(field, mergeState, toMerge); } else if (type == DocValuesType.SORTED_SET) { IList <SortedSetDocValues> toMerge = new JCG.List <SortedSetDocValues>(); foreach (AtomicReader reader in mergeState.Readers) { SortedSetDocValues values = reader.GetSortedSetDocValues(field.Name); if (values == null) { values = DocValues.EMPTY_SORTED_SET; } toMerge.Add(values); } consumer.MergeSortedSetField(field, mergeState, toMerge); } else { throw AssertionError.Create("type=" + type); } } } success = true; } finally { if (success) { IOUtils.Dispose(consumer); } else { IOUtils.DisposeWhileHandlingException(consumer); } } }
private void AddGroupField(Document doc, string groupField, string value, bool canUseIDV, DocValuesType valueType) { doc.Add(new TextField(groupField, value, Field.Store.YES)); if (canUseIDV) { Field valuesField = null; switch (valueType) { case DocValuesType.BINARY: valuesField = new BinaryDocValuesField(groupField + "_dv", new BytesRef(value)); break; case DocValuesType.SORTED: valuesField = new SortedDocValuesField(groupField + "_dv", new BytesRef(value)); break; default: fail("unhandled type"); break; } doc.Add(valuesField); } }
private AbstractAllGroupHeadsCollector CreateRandomCollector(string groupField, Sort sortWithinGroup, bool canUseIDV, DocValuesType valueType) { AbstractAllGroupHeadsCollector collector; if (Random.nextBoolean()) { ValueSource vs = new BytesRefFieldSource(groupField); collector = new FunctionAllGroupHeadsCollector(vs, new Hashtable(), sortWithinGroup); } else { collector = TermAllGroupHeadsCollector.Create(groupField, sortWithinGroup); } if (VERBOSE) { Console.WriteLine("Selected implementation: " + collector.GetType().Name); } return(collector); }
public void TestBasic() { string groupField = "author"; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy())); bool canUseIDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal); DocValuesType valueType = vts[Random.nextInt(vts.Length)]; // 0 Document doc = new Document(); AddGroupField(doc, groupField, "author1", canUseIDV, valueType); doc.Add(NewTextField("content", "random text", Field.Store.NO)); doc.Add(NewStringField("id_1", "1", Field.Store.NO)); doc.Add(NewStringField("id_2", "1", Field.Store.NO)); w.AddDocument(doc); // 1 doc = new Document(); AddGroupField(doc, groupField, "author1", canUseIDV, valueType); doc.Add(NewTextField("content", "some more random text blob", Field.Store.NO)); doc.Add(NewStringField("id_1", "2", Field.Store.NO)); doc.Add(NewStringField("id_2", "2", Field.Store.NO)); w.AddDocument(doc); // 2 doc = new Document(); AddGroupField(doc, groupField, "author1", canUseIDV, valueType); doc.Add(NewTextField("content", "some more random textual data", Field.Store.NO)); doc.Add(NewStringField("id_1", "3", Field.Store.NO)); doc.Add(NewStringField("id_2", "3", Field.Store.NO)); w.AddDocument(doc); w.Commit(); // To ensure a second segment // 3 doc = new Document(); AddGroupField(doc, groupField, "author2", canUseIDV, valueType); doc.Add(NewTextField("content", "some random text", Field.Store.NO)); doc.Add(NewStringField("id_1", "4", Field.Store.NO)); doc.Add(NewStringField("id_2", "4", Field.Store.NO)); w.AddDocument(doc); // 4 doc = new Document(); AddGroupField(doc, groupField, "author3", canUseIDV, valueType); doc.Add(NewTextField("content", "some more random text", Field.Store.NO)); doc.Add(NewStringField("id_1", "5", Field.Store.NO)); doc.Add(NewStringField("id_2", "5", Field.Store.NO)); w.AddDocument(doc); // 5 doc = new Document(); AddGroupField(doc, groupField, "author3", canUseIDV, valueType); doc.Add(NewTextField("content", "random blob", Field.Store.NO)); doc.Add(NewStringField("id_1", "6", Field.Store.NO)); doc.Add(NewStringField("id_2", "6", Field.Store.NO)); w.AddDocument(doc); // 6 -- no author field doc = new Document(); doc.Add(NewTextField("content", "random word stuck in alot of other text", Field.Store.NO)); doc.Add(NewStringField("id_1", "6", Field.Store.NO)); doc.Add(NewStringField("id_2", "6", Field.Store.NO)); w.AddDocument(doc); // 7 -- no author field doc = new Document(); doc.Add(NewTextField("content", "random word stuck in alot of other text", Field.Store.NO)); doc.Add(NewStringField("id_1", "7", Field.Store.NO)); doc.Add(NewStringField("id_2", "7", Field.Store.NO)); w.AddDocument(doc); IndexReader reader = w.GetReader(); IndexSearcher indexSearcher = NewSearcher(reader); w.Dispose(); int maxDoc = reader.MaxDoc; Sort sortWithinGroup = new Sort(new SortField("id_1", SortFieldType.INT32, true)); var allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType); indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector); assertTrue(ArrayContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads())); assertTrue(OpenBitSetContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc)); allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType); indexSearcher.Search(new TermQuery(new Term("content", "some")), allGroupHeadsCollector); assertTrue(ArrayContains(new int[] { 2, 3, 4 }, allGroupHeadsCollector.RetrieveGroupHeads())); assertTrue(OpenBitSetContains(new int[] { 2, 3, 4 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc)); allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType); indexSearcher.Search(new TermQuery(new Term("content", "blob")), allGroupHeadsCollector); assertTrue(ArrayContains(new int[] { 1, 5 }, allGroupHeadsCollector.RetrieveGroupHeads())); assertTrue(OpenBitSetContains(new int[] { 1, 5 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc)); // STRING sort type triggers different implementation Sort sortWithinGroup2 = new Sort(new SortField("id_2", SortFieldType.STRING, true)); allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup2, canUseIDV, valueType); indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector); assertTrue(ArrayContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads())); assertTrue(OpenBitSetContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc)); Sort sortWithinGroup3 = new Sort(new SortField("id_2", SortFieldType.STRING, false)); allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup3, canUseIDV, valueType); indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector); // 7 b/c higher doc id wins, even if order of field is in not in reverse. assertTrue(ArrayContains(new int[] { 0, 3, 4, 6 }, allGroupHeadsCollector.RetrieveGroupHeads())); assertTrue(OpenBitSetContains(new int[] { 0, 3, 4, 6 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc)); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
public void TestRandom() { int numberOfRuns = TestUtil.NextInt32(Random, 3, 6); for (int iter = 0; iter < numberOfRuns; iter++) { if (VERBOSE) { Console.WriteLine(string.Format("TEST: iter={0} total={1}", iter, numberOfRuns)); } int numDocs = TestUtil.NextInt32(Random, 100, 1000) * RANDOM_MULTIPLIER; int numGroups = TestUtil.NextInt32(Random, 1, numDocs); if (VERBOSE) { Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups); } List <BytesRef> groups = new List <BytesRef>(); for (int i = 0; i < numGroups; i++) { string randomValue; do { // B/c of DV based impl we can't see the difference between an empty string and a null value. // For that reason we don't generate empty string groups. randomValue = TestUtil.RandomRealisticUnicodeString(Random); } while ("".Equals(randomValue, StringComparison.Ordinal)); groups.Add(new BytesRef(randomValue)); } string[] contentStrings = new string[TestUtil.NextInt32(Random, 2, 20)]; if (VERBOSE) { Console.WriteLine("TEST: create fake content"); } for (int contentIDX = 0; contentIDX < contentStrings.Length; contentIDX++) { StringBuilder sb = new StringBuilder(); sb.append("real").append(Random.nextInt(3)).append(' '); int fakeCount = Random.nextInt(10); for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++) { sb.append("fake "); } contentStrings[contentIDX] = sb.toString(); if (VERBOSE) { Console.WriteLine(" content=" + sb.toString()); } } Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); bool preFlex = "Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal); bool canUseIDV = !preFlex; DocValuesType valueType = vts[Random.nextInt(vts.Length)]; Document doc = new Document(); Document docNoGroup = new Document(); Field group = NewStringField("group", "", Field.Store.NO); doc.Add(group); Field valuesField = null; if (canUseIDV) { switch (valueType) { case DocValuesType.BINARY: valuesField = new BinaryDocValuesField("group_dv", new BytesRef()); break; case DocValuesType.SORTED: valuesField = new SortedDocValuesField("group_dv", new BytesRef()); break; default: fail("unhandled type"); break; } doc.Add(valuesField); } Field sort1 = NewStringField("sort1", "", Field.Store.NO); doc.Add(sort1); docNoGroup.Add(sort1); Field sort2 = NewStringField("sort2", "", Field.Store.NO); doc.Add(sort2); docNoGroup.Add(sort2); Field sort3 = NewStringField("sort3", "", Field.Store.NO); doc.Add(sort3); docNoGroup.Add(sort3); Field content = NewTextField("content", "", Field.Store.NO); doc.Add(content); docNoGroup.Add(content); Int32Field id = new Int32Field("id", 0, Field.Store.NO); doc.Add(id); docNoGroup.Add(id); GroupDoc[] groupDocs = new GroupDoc[numDocs]; for (int i = 0; i < numDocs; i++) { BytesRef groupValue; if (Random.nextInt(24) == 17) { // So we test the "doc doesn't have the group'd // field" case: groupValue = null; } else { groupValue = groups[Random.nextInt(groups.size())]; } GroupDoc groupDoc = new GroupDoc( i, groupValue, groups[Random.nextInt(groups.size())], groups[Random.nextInt(groups.size())], new BytesRef(string.Format(CultureInfo.InvariantCulture, "{0:D5}", i)), contentStrings[Random.nextInt(contentStrings.Length)] ); if (VERBOSE) { Console.WriteLine(" doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.Utf8ToString()) + " sort1=" + groupDoc.sort1.Utf8ToString() + " sort2=" + groupDoc.sort2.Utf8ToString() + " sort3=" + groupDoc.sort3.Utf8ToString()); } groupDocs[i] = groupDoc; if (groupDoc.group != null) { group.SetStringValue(groupDoc.group.Utf8ToString()); if (canUseIDV) { valuesField.SetBytesValue(new BytesRef(groupDoc.group.Utf8ToString())); } } sort1.SetStringValue(groupDoc.sort1.Utf8ToString()); sort2.SetStringValue(groupDoc.sort2.Utf8ToString()); sort3.SetStringValue(groupDoc.sort3.Utf8ToString()); content.SetStringValue(groupDoc.content); id.SetInt32Value(groupDoc.id); if (groupDoc.group == null) { w.AddDocument(docNoGroup); } else { w.AddDocument(doc); } } DirectoryReader r = w.GetReader(); w.Dispose(); // NOTE: intentional but temporary field cache insanity! FieldCache.Int32s docIdToFieldId = FieldCache.DEFAULT.GetInt32s(SlowCompositeReaderWrapper.Wrap(r), "id", false); int[] fieldIdToDocID = new int[numDocs]; for (int i = 0; i < numDocs; i++) { int fieldId = docIdToFieldId.Get(i); fieldIdToDocID[fieldId] = i; } try { IndexSearcher s = NewSearcher(r); if (typeof(SlowCompositeReaderWrapper).GetTypeInfo().IsAssignableFrom(s.IndexReader.GetType())) { canUseIDV = false; } else { canUseIDV = !preFlex; } for (int contentID = 0; contentID < 3; contentID++) { ScoreDoc[] hits = s.Search(new TermQuery(new Term("content", "real" + contentID)), numDocs).ScoreDocs; foreach (ScoreDoc hit in hits) { GroupDoc gd = groupDocs[docIdToFieldId.Get(hit.Doc)]; assertTrue(gd.score == 0.0); gd.score = hit.Score; int docId = gd.id; assertEquals(docId, docIdToFieldId.Get(hit.Doc)); } } foreach (GroupDoc gd in groupDocs) { assertTrue(gd.score != 0.0); } for (int searchIter = 0; searchIter < 100; searchIter++) { if (VERBOSE) { Console.WriteLine("TEST: searchIter=" + searchIter); } string searchTerm = "real" + Random.nextInt(3); bool sortByScoreOnly = Random.nextBoolean(); Sort sortWithinGroup = GetRandomSort(sortByScoreOnly); AbstractAllGroupHeadsCollector allGroupHeadsCollector = CreateRandomCollector("group", sortWithinGroup, canUseIDV, valueType); s.Search(new TermQuery(new Term("content", searchTerm)), allGroupHeadsCollector); int[] expectedGroupHeads = CreateExpectedGroupHeads(searchTerm, groupDocs, sortWithinGroup, sortByScoreOnly, fieldIdToDocID); int[] actualGroupHeads = allGroupHeadsCollector.RetrieveGroupHeads(); // The actual group heads contains Lucene ids. Need to change them into our id value. for (int i = 0; i < actualGroupHeads.Length; i++) { actualGroupHeads[i] = docIdToFieldId.Get(actualGroupHeads[i]); } // Allows us the easily iterate and assert the actual and expected results. Array.Sort(expectedGroupHeads); Array.Sort(actualGroupHeads); if (VERBOSE) { Console.WriteLine("Collector: " + allGroupHeadsCollector.GetType().Name); Console.WriteLine("Sort within group: " + sortWithinGroup); Console.WriteLine("Num group: " + numGroups); Console.WriteLine("Num doc: " + numDocs); Console.WriteLine("\n=== Expected: \n"); foreach (int expectedDocId in expectedGroupHeads) { GroupDoc expectedGroupDoc = groupDocs[expectedDocId]; string expectedGroup = expectedGroupDoc.group == null ? null : expectedGroupDoc.group.Utf8ToString(); Console.WriteLine( string.Format(CultureInfo.InvariantCulture, "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}", expectedGroup, expectedGroupDoc.score, expectedGroupDoc.sort1.Utf8ToString(), expectedGroupDoc.sort2.Utf8ToString(), expectedGroupDoc.sort3.Utf8ToString(), expectedDocId) ); } Console.WriteLine("\n=== Actual: \n"); foreach (int actualDocId in actualGroupHeads) { GroupDoc actualGroupDoc = groupDocs[actualDocId]; string actualGroup = actualGroupDoc.group == null ? null : actualGroupDoc.group.Utf8ToString(); Console.WriteLine( string.Format(CultureInfo.InvariantCulture, "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}", actualGroup, actualGroupDoc.score, actualGroupDoc.sort1.Utf8ToString(), actualGroupDoc.sort2.Utf8ToString(), actualGroupDoc.sort3.Utf8ToString(), actualDocId) ); } Console.WriteLine("\n==================================================================================="); } assertArrayEquals(expectedGroupHeads, actualGroupHeads); } } finally { QueryUtils.PurgeFieldCache(r); } r.Dispose(); dir.Dispose(); } }
private static sbyte DocValuesByte(DocValuesType? type) { if (type == null) { return 0; } else if (type == DocValuesType.NUMERIC) { return 1; } else if (type == DocValuesType.BINARY) { return 2; } else if (type == DocValuesType.SORTED) { return 3; } else if (type == DocValuesType.SORTED_SET) { return 4; } else { throw new InvalidOperationException(); } }
public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext) { string fileName = IndexFileNames.SegmentFileName(segmentName, "", Lucene42FieldInfosFormat.EXTENSION); IndexInput input = directory.OpenInput(fileName, iocontext); bool success = false; try { CodecUtil.CheckHeader(input, Lucene42FieldInfosFormat.CODEC_NAME, Lucene42FieldInfosFormat.FORMAT_START, Lucene42FieldInfosFormat.FORMAT_CURRENT); int size = input.ReadVInt32(); //read in the size FieldInfo[] infos = new FieldInfo[size]; for (int i = 0; i < size; i++) { string name = input.ReadString(); int fieldNumber = input.ReadVInt32(); sbyte bits = (sbyte)input.ReadByte(); bool isIndexed = (bits & Lucene42FieldInfosFormat.IS_INDEXED) != 0; bool storeTermVector = (bits & Lucene42FieldInfosFormat.STORE_TERMVECTOR) != 0; bool omitNorms = (bits & Lucene42FieldInfosFormat.OMIT_NORMS) != 0; bool storePayloads = (bits & Lucene42FieldInfosFormat.STORE_PAYLOADS) != 0; IndexOptions indexOptions; if (!isIndexed) { indexOptions = IndexOptions.NONE; } else if ((bits & Lucene42FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) { indexOptions = IndexOptions.DOCS_ONLY; } else if ((bits & Lucene42FieldInfosFormat.OMIT_POSITIONS) != 0) { indexOptions = IndexOptions.DOCS_AND_FREQS; } else if ((bits & Lucene42FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) { indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; } else { indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } // DV Types are packed in one byte sbyte val = (sbyte)input.ReadByte(); DocValuesType docValuesType = GetDocValuesType(input, (byte)(val & 0x0F)); DocValuesType normsType = GetDocValuesType(input, (byte)((val.TripleShift(4)) & 0x0F)); IDictionary <string, string> attributes = input.ReadStringStringMap(); infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, normsType, attributes.AsReadOnly()); } CodecUtil.CheckEOF(input); FieldInfos fieldInfos = new FieldInfos(infos); success = true; return(fieldInfos); } finally { if (success) { input.Dispose(); } else { IOUtils.DisposeWhileHandlingException(input); } } }
private IndexContext CreateIndexContext() { Random random = Random; DocValuesType[] dvTypes = new DocValuesType[] { DocValuesType.BINARY, DocValuesType.SORTED }; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy()) ); bool canUseDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal); DocValuesType dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : DocValuesType.NONE; int numDocs = 86 + random.nextInt(1087) * RANDOM_MULTIPLIER; string[] groupValues = new string[numDocs / 5]; string[] countValues = new string[numDocs / 10]; for (int i = 0; i < groupValues.Length; i++) { groupValues[i] = GenerateRandomNonEmptyString(); } for (int i = 0; i < countValues.Length; i++) { countValues[i] = GenerateRandomNonEmptyString(); } List <string> contentStrings = new List <string>(); IDictionary <string, IDictionary <string, ISet <string> > > searchTermToGroupCounts = new HashMap <string, IDictionary <string, ISet <string> > >(); for (int i = 1; i <= numDocs; i++) { string groupValue = random.nextInt(23) == 14 ? null : groupValues[random.nextInt(groupValues.Length)]; string countValue = random.nextInt(21) == 13 ? null : countValues[random.nextInt(countValues.Length)]; string content = "random" + random.nextInt(numDocs / 20); IDictionary <string, ISet <string> > groupToCounts; if (!searchTermToGroupCounts.TryGetValue(content, out groupToCounts)) { // Groups sort always DOCID asc... searchTermToGroupCounts.Add(content, groupToCounts = new LinkedHashMap <string, ISet <string> >()); contentStrings.Add(content); } ISet <string> countsVals; if (!groupToCounts.TryGetValue(groupValue, out countsVals)) { groupToCounts.Add(groupValue, countsVals = new HashSet <string>()); } countsVals.Add(countValue); Document doc = new Document(); doc.Add(new StringField("id", string.Format(CultureInfo.InvariantCulture, "{0:D9}", i), Field.Store.YES)); if (groupValue != null) { AddField(doc, groupField, groupValue, dvType); } if (countValue != null) { AddField(doc, countField, countValue, dvType); } doc.Add(new TextField("content", content, Field.Store.YES)); w.AddDocument(doc); } DirectoryReader reader = w.GetReader(); if (VERBOSE) { for (int docID = 0; docID < reader.MaxDoc; docID++) { Document doc = reader.Document(docID); Console.WriteLine("docID=" + docID + " id=" + doc.Get("id") + " content=" + doc.Get("content") + " author=" + doc.Get("author") + " publisher=" + doc.Get("publisher")); } } w.Dispose(); return(new IndexContext(dir, reader, dvType, searchTermToGroupCounts, contentStrings.ToArray(/*new String[contentStrings.size()]*/))); }
private FieldInfo AddOrUpdateInternal(string name, int preferredFieldNumber, bool isIndexed, bool storeTermVector, bool omitNorms, bool storePayloads, IndexOptions indexOptions, DocValuesType docValues, DocValuesType normType) { // LUCENENET: Bypass FieldInfo method so we can access the quick boolean check if (!TryGetFieldInfo(name, out FieldInfo fi) || fi is null) { // this field wasn't yet added to this in-RAM // segment's FieldInfo, so now we get a global // number for this field. If the field was seen // before then we'll get the same name and number, // else we'll allocate a new one: int fieldNumber = globalFieldNumbers.AddOrGet(name, preferredFieldNumber, docValues); fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValues, normType, null); if (Debugging.AssertsEnabled) { Debugging.Assert(!byName.ContainsKey(fi.Name)); Debugging.Assert(globalFieldNumbers.ContainsConsistent(fi.Number, fi.Name, fi.DocValuesType)); } byName[fi.Name] = fi; } else { fi.Update(isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions); if (docValues != DocValuesType.NONE) { // only pay the synchronization cost if fi does not already have a DVType bool updateGlobal = !fi.HasDocValues; fi.DocValuesType = docValues; // this will also perform the consistency check. if (updateGlobal) { // must also update docValuesType map so it's // aware of this field's DocValueType globalFieldNumbers.SetDocValuesType(fi.Number, name, docValues); } } if (!fi.OmitsNorms && normType != DocValuesType.NONE) { fi.NormType = normType; } } return(fi); }
public virtual void TestSimple() { Random random = Random; DocValuesType[] dvTypes = new DocValuesType[] { DocValuesType.NUMERIC, DocValuesType.BINARY, DocValuesType.SORTED, }; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy())); bool canUseDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal); DocValuesType dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : DocValuesType.NONE; Document doc = new Document(); AddField(doc, groupField, "1", dvType); AddField(doc, countField, "1", dvType); doc.Add(new TextField("content", "random text", Field.Store.NO)); doc.Add(new StringField("id", "1", Field.Store.NO)); w.AddDocument(doc); // 1 doc = new Document(); AddField(doc, groupField, "1", dvType); AddField(doc, countField, "1", dvType); doc.Add(new TextField("content", "some more random text blob", Field.Store.NO)); doc.Add(new StringField("id", "2", Field.Store.NO)); w.AddDocument(doc); // 2 doc = new Document(); AddField(doc, groupField, "1", dvType); AddField(doc, countField, "2", dvType); doc.Add(new TextField("content", "some more random textual data", Field.Store.NO)); doc.Add(new StringField("id", "3", Field.Store.NO)); w.AddDocument(doc); w.Commit(); // To ensure a second segment // 3 doc = new Document(); AddField(doc, groupField, "2", dvType); doc.Add(new TextField("content", "some random text", Field.Store.NO)); doc.Add(new StringField("id", "4", Field.Store.NO)); w.AddDocument(doc); // 4 doc = new Document(); AddField(doc, groupField, "3", dvType); AddField(doc, countField, "1", dvType); doc.Add(new TextField("content", "some more random text", Field.Store.NO)); doc.Add(new StringField("id", "5", Field.Store.NO)); w.AddDocument(doc); // 5 doc = new Document(); AddField(doc, groupField, "3", dvType); AddField(doc, countField, "1", dvType); doc.Add(new TextField("content", "random blob", Field.Store.NO)); doc.Add(new StringField("id", "6", Field.Store.NO)); w.AddDocument(doc); // 6 -- no author field doc = new Document(); doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES)); AddField(doc, countField, "1", dvType); doc.Add(new StringField("id", "6", Field.Store.NO)); w.AddDocument(doc); IndexSearcher indexSearcher = NewSearcher(w.GetReader()); w.Dispose(); var cmp = new ComparerAnonymousHelper1(this); // === Search for content:random IAbstractFirstPassGroupingCollector <IComparable> firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10); indexSearcher.Search(new TermQuery(new Term("content", "random")), firstCollector); IAbstractDistinctValuesCollector <AbstractDistinctValuesCollector.IGroupCount <IComparable> > distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType); indexSearcher.Search(new TermQuery(new Term("content", "random")), distinctValuesCollector); //var gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>; // LUCENENET TODO: Try to work out how to do this without an O(n) operation var gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups); gcs.Sort(cmp); assertEquals(4, gcs.Count); CompareNull(gcs[0].GroupValue); List <IComparable> countValues = new List <IComparable>(gcs[0].UniqueValues); assertEquals(1, countValues.size()); Compare("1", countValues[0]); Compare("1", gcs[1].GroupValue); countValues = new List <IComparable>(gcs[1].UniqueValues); countValues.Sort(nullComparer); assertEquals(2, countValues.size()); Compare("1", countValues[0]); Compare("2", countValues[1]); Compare("2", gcs[2].GroupValue); countValues = new List <IComparable>(gcs[2].UniqueValues); assertEquals(1, countValues.size()); CompareNull(countValues[0]); Compare("3", gcs[3].GroupValue); countValues = new List <IComparable>(gcs[3].UniqueValues); assertEquals(1, countValues.size()); Compare("1", countValues[0]); // === Search for content:some firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10); indexSearcher.Search(new TermQuery(new Term("content", "some")), firstCollector); distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType); indexSearcher.Search(new TermQuery(new Term("content", "some")), distinctValuesCollector); // LUCENENET TODO: Try to work out how to do this without an O(n) operation //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>; gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups); gcs.Sort(cmp); assertEquals(3, gcs.Count); Compare("1", gcs[0].GroupValue); countValues = new List <IComparable>(gcs[0].UniqueValues); assertEquals(2, countValues.size()); countValues.Sort(nullComparer); Compare("1", countValues[0]); Compare("2", countValues[1]); Compare("2", gcs[1].GroupValue); countValues = new List <IComparable>(gcs[1].UniqueValues); assertEquals(1, countValues.size()); CompareNull(countValues[0]); Compare("3", gcs[2].GroupValue); countValues = new List <IComparable>(gcs[2].UniqueValues); assertEquals(1, countValues.size()); Compare("1", countValues[0]); // === Search for content:blob firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10); indexSearcher.Search(new TermQuery(new Term("content", "blob")), firstCollector); distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType); indexSearcher.Search(new TermQuery(new Term("content", "blob")), distinctValuesCollector); // LUCENENET TODO: Try to work out how to do this without an O(n) operation //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>; gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups); gcs.Sort(cmp); assertEquals(2, gcs.Count); Compare("1", gcs[0].GroupValue); countValues = new List <IComparable>(gcs[0].UniqueValues); // B/c the only one document matched with blob inside the author 1 group assertEquals(1, countValues.Count); Compare("1", countValues[0]); Compare("3", gcs[1].GroupValue); countValues = new List <IComparable>(gcs[1].UniqueValues); assertEquals(1, countValues.Count); Compare("1", countValues[0]); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }