public override BinaryDocValues GetBinary(FieldInfo field) { lock (this) { if (!binaryInstances.TryGetValue(field.Number, out BinaryDocValues instance)) { var type = field.GetAttribute(legacyKey).ToLegacyDocValuesType(); if (type == LegacyDocValuesType.BYTES_FIXED_STRAIGHT) { instance = LoadBytesFixedStraight(field); } else if (type == LegacyDocValuesType.BYTES_VAR_STRAIGHT) { instance = LoadBytesVarStraight(field); } else if (type == LegacyDocValuesType.BYTES_FIXED_DEREF) { instance = LoadBytesFixedDeref(field); } else if (type == LegacyDocValuesType.BYTES_VAR_DEREF) { instance = LoadBytesVarDeref(field); } else { throw AssertionError.Create(); } binaryInstances[field.Number] = instance; } return(instance); } }
internal virtual Int64Values GetNumeric(NumericEntry entry) { IndexInput data = (IndexInput)this.data.Clone(); data.Seek(entry.Offset); switch (entry.format) { case Lucene45DocValuesConsumer.DELTA_COMPRESSED: BlockPackedReader reader = new BlockPackedReader(data, entry.PackedInt32sVersion, entry.BlockSize, entry.Count, true); return(reader); case Lucene45DocValuesConsumer.GCD_COMPRESSED: long min = entry.minValue; long mult = entry.gcd; BlockPackedReader quotientReader = new BlockPackedReader(data, entry.PackedInt32sVersion, entry.BlockSize, entry.Count, true); return(new Int64ValuesAnonymousClass(min, mult, quotientReader)); case Lucene45DocValuesConsumer.TABLE_COMPRESSED: long[] table = entry.table; int bitsRequired = PackedInt32s.BitsRequired(table.Length - 1); PackedInt32s.Reader ords = PackedInt32s.GetDirectReaderNoHeader(data, PackedInt32s.Format.PACKED, entry.PackedInt32sVersion, (int)entry.Count, bitsRequired); return(new Int64ValuesAnonymousClass2(table, ords)); default: throw AssertionError.Create(); } }
private static sbyte DocValuesByte(DocValuesType type) { if (type == DocValuesType.NONE) { return(0); } else if (type == DocValuesType.NUMERIC) { return(1); } else if (type == DocValuesType.BINARY) { return(2); } else if (type == DocValuesType.SORTED) { return(3); } else if (type == DocValuesType.SORTED_SET) { return(4); } else { throw AssertionError.Create(); } }
private void ReadFields(IndexInput meta /*, FieldInfos infos // LUCENENET: Not read */) { int fieldNumber = meta.ReadVInt32(); while (fieldNumber != -1) { // check should be: infos.fieldInfo(fieldNumber) != null, which incorporates negative check // but docvalues updates are currently buggy here (loading extra stuff, etc): LUCENE-5616 if (fieldNumber < 0) { // trickier to validate more: because we re-use for norms, because we use multiple entries // for "composite" types like sortedset, etc. throw new CorruptIndexException("Invalid field number: " + fieldNumber + " (resource=" + meta + ")"); } byte type = meta.ReadByte(); if (type == Lucene45DocValuesFormat.NUMERIC) { numerics[fieldNumber] = ReadNumericEntry(meta); } else if (type == Lucene45DocValuesFormat.BINARY) { BinaryEntry b = ReadBinaryEntry(meta); binaries[fieldNumber] = b; } else if (type == Lucene45DocValuesFormat.SORTED) { ReadSortedField(fieldNumber, meta /*, infos // LUCENENET: Never read */); } else if (type == Lucene45DocValuesFormat.SORTED_SET) { SortedSetEntry ss = ReadSortedSetEntry(meta); sortedSets[fieldNumber] = ss; if (ss.Format == Lucene45DocValuesConsumer.SORTED_SET_WITH_ADDRESSES) { ReadSortedSetFieldWithAddresses(fieldNumber, meta /*, infos // LUCENENET: Never read */); } else if (ss.Format == Lucene45DocValuesConsumer.SORTED_SET_SINGLE_VALUED_SORTED) { if (meta.ReadVInt32() != fieldNumber) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.ReadByte() != Lucene45DocValuesFormat.SORTED) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } ReadSortedField(fieldNumber, meta /*, infos // LUCENENET: Never read */); } else { throw AssertionError.Create(); } } else { throw new CorruptIndexException("invalid type: " + type + ", resource=" + meta); } fieldNumber = meta.ReadVInt32(); } }
protected virtual void Recompose(IList <SrndQuery> queries) { if (queries.Count < 2) { throw AssertionError.Create("Too few subqueries"); } this.m_queries = new JCG.List <SrndQuery>(queries); }
public override SortedDocValues GetSorted(FieldInfo field) { UninterruptableMonitor.Enter(this); try { if (!sortedInstances.TryGetValue(field.Number, out SortedDocValues instance)) { string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "dat"); string indexName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "idx"); IndexInput data = null; IndexInput index = null; bool success = false; try { data = dir.OpenInput(dataName, state.Context); index = dir.OpenInput(indexName, state.Context); var type = field.GetAttribute(legacyKey).ToLegacyDocValuesType(); if (type == LegacyDocValuesType.BYTES_FIXED_SORTED) { instance = LoadBytesFixedSorted(/* field, // LUCENENET: Never read */ data, index); } else if (type == LegacyDocValuesType.BYTES_VAR_SORTED) { instance = LoadBytesVarSorted(/* field, // LUCENENET: Never read */ data, index); } else { throw AssertionError.Create(); } CodecUtil.CheckEOF(data); CodecUtil.CheckEOF(index); success = true; } finally { if (success) { IOUtils.Dispose(data, index); } else { IOUtils.DisposeWhileHandlingException(data, index); } } sortedInstances[field.Number] = instance; } return(instance); } finally { UninterruptableMonitor.Exit(this); } }
public override void Merge(IndexWriter writer, MergeTrigger trigger, bool newMergesFound) { lock (this) { if (!mayMerge.Value && writer.NextMerge() != null) { throw AssertionError.Create(); } base.Merge(writer, trigger, newMergesFound); } }
private NumericDocValues LoadNumeric(FieldInfo field) { NumericEntry entry = numerics[field.Number]; data.Seek(entry.offset + entry.missingBytes); switch (entry.format) { case TABLE_COMPRESSED: int size = data.ReadVInt32(); if (size > 256) { throw new CorruptIndexException( "TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + data); } var decode = new long[size]; for (int i = 0; i < decode.Length; i++) { decode[i] = data.ReadInt64(); } int formatID = data.ReadVInt32(); int bitsPerValue = data.ReadVInt32(); var ordsReader = PackedInt32s.GetReaderNoHeader(data, PackedInt32s.Format.ById(formatID), entry.packedIntsVersion, maxDoc, bitsPerValue); ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(decode) + ordsReader.RamBytesUsed()); return(new NumericDocValuesAnonymousClass(decode, ordsReader)); case DELTA_COMPRESSED: int blockSize = data.ReadVInt32(); var reader = new BlockPackedReader(data, entry.packedIntsVersion, blockSize, maxDoc, false); ramBytesUsed.AddAndGet(reader.RamBytesUsed()); return(reader); case UNCOMPRESSED: var bytes = new byte[maxDoc]; data.ReadBytes(bytes, 0, bytes.Length); ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(bytes)); // LUCENENET: IMPORTANT - some bytes are negative here, so we need to pass as sbyte return(new NumericDocValuesAnonymousClass2((sbyte[])(Array)bytes)); case GCD_COMPRESSED: long min = data.ReadInt64(); long mult = data.ReadInt64(); int quotientBlockSize = data.ReadVInt32(); var quotientReader = new BlockPackedReader(data, entry.packedIntsVersion, quotientBlockSize, maxDoc, false); ramBytesUsed.AddAndGet(quotientReader.RamBytesUsed()); return(new NumericDocValuesAnonymousClass3(min, mult, quotientReader)); default: throw AssertionError.Create(); } }
public virtual void AddSpanQuery(Search.Query q) { if (q == SrndQuery.TheEmptyLcnQuery) { return; } if (!(q is SpanQuery)) { throw AssertionError.Create("Expected SpanQuery: " + q.ToString(FieldName)); } AddSpanQueryWeighted((SpanQuery)q, q.Boost); }
public static Search.Query MakeBooleanQuery( IList <Search.Query> queries, Occur occur) { if (queries.Count <= 1) { throw AssertionError.Create("Too few subqueries: " + queries.Count); } BooleanQuery bq = new BooleanQuery(); AddQueriesToBoolean(bq, queries, occur); return(bq); }
private NumericDocValues LoadNumeric(NumericEntry entry) { data.Seek(entry.offset + entry.missingBytes); switch (entry.byteWidth) { case 1: { var values = new byte[entry.count]; data.ReadBytes(values, 0, entry.count); ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(values)); // LUCENENET: IMPORTANT - some bytes are negative here, so we need to pass as sbyte return(new NumericDocValuesAnonymousClass((sbyte[])(Array)values)); } case 2: { var values = new short[entry.count]; for (int i = 0; i < entry.count; i++) { values[i] = data.ReadInt16(); } ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(values)); return(new NumericDocValuesAnonymousClass2(values)); } case 4: { var values = new int[entry.count]; for (var i = 0; i < entry.count; i++) { values[i] = data.ReadInt32(); } ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(values)); return(new NumericDocValuesAnonymousClass3(values)); } case 8: { var values = new long[entry.count]; for (int i = 0; i < entry.count; i++) { values[i] = data.ReadInt64(); } ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(values)); return(new NumericDocValuesAnonymousClass4(values)); } default: throw AssertionError.Create(); } }
public override void Merge(IndexWriter writer, MergeTrigger trigger, bool newMergesFound) { UninterruptableMonitor.Enter(this); try { if (!mayMerge.Value && writer.NextMerge() != null) { throw AssertionError.Create(); } base.Merge(writer, trigger, newMergesFound); } finally { UninterruptableMonitor.Exit(this); } }
internal virtual void WriteSequence() { if (Debugging.AssertsEnabled) { Debugging.Assert(SequenceIsConsistent()); } try { WriteHeader(reverse, clean, dirtyWords.Length); } catch (Exception cannotHappen) when(cannotHappen.IsIOException()) { throw AssertionError.Create(cannotHappen.Message, cannotHappen); } @out.WriteBytes(dirtyWords.Bytes, 0, dirtyWords.Length); dirtyWords.Length = 0; ++numSequences; }
internal Field GetNumericField(string name, NumericType type) { Field f; if (reuseFields) { numericFields.TryGetValue(name, out f); } else { f = null; } if (f == null) { switch (type) { case NumericType.INT32: f = new Int32Field(name, 0, Field.Store.NO); break; case NumericType.INT64: f = new Int64Field(name, 0L, Field.Store.NO); break; case NumericType.SINGLE: f = new SingleField(name, 0.0F, Field.Store.NO); break; case NumericType.DOUBLE: f = new DoubleField(name, 0.0, Field.Store.NO); break; default: throw AssertionError.Create("Cannot get here"); } if (reuseFields) { numericFields[name] = f; } } return(f); }
public override BinaryDocValues GetBinary(FieldInfo field) { BinaryEntry bytes = binaries[field.Number]; switch (bytes.format) { case Lucene45DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED: return(GetFixedBinary(/*field, LUCENENET: Never read */ bytes)); case Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED: return(GetVariableBinary(field, bytes)); case Lucene45DocValuesConsumer.BINARY_PREFIX_COMPRESSED: return(GetCompressedBinary(field, bytes)); default: throw AssertionError.Create(); } }
protected override SortedDocValues GetSortedDocValues(AtomicReaderContext context, string field) { SortedSetDocValues sortedSet = FieldCache.DEFAULT.GetDocTermOrds(context.AtomicReader, field); if (sortedSet.ValueCount >= int.MaxValue) { throw UnsupportedOperationException.Create("fields containing more than " + (int.MaxValue - 1) + " unique terms are unsupported"); } SortedDocValues singleton = DocValues.UnwrapSingleton(sortedSet); if (singleton != null) { // it's actually single-valued in practice, but indexed as multi-valued, // so just sort on the underlying single-valued dv directly. // regardless of selector type, this optimization is safe! return(singleton); } else if (outerInstance.selector == Selector.MIN) { return(new MinValue(sortedSet)); } else { if (sortedSet is RandomAccessOrds == false) { throw UnsupportedOperationException.Create("codec does not support random access ordinals, cannot use selector: " + outerInstance.selector); } RandomAccessOrds randomOrds = (RandomAccessOrds)sortedSet; switch (outerInstance.selector) { case Selector.MAX: return(new MaxValue(randomOrds)); case Selector.MIDDLE_MIN: return(new MiddleMinValue(randomOrds)); case Selector.MIDDLE_MAX: return(new MiddleMaxValue(randomOrds)); case Selector.MIN: default: throw AssertionError.Create(); } } }
/// <summary> /// Create a random instance. /// </summary> public static CompressingCodec RandomInstance(Random random, int chunkSize, bool withSegmentSuffix) { switch (random.Next(4)) { case 0: return(new FastCompressingCodec(chunkSize, withSegmentSuffix)); case 1: return(new FastDecompressionCompressingCodec(chunkSize, withSegmentSuffix)); case 2: return(new HighCompressionCompressingCodec(chunkSize, withSegmentSuffix)); case 3: return(new DummyCompressingCodec(chunkSize, withSegmentSuffix)); default: throw AssertionError.Create(); } }
public override IBits GetDocsWithField(FieldInfo field) { switch (field.DocValuesType) { case DocValuesType.SORTED_SET: return(DocValues.DocsWithValue(GetSortedSet(field), maxDoc)); case DocValuesType.SORTED: return(DocValues.DocsWithValue(GetSorted(field), maxDoc)); case DocValuesType.BINARY: return(GetBinaryDocsWithField(field)); case DocValuesType.NUMERIC: return(GetNumericDocsWithField(field)); default: throw AssertionError.Create(); } }
private static void ReadField(DataInput @in, StoredFieldVisitor visitor, FieldInfo info, int bits) { switch (bits & CompressingStoredFieldsWriter.TYPE_MASK) { case CompressingStoredFieldsWriter.BYTE_ARR: int length = @in.ReadVInt32(); var data = new byte[length]; @in.ReadBytes(data, 0, length); visitor.BinaryField(info, data); break; case CompressingStoredFieldsWriter.STRING: length = @in.ReadVInt32(); data = new byte[length]; @in.ReadBytes(data, 0, length); #pragma warning disable 612, 618 visitor.StringField(info, IOUtils.CHARSET_UTF_8.GetString(data)); #pragma warning restore 612, 618 break; case CompressingStoredFieldsWriter.NUMERIC_INT32: visitor.Int32Field(info, @in.ReadInt32()); break; case CompressingStoredFieldsWriter.NUMERIC_SINGLE: visitor.SingleField(info, J2N.BitConversion.Int32BitsToSingle(@in.ReadInt32())); break; case CompressingStoredFieldsWriter.NUMERIC_INT64: visitor.Int64Field(info, @in.ReadInt64()); break; case CompressingStoredFieldsWriter.NUMERIC_DOUBLE: visitor.DoubleField(info, J2N.BitConversion.Int64BitsToDouble(@in.ReadInt64())); break; default: throw AssertionError.Create("Unknown type flag: " + bits.ToString("x")); } }
public override IBits GetDocsWithField(FieldInfo field) { switch (field.DocValuesType) { case DocValuesType.SORTED_SET: return(DocValues.DocsWithValue(GetSortedSet(field), maxDoc)); case DocValuesType.SORTED: return(DocValues.DocsWithValue(GetSorted(field), maxDoc)); case DocValuesType.BINARY: BinaryEntry be = binaries[field.Number]; return(GetMissingBits(field.Number, be.missingOffset, be.missingBytes)); case DocValuesType.NUMERIC: NumericEntry ne = numerics[field.Number]; return(GetMissingBits(field.Number, ne.missingOffset, ne.missingBytes)); default: throw AssertionError.Create(); } }
public static BulkOperation Of(PackedInt32s.Format format, int bitsPerValue) { if (format == PackedInt32s.Format.PACKED) { if (Debugging.AssertsEnabled) { Debugging.Assert(packedBulkOps[bitsPerValue - 1] != null); } return(packedBulkOps[bitsPerValue - 1]); } else if (format == PackedInt32s.Format.PACKED_SINGLE_BLOCK) { if (Debugging.AssertsEnabled) { Debugging.Assert(packedSingleBlockBulkOps[bitsPerValue - 1] != null); } return(packedSingleBlockBulkOps[bitsPerValue - 1]); } else { throw AssertionError.Create(); } }
public override SortedSetDocValues GetSortedSet(FieldInfo field) { SortedSetEntry ss = sortedSets[field.Number]; if (ss.Format == Lucene45DocValuesConsumer.SORTED_SET_SINGLE_VALUED_SORTED) { SortedDocValues values = GetSorted(field); return(DocValues.Singleton(values)); } else if (ss.Format != Lucene45DocValuesConsumer.SORTED_SET_WITH_ADDRESSES) { throw AssertionError.Create(); } IndexInput data = (IndexInput)this.data.Clone(); long valueCount = binaries[field.Number].Count; // we keep the byte[]s and list of ords on disk, these could be large Int64BinaryDocValues binary = (Int64BinaryDocValues)GetBinary(field); Int64Values ordinals = GetNumeric(ords[field.Number]); // but the addresses to the ord stream are in RAM MonotonicBlockPackedReader ordIndex = GetOrdIndexInstance(data, field, ordIndexes[field.Number]); return(new RandomAccessOrdsAnonymousClass(valueCount, binary, ordinals, ordIndex)); }
private static void SkipField(DataInput @in, int bits) { switch (bits & CompressingStoredFieldsWriter.TYPE_MASK) { case CompressingStoredFieldsWriter.BYTE_ARR: case CompressingStoredFieldsWriter.STRING: int length = @in.ReadVInt32(); @in.SkipBytes(length); break; case CompressingStoredFieldsWriter.NUMERIC_INT32: case CompressingStoredFieldsWriter.NUMERIC_SINGLE: @in.ReadInt32(); break; case CompressingStoredFieldsWriter.NUMERIC_INT64: case CompressingStoredFieldsWriter.NUMERIC_DOUBLE: @in.ReadInt64(); break; default: throw AssertionError.Create("Unknown type flag: " + bits.ToString("x")); } }
public override void WriteField(FieldInfo info, IIndexableField field) { int bits /* = 0*/; // LUCENENET: IDE0059: Remove unnecessary value assignment BytesRef bytes; string @string; // LUCENENET specific - To avoid boxing/unboxing, we don't // call GetNumericValue(). Instead, we check the field.NumericType and then // call the appropriate conversion method. if (field.NumericType != NumericFieldType.NONE) { switch (field.NumericType) { case NumericFieldType.BYTE: case NumericFieldType.INT16: case NumericFieldType.INT32: bits = NUMERIC_INT32; break; case NumericFieldType.INT64: bits = NUMERIC_INT64; break; case NumericFieldType.SINGLE: bits = NUMERIC_SINGLE; break; case NumericFieldType.DOUBLE: bits = NUMERIC_DOUBLE; break; default: throw new ArgumentException("cannot store numeric type " + field.NumericType); } @string = null; bytes = null; } else { bytes = field.GetBinaryValue(); if (bytes != null) { bits = BYTE_ARR; @string = null; } else { bits = STRING; @string = field.GetStringValue(); if (@string == null) { throw new ArgumentException("field " + field.Name + " is stored but does not have BinaryValue, StringValue nor NumericValue"); } } } long infoAndBits = (((long)info.Number) << TYPE_BITS) | (uint)bits; bufferedDocs.WriteVInt64(infoAndBits); if (bytes != null) { bufferedDocs.WriteVInt32(bytes.Length); bufferedDocs.WriteBytes(bytes.Bytes, bytes.Offset, bytes.Length); } else if (@string != null) { bufferedDocs.WriteString(field.GetStringValue()); } else { switch (field.NumericType) { case NumericFieldType.BYTE: case NumericFieldType.INT16: case NumericFieldType.INT32: bufferedDocs.WriteInt32(field.GetInt32Value().Value); break; case NumericFieldType.INT64: bufferedDocs.WriteInt64(field.GetInt64Value().Value); break; case NumericFieldType.SINGLE: bufferedDocs.WriteInt32(BitConversion.SingleToInt32Bits(field.GetSingleValue().Value)); break; case NumericFieldType.DOUBLE: bufferedDocs.WriteInt64(BitConversion.DoubleToInt64Bits(field.GetDoubleValue().Value)); break; default: throw AssertionError.Create("Cannot get here"); } } }
private void MergeDocValues(SegmentWriteState segmentWriteState) { DocValuesConsumer consumer = codec.DocValuesFormat.FieldsConsumer(segmentWriteState); bool success = false; try { foreach (FieldInfo field in mergeState.FieldInfos) { DocValuesType type = field.DocValuesType; if (type != DocValuesType.NONE) { if (type == DocValuesType.NUMERIC) { IList <NumericDocValues> toMerge = new JCG.List <NumericDocValues>(); IList <IBits> docsWithField = new JCG.List <IBits>(); foreach (AtomicReader reader in mergeState.Readers) { NumericDocValues values = reader.GetNumericDocValues(field.Name); IBits bits = reader.GetDocsWithField(field.Name); if (values == null) { values = DocValues.EMPTY_NUMERIC; bits = new Lucene.Net.Util.Bits.MatchNoBits(reader.MaxDoc); } toMerge.Add(values); docsWithField.Add(bits); } consumer.MergeNumericField(field, mergeState, toMerge, docsWithField); } else if (type == DocValuesType.BINARY) { IList <BinaryDocValues> toMerge = new JCG.List <BinaryDocValues>(); IList <IBits> docsWithField = new JCG.List <IBits>(); foreach (AtomicReader reader in mergeState.Readers) { BinaryDocValues values = reader.GetBinaryDocValues(field.Name); IBits bits = reader.GetDocsWithField(field.Name); if (values == null) { values = DocValues.EMPTY_BINARY; bits = new Lucene.Net.Util.Bits.MatchNoBits(reader.MaxDoc); } toMerge.Add(values); docsWithField.Add(bits); } consumer.MergeBinaryField(field, mergeState, toMerge, docsWithField); } else if (type == DocValuesType.SORTED) { IList <SortedDocValues> toMerge = new JCG.List <SortedDocValues>(); foreach (AtomicReader reader in mergeState.Readers) { SortedDocValues values = reader.GetSortedDocValues(field.Name); if (values == null) { values = DocValues.EMPTY_SORTED; } toMerge.Add(values); } consumer.MergeSortedField(field, mergeState, toMerge); } else if (type == DocValuesType.SORTED_SET) { IList <SortedSetDocValues> toMerge = new JCG.List <SortedSetDocValues>(); foreach (AtomicReader reader in mergeState.Readers) { SortedSetDocValues values = reader.GetSortedSetDocValues(field.Name); if (values == null) { values = DocValues.EMPTY_SORTED_SET; } toMerge.Add(values); } consumer.MergeSortedSetField(field, mergeState, toMerge); } else { throw AssertionError.Create("type=" + type); } } } success = true; } finally { if (success) { IOUtils.Dispose(consumer); } else { IOUtils.DisposeWhileHandlingException(consumer); } } }
internal virtual void AddNumericField(FieldInfo field, IEnumerable <long?> values, bool optimizeStorage) { long count = 0; long minValue = long.MaxValue; long maxValue = long.MinValue; long gcd = 0; bool missing = false; // TODO: more efficient? JCG.HashSet <long> uniqueValues = null; if (optimizeStorage) { uniqueValues = new JCG.HashSet <long>(); foreach (long?nv in values) { long v; if (nv == null) { v = 0; missing = true; } else { v = nv.Value; } if (gcd != 1) { if (v < long.MinValue / 2 || v > long.MaxValue / 2) { // in that case v - minValue might overflow and make the GCD computation return // wrong results. Since these extreme values are unlikely, we just discard // GCD computation for them gcd = 1; } // minValue needs to be set first else if (count != 0) { gcd = MathUtil.Gcd(gcd, v - minValue); } } minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); if (uniqueValues != null) { if (uniqueValues.Add(v)) { if (uniqueValues.Count > 256) { uniqueValues = null; } } } ++count; } } else { foreach (var nv in values) { ++count; } } long delta = maxValue - minValue; int format; if (uniqueValues != null && (delta < 0L || PackedInt32s.BitsRequired(uniqueValues.Count - 1) < PackedInt32s.BitsRequired(delta)) && count <= int.MaxValue) { format = TABLE_COMPRESSED; } else if (gcd != 0 && gcd != 1) { format = GCD_COMPRESSED; } else { format = DELTA_COMPRESSED; } meta.WriteVInt32(field.Number); meta.WriteByte((byte)Lucene45DocValuesFormat.NUMERIC); meta.WriteVInt32(format); if (missing) { meta.WriteInt64(data.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream WriteMissingBitset(values); } else { meta.WriteInt64(-1L); } meta.WriteVInt32(PackedInt32s.VERSION_CURRENT); meta.WriteInt64(data.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream meta.WriteVInt64(count); meta.WriteVInt32(BLOCK_SIZE); switch (format) { case GCD_COMPRESSED: meta.WriteInt64(minValue); meta.WriteInt64(gcd); BlockPackedWriter quotientWriter = new BlockPackedWriter(data, BLOCK_SIZE); foreach (long?nv in values) { quotientWriter.Add((nv.GetValueOrDefault() - minValue) / gcd); } quotientWriter.Finish(); break; case DELTA_COMPRESSED: BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE); foreach (long?nv in values) { writer.Add(nv.GetValueOrDefault()); } writer.Finish(); break; case TABLE_COMPRESSED: // LUCENENET NOTE: diming an array and then using .CopyTo() for better efficiency than LINQ .ToArray() long[] decode = new long[uniqueValues.Count]; uniqueValues.CopyTo(decode, 0); Dictionary <long, int> encode = new Dictionary <long, int>(); meta.WriteVInt32(decode.Length); for (int i = 0; i < decode.Length; i++) { meta.WriteInt64(decode[i]); encode[decode[i]] = i; } int bitsRequired = PackedInt32s.BitsRequired(uniqueValues.Count - 1); PackedInt32s.Writer ordsWriter = PackedInt32s.GetWriterNoHeader(data, PackedInt32s.Format.PACKED, (int)count, bitsRequired, PackedInt32s.DEFAULT_BUFFER_SIZE); foreach (long?nv in values) { ordsWriter.Add(encode[nv.GetValueOrDefault()]); } ordsWriter.Finish(); break; default: throw AssertionError.Create(); } }
private void DoTest(DocValuesType type) { Directory d = NewDirectory(); IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); int nDocs = AtLeast(50); Field id = new NumericDocValuesField("id", 0); Field f; switch (type) { case DocValuesType.BINARY: f = new BinaryDocValuesField("dv", new BytesRef()); break; case DocValuesType.SORTED: f = new SortedDocValuesField("dv", new BytesRef()); break; case DocValuesType.NUMERIC: f = new NumericDocValuesField("dv", 0); break; default: throw AssertionError.Create(); } Document document = new Document(); document.Add(id); document.Add(f); object[] vals = new object[nDocs]; RandomIndexWriter iw = new RandomIndexWriter(Random, d, iwConfig); for (int i = 0; i < nDocs; ++i) { id.SetInt64Value(i); switch (type) { case DocValuesType.SORTED: case DocValuesType.BINARY: do { vals[i] = TestUtil.RandomSimpleString(Random, 20); } while (((string)vals[i]).Length == 0); f.SetBytesValue(new BytesRef((string)vals[i])); break; case DocValuesType.NUMERIC: int bitsPerValue = RandomInts.RandomInt32Between(Random, 1, 31); // keep it an int vals[i] = (long)Random.Next((int)PackedInt32s.MaxValue(bitsPerValue)); f.SetInt64Value((long)vals[i]); break; } iw.AddDocument(document); if (Random.NextBoolean() && i % 10 == 9) { iw.Commit(); } } iw.Dispose(); DirectoryReader rd = DirectoryReader.Open(d); foreach (AtomicReaderContext leave in rd.Leaves) { FunctionValues ids = (new Int64FieldSource("id")).GetValues(null, leave); ValueSource vs; switch (type) { case DocValuesType.BINARY: case DocValuesType.SORTED: vs = new BytesRefFieldSource("dv"); break; case DocValuesType.NUMERIC: vs = new Int64FieldSource("dv"); break; default: throw AssertionError.Create(); } FunctionValues values = vs.GetValues(null, leave); BytesRef bytes = new BytesRef(); for (int i = 0; i < leave.AtomicReader.MaxDoc; ++i) { assertTrue(values.Exists(i)); if (vs is BytesRefFieldSource) { assertTrue(values.ObjectVal(i) is string); } else if (vs is Int64FieldSource) { assertTrue(values.ObjectVal(i) is J2N.Numerics.Int64); assertTrue(values.BytesVal(i, bytes)); } else { throw AssertionError.Create(); } object expected = vals[ids.Int32Val(i)]; switch (type) { case DocValuesType.SORTED: values.OrdVal(i); // no exception assertTrue(values.NumOrd >= 1); goto case DocValuesType.BINARY; case DocValuesType.BINARY: assertEquals(expected, values.ObjectVal(i)); assertEquals(expected, values.StrVal(i)); assertEquals(expected, values.ObjectVal(i)); assertEquals(expected, values.StrVal(i)); assertTrue(values.BytesVal(i, bytes)); assertEquals(new BytesRef((string)expected), bytes); break; case DocValuesType.NUMERIC: assertEquals(Convert.ToInt64(expected, CultureInfo.InvariantCulture), values.Int64Val(i)); break; } } } rd.Dispose(); d.Dispose(); }
public override Fields Get(int doc) { EnsureOpen(); // seek to the right place { long startPointer = indexReader.GetStartPointer(doc); vectorsStream.Seek(startPointer); } // decode // - docBase: first doc ID of the chunk // - chunkDocs: number of docs of the chunk int docBase = vectorsStream.ReadVInt32(); int chunkDocs = vectorsStream.ReadVInt32(); if (doc < docBase || doc >= docBase + chunkDocs || docBase + chunkDocs > numDocs) { throw new CorruptIndexException("docBase=" + docBase + ",chunkDocs=" + chunkDocs + ",doc=" + doc + " (resource=" + vectorsStream + ")"); } int skip; // number of fields to skip int numFields; // number of fields of the document we're looking for int totalFields; // total number of fields of the chunk (sum for all docs) if (chunkDocs == 1) { skip = 0; numFields = totalFields = vectorsStream.ReadVInt32(); } else { reader.Reset(vectorsStream, chunkDocs); int sum = 0; for (int i = docBase; i < doc; ++i) { sum += (int)reader.Next(); } skip = sum; numFields = (int)reader.Next(); sum += numFields; for (int i = doc + 1; i < docBase + chunkDocs; ++i) { sum += (int)reader.Next(); } totalFields = sum; } if (numFields == 0) { // no vectors return(null); } // read field numbers that have term vectors int[] fieldNums; { int token = vectorsStream.ReadByte() & 0xFF; if (Debugging.AssertsEnabled) { Debugging.Assert(token != 0); // means no term vectors, cannot happen since we checked for numFields == 0 } int bitsPerFieldNum = token & 0x1F; int totalDistinctFields = token.TripleShift(5); if (totalDistinctFields == 0x07) { totalDistinctFields += vectorsStream.ReadVInt32(); } ++totalDistinctFields; PackedInt32s.IReaderIterator it = PackedInt32s.GetReaderIteratorNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalDistinctFields, bitsPerFieldNum, 1); fieldNums = new int[totalDistinctFields]; for (int i = 0; i < totalDistinctFields; ++i) { fieldNums[i] = (int)it.Next(); } } // read field numbers and flags int[] fieldNumOffs = new int[numFields]; PackedInt32s.Reader flags; { int bitsPerOff = PackedInt32s.BitsRequired(fieldNums.Length - 1); PackedInt32s.Reader allFieldNumOffs = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, bitsPerOff); switch (vectorsStream.ReadVInt32()) { case 0: PackedInt32s.Reader fieldFlags = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, fieldNums.Length, CompressingTermVectorsWriter.FLAGS_BITS); PackedInt32s.Mutable f = PackedInt32s.GetMutable(totalFields, CompressingTermVectorsWriter.FLAGS_BITS, PackedInt32s.COMPACT); for (int i = 0; i < totalFields; ++i) { int fieldNumOff = (int)allFieldNumOffs.Get(i); if (Debugging.AssertsEnabled) { Debugging.Assert(fieldNumOff >= 0 && fieldNumOff < fieldNums.Length); } int fgs = (int)fieldFlags.Get(fieldNumOff); f.Set(i, fgs); } flags = f; break; case 1: flags = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, CompressingTermVectorsWriter.FLAGS_BITS); break; default: throw AssertionError.Create(); } for (int i = 0; i < numFields; ++i) { fieldNumOffs[i] = (int)allFieldNumOffs.Get(skip + i); } } // number of terms per field for all fields PackedInt32s.Reader numTerms; int totalTerms; { int bitsRequired = vectorsStream.ReadVInt32(); numTerms = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, bitsRequired); int sum = 0; for (int i = 0; i < totalFields; ++i) { sum += (int)numTerms.Get(i); } totalTerms = sum; } // term lengths int docOff = 0, docLen = 0, totalLen; int[] fieldLengths = new int[numFields]; int[][] prefixLengths = new int[numFields][]; int[][] suffixLengths = new int[numFields][]; { reader.Reset(vectorsStream, totalTerms); // skip int toSkip = 0; for (int i = 0; i < skip; ++i) { toSkip += (int)numTerms.Get(i); } reader.Skip(toSkip); // read prefix lengths for (int i = 0; i < numFields; ++i) { int termCount = (int)numTerms.Get(skip + i); int[] fieldPrefixLengths = new int[termCount]; prefixLengths[i] = fieldPrefixLengths; for (int j = 0; j < termCount;) { Int64sRef next = reader.Next(termCount - j); for (int k = 0; k < next.Length; ++k) { fieldPrefixLengths[j++] = (int)next.Int64s[next.Offset + k]; } } } reader.Skip(totalTerms - reader.Ord); reader.Reset(vectorsStream, totalTerms); // skip //toSkip = 0; // LUCENENET: IDE0059: Remove unnecessary value assignment for (int i = 0; i < skip; ++i) { for (int j = 0; j < numTerms.Get(i); ++j) { docOff += (int)reader.Next(); } } for (int i = 0; i < numFields; ++i) { int termCount = (int)numTerms.Get(skip + i); int[] fieldSuffixLengths = new int[termCount]; suffixLengths[i] = fieldSuffixLengths; for (int j = 0; j < termCount;) { Int64sRef next = reader.Next(termCount - j); for (int k = 0; k < next.Length; ++k) { fieldSuffixLengths[j++] = (int)next.Int64s[next.Offset + k]; } } fieldLengths[i] = Sum(suffixLengths[i]); docLen += fieldLengths[i]; } totalLen = docOff + docLen; for (int i = skip + numFields; i < totalFields; ++i) { for (int j = 0; j < numTerms.Get(i); ++j) { totalLen += (int)reader.Next(); } } } // term freqs int[] termFreqs = new int[totalTerms]; { reader.Reset(vectorsStream, totalTerms); for (int i = 0; i < totalTerms;) { Int64sRef next = reader.Next(totalTerms - i); for (int k = 0; k < next.Length; ++k) { termFreqs[i++] = 1 + (int)next.Int64s[next.Offset + k]; } } } // total number of positions, offsets and payloads int totalPositions = 0, totalOffsets = 0, totalPayloads = 0; for (int i = 0, termIndex = 0; i < totalFields; ++i) { int f = (int)flags.Get(i); int termCount = (int)numTerms.Get(i); for (int j = 0; j < termCount; ++j) { int freq = termFreqs[termIndex++]; if ((f & CompressingTermVectorsWriter.POSITIONS) != 0) { totalPositions += freq; } if ((f & CompressingTermVectorsWriter.OFFSETS) != 0) { totalOffsets += freq; } if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0) { totalPayloads += freq; } } if (Debugging.AssertsEnabled) { Debugging.Assert(i != totalFields - 1 || termIndex == totalTerms, "{0} {1}", termIndex, totalTerms); } } int[][] positionIndex = PositionIndex(skip, numFields, numTerms, termFreqs); int[][] positions, startOffsets, lengths; if (totalPositions > 0) { positions = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.POSITIONS, totalPositions, positionIndex); } else { positions = new int[numFields][]; } if (totalOffsets > 0) { // average number of chars per term float[] charsPerTerm = new float[fieldNums.Length]; for (int i = 0; i < charsPerTerm.Length; ++i) { charsPerTerm[i] = J2N.BitConversion.Int32BitsToSingle(vectorsStream.ReadInt32()); } startOffsets = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.OFFSETS, totalOffsets, positionIndex); lengths = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.OFFSETS, totalOffsets, positionIndex); for (int i = 0; i < numFields; ++i) { int[] fStartOffsets = startOffsets[i]; int[] fPositions = positions[i]; // patch offsets from positions if (fStartOffsets != null && fPositions != null) { float fieldCharsPerTerm = charsPerTerm[fieldNumOffs[i]]; for (int j = 0; j < startOffsets[i].Length; ++j) { fStartOffsets[j] += (int)(fieldCharsPerTerm * fPositions[j]); } } if (fStartOffsets != null) { int[] fPrefixLengths = prefixLengths[i]; int[] fSuffixLengths = suffixLengths[i]; int[] fLengths = lengths[i]; for (int j = 0, end = (int)numTerms.Get(skip + i); j < end; ++j) { // delta-decode start offsets and patch lengths using term lengths int termLength = fPrefixLengths[j] + fSuffixLengths[j]; lengths[i][positionIndex[i][j]] += termLength; for (int k = positionIndex[i][j] + 1; k < positionIndex[i][j + 1]; ++k) { fStartOffsets[k] += fStartOffsets[k - 1]; fLengths[k] += termLength; } } } } } else { startOffsets = lengths = new int[numFields][]; } if (totalPositions > 0) { // delta-decode positions for (int i = 0; i < numFields; ++i) { int[] fPositions = positions[i]; int[] fpositionIndex = positionIndex[i]; if (fPositions != null) { for (int j = 0, end = (int)numTerms.Get(skip + i); j < end; ++j) { // delta-decode start offsets for (int k = fpositionIndex[j] + 1; k < fpositionIndex[j + 1]; ++k) { fPositions[k] += fPositions[k - 1]; } } } } } // payload lengths int[][] payloadIndex = new int[numFields][]; int totalPayloadLength = 0; int payloadOff = 0; int payloadLen = 0; if (totalPayloads > 0) { reader.Reset(vectorsStream, totalPayloads); // skip int termIndex = 0; for (int i = 0; i < skip; ++i) { int f = (int)flags.Get(i); int termCount = (int)numTerms.Get(i); if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0) { for (int j = 0; j < termCount; ++j) { int freq = termFreqs[termIndex + j]; for (int k = 0; k < freq; ++k) { int l = (int)reader.Next(); payloadOff += l; } } } termIndex += termCount; } totalPayloadLength = payloadOff; // read doc payload lengths for (int i = 0; i < numFields; ++i) { int f = (int)flags.Get(skip + i); int termCount = (int)numTerms.Get(skip + i); if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0) { int totalFreq = positionIndex[i][termCount]; payloadIndex[i] = new int[totalFreq + 1]; int posIdx = 0; payloadIndex[i][posIdx] = payloadLen; for (int j = 0; j < termCount; ++j) { int freq = termFreqs[termIndex + j]; for (int k = 0; k < freq; ++k) { int payloadLength = (int)reader.Next(); payloadLen += payloadLength; payloadIndex[i][posIdx + 1] = payloadLen; ++posIdx; } } if (Debugging.AssertsEnabled) { Debugging.Assert(posIdx == totalFreq); } } termIndex += termCount; } totalPayloadLength += payloadLen; for (int i = skip + numFields; i < totalFields; ++i) { int f = (int)flags.Get(i); int termCount = (int)numTerms.Get(i); if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0) { for (int j = 0; j < termCount; ++j) { int freq = termFreqs[termIndex + j]; for (int k = 0; k < freq; ++k) { totalPayloadLength += (int)reader.Next(); } } } termIndex += termCount; } if (Debugging.AssertsEnabled) { Debugging.Assert(termIndex == totalTerms, "{0} {1}", termIndex, totalTerms); } } // decompress data BytesRef suffixBytes = new BytesRef(); decompressor.Decompress(vectorsStream, totalLen + totalPayloadLength, docOff + payloadOff, docLen + payloadLen, suffixBytes); suffixBytes.Length = docLen; BytesRef payloadBytes = new BytesRef(suffixBytes.Bytes, suffixBytes.Offset + docLen, payloadLen); int[] FieldFlags = new int[numFields]; for (int i = 0; i < numFields; ++i) { FieldFlags[i] = (int)flags.Get(skip + i); } int[] fieldNumTerms = new int[numFields]; for (int i = 0; i < numFields; ++i) { fieldNumTerms[i] = (int)numTerms.Get(skip + i); } int[][] fieldTermFreqs = new int[numFields][]; { int termIdx = 0; for (int i = 0; i < skip; ++i) { termIdx += (int)numTerms.Get(i); } for (int i = 0; i < numFields; ++i) { int termCount = (int)numTerms.Get(skip + i); fieldTermFreqs[i] = new int[termCount]; for (int j = 0; j < termCount; ++j) { fieldTermFreqs[i][j] = termFreqs[termIdx++]; } } } if (Debugging.AssertsEnabled) { Debugging.Assert(Sum(fieldLengths) == docLen, "{0} != {1}", Sum(fieldLengths), docLen); } return(new TVFields(this, fieldNums, FieldFlags, fieldNumOffs, fieldNumTerms, fieldLengths, prefixLengths, suffixLengths, fieldTermFreqs, positionIndex, positions, startOffsets, lengths, payloadBytes, payloadIndex, suffixBytes)); }
public override NumericDocValues GetNumeric(FieldInfo field) { UninterruptableMonitor.Enter(this); try { if (!numericInstances.TryGetValue(field.Number, out NumericDocValues instance)) { string fileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "dat"); IndexInput input = dir.OpenInput(fileName, state.Context); bool success = false; try { var type = field.GetAttribute(legacyKey).ToLegacyDocValuesType(); //switch (Enum.Parse(typeof(LegacyDocValuesType), field.GetAttribute(LegacyKey))) //{ if (type == LegacyDocValuesType.VAR_INTS) { instance = LoadVarInt32sField(/* field, // LUCENENET: Never read */ input); } else if (type == LegacyDocValuesType.FIXED_INTS_8) { instance = LoadByteField(/* field, // LUCENENET: Never read */ input); } else if (type == LegacyDocValuesType.FIXED_INTS_16) { instance = LoadInt16Field(/* field, // LUCENENET: Never read */ input); } else if (type == LegacyDocValuesType.FIXED_INTS_32) { instance = LoadInt32Field(/* field, // LUCENENET: Never read */ input); } else if (type == LegacyDocValuesType.FIXED_INTS_64) { instance = LoadInt64Field(/* field, // LUCENENET: Never read */ input); } else if (type == LegacyDocValuesType.FLOAT_32) { instance = LoadSingleField(/* field, // LUCENENET: Never read */ input); } else if (type == LegacyDocValuesType.FLOAT_64) { instance = LoadDoubleField(/* field, // LUCENENET: Never read */ input); } else { throw AssertionError.Create(); } CodecUtil.CheckEOF(input); success = true; } finally { if (success) { IOUtils.Dispose(input); } else { IOUtils.DisposeWhileHandlingException(input); } } numericInstances[field.Number] = instance; } return(instance); } finally { UninterruptableMonitor.Exit(this); } }
public override void WriteField(FieldInfo info, IIndexableField field) { fieldsStream.WriteVInt32(info.Number); int bits = 0; BytesRef bytes; string @string; // TODO: maybe a field should serialize itself? // this way we don't bake into indexer all these // specific encodings for different fields? and apps // can customize... // LUCENENET specific - To avoid boxing/unboxing, we don't // call GetNumericValue(). Instead, we check the field.NumericType and then // call the appropriate conversion method. if (field.NumericType != NumericFieldType.NONE) { switch (field.NumericType) { case NumericFieldType.BYTE: case NumericFieldType.INT16: case NumericFieldType.INT32: bits |= FIELD_IS_NUMERIC_INT; break; case NumericFieldType.INT64: bits |= FIELD_IS_NUMERIC_LONG; break; case NumericFieldType.SINGLE: bits |= FIELD_IS_NUMERIC_FLOAT; break; case NumericFieldType.DOUBLE: bits |= FIELD_IS_NUMERIC_DOUBLE; break; default: throw new ArgumentException("cannot store numeric type " + field.NumericType); } @string = null; bytes = null; } else { bytes = field.GetBinaryValue(); if (bytes != null) { bits |= FIELD_IS_BINARY; @string = null; } else { @string = field.GetStringValue(); if (@string is null) { throw new ArgumentException("field " + field.Name + " is stored but does not have binaryValue, stringValue nor numericValue"); } } } fieldsStream.WriteByte((byte)bits); if (bytes != null) { fieldsStream.WriteVInt32(bytes.Length); fieldsStream.WriteBytes(bytes.Bytes, bytes.Offset, bytes.Length); } else if (@string != null) { fieldsStream.WriteString(field.GetStringValue()); } else { switch (field.NumericType) { case NumericFieldType.BYTE: case NumericFieldType.INT16: case NumericFieldType.INT32: fieldsStream.WriteInt32(field.GetInt32Value().Value); break; case NumericFieldType.INT64: fieldsStream.WriteInt64(field.GetInt64Value().Value); break; case NumericFieldType.SINGLE: fieldsStream.WriteInt32(BitConversion.SingleToInt32Bits(field.GetSingleValue().Value)); break; case NumericFieldType.DOUBLE: fieldsStream.WriteInt64(BitConversion.DoubleToInt64Bits(field.GetDoubleValue().Value)); break; default: throw AssertionError.Create("Cannot get here"); } } }