public override void AddBinaryField(FieldInfo field, IEnumerable <BytesRef> values) { // write the byte[] data Meta.WriteVInt(field.Number); Meta.WriteByte(Lucene42DocValuesProducer.BYTES); int minLength = int.MaxValue; int maxLength = int.MinValue; long startFP = Data.FilePointer; foreach (BytesRef v in values) { int length = v == null ? 0 : v.Length; if (length > Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH) { throw new System.ArgumentException("DocValuesField \"" + field.Name + "\" is too large, must be <= " + Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH); } minLength = Math.Min(minLength, length); maxLength = Math.Max(maxLength, length); if (v != null) { Data.WriteBytes(v.Bytes, v.Offset, v.Length); } } Meta.WriteLong(startFP); Meta.WriteLong(Data.FilePointer - startFP); Meta.WriteVInt(minLength); Meta.WriteVInt(maxLength); // if minLength == maxLength, its a fixed-length byte[], we are done (the addresses are implicit) // otherwise, we need to record the length fields... if (minLength != maxLength) { Meta.WriteVInt(PackedInts.VERSION_CURRENT); Meta.WriteVInt(Lucene42DocValuesProducer.BLOCK_SIZE); MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(Data, Lucene42DocValuesProducer.BLOCK_SIZE); long addr = 0; foreach (BytesRef v in values) { if (v != null) { addr += v.Length; } writer.Add(addr); } writer.Finish(); } }
public override void AddBinaryField(FieldInfo field, IEnumerable <BytesRef> values) { // write the byte[] data meta.WriteVInt32(field.Number); meta.WriteByte((byte)Lucene42DocValuesProducer.BYTES); int minLength = int.MaxValue; int maxLength = int.MinValue; long startFP = data.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream foreach (BytesRef v in values) { int length = v is null ? 0 : v.Length; if (length > Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH) { throw new ArgumentException("DocValuesField \"" + field.Name + "\" is too large, must be <= " + Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH); } minLength = Math.Min(minLength, length); maxLength = Math.Max(maxLength, length); if (v != null) { data.WriteBytes(v.Bytes, v.Offset, v.Length); } } meta.WriteInt64(startFP); meta.WriteInt64(data.Position - startFP); // LUCENENET specific: Renamed from getFilePointer() to match FileStream meta.WriteVInt32(minLength); meta.WriteVInt32(maxLength); // if minLength == maxLength, its a fixed-length byte[], we are done (the addresses are implicit) // otherwise, we need to record the length fields... if (minLength != maxLength) { meta.WriteVInt32(PackedInt32s.VERSION_CURRENT); meta.WriteVInt32(Lucene42DocValuesProducer.BLOCK_SIZE); MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, Lucene42DocValuesProducer.BLOCK_SIZE); long addr = 0; foreach (BytesRef v in values) { if (v != null) { addr += v.Length; } writer.Add(addr); } writer.Finish(); } }
public override void AddSortedSetField(FieldInfo field, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrdCount, IEnumerable <long?> ords) { Meta.WriteVInt(field.Number); Meta.WriteByte((byte)Lucene45DocValuesFormat.SORTED_SET); if (IsSingleValued(docToOrdCount)) { Meta.WriteVInt(SORTED_SET_SINGLE_VALUED_SORTED); // The field is single-valued, we can encode it as SORTED AddSortedField(field, values, GetSortedSetEnumerable(docToOrdCount, ords)); return; } Meta.WriteVInt(SORTED_SET_WITH_ADDRESSES); // write the ord -> byte[] as a binary field AddTermsDict(field, values); // write the stream of ords as a numeric field // NOTE: we could return an iterator that delta-encodes these within a doc AddNumericField(field, ords, false); // write the doc -> ord count as a absolute index to the stream Meta.WriteVInt(field.Number); Meta.WriteByte((byte)Lucene45DocValuesFormat.NUMERIC); Meta.WriteVInt(DELTA_COMPRESSED); Meta.WriteLong(-1L); Meta.WriteVInt(PackedInts.VERSION_CURRENT); Meta.WriteLong(Data.FilePointer); Meta.WriteVLong(MaxDoc); Meta.WriteVInt(BLOCK_SIZE); var writer = new MonotonicBlockPackedWriter(Data, BLOCK_SIZE); long addr = 0; foreach (long?v in docToOrdCount) { addr += v.Value; writer.Add(addr); } writer.Finish(); }
public override void AddSortedSetField(FieldInfo field, IEnumerable<BytesRef> values, IEnumerable<long?> docToOrdCount, IEnumerable<long?> ords) { Meta.WriteVInt(field.Number); Meta.WriteByte((byte)Lucene45DocValuesFormat.SORTED_SET); if (IsSingleValued(docToOrdCount)) { Meta.WriteVInt(SORTED_SET_SINGLE_VALUED_SORTED); // The field is single-valued, we can encode it as SORTED AddSortedField(field, values, GetSortedSetEnumerable(docToOrdCount, ords)); return; } Meta.WriteVInt(SORTED_SET_WITH_ADDRESSES); // write the ord -> byte[] as a binary field AddTermsDict(field, values); // write the stream of ords as a numeric field // NOTE: we could return an iterator that delta-encodes these within a doc AddNumericField(field, ords, false); // write the doc -> ord count as a absolute index to the stream Meta.WriteVInt(field.Number); Meta.WriteByte((byte)Lucene45DocValuesFormat.NUMERIC); Meta.WriteVInt(DELTA_COMPRESSED); Meta.WriteLong(-1L); Meta.WriteVInt(PackedInts.VERSION_CURRENT); Meta.WriteLong(Data.FilePointer); Meta.WriteVLong(MaxDoc); Meta.WriteVInt(BLOCK_SIZE); var writer = new MonotonicBlockPackedWriter(Data, BLOCK_SIZE); long addr = 0; foreach (long? v in docToOrdCount) { addr += v.Value; writer.Add(addr); } writer.Finish(); }
/// <summary> /// expert: writes a value dictionary for a sorted/sortedset field </summary> protected internal virtual void AddTermsDict(FieldInfo field, IEnumerable<BytesRef> values) { // first check if its a "fixed-length" terms dict int minLength = int.MaxValue; int maxLength = int.MinValue; foreach (BytesRef v in values) { minLength = Math.Min(minLength, v.Length); maxLength = Math.Max(maxLength, v.Length); } if (minLength == maxLength) { // no index needed: direct addressing by mult AddBinaryField(field, values); } else { // header Meta.WriteVInt(field.Number); Meta.WriteByte((byte)Lucene45DocValuesFormat.BINARY); Meta.WriteVInt(BINARY_PREFIX_COMPRESSED); Meta.WriteLong(-1L); // now write the bytes: sharing prefixes within a block long startFP = Data.FilePointer; // currently, we have to store the delta from expected for every 1/nth term // we could avoid this, but its not much and less overall RAM than the previous approach! RAMOutputStream addressBuffer = new RAMOutputStream(); MonotonicBlockPackedWriter termAddresses = new MonotonicBlockPackedWriter(addressBuffer, BLOCK_SIZE); BytesRef lastTerm = new BytesRef(); long count = 0; foreach (BytesRef v in values) { if (count % ADDRESS_INTERVAL == 0) { termAddresses.Add(Data.FilePointer - startFP); // force the first term in a block to be abs-encoded lastTerm.Length = 0; } // prefix-code int sharedPrefix = StringHelper.BytesDifference(lastTerm, v); Data.WriteVInt(sharedPrefix); Data.WriteVInt(v.Length - sharedPrefix); Data.WriteBytes(v.Bytes, v.Offset + sharedPrefix, v.Length - sharedPrefix); lastTerm.CopyBytes(v); count++; } long indexStartFP = Data.FilePointer; // write addresses of indexed terms termAddresses.Finish(); addressBuffer.WriteTo(Data); addressBuffer = null; termAddresses = null; Meta.WriteVInt(minLength); Meta.WriteVInt(maxLength); Meta.WriteVLong(count); Meta.WriteLong(startFP); Meta.WriteVInt(ADDRESS_INTERVAL); Meta.WriteLong(indexStartFP); Meta.WriteVInt(PackedInts.VERSION_CURRENT); Meta.WriteVInt(BLOCK_SIZE); } }
public override void AddBinaryField(FieldInfo field, IEnumerable<BytesRef> values) { // write the byte[] data Meta.WriteVInt(field.Number); Meta.WriteByte((byte)Lucene45DocValuesFormat.BINARY); int minLength = int.MaxValue; int maxLength = int.MinValue; long startFP = Data.FilePointer; long count = 0; bool missing = false; foreach (BytesRef v in values) { int length; if (v == null) { length = 0; missing = true; } else { length = v.Length; } minLength = Math.Min(minLength, length); maxLength = Math.Max(maxLength, length); if (v != null) { Data.WriteBytes(v.Bytes, v.Offset, v.Length); } count++; } Meta.WriteVInt(minLength == maxLength ? BINARY_FIXED_UNCOMPRESSED : BINARY_VARIABLE_UNCOMPRESSED); if (missing) { Meta.WriteLong(Data.FilePointer); WriteMissingBitset(values); } else { Meta.WriteLong(-1L); } Meta.WriteVInt(minLength); Meta.WriteVInt(maxLength); Meta.WriteVLong(count); Meta.WriteLong(startFP); // if minLength == maxLength, its a fixed-length byte[], we are done (the addresses are implicit) // otherwise, we need to record the length fields... if (minLength != maxLength) { Meta.WriteLong(Data.FilePointer); Meta.WriteVInt(PackedInts.VERSION_CURRENT); Meta.WriteVInt(BLOCK_SIZE); MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(Data, BLOCK_SIZE); long addr = 0; foreach (BytesRef v in values) { if (v != null) { addr += v.Length; } writer.Add(addr); } writer.Finish(); } }
/// <summary> /// expert: writes a value dictionary for a sorted/sortedset field </summary> protected internal virtual void AddTermsDict(FieldInfo field, IEnumerable <BytesRef> values) { // first check if its a "fixed-length" terms dict int minLength = int.MaxValue; int maxLength = int.MinValue; foreach (BytesRef v in values) { minLength = Math.Min(minLength, v.Length); maxLength = Math.Max(maxLength, v.Length); } if (minLength == maxLength) { // no index needed: direct addressing by mult AddBinaryField(field, values); } else { // header Meta.WriteVInt(field.Number); Meta.WriteByte((byte)Lucene45DocValuesFormat.BINARY); Meta.WriteVInt(BINARY_PREFIX_COMPRESSED); Meta.WriteLong(-1L); // now write the bytes: sharing prefixes within a block long startFP = Data.FilePointer; // currently, we have to store the delta from expected for every 1/nth term // we could avoid this, but its not much and less overall RAM than the previous approach! RAMOutputStream addressBuffer = new RAMOutputStream(); MonotonicBlockPackedWriter termAddresses = new MonotonicBlockPackedWriter(addressBuffer, BLOCK_SIZE); BytesRef lastTerm = new BytesRef(); long count = 0; foreach (BytesRef v in values) { if (count % ADDRESS_INTERVAL == 0) { termAddresses.Add(Data.FilePointer - startFP); // force the first term in a block to be abs-encoded lastTerm.Length = 0; } // prefix-code int sharedPrefix = StringHelper.BytesDifference(lastTerm, v); Data.WriteVInt(sharedPrefix); Data.WriteVInt(v.Length - sharedPrefix); Data.WriteBytes(v.Bytes, v.Offset + sharedPrefix, v.Length - sharedPrefix); lastTerm.CopyBytes(v); count++; } long indexStartFP = Data.FilePointer; // write addresses of indexed terms termAddresses.Finish(); addressBuffer.WriteTo(Data); addressBuffer = null; termAddresses = null; Meta.WriteVInt(minLength); Meta.WriteVInt(maxLength); Meta.WriteVLong(count); Meta.WriteLong(startFP); Meta.WriteVInt(ADDRESS_INTERVAL); Meta.WriteLong(indexStartFP); Meta.WriteVInt(PackedInts.VERSION_CURRENT); Meta.WriteVInt(BLOCK_SIZE); } }
public override void AddBinaryField(FieldInfo field, IEnumerable <BytesRef> values) { // write the byte[] data Meta.WriteVInt(field.Number); Meta.WriteByte((byte)Lucene45DocValuesFormat.BINARY); int minLength = int.MaxValue; int maxLength = int.MinValue; long startFP = Data.FilePointer; long count = 0; bool missing = false; foreach (BytesRef v in values) { int length; if (v == null) { length = 0; missing = true; } else { length = v.Length; } minLength = Math.Min(minLength, length); maxLength = Math.Max(maxLength, length); if (v != null) { Data.WriteBytes(v.Bytes, v.Offset, v.Length); } count++; } Meta.WriteVInt(minLength == maxLength ? BINARY_FIXED_UNCOMPRESSED : BINARY_VARIABLE_UNCOMPRESSED); if (missing) { Meta.WriteLong(Data.FilePointer); WriteMissingBitset(values); } else { Meta.WriteLong(-1L); } Meta.WriteVInt(minLength); Meta.WriteVInt(maxLength); Meta.WriteVLong(count); Meta.WriteLong(startFP); // if minLength == maxLength, its a fixed-length byte[], we are done (the addresses are implicit) // otherwise, we need to record the length fields... if (minLength != maxLength) { Meta.WriteLong(Data.FilePointer); Meta.WriteVInt(PackedInts.VERSION_CURRENT); Meta.WriteVInt(BLOCK_SIZE); MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(Data, BLOCK_SIZE); long addr = 0; foreach (BytesRef v in values) { if (v != null) { addr += v.Length; } writer.Add(addr); } writer.Finish(); } }
public override void AddBinaryField(FieldInfo field, IEnumerable <BytesRef> values) { // write the byte[] data meta.WriteVInt32(field.Number); meta.WriteByte(MemoryDocValuesProducer.BYTES); var minLength = int.MaxValue; var maxLength = int.MinValue; var startFP = data.GetFilePointer(); var missing = false; foreach (var v in values) { int length; if (v == null) { length = 0; missing = true; } else { length = v.Length; } if (length > MemoryDocValuesFormat.MAX_BINARY_FIELD_LENGTH) { throw new ArgumentException("DocValuesField \"" + field.Name + "\" is too large, must be <= " + MemoryDocValuesFormat.MAX_BINARY_FIELD_LENGTH); } minLength = Math.Min(minLength, length); maxLength = Math.Max(maxLength, length); if (v != null) { data.WriteBytes(v.Bytes, v.Offset, v.Length); } } meta.WriteInt64(startFP); meta.WriteInt64(data.GetFilePointer() - startFP); if (missing) { long start = data.GetFilePointer(); WriteMissingBitset(values); meta.WriteInt64(start); meta.WriteInt64(data.GetFilePointer() - start); } else { meta.WriteInt64(-1L); } meta.WriteVInt32(minLength); meta.WriteVInt32(maxLength); // if minLength == maxLength, its a fixed-length byte[], we are done (the addresses are implicit) // otherwise, we need to record the length fields... if (minLength != maxLength) { meta.WriteVInt32(PackedInt32s.VERSION_CURRENT); meta.WriteVInt32(MemoryDocValuesProducer.BLOCK_SIZE); var writer = new MonotonicBlockPackedWriter(data, MemoryDocValuesProducer.BLOCK_SIZE); long addr = 0; foreach (BytesRef v in values) { if (v != null) { addr += v.Length; } writer.Add(addr); } writer.Finish(); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: @Override public void addBinaryField(index.FieldInfo field, final Iterable<util.BytesRef> values) throws java.io.IOException //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET: public override void addBinaryField(FieldInfo field, IEnumerable <BytesRef> values) { // write the byte[] data meta.writeVInt(field.number); meta.writeByte(BYTES); int minLength = int.MaxValue; int maxLength = int.MinValue; //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final long startFP = data.getFilePointer(); long startFP = data.FilePointer; bool missing = false; foreach (BytesRef v in values) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int length; int length; if (v == null) { length = 0; missing = true; } else { length = v.length; } if (length > MemoryDocValuesFormat.MAX_BINARY_FIELD_LENGTH) { throw new System.ArgumentException("DocValuesField \"" + field.name + "\" is too large, must be <= " + MemoryDocValuesFormat.MAX_BINARY_FIELD_LENGTH); } minLength = Math.Min(minLength, length); maxLength = Math.Max(maxLength, length); if (v != null) { data.writeBytes(v.bytes, v.offset, v.length); } } meta.writeLong(startFP); meta.writeLong(data.FilePointer - startFP); if (missing) { long start = data.FilePointer; writeMissingBitset(values); meta.writeLong(start); meta.writeLong(data.FilePointer - start); } else { meta.writeLong(-1L); } meta.writeVInt(minLength); meta.writeVInt(maxLength); // if minLength == maxLength, its a fixed-length byte[], we are done (the addresses are implicit) // otherwise, we need to record the length fields... if (minLength != maxLength) { meta.writeVInt(PackedInts.VERSION_CURRENT); meta.writeVInt(BLOCK_SIZE); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final util.packed.MonotonicBlockPackedWriter writer = new util.packed.MonotonicBlockPackedWriter(data, BLOCK_SIZE); MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE); long addr = 0; foreach (BytesRef v in values) { if (v != null) { addr += v.length; } writer.add(addr); } writer.finish(); } }