private void AddVarSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd) { field.PutAttribute(legacyKey, LegacyDocValuesType.BYTES_VAR_SORTED.ToString()); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); /* values */ long startPos = data.GetFilePointer(); int valueCount = 0; foreach (BytesRef v in values) { data.WriteBytes(v.Bytes, v.Offset, v.Length); valueCount++; } /* addresses */ long maxAddress = data.GetFilePointer() - startPos; index.WriteInt64(maxAddress); if (Debugging.AssertsEnabled) { Debugging.Assert(valueCount != int.MaxValue); // unsupported by the 4.0 impl } PackedInt32s.Writer w = PackedInt32s.GetWriter(index, valueCount + 1, PackedInt32s.BitsRequired(maxAddress), PackedInt32s.DEFAULT); long currentPosition = 0; foreach (BytesRef v in values) { w.Add(currentPosition); currentPosition += v.Length; } // write sentinel if (Debugging.AssertsEnabled) { Debugging.Assert(currentPosition == maxAddress); } w.Add(currentPosition); w.Finish(); /* ordinals */ int maxDoc = state.SegmentInfo.DocCount; if (Debugging.AssertsEnabled) { Debugging.Assert(valueCount > 0); } PackedInt32s.Writer ords = PackedInt32s.GetWriter(index, maxDoc, PackedInt32s.BitsRequired(valueCount - 1), PackedInt32s.DEFAULT); foreach (long n in docToOrd) { ords.Add((long)n); } ords.Finish(); }
public override void AddNumericField(FieldInfo field, IEnumerable <long?> values) { meta.WriteVInt32(field.Number); meta.WriteByte((byte)NUMBER); meta.WriteInt64(data.GetFilePointer()); long minValue = long.MaxValue; long maxValue = long.MinValue; long gcd = 0; // TODO: more efficient? JCG.HashSet <long> uniqueValues = null; if (true) { uniqueValues = new JCG.HashSet <long>(); long count = 0; foreach (long?nv in values) { Debug.Assert(nv != null); long v = nv.Value; if (gcd != 1) { if (v < long.MinValue / 2 || v > long.MaxValue / 2) { // in that case v - minValue might overflow and make the GCD computation return // wrong results. Since these extreme values are unlikely, we just discard // GCD computation for them gcd = 1; } // minValue needs to be set first else if (count != 0) { gcd = MathUtil.Gcd(gcd, v - minValue); } } minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); if (uniqueValues != null) { if (uniqueValues.Add(v)) { if (uniqueValues.Count > 256) { uniqueValues = null; } } } ++count; } Debug.Assert(count == maxDoc); } if (uniqueValues != null) { // small number of unique values int bitsPerValue = PackedInt32s.BitsRequired(uniqueValues.Count - 1); FormatAndBits formatAndBits = PackedInt32s.FastestFormatAndBits(maxDoc, bitsPerValue, acceptableOverheadRatio); if (formatAndBits.BitsPerValue == 8 && minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue) { meta.WriteByte((byte)UNCOMPRESSED); // uncompressed foreach (long?nv in values) { data.WriteByte((byte)nv.GetValueOrDefault()); } } else { meta.WriteByte((byte)TABLE_COMPRESSED); // table-compressed var decode = new long[uniqueValues.Count]; uniqueValues.CopyTo(decode); var encode = new Dictionary <long, int>(); data.WriteVInt32(decode.Length); for (int i = 0; i < decode.Length; i++) { data.WriteInt64(decode[i]); encode[decode[i]] = i; } meta.WriteVInt32(PackedInt32s.VERSION_CURRENT); data.WriteVInt32(formatAndBits.Format.Id); data.WriteVInt32(formatAndBits.BitsPerValue); PackedInt32s.Writer writer = PackedInt32s.GetWriterNoHeader(data, formatAndBits.Format, maxDoc, formatAndBits.BitsPerValue, PackedInt32s.DEFAULT_BUFFER_SIZE); foreach (long?nv in values) { writer.Add(encode[nv.GetValueOrDefault()]); } writer.Finish(); } } else if (gcd != 0 && gcd != 1) { meta.WriteByte((byte)GCD_COMPRESSED); meta.WriteVInt32(PackedInt32s.VERSION_CURRENT); data.WriteInt64(minValue); data.WriteInt64(gcd); data.WriteVInt32(BLOCK_SIZE); var writer = new BlockPackedWriter(data, BLOCK_SIZE); foreach (long?nv in values) { writer.Add((nv.GetValueOrDefault() - minValue) / gcd); } writer.Finish(); } else { meta.WriteByte((byte)DELTA_COMPRESSED); // delta-compressed meta.WriteVInt32(PackedInt32s.VERSION_CURRENT); data.WriteVInt32(BLOCK_SIZE); var writer = new BlockPackedWriter(data, BLOCK_SIZE); foreach (long?nv in values) { writer.Add(nv.GetValueOrDefault()); } writer.Finish(); } }
[ExceptionToNetNumericConvention] // LUCENENET: Private API, keeping as-is private void AddVarIntsField(FieldInfo field, IndexOutput output, IEnumerable <long?> values, long minValue, long maxValue) { field.PutAttribute(legacyKey, LegacyDocValuesType.VAR_INTS.ToString()); CodecUtil.WriteHeader(output, Lucene40DocValuesFormat.VAR_INTS_CODEC_NAME, Lucene40DocValuesFormat.VAR_INTS_VERSION_CURRENT); long delta = maxValue - minValue; if (delta < 0) { // writes longs output.WriteByte((byte)Lucene40DocValuesFormat.VAR_INTS_FIXED_64); foreach (long?n in values) { output.WriteInt64(n.GetValueOrDefault()); } } else { // writes packed ints output.WriteByte((byte)Lucene40DocValuesFormat.VAR_INTS_PACKED); output.WriteInt64(minValue); output.WriteInt64(0 - minValue); // default value (representation of 0) PackedInt32s.Writer writer = PackedInt32s.GetWriter(output, state.SegmentInfo.DocCount, PackedInt32s.BitsRequired(delta), PackedInt32s.DEFAULT); foreach (long?n in values) { writer.Add(n.GetValueOrDefault() - minValue); } writer.Finish(); } }
/// <summary> /// Go to the chunk containing the provided <paramref name="doc"/> ID. /// </summary> internal void Next(int doc) { Debug.Assert(doc >= this.docBase + this.chunkDocs, doc + " " + this.docBase + " " + this.chunkDocs); fieldsStream.Seek(outerInstance.indexReader.GetStartPointer(doc)); int docBase = fieldsStream.ReadVInt32(); int chunkDocs = fieldsStream.ReadVInt32(); if (docBase < this.docBase + this.chunkDocs || docBase + chunkDocs > outerInstance.numDocs) { throw new CorruptIndexException("Corrupted: current docBase=" + this.docBase + ", current numDocs=" + this.chunkDocs + ", new docBase=" + docBase + ", new numDocs=" + chunkDocs + " (resource=" + fieldsStream + ")"); } this.docBase = docBase; this.chunkDocs = chunkDocs; if (chunkDocs > numStoredFields.Length) { int newLength = ArrayUtil.Oversize(chunkDocs, 4); numStoredFields = new int[newLength]; lengths = new int[newLength]; } if (chunkDocs == 1) { numStoredFields[0] = fieldsStream.ReadVInt32(); lengths[0] = fieldsStream.ReadVInt32(); } else { int bitsPerStoredFields = fieldsStream.ReadVInt32(); if (bitsPerStoredFields == 0) { Arrays.Fill(numStoredFields, 0, chunkDocs, fieldsStream.ReadVInt32()); } else if (bitsPerStoredFields > 31) { throw new CorruptIndexException("bitsPerStoredFields=" + bitsPerStoredFields + " (resource=" + fieldsStream + ")"); } else { PackedInt32s.IReaderIterator it = PackedInt32s.GetReaderIteratorNoHeader(fieldsStream, PackedInt32s.Format.PACKED, outerInstance.packedIntsVersion, chunkDocs, bitsPerStoredFields, 1); for (int i = 0; i < chunkDocs; ++i) { numStoredFields[i] = (int)it.Next(); } } int bitsPerLength = fieldsStream.ReadVInt32(); if (bitsPerLength == 0) { Arrays.Fill(lengths, 0, chunkDocs, fieldsStream.ReadVInt32()); } else if (bitsPerLength > 31) { throw new CorruptIndexException("bitsPerLength=" + bitsPerLength); } else { PackedInt32s.IReaderIterator it = PackedInt32s.GetReaderIteratorNoHeader(fieldsStream, PackedInt32s.Format.PACKED, outerInstance.packedIntsVersion, chunkDocs, bitsPerLength, 1); for (int i = 0; i < chunkDocs; ++i) { lengths[i] = (int)it.Next(); } } } }
public override void Finish(long termsFilePointer) { // write primary terms dict offsets packedIndexStart = outerInstance.m_output.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream PackedInt32s.Writer w = PackedInt32s.GetWriter(outerInstance.m_output, numIndexTerms, PackedInt32s.BitsRequired(termsFilePointer), PackedInt32s.DEFAULT); // relative to our indexStart long upto = 0; for (int i = 0; i < numIndexTerms; i++) { upto += termsPointerDeltas[i]; w.Add(upto); } w.Finish(); packedOffsetsStart = outerInstance.m_output.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream // write offsets into the byte[] terms w = PackedInt32s.GetWriter(outerInstance.m_output, 1 + numIndexTerms, PackedInt32s.BitsRequired(totTermLength), PackedInt32s.DEFAULT); upto = 0; for (int i = 0; i < numIndexTerms; i++) { w.Add(upto); upto += termLengths[i]; } w.Add(upto); w.Finish(); // our referrer holds onto us, while other fields are // being written, so don't tie up this RAM: termLengths = null; termsPointerDeltas = null; }
private void FlushFlags(int totalFields, int[] fieldNums) { // check if fields always have the same flags bool nonChangingFlags = true; int[] fieldFlags = new int[fieldNums.Length]; Arrays.Fill(fieldFlags, -1); bool breakOuterLoop; foreach (DocData dd in pendingDocs) { breakOuterLoop = false; foreach (FieldData fd in dd.fields) { int fieldNumOff = Array.BinarySearch(fieldNums, fd.fieldNum); Debug.Assert(fieldNumOff >= 0); if (fieldFlags[fieldNumOff] == -1) { fieldFlags[fieldNumOff] = fd.flags; } else if (fieldFlags[fieldNumOff] != fd.flags) { nonChangingFlags = false; breakOuterLoop = true; } } if (breakOuterLoop) { break; } } if (nonChangingFlags) { // write one flag per field num vectorsStream.WriteVInt32(0); PackedInt32s.Writer writer = PackedInt32s.GetWriterNoHeader(vectorsStream, PackedInt32s.Format.PACKED, fieldFlags.Length, FLAGS_BITS, 1); foreach (int flags in fieldFlags) { Debug.Assert(flags >= 0); writer.Add(flags); } Debug.Assert(writer.Ord == fieldFlags.Length - 1); writer.Finish(); } else { // write one flag for every field instance vectorsStream.WriteVInt32(1); PackedInt32s.Writer writer = PackedInt32s.GetWriterNoHeader(vectorsStream, PackedInt32s.Format.PACKED, totalFields, FLAGS_BITS, 1); foreach (DocData dd in pendingDocs) { foreach (FieldData fd in dd.fields) { writer.Add(fd.flags); } } Debug.Assert(writer.Ord == totalFields - 1); writer.Finish(); } }
public override void VisitDocument(int docID, StoredFieldVisitor visitor) { fieldsStream.Seek(indexReader.GetStartPointer(docID)); int docBase = fieldsStream.ReadVInt32(); int chunkDocs = fieldsStream.ReadVInt32(); if (docID < docBase || docID >= docBase + chunkDocs || docBase + chunkDocs > numDocs) { throw new CorruptIndexException("Corrupted: docID=" + docID + ", docBase=" + docBase + ", chunkDocs=" + chunkDocs + ", numDocs=" + numDocs + " (resource=" + fieldsStream + ")"); } int numStoredFields, offset, length, totalLength; if (chunkDocs == 1) { numStoredFields = fieldsStream.ReadVInt32(); offset = 0; length = fieldsStream.ReadVInt32(); totalLength = length; } else { int bitsPerStoredFields = fieldsStream.ReadVInt32(); if (bitsPerStoredFields == 0) { numStoredFields = fieldsStream.ReadVInt32(); } else if (bitsPerStoredFields > 31) { throw new CorruptIndexException("bitsPerStoredFields=" + bitsPerStoredFields + " (resource=" + fieldsStream + ")"); } else { long filePointer = fieldsStream.GetFilePointer(); PackedInt32s.Reader reader = PackedInt32s.GetDirectReaderNoHeader(fieldsStream, PackedInt32s.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerStoredFields); numStoredFields = (int)(reader.Get(docID - docBase)); fieldsStream.Seek(filePointer + PackedInt32s.Format.PACKED.ByteCount(packedIntsVersion, chunkDocs, bitsPerStoredFields)); } int bitsPerLength = fieldsStream.ReadVInt32(); if (bitsPerLength == 0) { length = fieldsStream.ReadVInt32(); offset = (docID - docBase) * length; totalLength = chunkDocs * length; } else if (bitsPerStoredFields > 31) { throw new CorruptIndexException("bitsPerLength=" + bitsPerLength + " (resource=" + fieldsStream + ")"); } else { PackedInt32s.IReaderIterator it = PackedInt32s.GetReaderIteratorNoHeader(fieldsStream, PackedInt32s.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerLength, 1); int off = 0; for (int i = 0; i < docID - docBase; ++i) { off += (int)it.Next(); } offset = off; length = (int)it.Next(); off += length; for (int i = docID - docBase + 1; i < chunkDocs; ++i) { off += (int)it.Next(); } totalLength = off; } } if ((length == 0) != (numStoredFields == 0)) { throw new CorruptIndexException("length=" + length + ", numStoredFields=" + numStoredFields + " (resource=" + fieldsStream + ")"); } if (numStoredFields == 0) { // nothing to do return; } DataInput documentInput; if (version >= CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS && totalLength >= 2 * chunkSize) { Debug.Assert(chunkSize > 0); Debug.Assert(offset < chunkSize); decompressor.Decompress(fieldsStream, chunkSize, offset, Math.Min(length, chunkSize - offset), bytes); documentInput = new DataInputAnonymousInnerClassHelper(this, offset, length); } else { BytesRef bytes = totalLength <= BUFFER_REUSE_THRESHOLD ? this.bytes : new BytesRef(); decompressor.Decompress(fieldsStream, totalLength, offset, length, bytes); Debug.Assert(bytes.Length == length); documentInput = new ByteArrayDataInput(bytes.Bytes, bytes.Offset, bytes.Length); } for (int fieldIDX = 0; fieldIDX < numStoredFields; fieldIDX++) { long infoAndBits = documentInput.ReadVInt64(); int fieldNumber = (int)((long)((ulong)infoAndBits >> CompressingStoredFieldsWriter.TYPE_BITS)); FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber); int bits = (int)(infoAndBits & CompressingStoredFieldsWriter.TYPE_MASK); Debug.Assert(bits <= CompressingStoredFieldsWriter.NUMERIC_DOUBLE, "bits=" + bits.ToString("x")); switch (visitor.NeedsField(fieldInfo)) { case StoredFieldVisitor.Status.YES: ReadField(documentInput, visitor, fieldInfo, bits); break; case StoredFieldVisitor.Status.NO: SkipField(documentInput, bits); break; case StoredFieldVisitor.Status.STOP: return; } } }
internal virtual void AddNumericField(FieldInfo field, IEnumerable <long?> values, bool optimizeStorage) { meta.WriteVInt32(field.Number); meta.WriteByte(MemoryDocValuesProducer.NUMBER); meta.WriteInt64(data.GetFilePointer()); long minValue = long.MaxValue; long maxValue = long.MinValue; long gcd = 0; bool missing = false; // TODO: more efficient? ISet <long?> uniqueValues = null; if (optimizeStorage) { uniqueValues = new JCG.HashSet <long?>(); long count = 0; foreach (var nv in values) { long v; if (nv == null) { v = 0; missing = true; } else { v = nv.Value; } if (gcd != 1) { if (v < long.MinValue / 2 || v > long.MaxValue / 2) { // in that case v - minValue might overflow and make the GCD computation return // wrong results. Since these extreme values are unlikely, we just discard // GCD computation for them gcd = 1; } // minValue needs to be set first else if (count != 0) { gcd = MathUtil.Gcd(gcd, v - minValue); } } minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); if (uniqueValues != null) { if (uniqueValues.Add(v)) { if (uniqueValues.Count > 256) { uniqueValues = null; } } } ++count; } if (Debugging.AssertsEnabled) { Debugging.Assert(count == maxDoc); } } if (missing) { long start = data.GetFilePointer(); WriteMissingBitset(values); meta.WriteInt64(start); meta.WriteInt64(data.GetFilePointer() - start); } else { meta.WriteInt64(-1L); } if (uniqueValues != null) { // small number of unique values int bitsPerValue = PackedInt32s.BitsRequired(uniqueValues.Count - 1); FormatAndBits formatAndBits = PackedInt32s.FastestFormatAndBits(maxDoc, bitsPerValue, acceptableOverheadRatio); if (formatAndBits.BitsPerValue == 8 && minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue) { meta.WriteByte(MemoryDocValuesProducer.UNCOMPRESSED); // uncompressed foreach (var nv in values) { data.WriteByte((byte)nv.GetValueOrDefault()); } } else { meta.WriteByte(MemoryDocValuesProducer.TABLE_COMPRESSED); // table-compressed long?[] decode = new long?[uniqueValues.Count]; uniqueValues.CopyTo(decode, 0); var encode = new Dictionary <long?, int?>(); data.WriteVInt32(decode.Length); for (int i = 0; i < decode.Length; i++) { data.WriteInt64(decode[i].Value); encode[decode[i]] = i; } meta.WriteVInt32(PackedInt32s.VERSION_CURRENT); data.WriteVInt32(formatAndBits.Format.Id); data.WriteVInt32(formatAndBits.BitsPerValue); PackedInt32s.Writer writer = PackedInt32s.GetWriterNoHeader(data, formatAndBits.Format, maxDoc, formatAndBits.BitsPerValue, PackedInt32s.DEFAULT_BUFFER_SIZE); foreach (var nv in values) { var v = encode[nv.GetValueOrDefault()]; writer.Add((long)v); } writer.Finish(); } } else if (gcd != 0 && gcd != 1) { meta.WriteByte(MemoryDocValuesProducer.GCD_COMPRESSED); meta.WriteVInt32(PackedInt32s.VERSION_CURRENT); data.WriteInt64(minValue); data.WriteInt64(gcd); data.WriteVInt32(MemoryDocValuesProducer.BLOCK_SIZE); var writer = new BlockPackedWriter(data, MemoryDocValuesProducer.BLOCK_SIZE); foreach (var nv in values) { writer.Add((nv.GetValueOrDefault() - minValue) / gcd); } writer.Finish(); } else { meta.WriteByte(MemoryDocValuesProducer.DELTA_COMPRESSED); // delta-compressed meta.WriteVInt32(PackedInt32s.VERSION_CURRENT); data.WriteVInt32(MemoryDocValuesProducer.BLOCK_SIZE); var writer = new BlockPackedWriter(data, MemoryDocValuesProducer.BLOCK_SIZE); foreach (var nv in values) { writer.Add(nv.GetValueOrDefault()); } writer.Finish(); } }
private void FlushFields(int totalFields, int[] fieldNums) { PackedInt32s.Writer writer = PackedInt32s.GetWriterNoHeader(vectorsStream, PackedInt32s.Format.PACKED, totalFields, PackedInt32s.BitsRequired(fieldNums.Length - 1), 1); foreach (DocData dd in pendingDocs) { foreach (FieldData fd in dd.fields) { int fieldNumIndex = Array.BinarySearch(fieldNums, fd.fieldNum); Debug.Assert(fieldNumIndex >= 0); writer.Add(fieldNumIndex); } } writer.Finish(); }
/// <summary> /// Sole constructor. </summary> /// <param name="blockSize"> the number of values of a single block, must be a multiple of <c>64</c>. </param> protected AbstractBlockPackedWriter(DataOutput @out, int blockSize) // LUCENENET specific - marked protected instead of public { PackedInt32s.CheckBlockSize(blockSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE); Reset(@out); m_values = new long[blockSize]; }
public override void Fill(int fromIndex, int toIndex, long val) { if (Debugging.AssertsEnabled) { Debugging.Assert(PackedInt32s.BitsRequired(val) <= BitsPerValue); Debugging.Assert(fromIndex <= toIndex); } // minimum number of values that use an exact number of full blocks int nAlignedValues = 64 / Gcd(64, m_bitsPerValue); int span = toIndex - fromIndex; if (span <= 3 * nAlignedValues) { // there needs be at least 2 * nAlignedValues aligned values for the // block approach to be worth trying base.Fill(fromIndex, toIndex, val); return; } // fill the first values naively until the next block start int fromIndexModNAlignedValues = fromIndex % nAlignedValues; if (fromIndexModNAlignedValues != 0) { for (int i = fromIndexModNAlignedValues; i < nAlignedValues; ++i) { Set(fromIndex++, val); } } if (Debugging.AssertsEnabled) { Debugging.Assert(fromIndex % nAlignedValues == 0); } // compute the long[] blocks for nAlignedValues consecutive values and // use them to set as many values as possible without applying any mask // or shift int nAlignedBlocks = (nAlignedValues * m_bitsPerValue) >> 6; long[] nAlignedValuesBlocks; { Packed64 values = new Packed64(nAlignedValues, m_bitsPerValue); for (int i = 0; i < nAlignedValues; ++i) { values.Set(i, val); } nAlignedValuesBlocks = values.blocks; if (Debugging.AssertsEnabled) { Debugging.Assert(nAlignedBlocks <= nAlignedValuesBlocks.Length); } } int startBlock = (int)((ulong)((long)fromIndex * m_bitsPerValue) >> 6); int endBlock = (int)((ulong)((long)toIndex * m_bitsPerValue) >> 6); for (int block = startBlock; block < endBlock; ++block) { long blockValue = nAlignedValuesBlocks[block % nAlignedBlocks]; blocks[block] = blockValue; } // fill the gap for (int i = (int)(((long)endBlock << 6) / m_bitsPerValue); i < toIndex; ++i) { Set(i, val); } }
public virtual void TestEncodeDecode() { int iterations = RandomInts.RandomInt32Between(Random, 1, 1000); float AcceptableOverheadRatio = (float)Random.NextDouble(); int[] values = new int[(iterations - 1) * Lucene41PostingsFormat.BLOCK_SIZE + ForUtil.MAX_DATA_SIZE]; for (int i = 0; i < iterations; ++i) { int bpv = Random.Next(32); if (bpv == 0) { int value = RandomInts.RandomInt32Between(Random, 0, int.MaxValue); for (int j = 0; j < Lucene41PostingsFormat.BLOCK_SIZE; ++j) { values[i * Lucene41PostingsFormat.BLOCK_SIZE + j] = value; } } else { for (int j = 0; j < Lucene41PostingsFormat.BLOCK_SIZE; ++j) { values[i * Lucene41PostingsFormat.BLOCK_SIZE + j] = RandomInts.RandomInt32Between(Random, 0, (int)PackedInt32s.MaxValue(bpv)); } } } Directory d = new RAMDirectory(); long endPointer; { // encode IndexOutput @out = d.CreateOutput("test.bin", IOContext.DEFAULT); ForUtil forUtil = new ForUtil(AcceptableOverheadRatio, @out); for (int i = 0; i < iterations; ++i) { forUtil.WriteBlock(Arrays.CopyOfRange(values, i * Lucene41PostingsFormat.BLOCK_SIZE, values.Length), new byte[Lucene41.ForUtil.MAX_ENCODED_SIZE], @out); } endPointer = @out.GetFilePointer(); @out.Dispose(); } { // decode IndexInput @in = d.OpenInput("test.bin", IOContext.READ_ONCE); ForUtil forUtil = new ForUtil(@in); for (int i = 0; i < iterations; ++i) { if (Random.NextBoolean()) { forUtil.SkipBlock(@in); continue; } int[] restored = new int[Lucene41.ForUtil.MAX_DATA_SIZE]; forUtil.ReadBlock(@in, new byte[Lucene41.ForUtil.MAX_ENCODED_SIZE], restored); Assert.AreEqual(Arrays.CopyOfRange(values, i * Lucene41PostingsFormat.BLOCK_SIZE, (i + 1) * Lucene41PostingsFormat.BLOCK_SIZE), Arrays.CopyOf(restored, Lucene41PostingsFormat.BLOCK_SIZE)); } assertEquals(endPointer, @in.GetFilePointer()); @in.Dispose(); } }
private static long Mask(int bitsPerValue) { return(bitsPerValue == 64 ? ~0L : PackedInt32s.MaxValue(bitsPerValue)); }
/// <param name="startBitsPerValue"> the initial number of bits per value, may grow depending on the data </param> /// <param name="valueCount"> the number of values </param> /// <param name="acceptableOverheadRatio"> an acceptable overhead ratio </param> public GrowableWriter(int startBitsPerValue, int valueCount, float acceptableOverheadRatio) { this.acceptableOverheadRatio = acceptableOverheadRatio; current = PackedInt32s.GetMutable(valueCount, startBitsPerValue, this.acceptableOverheadRatio); currentMask = Mask(current.BitsPerValue); }
private void DoTest(DocValuesType type) { Directory d = NewDirectory(); IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); int nDocs = AtLeast(50); Field id = new NumericDocValuesField("id", 0); Field f; switch (type) { case DocValuesType.BINARY: f = new BinaryDocValuesField("dv", new BytesRef()); break; case DocValuesType.SORTED: f = new SortedDocValuesField("dv", new BytesRef()); break; case DocValuesType.NUMERIC: f = new NumericDocValuesField("dv", 0); break; default: throw AssertionError.Create(); } Document document = new Document(); document.Add(id); document.Add(f); object[] vals = new object[nDocs]; RandomIndexWriter iw = new RandomIndexWriter(Random, d, iwConfig); for (int i = 0; i < nDocs; ++i) { id.SetInt64Value(i); switch (type) { case DocValuesType.SORTED: case DocValuesType.BINARY: do { vals[i] = TestUtil.RandomSimpleString(Random, 20); } while (((string)vals[i]).Length == 0); f.SetBytesValue(new BytesRef((string)vals[i])); break; case DocValuesType.NUMERIC: int bitsPerValue = RandomInts.RandomInt32Between(Random, 1, 31); // keep it an int vals[i] = (long)Random.Next((int)PackedInt32s.MaxValue(bitsPerValue)); f.SetInt64Value((long)vals[i]); break; } iw.AddDocument(document); if (Random.NextBoolean() && i % 10 == 9) { iw.Commit(); } } iw.Dispose(); DirectoryReader rd = DirectoryReader.Open(d); foreach (AtomicReaderContext leave in rd.Leaves) { FunctionValues ids = (new Int64FieldSource("id")).GetValues(null, leave); ValueSource vs; switch (type) { case DocValuesType.BINARY: case DocValuesType.SORTED: vs = new BytesRefFieldSource("dv"); break; case DocValuesType.NUMERIC: vs = new Int64FieldSource("dv"); break; default: throw AssertionError.Create(); } FunctionValues values = vs.GetValues(null, leave); BytesRef bytes = new BytesRef(); for (int i = 0; i < leave.AtomicReader.MaxDoc; ++i) { assertTrue(values.Exists(i)); if (vs is BytesRefFieldSource) { assertTrue(values.ObjectVal(i) is string); } else if (vs is Int64FieldSource) { assertTrue(values.ObjectVal(i) is long?); assertTrue(values.BytesVal(i, bytes)); } else { throw AssertionError.Create(); } object expected = vals[ids.Int32Val(i)]; switch (type) { case DocValuesType.SORTED: values.OrdVal(i); // no exception assertTrue(values.NumOrd >= 1); goto case DocValuesType.BINARY; case DocValuesType.BINARY: assertEquals(expected, values.ObjectVal(i)); assertEquals(expected, values.StrVal(i)); assertEquals(expected, values.ObjectVal(i)); assertEquals(expected, values.StrVal(i)); assertTrue(values.BytesVal(i, bytes)); assertEquals(new BytesRef((string)expected), bytes); break; case DocValuesType.NUMERIC: assertEquals(Convert.ToInt64(expected, CultureInfo.InvariantCulture), values.Int64Val(i)); break; } } } rd.Dispose(); d.Dispose(); }
/// <summary> /// Sole constructor. </summary> /// <param name="blockSize"> the number of values of a single block, must be a multiple of <c>64</c>. </param> public AbstractBlockPackedWriter(DataOutput @out, int blockSize) { PackedInt32s.CheckBlockSize(blockSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE); Reset(@out); m_values = new long[blockSize]; }