// NOTE: 4.0 file format docs are crazy/wrong here... private void AddVarStraightBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable<BytesRef> values) { field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_VAR_STRAIGHT.Name); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT); /* values */ long startPos = data.FilePointer; foreach (BytesRef v in values) { if (v != null) { data.WriteBytes(v.Bytes, v.Offset, v.Length); } } /* addresses */ long maxAddress = data.FilePointer - startPos; index.WriteVLong(maxAddress); int maxDoc = State.SegmentInfo.DocCount; Debug.Assert(maxDoc != int.MaxValue); // unsupported by the 4.0 impl PackedInts.Writer w = PackedInts.GetWriter(index, maxDoc + 1, PackedInts.BitsRequired(maxAddress), PackedInts.DEFAULT); long currentPosition = 0; foreach (BytesRef v in values) { w.Add(currentPosition); if (v != null) { currentPosition += v.Length; } } // write sentinel Debug.Assert(currentPosition == maxAddress); w.Add(currentPosition); w.Finish(); }
internal virtual DocValuesConsumer GetInstance(FieldInfo field) { DocValuesFormat format = null; if (field.DocValuesGen != -1) { string formatName = field.GetAttribute(PER_FIELD_FORMAT_KEY); // this means the field never existed in that segment, yet is applied updates if (formatName != null) { format = DocValuesFormat.ForName(formatName); } } if (format == null) { format = OuterInstance.GetDocValuesFormatForField(field.Name); } if (format == null) { throw new InvalidOperationException("invalid null DocValuesFormat for field=\"" + field.Name + "\""); } string formatName_ = format.Name; string previousValue = field.PutAttribute(PER_FIELD_FORMAT_KEY, formatName_); Debug.Assert(field.DocValuesGen != -1 || previousValue == null, "formatName=" + formatName_ + " prevValue=" + previousValue); int suffix = -1; ConsumerAndSuffix consumer; Formats.TryGetValue(format, out consumer); if (consumer == null) { // First time we are seeing this format; create a new instance if (field.DocValuesGen != -1) { string suffixAtt = field.GetAttribute(PER_FIELD_SUFFIX_KEY); // even when dvGen is != -1, it can still be a new field, that never // existed in the segment, and therefore doesn't have the recorded // attributes yet. if (suffixAtt != null) { suffix = Convert.ToInt32(suffixAtt); } } if (suffix == -1) { // bump the suffix if (!Suffixes.TryGetValue(formatName_, out suffix)) { suffix = 0; } else { suffix = suffix + 1; } } Suffixes[formatName_] = suffix; string segmentSuffix = GetFullSegmentSuffix(SegmentWriteState.SegmentSuffix, GetSuffix(formatName_, Convert.ToString(suffix))); consumer = new ConsumerAndSuffix(); consumer.Consumer = format.FieldsConsumer(new SegmentWriteState(SegmentWriteState, segmentSuffix)); consumer.Suffix = suffix; Formats[format] = consumer; } else { // we've already seen this format, so just grab its suffix Debug.Assert(Suffixes.ContainsKey(formatName_)); suffix = consumer.Suffix; } previousValue = field.PutAttribute(PER_FIELD_SUFFIX_KEY, Convert.ToString(suffix)); Debug.Assert(field.DocValuesGen != -1 || previousValue == null, "suffix=" + Convert.ToString(suffix) + " prevValue=" + previousValue); // TODO: we should only provide the "slice" of FIS // that this DVF actually sees ... return consumer.Consumer; }
private void AddVarIntsField(FieldInfo field, IndexOutput output, IEnumerable<long?> values, long minValue, long maxValue) { field.PutAttribute(LegacyKey, LegacyDocValuesType.VAR_INTS.Name); CodecUtil.WriteHeader(output, Lucene40DocValuesFormat.VAR_INTS_CODEC_NAME, Lucene40DocValuesFormat.VAR_INTS_VERSION_CURRENT); long delta = maxValue - minValue; if (delta < 0) { // writes longs output.WriteByte((byte)Lucene40DocValuesFormat.VAR_INTS_FIXED_64); foreach (long? n in values) { output.WriteLong(n == null ? 0 : n.Value); } } else { // writes packed ints output.WriteByte((byte)Lucene40DocValuesFormat.VAR_INTS_PACKED); output.WriteLong(minValue); output.WriteLong(0 - minValue); // default value (representation of 0) PackedInts.Writer writer = PackedInts.GetWriter(output, State.SegmentInfo.DocCount, PackedInts.BitsRequired(delta), PackedInts.DEFAULT); foreach (long? n in values) { long v = n == null ? 0 : (long)n; writer.Add(v - minValue); } writer.Finish(); } }
private void AddVarSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable<BytesRef> values, IEnumerable<long?> docToOrd) { field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_VAR_SORTED.Name); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); /* values */ long startPos = data.FilePointer; int valueCount = 0; foreach (BytesRef v in values) { data.WriteBytes(v.Bytes, v.Offset, v.Length); valueCount++; } /* addresses */ long maxAddress = data.FilePointer - startPos; index.WriteLong(maxAddress); Debug.Assert(valueCount != int.MaxValue); // unsupported by the 4.0 impl PackedInts.Writer w = PackedInts.GetWriter(index, valueCount + 1, PackedInts.BitsRequired(maxAddress), PackedInts.DEFAULT); long currentPosition = 0; foreach (BytesRef v in values) { w.Add(currentPosition); currentPosition += v.Length; } // write sentinel Debug.Assert(currentPosition == maxAddress); w.Add(currentPosition); w.Finish(); /* ordinals */ int maxDoc = State.SegmentInfo.DocCount; Debug.Assert(valueCount > 0); PackedInts.Writer ords = PackedInts.GetWriter(index, maxDoc, PackedInts.BitsRequired(valueCount - 1), PackedInts.DEFAULT); foreach (long n in docToOrd) { ords.Add((long)n); } ords.Finish(); }
private void AddVarDerefBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable<BytesRef> values) { field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_VAR_DEREF.Name); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT); // deduplicate SortedSet<BytesRef> dictionary = new SortedSet<BytesRef>(); foreach (BytesRef v in values) { dictionary.Add(v == null ? new BytesRef() : BytesRef.DeepCopyOf(v)); } /* values */ long startPosition = data.FilePointer; long currentAddress = 0; Dictionary<BytesRef, long> valueToAddress = new Dictionary<BytesRef, long>(); foreach (BytesRef v in dictionary) { currentAddress = data.FilePointer - startPosition; valueToAddress[v] = currentAddress; WriteVShort(data, v.Length); data.WriteBytes(v.Bytes, v.Offset, v.Length); } /* ordinals */ long totalBytes = data.FilePointer - startPosition; index.WriteLong(totalBytes); int maxDoc = State.SegmentInfo.DocCount; PackedInts.Writer w = PackedInts.GetWriter(index, maxDoc, PackedInts.BitsRequired(currentAddress), PackedInts.DEFAULT); foreach (BytesRef v in values) { w.Add(valueToAddress[v == null ? new BytesRef() : v]); } w.Finish(); }
private void AddShortsField(FieldInfo field, IndexOutput output, IEnumerable<long?> values) { field.PutAttribute(LegacyKey, LegacyDocValuesType.FIXED_INTS_16.Name); CodecUtil.WriteHeader(output, Lucene40DocValuesFormat.INTS_CODEC_NAME, Lucene40DocValuesFormat.INTS_VERSION_CURRENT); output.WriteInt(2); // size foreach (long? n in values) { output.WriteShort(n == null ? (short)0 : (short)n); } }
private void AddFixedStraightBytesField(FieldInfo field, IndexOutput output, IEnumerable<BytesRef> values, int length) { field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_FIXED_STRAIGHT.Name); CodecUtil.WriteHeader(output, Lucene40DocValuesFormat.BYTES_FIXED_STRAIGHT_CODEC_NAME, Lucene40DocValuesFormat.BYTES_FIXED_STRAIGHT_VERSION_CURRENT); output.WriteInt(length); foreach (BytesRef v in values) { if (v != null) { output.WriteBytes(v.Bytes, v.Offset, v.Length); } } }
private void AddFixedSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable<BytesRef> values, IEnumerable<long?> docToOrd, int length) { field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_FIXED_SORTED.Name); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT); /* values */ data.WriteInt(length); int valueCount = 0; foreach (BytesRef v in values) { data.WriteBytes(v.Bytes, v.Offset, v.Length); valueCount++; } /* ordinals */ index.WriteInt(valueCount); int maxDoc = State.SegmentInfo.DocCount; Debug.Assert(valueCount > 0); PackedInts.Writer w = PackedInts.GetWriter(index, maxDoc, PackedInts.BitsRequired(valueCount - 1), PackedInts.DEFAULT); foreach (long n in docToOrd) { w.Add((long)n); } w.Finish(); }
private void AddFixedDerefBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable<BytesRef> values, int length) { field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_FIXED_DEREF.Name); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_CURRENT); // deduplicate SortedSet<BytesRef> dictionary = new SortedSet<BytesRef>(); foreach (BytesRef v in values) { dictionary.Add(v == null ? new BytesRef() : BytesRef.DeepCopyOf(v)); } /* values */ data.WriteInt(length); foreach (BytesRef v in dictionary) { data.WriteBytes(v.Bytes, v.Offset, v.Length); } /* ordinals */ int valueCount = dictionary.Count; Debug.Assert(valueCount > 0); index.WriteInt(valueCount); int maxDoc = State.SegmentInfo.DocCount; PackedInts.Writer w = PackedInts.GetWriter(index, maxDoc, PackedInts.BitsRequired(valueCount - 1), PackedInts.DEFAULT); BytesRef brefDummy; foreach (BytesRef v in values) { brefDummy = v; if (v == null) { brefDummy = new BytesRef(); } //int ord = dictionary.HeadSet(brefDummy).Size(); int ord = dictionary.Count(@ref => @ref.CompareTo(brefDummy) < 0); w.Add(ord); } w.Finish(); }