// Currently, this instance is re-used across fields, so // our parent calls setField whenever the field changes public override int SetField(FieldInfo fieldInfo) { //System.out.println("SPW: setField"); /* if (BlockTreeTermsWriter.DEBUG && fieldInfo.Name.equals("id")) { DEBUG = true; } else { DEBUG = false; } */ this.FieldInfo = fieldInfo; IndexOptions = fieldInfo.FieldIndexOptions; StoreOffsets = IndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; StorePayloads = fieldInfo.HasPayloads(); LastState = EmptyState; //System.out.println(" set init blockFreqStart=" + freqStart); //System.out.println(" set init blockProxStart=" + proxStart); return 0; }
/// <summary> /// Returns <seealso cref="SortedDocValues"/> for this field. /// The returned instance need not be thread-safe: it will only be /// used by a single thread. /// </summary> public abstract SortedDocValues GetSorted(FieldInfo field);
/// <summary> /// Returns a <seealso cref="Bits"/> at the size of <code>reader.maxDoc()</code>, /// with turned on bits for each docid that does have a value for this field. /// The returned instance need not be thread-safe: it will only be /// used by a single thread. /// </summary> public abstract Bits GetDocsWithField(FieldInfo field);
private BinaryDocValues GetVariableBinary(FieldInfo field, BinaryEntry bytes) { IndexInput data = (IndexInput)this.Data.Clone(); MonotonicBlockPackedReader addresses = GetAddressInstance(data, field, bytes); return new LongBinaryDocValuesAnonymousInnerClassHelper2(this, bytes, data, addresses); }
/// <summary> /// Returns <seealso cref="NumericDocValues"/> for this field. /// The returned instance need not be thread-safe: it will only be /// used by a single thread. /// </summary> public abstract NumericDocValues GetNumeric(FieldInfo field);
/// <summary> /// Sets the current field for writing, and returns the /// fixed length of long[] metadata (which is fixed per /// field), called when the writing switches to another field. /// </summary> // TODO: better name? public abstract int SetField(FieldInfo fieldInfo);
private BinaryDocValues GetCompressedBinary(FieldInfo field, BinaryEntry bytes) { IndexInput data = (IndexInput)this.Data.Clone(); MonotonicBlockPackedReader addresses = GetIntervalInstance(data, field, bytes); return new CompressedBinaryDocValues(bytes, addresses, data); }
public override void AddBinaryField(FieldInfo field, IEnumerable<BytesRef> values) { GetInstance(field).AddBinaryField(field, values); }
/// <summary> /// Returns <seealso cref="SortedSetDocValues"/> for this field. /// The returned instance need not be thread-safe: it will only be /// used by a single thread. /// </summary> public abstract SortedSetDocValues GetSortedSet(FieldInfo field);
/// <summary> /// Returns <seealso cref="BinaryDocValues"/> for this field. /// The returned instance need not be thread-safe: it will only be /// used by a single thread. /// </summary> public abstract BinaryDocValues GetBinary(FieldInfo field);
/// <summary> /// Must fully consume state, since after this call that /// TermState may be reused. /// </summary> public abstract DocsAndPositionsEnum DocsAndPositions(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsAndPositionsEnum reuse, int flags);
internal AssertingPostingsConsumer(PostingsConsumer @in, FieldInfo fieldInfo, OpenBitSet visitedDocs) { this.@in = @in; this.fieldInfo = fieldInfo; this.VisitedDocs = visitedDocs; }
public override void AddNumericField(FieldInfo field, IEnumerable<long?> values) { GetInstance(field).AddNumericField(field, values); }
public override void StartField(FieldInfo info, int numTerms, bool positions, bool offsets, bool payloads) { Debug.Assert(LastFieldName == null || info.Name.CompareTo(LastFieldName) > 0, "fieldName=" + info.Name + " lastFieldName=" + LastFieldName); LastFieldName = info.Name; if (payloads) { throw new System.NotSupportedException("3.x codec does not support payloads on vectors!"); } this.Positions = positions; this.Offsets = offsets; LastTerm.Length = 0; Fps[FieldCount++] = Tvf.FilePointer; Tvd.WriteVInt(info.Number); Tvf.WriteVInt(numTerms); sbyte bits = 0x0; if (positions) { bits |= Lucene3xTermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR; } if (offsets) { bits |= Lucene3xTermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR; } Tvf.WriteByte((byte)bits); Debug.Assert(FieldCount <= NumVectorFields); if (FieldCount == NumVectorFields) { // last field of the document // this is crazy because the file format is crazy! for (int i = 1; i < FieldCount; i++) { Tvd.WriteVLong(Fps[i] - Fps[i - 1]); } } }
public override void AddSortedSetField(FieldInfo field, IEnumerable<BytesRef> values, IEnumerable<long?> docToOrdCount, IEnumerable<long?> ords) { GetInstance(field).AddSortedSetField(field, values, docToOrdCount, ords); }
/// <summary> /// Actually decode metadata for next term </summary> /// <seealso cref= PostingsWriterBase#encodeTerm </seealso> public abstract void DecodeTerm(long[] longs, DataInput @in, FieldInfo fieldInfo, BlockTermState state, bool absolute);
internal virtual DocValuesConsumer GetInstance(FieldInfo field) { DocValuesFormat format = null; if (field.DocValuesGen != -1) { string formatName = field.GetAttribute(PER_FIELD_FORMAT_KEY); // this means the field never existed in that segment, yet is applied updates if (formatName != null) { format = DocValuesFormat.ForName(formatName); } } if (format == null) { format = OuterInstance.GetDocValuesFormatForField(field.Name); } if (format == null) { throw new InvalidOperationException("invalid null DocValuesFormat for field=\"" + field.Name + "\""); } string formatName_ = format.Name; string previousValue = field.PutAttribute(PER_FIELD_FORMAT_KEY, formatName_); Debug.Assert(field.DocValuesGen != -1 || previousValue == null, "formatName=" + formatName_ + " prevValue=" + previousValue); int suffix = -1; ConsumerAndSuffix consumer; Formats.TryGetValue(format, out consumer); if (consumer == null) { // First time we are seeing this format; create a new instance if (field.DocValuesGen != -1) { string suffixAtt = field.GetAttribute(PER_FIELD_SUFFIX_KEY); // even when dvGen is != -1, it can still be a new field, that never // existed in the segment, and therefore doesn't have the recorded // attributes yet. if (suffixAtt != null) { suffix = Convert.ToInt32(suffixAtt); } } if (suffix == -1) { // bump the suffix if (!Suffixes.TryGetValue(formatName_, out suffix)) { suffix = 0; } else { suffix = suffix + 1; } } Suffixes[formatName_] = suffix; string segmentSuffix = GetFullSegmentSuffix(SegmentWriteState.SegmentSuffix, GetSuffix(formatName_, Convert.ToString(suffix))); consumer = new ConsumerAndSuffix(); consumer.Consumer = format.FieldsConsumer(new SegmentWriteState(SegmentWriteState, segmentSuffix)); consumer.Suffix = suffix; Formats[format] = consumer; } else { // we've already seen this format, so just grab its suffix Debug.Assert(Suffixes.ContainsKey(formatName_)); suffix = consumer.Suffix; } previousValue = field.PutAttribute(PER_FIELD_SUFFIX_KEY, Convert.ToString(suffix)); Debug.Assert(field.DocValuesGen != -1 || previousValue == null, "suffix=" + Convert.ToString(suffix) + " prevValue=" + previousValue); // TODO: we should only provide the "slice" of FIS // that this DVF actually sees ... return consumer.Consumer; }
private BinaryDocValues GetFixedBinary(FieldInfo field, BinaryEntry bytes) { IndexInput data = (IndexInput)this.Data.Clone(); return new LongBinaryDocValuesAnonymousInnerClassHelper(this, bytes, data); }
public override void WriteField(FieldInfo info, IndexableField field) { FieldsStream.WriteVInt(info.Number); int bits = 0; BytesRef bytes; string @string; // TODO: maybe a field should serialize itself? // this way we don't bake into indexer all these // specific encodings for different fields? and apps // can customize... object number = field.NumericValue; if (number != null) { if (number is sbyte? || number is short? || number is int?) { bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_INT; } else if (number is long?) { bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_LONG; } else if (number is float?) { bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_FLOAT; } else if (number is double?) { bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_DOUBLE; } else { throw new System.ArgumentException("cannot store numeric type " + number.GetType()); } @string = null; bytes = null; } else { bytes = field.BinaryValue(); if (bytes != null) { bits |= Lucene3xStoredFieldsReader.FIELD_IS_BINARY; @string = null; } else { @string = field.StringValue; if (@string == null) { throw new System.ArgumentException("field " + field.Name() + " is stored but does not have binaryValue, stringValue nor numericValue"); } } } FieldsStream.WriteByte((byte)(sbyte)bits); if (bytes != null) { FieldsStream.WriteVInt(bytes.Length); FieldsStream.WriteBytes(bytes.Bytes, bytes.Offset, bytes.Length); } else if (@string != null) { FieldsStream.WriteString(field.StringValue); } else { if (number is sbyte? || number is short? || number is int?) { FieldsStream.WriteInt((int)number); } else if (number is long?) { FieldsStream.WriteLong((long)number); } else if (number is float?) { FieldsStream.WriteInt(Number.FloatToIntBits((float)number)); } else if (number is double?) { FieldsStream.WriteLong(BitConverter.DoubleToInt64Bits((double)number)); } else { Debug.Assert(false); } } }
public override void StartField(FieldInfo info, int numTerms, bool positions, bool offsets, bool payloads) { Debug.Assert(TermCount == 0); Debug.Assert(DocStatus == Status.STARTED); Debug.Assert(FieldStatus != Status.STARTED); @in.StartField(info, numTerms, positions, offsets, payloads); FieldStatus = Status.STARTED; TermCount = numTerms; HasPositions = positions || offsets || payloads; }
public override BinaryDocValues GetBinary(FieldInfo field) { BinaryEntry bytes = Binaries[field.Number]; switch (bytes.Format) { case Lucene45DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED: return GetFixedBinary(field, bytes); case Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED: return GetVariableBinary(field, bytes); case Lucene45DocValuesConsumer.BINARY_PREFIX_COMPRESSED: return GetCompressedBinary(field, bytes); default: throw new Exception(); } }
public override Bits GetDocsWithField(FieldInfo field) { switch (field.DocValuesType) { case FieldInfo.DocValuesType_e.SORTED_SET: return DocValues.DocsWithValue(GetSortedSet(field), MaxDoc); case FieldInfo.DocValuesType_e.SORTED: return DocValues.DocsWithValue(GetSorted(field), MaxDoc); case FieldInfo.DocValuesType_e.BINARY: BinaryEntry be = Binaries[field.Number]; return GetMissingBits(be.MissingOffset); case FieldInfo.DocValuesType_e.NUMERIC: NumericEntry ne = Numerics[field.Number]; return GetMissingBits(ne.MissingOffset); default: throw new InvalidOperationException(); } }
public override NumericDocValues GetNumeric(FieldInfo field) { NumericEntry entry = Numerics[field.Number]; return GetNumeric(entry); }
public override void EncodeTerm(long[] empty, DataOutput @out, FieldInfo fieldInfo, BlockTermState _state, bool absolute) { StandardTermState state = (StandardTermState)_state; if (absolute) { LastState = EmptyState; } @out.WriteVLong(state.FreqStart - LastState.FreqStart); if (state.SkipOffset != -1) { Debug.Assert(state.SkipOffset > 0); @out.WriteVLong(state.SkipOffset); } if (IndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { @out.WriteVLong(state.ProxStart - LastState.ProxStart); } LastState = state; }
public override SortedDocValues GetSorted(FieldInfo field) { int valueCount = (int)Binaries[field.Number].Count; BinaryDocValues binary = GetBinary(field); NumericEntry entry = Ords[field.Number]; IndexInput data = (IndexInput)this.Data.Clone(); data.Seek(entry.Offset); BlockPackedReader ordinals = new BlockPackedReader(data, entry.PackedIntsVersion, entry.BlockSize, entry.Count, true); return new SortedDocValuesAnonymousInnerClassHelper(this, valueCount, binary, ordinals); }
public override TermsConsumer AddField(FieldInfo field) { TermsConsumer consumer = @in.AddField(field); Debug.Assert(consumer != null); return new AssertingTermsConsumer(consumer, field); }
public override SortedSetDocValues GetSortedSet(FieldInfo field) { SortedSetEntry ss = SortedSets[field.Number]; if (ss.Format == Lucene45DocValuesConsumer.SORTED_SET_SINGLE_VALUED_SORTED) { SortedDocValues values = GetSorted(field); return DocValues.Singleton(values); } else if (ss.Format != Lucene45DocValuesConsumer.SORTED_SET_WITH_ADDRESSES) { throw new Exception(); } IndexInput data = (IndexInput)this.Data.Clone(); long valueCount = Binaries[field.Number].Count; // we keep the byte[]s and list of ords on disk, these could be large LongBinaryDocValues binary = (LongBinaryDocValues)GetBinary(field); LongValues ordinals = GetNumeric(Ords[field.Number]); // but the addresses to the ord stream are in RAM MonotonicBlockPackedReader ordIndex = GetOrdIndexInstance(data, field, OrdIndexes[field.Number]); return new RandomAccessOrdsAnonymousInnerClassHelper(this, valueCount, binary, ordinals, ordIndex); }
internal AssertingTermsConsumer(TermsConsumer @in, FieldInfo fieldInfo) { this.@in = @in; this.fieldInfo = fieldInfo; }
/// <summary> /// returns an address instance for prefix-compressed binary values. /// @lucene.internal /// </summary> protected internal virtual MonotonicBlockPackedReader GetIntervalInstance(IndexInput data, FieldInfo field, BinaryEntry bytes) { MonotonicBlockPackedReader addresses; long interval = bytes.AddressInterval; lock (AddressInstances) { MonotonicBlockPackedReader addrInstance; if (!AddressInstances.TryGetValue(field.Number, out addrInstance)) { data.Seek(bytes.AddressesOffset); long size; if (bytes.Count % interval == 0) { size = bytes.Count / interval; } else { size = 1L + bytes.Count / interval; } addrInstance = new MonotonicBlockPackedReader(data, bytes.PackedIntsVersion, bytes.BlockSize, size, false); AddressInstances[field.Number] = addrInstance; RamBytesUsed_Renamed.AddAndGet(addrInstance.RamBytesUsed() + RamUsageEstimator.NUM_BYTES_INT); } addresses = addrInstance; } return addresses; }
public override void WriteField(FieldInfo info, IndexableField field) { Debug.Assert(DocStatus == Status.STARTED); @in.WriteField(info, field); Debug.Assert(FieldCount > 0); FieldCount--; }
/// <summary> /// returns an address instance for sortedset ordinal lists /// @lucene.internal /// </summary> protected internal virtual MonotonicBlockPackedReader GetOrdIndexInstance(IndexInput data, FieldInfo field, NumericEntry entry) { MonotonicBlockPackedReader ordIndex; lock (OrdIndexInstances) { MonotonicBlockPackedReader ordIndexInstance; if (!OrdIndexInstances.TryGetValue(field.Number, out ordIndexInstance)) { data.Seek(entry.Offset); ordIndexInstance = new MonotonicBlockPackedReader(data, entry.PackedIntsVersion, entry.BlockSize, entry.Count, false); OrdIndexInstances[field.Number] = ordIndexInstance; RamBytesUsed_Renamed.AddAndGet(ordIndexInstance.RamBytesUsed() + RamUsageEstimator.NUM_BYTES_INT); } ordIndex = ordIndexInstance; } return ordIndex; }
/// <summary> /// Encode metadata as long[] and byte[]. {@code absolute} controls whether /// current term is delta encoded according to latest term. /// Usually elements in {@code longs} are file pointers, so each one always /// increases when a new term is consumed. {@code out} is used to write generic /// bytes, which are not monotonic. /// /// NOTE: sometimes long[] might contain "don't care" values that are unused, e.g. /// the pointer to postings list may not be defined for some terms but is defined /// for others, if it is designed to inline some postings data in term dictionary. /// In this case, the postings writer should always use the last value, so that each /// element in metadata long[] remains monotonic. /// </summary> public abstract void EncodeTerm(long[] longs, DataOutput @out, FieldInfo fieldInfo, BlockTermState state, bool absolute);
public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext) { string fileName = IndexFileNames.SegmentFileName(segmentName, "", PreFlexRWFieldInfosWriter.FIELD_INFOS_EXTENSION); IndexInput input = directory.OpenInput(fileName, iocontext); try { int format = input.ReadVInt(); if (format > FORMAT_MINIMUM) { throw new IndexFormatTooOldException(input, format, FORMAT_MINIMUM, PreFlexRWFieldInfosWriter.FORMAT_CURRENT); } if (format < PreFlexRWFieldInfosWriter.FORMAT_CURRENT && format != PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW) { throw new IndexFormatTooNewException(input, format, FORMAT_MINIMUM, PreFlexRWFieldInfosWriter.FORMAT_CURRENT); } int size = input.ReadVInt(); //read in the size FieldInfo[] infos = new FieldInfo[size]; for (int i = 0; i < size; i++) { string name = input.ReadString(); int fieldNumber = format == PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW ? input.ReadInt() : i; byte bits = input.ReadByte(); bool isIndexed = (bits & PreFlexRWFieldInfosWriter.IS_INDEXED) != 0; bool storeTermVector = (bits & PreFlexRWFieldInfosWriter.STORE_TERMVECTOR) != 0; bool omitNorms = (bits & PreFlexRWFieldInfosWriter.OMIT_NORMS) != 0; bool storePayloads = (bits & PreFlexRWFieldInfosWriter.STORE_PAYLOADS) != 0; FieldInfo.IndexOptions?indexOptions; if (!isIndexed) { indexOptions = null; } else if ((bits & PreFlexRWFieldInfosWriter.OMIT_TERM_FREQ_AND_POSITIONS) != 0) { indexOptions = FieldInfo.IndexOptions.DOCS_ONLY; } else if ((bits & PreFlexRWFieldInfosWriter.OMIT_POSITIONS) != 0) { if (format <= PreFlexRWFieldInfosWriter.FORMAT_OMIT_POSITIONS) { indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; } else { throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")"); } } else { indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } // LUCENE-3027: past indices were able to write // storePayloads=true when omitTFAP is also true, // which is invalid. We correct that, here: if (indexOptions != FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { storePayloads = false; } DocValuesType_e?normType = isIndexed && !omitNorms ? (DocValuesType_e?)DocValuesType_e.NUMERIC : null; if (format == PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW && normType != null) { // RW can have norms but doesn't write them normType = input.ReadByte() != 0 ? (DocValuesType_e?)DocValuesType_e.NUMERIC : null; } infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, null, normType, null); } if (input.FilePointer != input.Length()) { throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.FilePointer + " vs size " + input.Length() + " (resource: " + input + ")"); } return(new FieldInfos(infos)); } finally { input.Dispose(); } }