/// <summary> Save this segment's info.</summary> internal void Write(IndexOutput output) { output.WriteString(name); output.WriteInt(docCount); output.WriteLong(delGen); output.WriteInt(docStoreOffset); if (docStoreOffset != -1) { output.WriteString(docStoreSegment); output.WriteByte((byte)(docStoreIsCompoundFile?1:0)); } output.WriteByte((byte)(hasSingleNormFile?1:0)); if (normGen == null) { output.WriteInt(NO); } else { output.WriteInt(normGen.Length); for (int j = 0; j < normGen.Length; j++) { output.WriteLong(normGen[j]); } } output.WriteByte((byte)isCompoundFile); output.WriteInt(delCount); output.WriteByte((byte)(hasProx?1:0)); output.WriteStringStringMap(diagnostics); }
internal void WriteField(FieldInfo fi, IFieldable field) { fieldsStream.WriteVInt(fi.number); byte bits = 0; if (field.IsTokenized) { bits |= FieldsWriter.FIELD_IS_TOKENIZED; } if (field.IsBinary) { bits |= FieldsWriter.FIELD_IS_BINARY; } fieldsStream.WriteByte(bits); // compression is disabled for the current field if (field.IsBinary) { byte[] data = field.GetBinaryValue(); int len = field.BinaryLength; int offset = field.BinaryOffset; fieldsStream.WriteVInt(len); fieldsStream.WriteBytes(data, offset, len); } else { fieldsStream.WriteString(field.StringValue); } }
protected override void Dispose(bool disposing) { if (disposing) { _delegateFieldsConsumer.Dispose(); // Now we are done accumulating values for these fields var nonSaturatedBlooms = new List <KeyValuePair <FieldInfo, FuzzySet> >(); foreach (var entry in _bloomFilters) { var bloomFilter = entry.Value; if (!outerInstance._bloomFilterFactory.IsSaturated(bloomFilter, entry.Key)) { nonSaturatedBlooms.Add(entry); } } var bloomFileName = IndexFileNames.SegmentFileName( _state.SegmentInfo.Name, _state.SegmentSuffix, BLOOM_EXTENSION); IndexOutput bloomOutput = null; try { bloomOutput = _state.Directory.CreateOutput(bloomFileName, _state.Context); CodecUtil.WriteHeader(bloomOutput, /*BLOOM_CODEC_NAME*/ outerInstance.Name, VERSION_CURRENT); // remember the name of the postings format we will delegate to bloomOutput.WriteString(outerInstance._delegatePostingsFormat.Name); // First field in the output file is the number of fields+blooms saved bloomOutput.WriteInt32(nonSaturatedBlooms.Count); foreach (var entry in nonSaturatedBlooms) { var fieldInfo = entry.Key; var bloomFilter = entry.Value; bloomOutput.WriteInt32(fieldInfo.Number); SaveAppropriatelySizedBloomFilter(bloomOutput, bloomFilter, fieldInfo); } CodecUtil.WriteFooter(bloomOutput); } finally { IOUtils.Dispose(bloomOutput); } //We are done with large bitsets so no need to keep them hanging around _bloomFilters.Clear(); } }
public void Write(IndexOutput output) { output.WriteVInt(CURRENT_FORMAT); output.WriteVInt(Size()); for (int i = 0; i < Size(); i++) { FieldInfo fi = FieldInfo(i); var bits = (byte)(0x0); if (fi.isIndexed) { bits |= IS_INDEXED; } if (fi.storeTermVector) { bits |= STORE_TERMVECTOR; } if (fi.storePositionWithTermVector) { bits |= STORE_POSITIONS_WITH_TERMVECTOR; } if (fi.storeOffsetWithTermVector) { bits |= STORE_OFFSET_WITH_TERMVECTOR; } if (fi.omitNorms) { bits |= OMIT_NORMS; } if (fi.storePayloads) { bits |= STORE_PAYLOADS; } if (fi.omitTermFreqAndPositions) { bits |= OMIT_TERM_FREQ_AND_POSITIONS; } output.WriteString(fi.name); output.WriteByte(bits); } }
/// <summary> /// Save a single segment's info. </summary> public override void Write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) { string fileName = IndexFileNames.SegmentFileName(si.Name, "", Lucene40SegmentInfoFormat.SI_EXTENSION); si.AddFile(fileName); IndexOutput output = dir.CreateOutput(fileName, ioContext); bool success = false; try { CodecUtil.WriteHeader(output, Lucene40SegmentInfoFormat.CODEC_NAME, Lucene40SegmentInfoFormat.VERSION_CURRENT); // Write the Lucene version that created this segment, since 3.1 output.WriteString(si.Version); output.WriteInt32(si.DocCount); output.WriteByte((byte)(sbyte)(si.UseCompoundFile ? SegmentInfo.YES : SegmentInfo.NO)); output.WriteStringStringMap(si.Diagnostics); output.WriteStringStringMap(Collections.EmptyMap <string, string>()); output.WriteStringSet(si.GetFiles()); success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(output); si.Dir.DeleteFile(fileName); } else { output.Dispose(); } } }
public override void Dispose() { _delegateFieldsConsumer.Dispose(); // Now we are done accumulating values for these fields var nonSaturatedBlooms = (from entry in _bloomFilters.EntrySet() let bloomFilter = entry.Value where !_bfpf._bloomFilterFactory.IsSaturated(bloomFilter, entry.Key) select entry).ToList(); var bloomFileName = IndexFileNames.SegmentFileName( _state.SegmentInfo.Name, _state.SegmentSuffix, BLOOM_EXTENSION); IndexOutput bloomOutput = null; try { bloomOutput = _state.Directory.CreateOutput(bloomFileName, _state.Context); CodecUtil.WriteHeader(bloomOutput, BLOOM_CODEC_NAME, VERSION_CURRENT); // remember the name of the postings format we will delegate to bloomOutput.WriteString(_bfpf._delegatePostingsFormat.Name); // First field in the output file is the number of fields+blooms saved bloomOutput.WriteInt(nonSaturatedBlooms.Count); foreach (var entry in nonSaturatedBlooms) { var fieldInfo = entry.Key; var bloomFilter = entry.Value; bloomOutput.WriteInt(fieldInfo.Number); SaveAppropriatelySizedBloomFilter(bloomOutput, bloomFilter, fieldInfo); } CodecUtil.WriteFooter(bloomOutput); } finally { IOUtils.Close(bloomOutput); } //We are done with large bitsets so no need to keep them hanging around _bloomFilters.Clear(); }
public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context) { string fileName = IndexFileNames.SegmentFileName(segmentName, "", FIELD_INFOS_EXTENSION); IndexOutput output = directory.CreateOutput(fileName, context); bool success = false; try { output.WriteVInt32(FORMAT_PREFLEX_RW); output.WriteVInt32(infos.Count); foreach (FieldInfo fi in infos) { sbyte bits = 0x0; if (fi.HasVectors) { bits |= STORE_TERMVECTOR; } if (fi.OmitsNorms) { bits |= OMIT_NORMS; } if (fi.HasPayloads) { bits |= STORE_PAYLOADS; } if (fi.IsIndexed) { bits |= IS_INDEXED; if (Debugging.AssertsEnabled) { Debugging.Assert(fi.IndexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.HasPayloads); } if (fi.IndexOptions == IndexOptions.DOCS_ONLY) { bits |= OMIT_TERM_FREQ_AND_POSITIONS; } else if (fi.IndexOptions == IndexOptions.DOCS_AND_FREQS) { bits |= OMIT_POSITIONS; } } output.WriteString(fi.Name); /* * we need to write the field number since IW tries * to stabelize the field numbers across segments so the * FI ordinal is not necessarily equivalent to the field number */ output.WriteInt32(fi.Number); output.WriteByte((byte)bits); if (fi.IsIndexed && !fi.OmitsNorms) { // to allow null norm types we need to indicate if norms are written // only in RW case output.WriteByte((byte)(fi.NormType == Index.DocValuesType.NONE ? 0 : 1)); } if (Debugging.AssertsEnabled) { Debugging.Assert(fi.Attributes is null); // not used or supported } } success = true; } finally { if (success) { output.Dispose(); } else { IOUtils.DisposeWhileHandlingException(output); } } }
public override void WriteField(FieldInfo info, IIndexableField field) { fieldsStream.WriteVInt32(info.Number); int bits = 0; BytesRef bytes; string @string; // TODO: maybe a field should serialize itself? // this way we don't bake into indexer all these // specific encodings for different fields? and apps // can customize... // LUCENENET specific - To avoid boxing/unboxing, we don't // call GetNumericValue(). Instead, we check the field.NumericType and then // call the appropriate conversion method. if (field.NumericType != NumericFieldType.NONE) { switch (field.NumericType) { case NumericFieldType.BYTE: case NumericFieldType.INT16: case NumericFieldType.INT32: bits |= FIELD_IS_NUMERIC_INT; break; case NumericFieldType.INT64: bits |= FIELD_IS_NUMERIC_LONG; break; case NumericFieldType.SINGLE: bits |= FIELD_IS_NUMERIC_FLOAT; break; case NumericFieldType.DOUBLE: bits |= FIELD_IS_NUMERIC_DOUBLE; break; default: throw new ArgumentException("cannot store numeric type " + field.NumericType); } @string = null; bytes = null; } else { bytes = field.GetBinaryValue(); if (bytes != null) { bits |= FIELD_IS_BINARY; @string = null; } else { @string = field.GetStringValue(); if (@string == null) { throw new ArgumentException("field " + field.Name + " is stored but does not have binaryValue, stringValue nor numericValue"); } } } fieldsStream.WriteByte((byte)(sbyte)bits); if (bytes != null) { fieldsStream.WriteVInt32(bytes.Length); fieldsStream.WriteBytes(bytes.Bytes, bytes.Offset, bytes.Length); } else if (@string != null) { fieldsStream.WriteString(field.GetStringValue()); } else { switch (field.NumericType) { case NumericFieldType.BYTE: case NumericFieldType.INT16: case NumericFieldType.INT32: fieldsStream.WriteInt32(field.GetInt32Value().Value); break; case NumericFieldType.INT64: fieldsStream.WriteInt64(field.GetInt64Value().Value); break; case NumericFieldType.SINGLE: fieldsStream.WriteInt32(BitConversion.SingleToInt32Bits(field.GetSingleValue().Value)); break; case NumericFieldType.DOUBLE: fieldsStream.WriteInt64(BitConversion.DoubleToInt64Bits(field.GetDoubleValue().Value)); break; default: throw new InvalidOperationException("Cannot get here"); } } }
public override void WriteField(FieldInfo info, IIndexableField field) { fieldsStream.WriteVInt32(info.Number); int bits = 0; BytesRef bytes; string @string; // TODO: maybe a field should serialize itself? // this way we don't bake into indexer all these // specific encodings for different fields? and apps // can customize... object number = (object)field.GetNumericValue(); if (number != null) { if (number is sbyte || number is short || number is int) { bits |= FIELD_IS_NUMERIC_INT; } else if (number is long) { bits |= FIELD_IS_NUMERIC_LONG; } else if (number is float) { bits |= FIELD_IS_NUMERIC_FLOAT; } else if (number is double) { bits |= FIELD_IS_NUMERIC_DOUBLE; } else { throw new System.ArgumentException("cannot store numeric type " + number.GetType()); } @string = null; bytes = null; } else { bytes = field.GetBinaryValue(); if (bytes != null) { bits |= FIELD_IS_BINARY; @string = null; } else { @string = field.GetStringValue(); if (@string == null) { throw new System.ArgumentException("field " + field.Name + " is stored but does not have binaryValue, stringValue nor numericValue"); } } } fieldsStream.WriteByte((byte)(sbyte)bits); if (bytes != null) { fieldsStream.WriteVInt32(bytes.Length); fieldsStream.WriteBytes(bytes.Bytes, bytes.Offset, bytes.Length); } else if (@string != null) { fieldsStream.WriteString(field.GetStringValue()); } else { if (number is sbyte || number is short || number is int) { fieldsStream.WriteInt32((int)number); } else if (number is long) { fieldsStream.WriteInt64((long)number); } else if (number is float) { fieldsStream.WriteInt32(Number.SingleToInt32Bits((float)number)); } else if (number is double) { fieldsStream.WriteInt64(BitConverter.DoubleToInt64Bits((double)number)); } else { throw new InvalidOperationException("Cannot get here"); } } }
/// <summary>Merge files with the extensions added up to now. /// All files with these extensions are combined sequentially into the /// compound stream. After successful merge, the source files /// are deleted. /// </summary> /// <throws> IllegalStateException if close() had been called before or </throws> /// <summary> if no file has been added to this object /// </summary> public void Dispose() { // Extract into protected method if class ever becomes unsealed // TODO: Dispose shouldn't throw exceptions! if (merged) { throw new SystemException("Merge already performed"); } if ((entries.Count == 0)) { throw new SystemException("No entries to merge have been defined"); } merged = true; // open the compound stream IndexOutput os = null; try { os = directory.CreateOutput(fileName); // Write the number of entries os.WriteVInt(entries.Count); // Write the directory with all offsets at 0. // Remember the positions of directory entries so that we can // adjust the offsets later long totalSize = 0; foreach (FileEntry fe in entries) { fe.directoryOffset = os.FilePointer; os.WriteLong(0); // for now os.WriteString(fe.file); totalSize += directory.FileLength(fe.file); } // Pre-allocate size of file as optimization -- // this can potentially help IO performance as // we write the file and also later during // searching. It also uncovers a disk-full // situation earlier and hopefully without // actually filling disk to 100%: long finalLength = totalSize + os.FilePointer; os.SetLength(finalLength); // Open the files and copy their data into the stream. // Remember the locations of each file's data section. var buffer = new byte[16384]; foreach (FileEntry fe in entries) { fe.dataOffset = os.FilePointer; CopyFile(fe, os, buffer); } // Write the data offsets into the directory of the compound stream foreach (FileEntry fe in entries) { os.Seek(fe.directoryOffset); os.WriteLong(fe.dataOffset); } System.Diagnostics.Debug.Assert(finalLength == os.Length); // Close the output stream. Set the os to null before trying to // close so that if an exception occurs during the close, the // finally clause below will not attempt to close the stream // the second time. IndexOutput tmp = os; os = null; tmp.Close(); } finally { if (os != null) { try { os.Close(); } catch (System.IO.IOException) { } } } }
public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context) { string fileName = IndexFileNames.SegmentFileName(segmentName, "", Lucene42FieldInfosFormat.EXTENSION); IndexOutput output = directory.CreateOutput(fileName, context); bool success = false; try { CodecUtil.WriteHeader(output, Lucene42FieldInfosFormat.CODEC_NAME, Lucene42FieldInfosFormat.FORMAT_CURRENT); output.WriteVInt(infos.Size()); foreach (FieldInfo fi in infos) { FieldInfo.IndexOptions?indexOptions = fi.FieldIndexOptions; sbyte bits = 0x0; if (fi.HasVectors()) { bits |= Lucene42FieldInfosFormat.STORE_TERMVECTOR; } if (fi.OmitsNorms()) { bits |= Lucene42FieldInfosFormat.OMIT_NORMS; } if (fi.HasPayloads()) { bits |= Lucene42FieldInfosFormat.STORE_PAYLOADS; } if (fi.Indexed) { bits |= Lucene42FieldInfosFormat.IS_INDEXED; Debug.Assert(indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.HasPayloads()); if (indexOptions == FieldInfo.IndexOptions.DOCS_ONLY) { bits |= Lucene42FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS; } else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { bits |= Lucene42FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS; } else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS) { bits |= Lucene42FieldInfosFormat.OMIT_POSITIONS; } } output.WriteString(fi.Name); output.WriteVInt(fi.Number); output.WriteByte(bits); // pack the DV types in one byte sbyte dv = DocValuesByte(fi.DocValuesType); sbyte nrm = DocValuesByte(fi.NormType); Debug.Assert((dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0); sbyte val = unchecked ((sbyte)(0xff & ((nrm << 4) | dv))); output.WriteByte(val); output.WriteStringStringMap(fi.Attributes()); } success = true; } finally { if (success) { output.Dispose(); } else { IOUtils.CloseWhileHandlingException(output); } } }
public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context) { string fileName = IndexFileNames.SegmentFileName(segmentName, "", Lucene40FieldInfosFormat.FIELD_INFOS_EXTENSION); IndexOutput output = directory.CreateOutput(fileName, context); bool success = false; try { CodecUtil.WriteHeader(output, Lucene40FieldInfosFormat.CODEC_NAME, Lucene40FieldInfosFormat.FORMAT_CURRENT); output.WriteVInt32(infos.Count); foreach (FieldInfo fi in infos) { IndexOptions indexOptions = fi.IndexOptions; sbyte bits = 0x0; if (fi.HasVectors) { bits |= Lucene40FieldInfosFormat.STORE_TERMVECTOR; } if (fi.OmitsNorms) { bits |= Lucene40FieldInfosFormat.OMIT_NORMS; } if (fi.HasPayloads) { bits |= Lucene40FieldInfosFormat.STORE_PAYLOADS; } if (fi.IsIndexed) { bits |= Lucene40FieldInfosFormat.IS_INDEXED; // LUCENENET specific - to avoid boxing, changed from CompareTo() to IndexOptionsComparer.Compare() if (Debugging.AssertsEnabled) { Debugging.Assert(IndexOptionsComparer.Default.Compare(indexOptions, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.HasPayloads); } if (indexOptions == IndexOptions.DOCS_ONLY) { bits |= Lucene40FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS; } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { bits |= Lucene40FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS; } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) { bits |= Lucene40FieldInfosFormat.OMIT_POSITIONS; } } output.WriteString(fi.Name); output.WriteVInt32(fi.Number); output.WriteByte((byte)bits); // pack the DV types in one byte byte dv = DocValuesByte(fi.DocValuesType, fi.GetAttribute(Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY)); byte nrm = DocValuesByte(fi.NormType, fi.GetAttribute(Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY)); if (Debugging.AssertsEnabled) { Debugging.Assert((dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0); } var val = (byte)(0xff & ((nrm << 4) | (byte)dv)); output.WriteByte(val); output.WriteStringStringMap(fi.Attributes); } success = true; } finally { if (success) { output.Dispose(); } else { IOUtils.DisposeWhileHandlingException(output); } } }
public override void WriteField(FieldInfo info, IndexableField field) { FieldsStream.WriteVInt(info.Number); int bits = 0; BytesRef bytes; string @string; // TODO: maybe a field should serialize itself? // this way we don't bake into indexer all these // specific encodings for different fields? and apps // can customize... object number = field.NumericValue; if (number != null) { if (number is sbyte? || number is short? || number is int?) { bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_INT; } else if (number is long?) { bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_LONG; } else if (number is float?) { bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_FLOAT; } else if (number is double?) { bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_DOUBLE; } else { throw new System.ArgumentException("cannot store numeric type " + number.GetType()); } @string = null; bytes = null; } else { bytes = field.BinaryValue(); if (bytes != null) { bits |= Lucene3xStoredFieldsReader.FIELD_IS_BINARY; @string = null; } else { @string = field.StringValue; if (@string == null) { throw new System.ArgumentException("field " + field.Name() + " is stored but does not have binaryValue, stringValue nor numericValue"); } } } FieldsStream.WriteByte((sbyte)bits); if (bytes != null) { FieldsStream.WriteVInt(bytes.Length); FieldsStream.WriteBytes(bytes.Bytes, bytes.Offset, bytes.Length); } else if (@string != null) { FieldsStream.WriteString(field.StringValue); } else { if (number is sbyte? || number is short? || number is int?) { FieldsStream.WriteInt((int)number); } else if (number is long?) { FieldsStream.WriteLong((long)number); } else if (number is float?) { FieldsStream.WriteInt(Number.FloatToIntBits((float)number)); } else if (number is double?) { FieldsStream.WriteLong(BitConverter.DoubleToInt64Bits((double)number)); } else { Debug.Assert(false); } } }