/// <summary> /// Sole constructor. </summary> public CompressingTermVectorsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode) { this.compressionMode = compressionMode; string segment = si.Name; bool success = false; fieldInfos = fn; numDocs = si.DocCount; ChecksumIndexInput indexStream = null; try { // Load the index into memory string indexStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, CompressingTermVectorsWriter.VECTORS_INDEX_EXTENSION); indexStream = d.OpenChecksumInput(indexStreamFN, context); string codecNameIdx = formatName + CompressingTermVectorsWriter.CODEC_SFX_IDX; version = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingTermVectorsWriter.VERSION_START, CompressingTermVectorsWriter.VERSION_CURRENT); Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.FilePointer); indexReader = new CompressingStoredFieldsIndexReader(indexStream, si); if (version >= CompressingTermVectorsWriter.VERSION_CHECKSUM) { indexStream.ReadVLong(); // the end of the data file CodecUtil.CheckFooter(indexStream); } else { CodecUtil.CheckEOF(indexStream); } indexStream.Dispose(); indexStream = null; // Open the data file and read metadata string vectorsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, CompressingTermVectorsWriter.VECTORS_EXTENSION); vectorsStream = d.OpenInput(vectorsStreamFN, context); string codecNameDat = formatName + CompressingTermVectorsWriter.CODEC_SFX_DAT; int version2 = CodecUtil.CheckHeader(vectorsStream, codecNameDat, CompressingTermVectorsWriter.VERSION_START, CompressingTermVectorsWriter.VERSION_CURRENT); if (version != version2) { throw new Exception("Version mismatch between stored fields index and data: " + version + " != " + version2); } Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == vectorsStream.FilePointer); packedIntsVersion = vectorsStream.ReadVInt(); chunkSize = vectorsStream.ReadVInt(); decompressor = compressionMode.NewDecompressor(); this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, CompressingTermVectorsWriter.BLOCK_SIZE, 0); success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(this, indexStream); } } }
/// <summary> /// Create a new <seealso cref="CompressingTermVectorsFormat"/>. /// <p> /// <code>formatName</code> is the name of the format. this name will be used /// in the file formats to perform /// <seealso cref="CodecUtil#checkHeader(Lucene.Net.Store.DataInput, String, int, int) codec header checks"/>. /// <p> /// The <code>compressionMode</code> parameter allows you to choose between /// compression algorithms that have various compression and decompression /// speeds so that you can pick the one that best fits your indexing and /// searching throughput. You should never instantiate two /// <seealso cref="CompressingTermVectorsFormat"/>s that have the same name but /// different <seealso cref="CompressionMode"/>s. /// <p> /// <code>chunkSize</code> is the minimum byte size of a chunk of documents. /// Higher values of <code>chunkSize</code> should improve the compression /// ratio but will require more memory at indexing time and might make document /// loading a little slower (depending on the size of your OS cache compared /// to the size of your index). /// </summary> /// <param name="formatName"> the name of the <seealso cref="StoredFieldsFormat"/> </param> /// <param name="segmentSuffix"> a suffix to append to files created by this format </param> /// <param name="compressionMode"> the <seealso cref="CompressionMode"/> to use </param> /// <param name="chunkSize"> the minimum number of bytes of a single chunk of stored documents </param> /// <seealso cref= CompressionMode </seealso> public CompressingTermVectorsFormat(string formatName, string segmentSuffix, CompressionMode compressionMode, int chunkSize) { this.FormatName = formatName; this.SegmentSuffix = segmentSuffix; this.CompressionMode = compressionMode; if (chunkSize < 1) { throw new System.ArgumentException("chunkSize must be >= 1"); } this.ChunkSize = chunkSize; }
// used by clone private CompressingTermVectorsReader(CompressingTermVectorsReader reader) { this.fieldInfos = reader.fieldInfos; this.vectorsStream = (IndexInput)reader.vectorsStream.Clone(); this.indexReader = (CompressingStoredFieldsIndexReader)reader.indexReader.Clone(); this.packedIntsVersion = reader.packedIntsVersion; this.compressionMode = reader.compressionMode; this.decompressor = (Decompressor)reader.decompressor.Clone(); this.chunkSize = reader.chunkSize; this.numDocs = reader.numDocs; this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, CompressingTermVectorsWriter.BLOCK_SIZE, 0); this.version = reader.version; this.closed = false; }
// used by clone private CompressingStoredFieldsReader(CompressingStoredFieldsReader reader) { this.Version_Renamed = reader.Version_Renamed; this.FieldInfos = reader.FieldInfos; this.FieldsStream = (IndexInput)reader.FieldsStream.Clone(); this.IndexReader = (CompressingStoredFieldsIndexReader)reader.IndexReader.Clone(); this.MaxPointer = reader.MaxPointer; this.ChunkSize_Renamed = reader.ChunkSize_Renamed; this.PackedIntsVersion = reader.PackedIntsVersion; this.CompressionMode_Renamed = reader.CompressionMode_Renamed; this.Decompressor = (Decompressor)reader.Decompressor.Clone(); this.NumDocs = reader.NumDocs; this.Bytes = new BytesRef(reader.Bytes.Bytes.Length); this.Closed = false; }
private int NumBufferedDocs; // docBase + numBufferedDocs == current doc ID /// <summary> /// Sole constructor. </summary> public CompressingStoredFieldsWriter(Directory directory, SegmentInfo si, string segmentSuffix, IOContext context, string formatName, CompressionMode compressionMode, int chunkSize) { Debug.Assert(directory != null); this.Directory = directory; this.Segment = si.Name; this.SegmentSuffix = segmentSuffix; this.CompressionMode = compressionMode; this.Compressor = compressionMode.NewCompressor(); this.ChunkSize = chunkSize; this.DocBase = 0; this.BufferedDocs = new GrowableByteArrayDataOutput(chunkSize); this.NumStoredFields = new int[16]; this.EndOffsets = new int[16]; this.NumBufferedDocs = 0; bool success = false; IndexOutput indexStream = directory.CreateOutput(IndexFileNames.SegmentFileName(Segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION), context); try { FieldsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(Segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_EXTENSION), context); string codecNameIdx = formatName + CODEC_SFX_IDX; string codecNameDat = formatName + CODEC_SFX_DAT; CodecUtil.WriteHeader(indexStream, codecNameIdx, VERSION_CURRENT); CodecUtil.WriteHeader(FieldsStream, codecNameDat, VERSION_CURRENT); Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == FieldsStream.FilePointer); Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.FilePointer); IndexWriter = new CompressingStoredFieldsIndexWriter(indexStream); indexStream = null; FieldsStream.WriteVInt(chunkSize); FieldsStream.WriteVInt(PackedInts.VERSION_CURRENT); success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(indexStream); Abort(); } } }
/// <summary> /// Creates a compressing codec with an empty segment suffix /// </summary> protected CompressingCodec(string name, CompressionMode compressionMode, int chunkSize) : this(name, "", compressionMode, chunkSize) { }
/// <summary> /// Creates a compressing codec with a given segment suffix /// </summary> protected CompressingCodec(string name, string segmentSuffix, CompressionMode compressionMode, int chunkSize) : base(name, new Lucene46Codec()) { this.StoredFieldsFormat_Renamed = new CompressingStoredFieldsFormat(name, segmentSuffix, compressionMode, chunkSize); this.TermVectorsFormat_Renamed = new CompressingTermVectorsFormat(name, segmentSuffix, compressionMode, chunkSize); }
/// <summary> /// Create a new <seealso cref="CompressingStoredFieldsFormat"/> with an empty segment /// suffix. /// </summary> /// <seealso cref= CompressingStoredFieldsFormat#CompressingStoredFieldsFormat(String, String, CompressionMode, int) </seealso> public CompressingStoredFieldsFormat(string formatName, CompressionMode compressionMode, int chunkSize) : this(formatName, "", compressionMode, chunkSize) { }
/// <summary> /// Sole constructor. </summary> public CompressingStoredFieldsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode) { this.CompressionMode_Renamed = compressionMode; string segment = si.Name; bool success = false; FieldInfos = fn; NumDocs = si.DocCount; ChecksumIndexInput indexStream = null; try { string indexStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION); string fieldsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_EXTENSION); // Load the index into memory indexStream = d.OpenChecksumInput(indexStreamFN, context); string codecNameIdx = formatName + CompressingStoredFieldsWriter.CODEC_SFX_IDX; Version_Renamed = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT); Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.FilePointer); IndexReader = new CompressingStoredFieldsIndexReader(indexStream, si); long maxPointer = -1; if (Version_Renamed >= CompressingStoredFieldsWriter.VERSION_CHECKSUM) { maxPointer = indexStream.ReadVLong(); CodecUtil.CheckFooter(indexStream); } else { CodecUtil.CheckEOF(indexStream); } indexStream.Dispose(); indexStream = null; // Open the data file and read metadata FieldsStream = d.OpenInput(fieldsStreamFN, context); if (Version_Renamed >= CompressingStoredFieldsWriter.VERSION_CHECKSUM) { if (maxPointer + CodecUtil.FooterLength() != FieldsStream.Length()) { throw new CorruptIndexException("Invalid fieldsStream maxPointer (file truncated?): maxPointer=" + maxPointer + ", length=" + FieldsStream.Length()); } } else { maxPointer = FieldsStream.Length(); } this.MaxPointer = maxPointer; string codecNameDat = formatName + CompressingStoredFieldsWriter.CODEC_SFX_DAT; int fieldsVersion = CodecUtil.CheckHeader(FieldsStream, codecNameDat, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT); if (Version_Renamed != fieldsVersion) { throw new CorruptIndexException("Version mismatch between stored fields index and data: " + Version_Renamed + " != " + fieldsVersion); } Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == FieldsStream.FilePointer); if (Version_Renamed >= CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS) { ChunkSize_Renamed = FieldsStream.ReadVInt(); } else { ChunkSize_Renamed = -1; } PackedIntsVersion = FieldsStream.ReadVInt(); Decompressor = compressionMode.NewDecompressor(); this.Bytes = new BytesRef(); success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(this, indexStream); } } }
/// <summary> /// Sole constructor. </summary> public CompressingTermVectorsWriter(Directory directory, SegmentInfo si, string segmentSuffix, IOContext context, string formatName, CompressionMode compressionMode, int chunkSize) { Debug.Assert(directory != null); this.Directory = directory; this.Segment = si.Name; this.SegmentSuffix = segmentSuffix; this.CompressionMode = compressionMode; this.Compressor = compressionMode.NewCompressor(); this.ChunkSize = chunkSize; NumDocs = 0; PendingDocs = new LinkedList<DocData>(); TermSuffixes = new GrowableByteArrayDataOutput(ArrayUtil.Oversize(chunkSize, 1)); PayloadBytes = new GrowableByteArrayDataOutput(ArrayUtil.Oversize(1, 1)); LastTerm = new BytesRef(ArrayUtil.Oversize(30, 1)); bool success = false; IndexOutput indexStream = directory.CreateOutput(IndexFileNames.SegmentFileName(Segment, segmentSuffix, VECTORS_INDEX_EXTENSION), context); try { VectorsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(Segment, segmentSuffix, VECTORS_EXTENSION), context); string codecNameIdx = formatName + CODEC_SFX_IDX; string codecNameDat = formatName + CODEC_SFX_DAT; CodecUtil.WriteHeader(indexStream, codecNameIdx, VERSION_CURRENT); CodecUtil.WriteHeader(VectorsStream, codecNameDat, VERSION_CURRENT); Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == VectorsStream.FilePointer); Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.FilePointer); IndexWriter = new CompressingStoredFieldsIndexWriter(indexStream); indexStream = null; VectorsStream.WriteVInt(PackedInts.VERSION_CURRENT); VectorsStream.WriteVInt(chunkSize); Writer = new BlockPackedWriter(VectorsStream, BLOCK_SIZE); PositionsBuf = new int[1024]; StartOffsetsBuf = new int[1024]; LengthsBuf = new int[1024]; PayloadLengthsBuf = new int[1024]; success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(indexStream); Abort(); } } }