public PreFlexRWStoredFieldsWriter(Directory directory, string segment, IOContext context) { Debug.Assert(directory != null); this.Directory = directory; this.Segment = segment; bool success = false; try { FieldsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION), context); IndexStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION), context); FieldsStream.WriteInt(Lucene3xStoredFieldsReader.FORMAT_CURRENT); IndexStream.WriteInt(Lucene3xStoredFieldsReader.FORMAT_CURRENT); success = true; } finally { if (!success) { Abort(); } } }
internal SepSkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, IntIndexOutput freqOutput, IntIndexOutput docOutput, IntIndexOutput posOutput, IndexOutput payloadOutput) : base(skipInterval, numberOfSkipLevels, docCount) { _freqOutput = freqOutput; _posOutput = posOutput; _payloadOutput = payloadOutput; _lastSkipDoc = new int[numberOfSkipLevels]; _lastSkipPayloadLength = new int[numberOfSkipLevels]; // TODO: -- also cutover normal IndexOutput to use getIndex()? _lastSkipPayloadPointer = new long[numberOfSkipLevels]; _freqIndex = new IntIndexOutputIndex[numberOfSkipLevels]; _docIndex = new IntIndexOutputIndex[numberOfSkipLevels]; _posIndex = new IntIndexOutputIndex[numberOfSkipLevels]; for (var i = 0; i < numberOfSkipLevels; i++) { if (freqOutput != null) { _freqIndex[i] = freqOutput.Index(); } _docIndex[i] = docOutput.Index(); if (posOutput != null) { _posIndex[i] = posOutput.Index(); } } }
public BlockTermsWriter(TermsIndexWriterBase termsIndexWriter, SegmentWriteState state, PostingsWriterBase postingsWriter) { var termsFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, TERMS_EXTENSION); _termsIndexWriter = termsIndexWriter; _output = state.Directory.CreateOutput(termsFileName, state.Context); var success = false; try { FieldInfos = state.FieldInfos; WriteHeader(_output); CurrentField = null; PostingsWriter = postingsWriter; postingsWriter.Init(_output); // have consumer write its format/header success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(_output); } } }
public ThrottledIndexOutput(int bytesPerSecond, long flushDelayMillis, long closeDelayMillis, long seekDelayMillis, long minBytesWritten, IndexOutput @delegate) { Debug.Assert(bytesPerSecond > 0); this.@delegate = @delegate; this.BytesPerSecond = bytesPerSecond; this.FlushDelayMillis = flushDelayMillis; this.CloseDelayMillis = closeDelayMillis; this.SeekDelayMillis = seekDelayMillis; this.MinBytesWritten = minBytesWritten; }
/// <summary> /// Sole constructor. </summary> public Lucene40SkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, IndexOutput freqOutput, IndexOutput proxOutput) : base(skipInterval, numberOfSkipLevels, docCount) { this.FreqOutput = freqOutput; this.ProxOutput = proxOutput; LastSkipDoc = new int[numberOfSkipLevels]; LastSkipPayloadLength = new int[numberOfSkipLevels]; LastSkipOffsetLength = new int[numberOfSkipLevels]; LastSkipFreqPointer = new long[numberOfSkipLevels]; LastSkipProxPointer = new long[numberOfSkipLevels]; }
public CachedIndexOutput( ICloudProvider CloudProvider, Directory CacheDirectory, string Name ) { this.cloudProvider = CloudProvider; this.cacheDirectory = CacheDirectory; this.name = Name; this.fileMutex = BlobMutexManager.GrabMutex( this.name ); this.fileMutex.WaitOne(); try { // create the local cache one we will operate against... this.indexOutput = this.cacheDirectory.CreateOutput( this.name ); } finally { this.fileMutex.ReleaseMutex(); } }
internal RateLimitedIndexOutput(RateLimiter rateLimiter, IndexOutput @delegate) { // TODO should we make buffer size configurable if (@delegate is BufferedIndexOutput) { BufferedDelegate = (BufferedIndexOutput)@delegate; this.@delegate = @delegate; } else { this.@delegate = @delegate; BufferedDelegate = null; } this.RateLimiter = rateLimiter; }
public AzureIndexOutput(AzureDirectory azureDirectory, ICloudBlob blob) { _fileMutex = BlobMutexManager.GrabMutex(_name); _fileMutex.WaitOne(); try { _azureDirectory = azureDirectory; _blob = blob; _name = blob.Uri.Segments[blob.Uri.Segments.Length - 1]; // create the local cache one we will operate against... _indexOutput = CacheDirectory.createOutput(_name, IOContext.DEFAULT); } finally { _fileMutex.ReleaseMutex(); } }
private int LastFieldNumber = -1; // only for assert public PreFlexRWNormsConsumer(Directory directory, string segment, IOContext context) { string normsFileName = IndexFileNames.SegmentFileName(segment, "", NORMS_EXTENSION); bool success = false; IndexOutput output = null; try { output = directory.CreateOutput(normsFileName, context); output.WriteBytes(NORMS_HEADER, 0, NORMS_HEADER.Length); @out = output; success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(output); } } }
public FixedGapTermsIndexWriter(SegmentWriteState state) { String indexFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, TERMS_INDEX_EXTENSION); _termIndexInterval = state.TermIndexInterval; Output = state.Directory.CreateOutput(indexFileName, state.Context); bool success = false; try { _fieldInfos = state.FieldInfos; WriteHeader(Output); Output.WriteInt(_termIndexInterval); success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(Output); } } }
internal Lucene42NormsConsumer(SegmentWriteState state, string dataCodec, string dataExtension, string metaCodec, string metaExtension, float acceptableOverheadRatio) { this.AcceptableOverheadRatio = acceptableOverheadRatio; MaxDoc = state.SegmentInfo.DocCount; bool success = false; try { string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension); Data = state.Directory.CreateOutput(dataName, state.Context); CodecUtil.WriteHeader(Data, dataCodec, Lucene42DocValuesProducer.VERSION_CURRENT); string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension); Meta = state.Directory.CreateOutput(metaName, state.Context); CodecUtil.WriteHeader(Meta, metaCodec, Lucene42DocValuesProducer.VERSION_CURRENT); success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(this); } } }
private static void WriteHeader(IndexOutput output) { CodecUtil.WriteHeader(output, CODEC_NAME, VERSION_CURRENT); }
public StreamOutput( IndexOutput Output ) { this.Output = Output; }
public StreamOutput(IndexOutput output) { Output = output; }
public TermsWriter(IndexOutput @out, FieldInfo field, bool doPackFST, float acceptableOverheadRatio) { if (!InstanceFieldsInitialized) { InitializeInstanceFields(); InstanceFieldsInitialized = true; } this.@out = @out; this.field = field; this.doPackFST = doPackFST; this.acceptableOverheadRatio = acceptableOverheadRatio; builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPackFST, acceptableOverheadRatio, true, 15); }
/// <summary> /// expert: Creates a new writer </summary> public Lucene45DocValuesConsumer(SegmentWriteState state, string dataCodec, string dataExtension, string metaCodec, string metaExtension) { bool success = false; try { string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension); Data = state.Directory.CreateOutput(dataName, state.Context); CodecUtil.WriteHeader(Data, dataCodec, Lucene45DocValuesFormat.VERSION_CURRENT); string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension); Meta = state.Directory.CreateOutput(metaName, state.Context); CodecUtil.WriteHeader(Meta, metaCodec, Lucene45DocValuesFormat.VERSION_CURRENT); MaxDoc = state.SegmentInfo.DocCount; success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(this); } } }
private static void WriteTrailer(IndexOutput output, long dirStart) { output.WriteLong(dirStart); }
public ThrottledIndexOutput(int bytesPerSecond, long delays, int minBytesWritten, IndexOutput @delegate) : this(bytesPerSecond, delays, delays, delays, minBytesWritten, @delegate) { }
internal virtual FST <T> DoTest(int prune1, int prune2, bool allowRandomSuffixSharing) { if (LuceneTestCase.VERBOSE) { Console.WriteLine("\nTEST: prune1=" + prune1 + " prune2=" + prune2); } bool willRewrite = Random.NextBoolean(); Builder <T> builder = new Builder <T>(InputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, prune1, prune2, prune1 == 0 && prune2 == 0, allowRandomSuffixSharing ? Random.NextBoolean() : true, allowRandomSuffixSharing ? TestUtil.NextInt(Random, 1, 10) : int.MaxValue, Outputs, null, willRewrite, PackedInts.DEFAULT, true, 15); if (LuceneTestCase.VERBOSE) { if (willRewrite) { Console.WriteLine("TEST: packed FST"); } else { Console.WriteLine("TEST: non-packed FST"); } } foreach (InputOutput <T> pair in Pairs) { if (pair.Output is IEnumerable) { Builder <object> builderObject = builder as Builder <object>; var values = pair.Output as IEnumerable; foreach (object value in values) { builderObject.Add(pair.Input, value); } } else { builder.Add(pair.Input, pair.Output); } } FST <T> fst = builder.Finish(); if (Random.NextBoolean() && fst != null && !willRewrite) { IOContext context = LuceneTestCase.NewIOContext(Random); using (IndexOutput @out = Dir.CreateOutput("fst.bin", context)) { fst.Save(@out); } IndexInput @in = Dir.OpenInput("fst.bin", context); try { fst = new FST <T>(@in, Outputs); } finally { @in.Dispose(); Dir.DeleteFile("fst.bin"); } } if (LuceneTestCase.VERBOSE && Pairs.Count <= 20 && fst != null) { using (TextWriter w = new StreamWriter(new FileStream("out.dot", FileMode.OpenOrCreate), IOUtils.CHARSET_UTF_8)) { Util.ToDot(fst, w, false, false); } Console.WriteLine("SAVED out.dot"); } if (LuceneTestCase.VERBOSE) { if (fst == null) { Console.WriteLine(" fst has 0 nodes (fully pruned)"); } else { Console.WriteLine(" fst has " + fst.NodeCount + " nodes and " + fst.ArcCount + " arcs"); } } if (prune1 == 0 && prune2 == 0) { VerifyUnPruned(InputMode, fst); } else { VerifyPruned(InputMode, fst, prune1, prune2); } return(fst); }
public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context) { string fileName = IndexFileNames.SegmentFileName(segmentName, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION); IndexOutput output = directory.CreateOutput(fileName, context); bool success = false; try { CodecUtil.WriteHeader(output, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_CURRENT); output.WriteVInt(infos.Size()); foreach (FieldInfo fi in infos) { FieldInfo.IndexOptions?indexOptions = fi.FieldIndexOptions; sbyte bits = 0x0; if (fi.HasVectors()) { bits |= Lucene46FieldInfosFormat.STORE_TERMVECTOR; } if (fi.OmitsNorms()) { bits |= Lucene46FieldInfosFormat.OMIT_NORMS; } if (fi.HasPayloads()) { bits |= Lucene46FieldInfosFormat.STORE_PAYLOADS; } if (fi.Indexed) { bits |= Lucene46FieldInfosFormat.IS_INDEXED; Debug.Assert(indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.HasPayloads()); if (indexOptions == FieldInfo.IndexOptions.DOCS_ONLY) { bits |= Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS; } else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { bits |= Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS; } else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS) { bits |= Lucene46FieldInfosFormat.OMIT_POSITIONS; } } output.WriteString(fi.Name); output.WriteVInt(fi.Number); output.WriteByte(bits); // pack the DV types in one byte sbyte dv = DocValuesByte(fi.DocValuesType); sbyte nrm = DocValuesByte(fi.NormType); Debug.Assert((dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0); sbyte val = unchecked ((sbyte)(0xff & ((nrm << 4) | dv))); output.WriteByte(val); output.WriteLong(fi.DocValuesGen); output.WriteStringStringMap(fi.Attributes()); } CodecUtil.WriteFooter(output); success = true; } finally { if (success) { output.Dispose(); } else { IOUtils.CloseWhileHandlingException(output); } } }
public virtual void TestDataInputOutput() { Random random = Random(); for (int iter = 0; iter < 5 * RANDOM_MULTIPLIER; iter++) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("testOverflow")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } int blockBits = TestUtil.NextInt(random, 1, 20); int blockSize = 1 << blockBits; PagedBytes p = new PagedBytes(blockBits); IndexOutput @out = dir.CreateOutput("foo", IOContext.DEFAULT); int numBytes = TestUtil.NextInt(Random(), 2, 10000000); byte[] answer = new byte[numBytes]; Random().NextBytes(answer); int written = 0; while (written < numBytes) { if (Random().Next(10) == 7) { @out.WriteByte(answer[written++]); } else { int chunk = Math.Min(Random().Next(1000), numBytes - written); @out.WriteBytes(answer, written, chunk); written += chunk; } } @out.Dispose(); IndexInput input = dir.OpenInput("foo", IOContext.DEFAULT); DataInput @in = (DataInput)input.Clone(); p.Copy(input, input.Length()); PagedBytes.Reader reader = p.Freeze(random.NextBoolean()); byte[] verify = new byte[numBytes]; int read = 0; while (read < numBytes) { if (Random().Next(10) == 7) { verify[read++] = @in.ReadByte(); } else { int chunk = Math.Min(Random().Next(1000), numBytes - read); @in.ReadBytes(verify, read, chunk); read += chunk; } } Assert.IsTrue(Arrays.Equals(answer, verify)); BytesRef slice = new BytesRef(); for (int iter2 = 0; iter2 < 100; iter2++) { int pos = random.Next(numBytes - 1); int len = random.Next(Math.Min(blockSize + 1, numBytes - pos)); reader.FillSlice(slice, pos, len); for (int byteUpto = 0; byteUpto < len; byteUpto++) { Assert.AreEqual(answer[pos + byteUpto], (byte)slice.Bytes[slice.Offset + byteUpto]); } } input.Dispose(); dir.Dispose(); } }
// TODO what Var-Var codecs exist in practice... and what are there blocksizes like? // if its less than 128 we should set that as max and use byte? /// <summary> /// NOTE: maxBlockSize must be the maximum block size /// plus the max non-causal lookahead of your codec. EG Simple9 /// requires lookahead=1 because on seeing the Nth value /// it knows it must now encode the N-1 values before it. /// </summary> protected internal VariableIntBlockIndexOutput(IndexOutput output, int maxBlockSize) { this.output = output; this.output.WriteInt(maxBlockSize); }
private readonly ISet <string> _fieldsSeen = new JCG.HashSet <string>(); // for asserting // LUCENENET NOTE: Changed from public to internal because the class had to be made public, but is not for public use. internal SimpleTextDocValuesWriter(SegmentWriteState state, string ext) { data = state.Directory.CreateOutput( IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, ext), state.Context); numDocs = state.SegmentInfo.DocCount; }
/// <summary> /// Writes a codec footer, which records both a checksum /// algorithm ID and a checksum. this footer can /// be parsed and validated with /// <seealso cref="#checkFooter(ChecksumIndexInput) checkFooter()"/>. /// <p> /// CodecFooter --> Magic,AlgorithmID,Checksum /// <ul> /// <li>Magic --> <seealso cref="DataOutput#writeInt Uint32"/>. this /// identifies the start of the footer. It is always {@value #FOOTER_MAGIC}. /// <li>AlgorithmID --> <seealso cref="DataOutput#writeInt Uint32"/>. this /// indicates the checksum algorithm used. Currently this is always 0, /// for zlib-crc32. /// <li>Checksum --> <seealso cref="DataOutput#writeLong Uint32"/>. The /// actual checksum value for all previous bytes in the stream, including /// the bytes from Magic and AlgorithmID. /// </ul> /// </summary> /// <param name="out"> Output stream </param> /// <exception cref="IOException"> If there is an I/O error writing to the underlying medium. </exception> public static void WriteFooter(IndexOutput @out) { @out.WriteInt(FOOTER_MAGIC); @out.WriteInt(0); @out.WriteLong(@out.Checksum); }
protected override void Dispose(bool disposing) { _fileMutex.WaitOne(); try { string fileName = _name; // make sure it's all written out _indexOutput.Flush(); long originalLength = _indexOutput.Length; _indexOutput.Dispose(); Stream blobStream; // optionally put a compressor around the blob stream if (_azureDirectory.ShouldCompressFile(_name)) { blobStream = CompressStream(fileName, originalLength); } else { blobStream = new StreamInput(CacheDirectory.OpenInput(fileName, IOContext.DEFAULT)); } try { // push the blobStream up to the cloud _blob.UploadFromStream(blobStream); // set the metadata with the original index file properties _blob.Metadata["CachedLength"] = originalLength.ToString(); var filePath = Path.Combine(_azureDirectory.CatalogPath, fileName); var lastModified = File.GetLastWriteTimeUtc(filePath); long fileTimeUtc = lastModified.ToFileTimeUtc(); //_blob.Metadata["CachedLastModified"] = CacheDirectory.FileModified(fileName).ToString(); _blob.Metadata["CachedLastModified"] = fileTimeUtc.ToString(); _blob.SetMetadata(); Debug.WriteLine(string.Format("PUT {1} bytes to {0} in cloud", _name, blobStream.Length)); } finally { blobStream.Dispose(); } #if FULLDEBUG Debug.WriteLine(string.Format("CLOSED WRITESTREAM {0}", _name)); #endif // clean up _indexOutput = null; _blobContainer = null; _blob = null; GC.SuppressFinalize(this); } finally { _fileMutex.ReleaseMutex(); } }
protected override void Dispose(bool disposing) { if (disposing) { try { IOUtils.Close(FieldsStream, IndexStream); } finally { FieldsStream = IndexStream = null; } } }
public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context) { string fileName = IndexFileNames.SegmentFileName(segmentName, "", Lucene40FieldInfosFormat.FIELD_INFOS_EXTENSION); IndexOutput output = directory.CreateOutput(fileName, context); bool success = false; try { CodecUtil.WriteHeader(output, Lucene40FieldInfosFormat.CODEC_NAME, Lucene40FieldInfosFormat.FORMAT_CURRENT); output.WriteVInt32(infos.Count); foreach (FieldInfo fi in infos) { IndexOptions indexOptions = fi.IndexOptions; sbyte bits = 0x0; if (fi.HasVectors) { bits |= Lucene40FieldInfosFormat.STORE_TERMVECTOR; } if (fi.OmitsNorms) { bits |= Lucene40FieldInfosFormat.OMIT_NORMS; } if (fi.HasPayloads) { bits |= Lucene40FieldInfosFormat.STORE_PAYLOADS; } if (fi.IsIndexed) { bits |= Lucene40FieldInfosFormat.IS_INDEXED; Debug.Assert(indexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.HasPayloads); if (indexOptions == IndexOptions.DOCS_ONLY) { bits |= Lucene40FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS; } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { bits |= Lucene40FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS; } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) { bits |= Lucene40FieldInfosFormat.OMIT_POSITIONS; } } output.WriteString(fi.Name); output.WriteVInt32(fi.Number); output.WriteByte((byte)bits); // pack the DV types in one byte sbyte dv = DocValuesByte(fi.DocValuesType, fi.GetAttribute(Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY)); sbyte nrm = DocValuesByte(fi.NormType, fi.GetAttribute(Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY)); Debug.Assert((dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0); var val = (byte)(0xff & ((nrm << 4) | (byte)dv)); output.WriteByte(val); output.WriteStringStringMap(fi.Attributes); } success = true; } finally { if (success) { output.Dispose(); } else { IOUtils.DisposeWhileHandlingException(output); } } }
public override void Dispose() { if (blockOut == null) return; IOException ioe = null; try { var blockDirStart = blockOut.FilePointer; // write field summary blockOut.WriteVInt(_fields.Count); foreach (var field in _fields) { blockOut.WriteVInt(field.FieldInfo.Number); blockOut.WriteVLong(field.NumTerms); if (field.FieldInfo.FieldIndexOptions != IndexOptions.DOCS_ONLY) { blockOut.WriteVLong(field.SumTotalTermFreq); } blockOut.WriteVLong(field.SumDocFreq); blockOut.WriteVInt(field.DocCount); blockOut.WriteVInt(field.LongsSize); blockOut.WriteVLong(field.StatsOut.FilePointer); blockOut.WriteVLong(field.MetaLongsOut.FilePointer); blockOut.WriteVLong(field.MetaBytesOut.FilePointer); field.SkipOut.WriteTo(blockOut); field.StatsOut.WriteTo(blockOut); field.MetaLongsOut.WriteTo(blockOut); field.MetaBytesOut.WriteTo(blockOut); field.Dict.Save(indexOut); } WriteTrailer(blockOut, blockDirStart); CodecUtil.WriteFooter(indexOut); CodecUtil.WriteFooter(blockOut); } catch (IOException ioe2) { ioe = ioe2; } finally { IOUtils.CloseWhileHandlingException(ioe, blockOut, indexOut, postingsWriter); blockOut = null; } }
public SlowIndexOutput(SlowRAMDirectory outerInstance, IndexOutput io) { this.outerInstance = outerInstance; this.io = io; this.rand = outerInstance.forkRandom(); }
public SimpleTextFieldsWriter(SegmentWriteState state) { var fileName = SimpleTextPostingsFormat.GetPostingsFileName(state.SegmentInfo.Name, state.SegmentSuffix); _output = state.Directory.CreateOutput(fileName, state.Context); }
private void WriteTrailer(IndexOutput output, long dirStart) { output.WriteInt64(dirStart); }
protected internal override void WriteSkipData(int level, IndexOutput skipBuffer) { // To efficiently store payloads/offsets in the posting lists we do not store the length of // every payload/offset. Instead we omit the length if the previous lengths were the same // // However, in order to support skipping, the length at every skip point must be known. // So we use the same length encoding that we use for the posting lists for the skip data as well: // Case 1: current field does not store payloads/offsets // SkipDatum --> DocSkip, FreqSkip, ProxSkip // DocSkip,FreqSkip,ProxSkip --> VInt // DocSkip records the document number before every SkipInterval th document in TermFreqs. // Document numbers are represented as differences from the previous value in the sequence. // Case 2: current field stores payloads/offsets // SkipDatum --> DocSkip, PayloadLength?,OffsetLength?,FreqSkip,ProxSkip // DocSkip,FreqSkip,ProxSkip --> VInt // PayloadLength,OffsetLength--> VInt // In this case DocSkip/2 is the difference between // the current and the previous value. If DocSkip // is odd, then a PayloadLength encoded as VInt follows, // if DocSkip is even, then it is assumed that the // current payload/offset lengths equals the lengths at the previous // skip point int delta = CurDoc - LastSkipDoc[level]; if (CurStorePayloads || CurStoreOffsets) { Debug.Assert(CurStorePayloads || CurPayloadLength == LastSkipPayloadLength[level]); Debug.Assert(CurStoreOffsets || CurOffsetLength == LastSkipOffsetLength[level]); if (CurPayloadLength == LastSkipPayloadLength[level] && CurOffsetLength == LastSkipOffsetLength[level]) { // the current payload/offset lengths equals the lengths at the previous skip point, // so we don't store the lengths again skipBuffer.WriteVInt(delta << 1); } else { // the payload and/or offset length is different from the previous one. We shift the DocSkip, // set the lowest bit and store the current payload and/or offset lengths as VInts. skipBuffer.WriteVInt(delta << 1 | 1); if (CurStorePayloads) { skipBuffer.WriteVInt(CurPayloadLength); LastSkipPayloadLength[level] = CurPayloadLength; } if (CurStoreOffsets) { skipBuffer.WriteVInt(CurOffsetLength); LastSkipOffsetLength[level] = CurOffsetLength; } } } else { // current field does not store payloads or offsets skipBuffer.WriteVInt(delta); } skipBuffer.WriteVInt((int)(CurFreqPointer - LastSkipFreqPointer[level])); skipBuffer.WriteVInt((int)(CurProxPointer - LastSkipProxPointer[level])); LastSkipDoc[level] = CurDoc; LastSkipFreqPointer[level] = CurFreqPointer; LastSkipProxPointer[level] = CurProxPointer; }
public FieldsConsumerAnonymousInnerClassHelper(MemoryPostingsFormat outerInstance, IndexOutput @out) { this.outerInstance = outerInstance; this.@out = @out; }
public virtual void Test() { int[] ints = new int[7]; Int32sRef input = new Int32sRef(ints, 0, ints.Length); int seed = Random.Next(); Directory dir = new MMapDirectory(CreateTempDir("2BFST")); for (int doPackIter = 0; doPackIter < 2; doPackIter++) { bool doPack = doPackIter == 1; // Build FST w/ NoOutputs and stop when nodeCount > 2.2B if (!doPack) { Console.WriteLine("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS"); Outputs <object> outputs = NoOutputs.Singleton; object NO_OUTPUT = outputs.NoOutput; Builder <object> b = new Builder <object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInt32s.COMPACT, true, 15); int count = 0; Random r = new Random(seed); int[] ints2 = new int[200]; Int32sRef input2 = new Int32sRef(ints2, 0, ints2.Length); while (true) { //System.out.println("add: " + input + " -> " + output); for (int i = 10; i < ints2.Length; i++) { ints2[i] = r.Next(256); } b.Add(input2, NO_OUTPUT); count++; if (count % 100000 == 0) { Console.WriteLine(count + ": " + b.GetFstSizeInBytes() + " bytes; " + b.TotStateCount + " nodes"); } if (b.TotStateCount > int.MaxValue + 100L * 1024 * 1024) { break; } NextInput(r, ints2); } FST <object> fst = b.Finish(); for (int verify = 0; verify < 2; verify++) { Console.WriteLine("\nTEST: now verify [fst size=" + fst.GetSizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]"); Arrays.Fill(ints2, 0); r = new Random(seed); for (int i = 0; i < count; i++) { if (i % 1000000 == 0) { Console.WriteLine(i + "...: "); } for (int j = 10; j < ints2.Length; j++) { ints2[j] = r.Next(256); } Assert.AreEqual(NO_OUTPUT, Util.Get(fst, input2)); NextInput(r, ints2); } Console.WriteLine("\nTEST: enum all input/outputs"); Int32sRefFSTEnum <object> fstEnum = new Int32sRefFSTEnum <object>(fst); Arrays.Fill(ints2, 0); r = new Random(seed); int upto = 0; while (true) { Int32sRefFSTEnum.InputOutput <object> pair = fstEnum.Next(); if (pair == null) { break; } for (int j = 10; j < ints2.Length; j++) { ints2[j] = r.Next(256); } Assert.AreEqual(input2, pair.Input); Assert.AreEqual(NO_OUTPUT, pair.Output); upto++; NextInput(r, ints2); } Assert.AreEqual(count, upto); if (verify == 0) { Console.WriteLine("\nTEST: save/load FST and re-verify"); IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT); fst.Save(@out); @out.Dispose(); IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT); fst = new FST <object>(@in, outputs); @in.Dispose(); } else { dir.DeleteFile("fst"); } } } // Build FST w/ ByteSequenceOutputs and stop when FST // size = 3GB { Console.WriteLine("\nTEST: 3 GB size; doPack=" + doPack + " outputs=bytes"); Outputs <BytesRef> outputs = ByteSequenceOutputs.Singleton; Builder <BytesRef> b = new Builder <BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInt32s.COMPACT, true, 15); var outputBytes = new byte[20]; BytesRef output = new BytesRef(outputBytes); Arrays.Fill(ints, 0); int count = 0; Random r = new Random(seed); while (true) { r.NextBytes(outputBytes); //System.out.println("add: " + input + " -> " + output); b.Add(input, BytesRef.DeepCopyOf(output)); count++; if (count % 1000000 == 0) { Console.WriteLine(count + "...: " + b.GetFstSizeInBytes() + " bytes"); } if (b.GetFstSizeInBytes() > LIMIT) { break; } NextInput(r, ints); } FST <BytesRef> fst = b.Finish(); for (int verify = 0; verify < 2; verify++) { Console.WriteLine("\nTEST: now verify [fst size=" + fst.GetSizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]"); r = new Random(seed); Arrays.Fill(ints, 0); for (int i = 0; i < count; i++) { if (i % 1000000 == 0) { Console.WriteLine(i + "...: "); } r.NextBytes(outputBytes); Assert.AreEqual(output, Util.Get(fst, input)); NextInput(r, ints); } Console.WriteLine("\nTEST: enum all input/outputs"); Int32sRefFSTEnum <BytesRef> fstEnum = new Int32sRefFSTEnum <BytesRef>(fst); Arrays.Fill(ints, 0); r = new Random(seed); int upto = 0; while (true) { Int32sRefFSTEnum.InputOutput <BytesRef> pair = fstEnum.Next(); if (pair == null) { break; } Assert.AreEqual(input, pair.Input); r.NextBytes(outputBytes); Assert.AreEqual(output, pair.Output); upto++; NextInput(r, ints); } Assert.AreEqual(count, upto); if (verify == 0) { Console.WriteLine("\nTEST: save/load FST and re-verify"); IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT); fst.Save(@out); @out.Dispose(); IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT); fst = new FST <BytesRef>(@in, outputs); @in.Dispose(); } else { dir.DeleteFile("fst"); } } } // Build FST w/ PositiveIntOutputs and stop when FST // size = 3GB { Console.WriteLine("\nTEST: 3 GB size; doPack=" + doPack + " outputs=long"); Outputs <long?> outputs = PositiveInt32Outputs.Singleton; Builder <long?> b = new Builder <long?>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInt32s.COMPACT, true, 15); long output = 1; Arrays.Fill(ints, 0); int count = 0; Random r = new Random(seed); while (true) { //System.out.println("add: " + input + " -> " + output); b.Add(input, output); output += 1 + r.Next(10); count++; if (count % 1000000 == 0) { Console.WriteLine(count + "...: " + b.GetFstSizeInBytes() + " bytes"); } if (b.GetFstSizeInBytes() > LIMIT) { break; } NextInput(r, ints); } FST <long?> fst = b.Finish(); for (int verify = 0; verify < 2; verify++) { Console.WriteLine("\nTEST: now verify [fst size=" + fst.GetSizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]"); Arrays.Fill(ints, 0); output = 1; r = new Random(seed); for (int i = 0; i < count; i++) { if (i % 1000000 == 0) { Console.WriteLine(i + "...: "); } // forward lookup: Assert.AreEqual(output, (long)Util.Get(fst, input)); // reverse lookup: Assert.AreEqual(input, Util.GetByOutput(fst, output)); output += 1 + r.Next(10); NextInput(r, ints); } Console.WriteLine("\nTEST: enum all input/outputs"); Int32sRefFSTEnum <long?> fstEnum = new Int32sRefFSTEnum <long?>(fst); Arrays.Fill(ints, 0); r = new Random(seed); int upto = 0; output = 1; while (true) { Int32sRefFSTEnum.InputOutput <long?> pair = fstEnum.Next(); if (pair == null) { break; } Assert.AreEqual(input, pair.Input); Assert.AreEqual(output, pair.Output.Value); output += 1 + r.Next(10); upto++; NextInput(r, ints); } Assert.AreEqual(count, upto); if (verify == 0) { Console.WriteLine("\nTEST: save/load FST and re-verify"); IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT); fst.Save(@out); @out.Dispose(); IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT); fst = new FST <long?>(@in, outputs); @in.Dispose(); } else { dir.DeleteFile("fst"); } } } } dir.Dispose(); }
public override void AddBinaryField(FieldInfo field, IEnumerable <BytesRef> values) { // examine the values to determine best type to use ISet <BytesRef> uniqueValues = new JCG.HashSet <BytesRef>(); int minLength = int.MaxValue; int maxLength = int.MinValue; foreach (var value in values) { BytesRef b = value; if (b == null) { b = new BytesRef(); // 4.0 doesnt distinguish } if (b.Length > Lucene40DocValuesFormat.MAX_BINARY_FIELD_LENGTH) { throw new ArgumentException("DocValuesField \"" + field.Name + "\" is too large, must be <= " + Lucene40DocValuesFormat.MAX_BINARY_FIELD_LENGTH); } minLength = Math.Min(minLength, b.Length); maxLength = Math.Max(maxLength, b.Length); if (uniqueValues != null) { if (uniqueValues.Add(BytesRef.DeepCopyOf(b))) { if (uniqueValues.Count > 256) { uniqueValues = null; } } } } int maxDoc = state.SegmentInfo.DocCount; bool @fixed = minLength == maxLength; bool dedup = uniqueValues != null && uniqueValues.Count * 2 < maxDoc; if (dedup) { // we will deduplicate and deref values bool success = false; IndexOutput data = null; IndexOutput index = null; string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "dat"); string indexName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "idx"); try { data = dir.CreateOutput(dataName, state.Context); index = dir.CreateOutput(indexName, state.Context); if (@fixed) { AddFixedDerefBytesField(field, data, index, values, minLength); } else { AddVarDerefBytesField(field, data, index, values); } success = true; } finally { if (success) { IOUtils.Dispose(data, index); } else { IOUtils.DisposeWhileHandlingException(data, index); } } } else { // we dont deduplicate, just write values straight if (@fixed) { // fixed byte[] string fileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "dat"); IndexOutput data = dir.CreateOutput(fileName, state.Context); bool success = false; try { AddFixedStraightBytesField(field, data, values, minLength); success = true; } finally { if (success) { IOUtils.Dispose(data); } else { IOUtils.DisposeWhileHandlingException(data); } } } else { // variable byte[] bool success = false; IndexOutput data = null; IndexOutput index = null; string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "dat"); string indexName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "idx"); try { data = dir.CreateOutput(dataName, state.Context); index = dir.CreateOutput(indexName, state.Context); AddVarStraightBytesField(field, data, index, values); success = true; } finally { if (success) { IOUtils.Dispose(data, index); } else { IOUtils.DisposeWhileHandlingException(data, index); } } } } }
public override void Dispose() { if (Output == null) return; try { long dirStart = Output.FilePointer; int fieldCount = _fields.Count; int nonNullFieldCount = 0; for (int i = 0; i < fieldCount; i++) { FstFieldWriter field = _fields[i]; if (field.Fst != null) { nonNullFieldCount++; } } Output.WriteVInt(nonNullFieldCount); for (int i = 0; i < fieldCount; i++) { FstFieldWriter field = _fields[i]; if (field.Fst != null) { Output.WriteVInt(field.FieldInfo.Number); Output.WriteVLong(field.IndexStart); } } WriteTrailer(dirStart); CodecUtil.WriteFooter(Output); } finally { Output.Dispose(); Output = null; } }
public virtual ThrottledIndexOutput NewFromDelegate(IndexOutput output) { return(new ThrottledIndexOutput(BytesPerSecond, FlushDelayMillis, CloseDelayMillis, SeekDelayMillis, MinBytesWritten, output)); }
// TODO: it'd be nice to let the FST builder prune based // on term count of each node (the prune1/prune2 that it // accepts), and build the index based on that. This // should result in a more compact terms index, more like // a prefix trie than the other selectors, because it // only stores enough leading bytes to get down to N // terms that may complete that prefix. It becomes // "deeper" when terms are dense, and "shallow" when they // are less dense. // // However, it's not easy to make that work this this // API, because that pruning doesn't immediately know on // seeing each term whether that term will be a seek point // or not. It requires some non-causality in the API, ie // only on seeing some number of future terms will the // builder decide which past terms are seek points. // Somehow the API'd need to be able to return a "I don't // know" value, eg like a Future, which only later on is // flipped (frozen) to true or false. // // We could solve this with a 2-pass approach, where the // first pass would build an FSA (no outputs) solely to // determine which prefixes are the 'leaves' in the // pruning. The 2nd pass would then look at this prefix // trie to mark the seek points and build the FST mapping // to the true output. // // But, one downside to this approach is that it'd result // in uneven index term selection. EG with prune1=10, the // resulting index terms could be as frequent as every 10 // terms or as rare as every <maxArcCount> * 10 (eg 2560), // in the extremes. public VariableGapTermsIndexWriter(SegmentWriteState state, IndexTermSelector policy) { string indexFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, TERMS_INDEX_EXTENSION); Output = state.Directory.CreateOutput(indexFileName, state.Context); bool success = false; try { _policy = policy; WriteHeader(Output); success = true; } finally { if (!success) IOUtils.CloseWhileHandlingException(Output); } }
/// <summary> /// Creates a postings writer with the specified PackedInts overhead ratio </summary> // TODO: does this ctor even make sense? public Lucene41PostingsWriter(SegmentWriteState state, float acceptableOverheadRatio) : base() { DocOut = state.Directory.CreateOutput(IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, Lucene41PostingsFormat.DOC_EXTENSION), state.Context); IndexOutput posOut = null; IndexOutput payOut = null; bool success = false; try { CodecUtil.WriteHeader(DocOut, DOC_CODEC, VERSION_CURRENT); ForUtil = new ForUtil(acceptableOverheadRatio, DocOut); if (state.FieldInfos.HasProx()) { PosDeltaBuffer = new int[ForUtil.MAX_DATA_SIZE]; posOut = state.Directory.CreateOutput(IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, Lucene41PostingsFormat.POS_EXTENSION), state.Context); CodecUtil.WriteHeader(posOut, POS_CODEC, VERSION_CURRENT); if (state.FieldInfos.HasPayloads()) { PayloadBytes = new sbyte[128]; PayloadLengthBuffer = new int[ForUtil.MAX_DATA_SIZE]; } else { PayloadBytes = null; PayloadLengthBuffer = null; } if (state.FieldInfos.HasOffsets()) { OffsetStartDeltaBuffer = new int[ForUtil.MAX_DATA_SIZE]; OffsetLengthBuffer = new int[ForUtil.MAX_DATA_SIZE]; } else { OffsetStartDeltaBuffer = null; OffsetLengthBuffer = null; } if (state.FieldInfos.HasPayloads() || state.FieldInfos.HasOffsets()) { payOut = state.Directory.CreateOutput(IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, Lucene41PostingsFormat.PAY_EXTENSION), state.Context); CodecUtil.WriteHeader(payOut, PAY_CODEC, VERSION_CURRENT); } } else { PosDeltaBuffer = null; PayloadLengthBuffer = null; OffsetStartDeltaBuffer = null; OffsetLengthBuffer = null; PayloadBytes = null; } this.PayOut = payOut; this.PosOut = posOut; success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(DocOut, posOut, payOut); } } DocDeltaBuffer = new int[ForUtil.MAX_DATA_SIZE]; FreqBuffer = new int[ForUtil.MAX_DATA_SIZE]; // TODO: should we try skipping every 2/4 blocks...? SkipWriter = new Lucene41SkipWriter(MaxSkipLevels, Lucene41PostingsFormat.BLOCK_SIZE, state.SegmentInfo.DocCount, DocOut, posOut, payOut); Encoded = new sbyte[ForUtil.MAX_ENCODED_SIZE]; }
protected override void Dispose(bool disposing) { if (disposing) { // TODO: add a finish() at least to PushBase? DV too...? bool success = false; try { if (DocOut != null) { CodecUtil.WriteFooter(DocOut); } if (PosOut != null) { CodecUtil.WriteFooter(PosOut); } if (PayOut != null) { CodecUtil.WriteFooter(PayOut); } success = true; } finally { if (success) { IOUtils.Close(DocOut, PosOut, PayOut); } else { IOUtils.CloseWhileHandlingException(DocOut, PosOut, PayOut); } DocOut = PosOut = PayOut = null; } } }
public override void AddSortedField(FieldInfo field, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd) { // examine the values to determine best type to use int minLength = int.MaxValue; int maxLength = int.MinValue; foreach (BytesRef b in values) { minLength = Math.Min(minLength, b.Length); maxLength = Math.Max(maxLength, b.Length); } // but dont use fixed if there are missing values (we are simulating how lucene40 wrote dv...) bool anyMissing = false; foreach (long n in docToOrd) { if ((long)n == -1) { anyMissing = true; break; } } bool success = false; IndexOutput data = null; IndexOutput index = null; string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "dat"); string indexName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "idx"); try { data = dir.CreateOutput(dataName, state.Context); index = dir.CreateOutput(indexName, state.Context); if (minLength == maxLength && !anyMissing) { // fixed byte[] AddFixedSortedBytesField(field, data, index, values, docToOrd, minLength); } else { // var byte[] // three cases for simulating the old writer: // 1. no missing // 2. missing (and empty string in use): remap ord=-1 -> ord=0 // 3. missing (and empty string not in use): remap all ords +1, insert empty string into values if (!anyMissing) { AddVarSortedBytesField(field, data, index, values, docToOrd); } else if (minLength == 0) { AddVarSortedBytesField(field, data, index, values, MissingOrdRemapper.MapMissingToOrd0(docToOrd)); } else { AddVarSortedBytesField(field, data, index, MissingOrdRemapper.InsertEmptyValue(values), MissingOrdRemapper.MapAllOrds(docToOrd)); } } success = true; } finally { if (success) { IOUtils.Dispose(data, index); } else { IOUtils.DisposeWhileHandlingException(data, index); } } }
public ThrottledIndexOutput(int bytesPerSecond, long delayInMillis, IndexOutput @delegate) : this(bytesPerSecond, delayInMillis, delayInMillis, delayInMillis, DEFAULT_MIN_WRITTEN_BYTES, @delegate) { }
public override void close() { _fileMutex.WaitOne(); try { string fileName = _name; // make sure it's all written out _indexOutput.flush(); long originalLength = _indexOutput.length(); _indexOutput.close(); Stream blobStream = new StreamInput(CacheDirectory.openInput(fileName, IOContext.DEFAULT)); try { // push the blobStream up to the cloud _blob.UploadFromStream(blobStream); // set the metadata with the original index file properties _blob.Metadata["CachedLength"] = originalLength.ToString(CultureInfo.InvariantCulture); _blob.SetMetadata(); Debug.WriteLine("PUT {1} bytes to {0} in cloud", _name, blobStream.Length); } finally { blobStream.Dispose(); } // clean up _indexOutput = null; _blob = null; GC.SuppressFinalize(this); } finally { _fileMutex.ReleaseMutex(); } }
private void AddVarSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd) { field.PutAttribute(legacyKey, LegacyDocValuesType.BYTES_VAR_SORTED.ToString()); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); /* values */ long startPos = data.GetFilePointer(); int valueCount = 0; foreach (BytesRef v in values) { data.WriteBytes(v.Bytes, v.Offset, v.Length); valueCount++; } /* addresses */ long maxAddress = data.GetFilePointer() - startPos; index.WriteInt64(maxAddress); if (Debugging.AssertsEnabled) { Debugging.Assert(valueCount != int.MaxValue); // unsupported by the 4.0 impl } PackedInt32s.Writer w = PackedInt32s.GetWriter(index, valueCount + 1, PackedInt32s.BitsRequired(maxAddress), PackedInt32s.DEFAULT); long currentPosition = 0; foreach (BytesRef v in values) { w.Add(currentPosition); currentPosition += v.Length; } // write sentinel if (Debugging.AssertsEnabled) { Debugging.Assert(currentPosition == maxAddress); } w.Add(currentPosition); w.Finish(); /* ordinals */ int maxDoc = state.SegmentInfo.DocCount; if (Debugging.AssertsEnabled) { Debugging.Assert(valueCount > 0); } PackedInt32s.Writer ords = PackedInt32s.GetWriter(index, maxDoc, PackedInt32s.BitsRequired(valueCount - 1), PackedInt32s.DEFAULT); foreach (long n in docToOrd) { ords.Add((long)n); } ords.Finish(); }
public override void Init(IndexOutput termsOut) { CodecUtil.WriteHeader(termsOut, TERMS_CODEC, VERSION_CURRENT); termsOut.WriteVInt(Lucene41PostingsFormat.BLOCK_SIZE); }
private void WriteHeader(IndexOutput output) { CodecUtil.WriteHeader(output, CODEC_NAME, VERSION_CURRENT); }
public FSTOrdTermsWriter(SegmentWriteState state, PostingsWriterBase postingsWriter) { var termsIndexFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, TERMS_INDEX_EXTENSION); var termsBlockFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, TERMS_BLOCK_EXTENSION); this.postingsWriter = postingsWriter; fieldInfos = state.FieldInfos; var success = false; try { indexOut = state.Directory.CreateOutput(termsIndexFileName, state.Context); blockOut = state.Directory.CreateOutput(termsBlockFileName, state.Context); WriteHeader(indexOut); WriteHeader(blockOut); this.postingsWriter.Init(blockOut); success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(indexOut, blockOut); } } }
/// <summary> /// Write as a bit set. </summary> private void WriteBits(IndexOutput output) { output.WriteInt32(Length); // write size output.WriteInt32(Count()); // write count output.WriteBytes(bits, bits.Length); }
private static void WriteHeader(IndexOutput @out) { CodecUtil.WriteHeader(@out, TERMS_CODEC_NAME, TERMS_VERSION_CURRENT); }
/// <exception cref="IOException"></exception> private void DoUpdate() { SessionToken session = null; Dictionary <string, Directory> sourceDirectory = new Dictionary <string, Directory>(); Dictionary <string, IList <string> > copiedFiles = new Dictionary <string, IList <string> >(); bool notify = false; try { string version = handler.CurrentVersion; session = replicator.CheckForUpdate(version); WriteToInfoStream(string.Format("doUpdate(): handlerVersion={0} session={1}", version, session)); if (session == null) { return; } IDictionary <string, IList <RevisionFile> > requiredFiles = RequiredFiles(session.SourceFiles); WriteToInfoStream(string.Format("doUpdate(): handlerVersion={0} session={1}", version, session)); foreach (KeyValuePair <string, IList <RevisionFile> > pair in requiredFiles) { string source = pair.Key; Directory directory = factory.GetDirectory(session.Id, source); sourceDirectory.Add(source, directory); List <string> cpFiles = new List <string>(); copiedFiles.Add(source, cpFiles); foreach (RevisionFile file in pair.Value) { if (disposed) { // if we're closed, abort file copy WriteToInfoStream("doUpdate(): detected client was closed); abort file copy"); return; } Stream input = null; IndexOutput output = null; try { input = replicator.ObtainFile(session.Id, source, file.FileName); output = directory.CreateOutput(file.FileName, IOContext.DEFAULT); CopyBytes(output, input); cpFiles.Add(file.FileName); // TODO add some validation, on size / checksum } finally { IOUtils.Dispose(input, output); } } // only notify if all required files were successfully obtained. notify = true; } } finally { if (session != null) { try { replicator.Release(session.Id); } finally { if (!notify) { // cleanup after ourselves IOUtils.Dispose(sourceDirectory.Values); factory.CleanupSession(session.Id); } } } } // notify outside the try-finally above, so the session is released sooner. // the handler may take time to finish acting on the copied files, but the // session itself is no longer needed. try { if (notify && !disposed) { // no use to notify if we are closed already handler.RevisionReady(session.Version, session.SourceFiles, new ReadOnlyDictionary <string, IList <string> >(copiedFiles), sourceDirectory); } } finally { IOUtils.Dispose(sourceDirectory.Values); //TODO: Resharper Message, Expression is always true -> Verify and if so then we can remove the null check. if (session != null) { factory.CleanupSession(session.Id); } } }
protected override void WriteSkipData(int level, IndexOutput skipBuffer) { // To efficiently store payloads in the posting lists we do not store the length of // every payload. Instead we omit the length for a payload if the previous payload had // the same length. // However, in order to support skipping the payload length at every skip point must be known. // So we use the same length encoding that we use for the posting lists for the skip data as well: // Case 1: current field does not store payloads // SkipDatum --> DocSkip, FreqSkip, ProxSkip // DocSkip,FreqSkip,ProxSkip --> VInt // DocSkip records the document number before every SkipInterval th document in TermFreqs. // Document numbers are represented as differences from the previous value in the sequence. // Case 2: current field stores payloads // SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip // DocSkip,FreqSkip,ProxSkip --> VInt // PayloadLength --> VInt // In this case DocSkip/2 is the difference between // the current and the previous value. If DocSkip // is odd, then a PayloadLength encoded as VInt follows, // if DocSkip is even, then it is assumed that the // current payload length equals the length at the previous // skip point Debug.Assert(_indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !_curStorePayloads); if (_curStorePayloads) { int delta = _curDoc - _lastSkipDoc[level]; if (_curPayloadLength == _lastSkipPayloadLength[level]) { // the current payload length equals the length at the previous skip point, // so we don't store the length again skipBuffer.WriteVInt(delta << 1); } else { // the payload length is different from the previous one. We shift the DocSkip, // set the lowest bit and store the current payload length as VInt. skipBuffer.WriteVInt(delta << 1 | 1); skipBuffer.WriteVInt(_curPayloadLength); _lastSkipPayloadLength[level] = _curPayloadLength; } } else { // current field does not store payloads skipBuffer.WriteVInt(_curDoc - _lastSkipDoc[level]); } if (_indexOptions != FieldInfo.IndexOptions.DOCS_ONLY) { _freqIndex[level].Mark(); _freqIndex[level].Write(skipBuffer, false); } _docIndex[level].Mark(); _docIndex[level].Write(skipBuffer, false); if (_indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { _posIndex[level].Mark(); _posIndex[level].Write(skipBuffer, false); if (_curStorePayloads) { skipBuffer.WriteVInt((int) (_curPayloadPointer - _lastSkipPayloadPointer[level])); } } _lastSkipDoc[level] = _curDoc; _lastSkipPayloadPointer[level] = _curPayloadPointer; }
internal void Persist() { UninterruptableMonitor.Enter(this); try { string fileName = SNAPSHOTS_PREFIX + nextWriteGen; IndexOutput @out = dir.CreateOutput(fileName, IOContext.DEFAULT); bool success = false; try { CodecUtil.WriteHeader(@out, CODEC_NAME, VERSION_CURRENT); @out.WriteVInt32(m_refCounts.Count); foreach (KeyValuePair <long, int> ent in m_refCounts) { @out.WriteVInt64(ent.Key); @out.WriteVInt32(ent.Value); } success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(@out); try { dir.DeleteFile(fileName); } catch (Exception e) when(e.IsException()) { // Suppress so we keep throwing original exception } } else { IOUtils.Dispose(@out); } } dir.Sync(/*Collections.singletonList(*/ new[] { fileName } /*)*/); if (nextWriteGen > 0) { string lastSaveFile = SNAPSHOTS_PREFIX + (nextWriteGen - 1); try { dir.DeleteFile(lastSaveFile); } catch (Exception ioe) when(ioe.IsIOException()) { // OK: likely it didn't exist } } nextWriteGen++; } finally { UninterruptableMonitor.Exit(this); } }
/* private class IterableAnonymousInnerClassHelper : IEnumerable<int> { private readonly Lucene45DocValuesConsumer OuterInstance; private IEnumerable<int> DocToOrdCount; private IEnumerable<long> Ords; public IterableAnonymousInnerClassHelper(IEnumerable<int> docToOrdCount, IEnumerable<long> ords) { //this.OuterInstance = outerInstance; this.DocToOrdCount = docToOrdCount; this.Ords = ords; } public virtual IEnumerator<BytesRef> GetEnumerator() { */ /*IEnumerator<Number> docToOrdCountIt = DocToOrdCount.GetEnumerator(); IEnumerator<Number> ordsIt = Ords.GetEnumerator(); return new IteratorAnonymousInnerClassHelper(this, docToOrdCountIt, ordsIt);*/ /* return new SortedSetIterator(DocToOrdCount.GetEnumerator(), Ords.GetEnumerator()); } System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { return GetEnumerator(); } private class SortedSetIterator : IEnumerator<BytesRef> { internal byte[] buffer = new byte[10]; //Initial size, will grow if needed internal ByteArrayDataOutput output = new ByteArrayDataOutput(); internal BytesRef bytesRef = new BytesRef(); internal IEnumerator<int> counts; internal IEnumerator<long> ords; internal SortedSetIterator(IEnumerator<int> counts, IEnumerator<long> ords) { this.counts = counts; this.ords = ords; } public BytesRef Current { get { return bytesRef; } } public void Dispose() { counts.Dispose(); ords.Dispose(); } object System.Collections.IEnumerator.Current { get { return bytesRef; } } public bool MoveNext() { if (!counts.MoveNext()) return false; int count = counts.Current; int maxSize = count * 9;//worst case if (maxSize > buffer.Length) buffer = ArrayUtil.Grow(buffer, maxSize); try { EncodeValues(count); } catch (System.IO.IOException) { throw; } bytesRef.Bytes = (sbyte[])(Array)buffer; bytesRef.Offset = 0; bytesRef.Length = output.Position; return true; } private void EncodeValues(int count) { output.Reset(buffer); long lastOrd = 0; for (int i = 0; i < count; i++) { ords.MoveNext(); long ord = ords.Current; output.WriteVLong(ord - lastOrd); lastOrd = ord; } } public void Reset() { throw new NotImplementedException(); } }*/ /*private class IteratorAnonymousInnerClassHelper : IEnumerator<Number> { private readonly IterableAnonymousInnerClassHelper OuterInstance; private IEnumerator<Number> DocToOrdCountIt; private IEnumerator<Number> OrdsIt; public IteratorAnonymousInnerClassHelper(IterableAnonymousInnerClassHelper outerInstance, IEnumerator<Number> docToOrdCountIt, IEnumerator<Number> ordsIt) { this.OuterInstance = outerInstance; this.DocToOrdCountIt = docToOrdCountIt; this.OrdsIt = ordsIt; } public virtual bool HasNext() { return DocToOrdCountIt.HasNext(); } public virtual Number Next() { Number ordCount = DocToOrdCountIt.next(); if ((long)ordCount == 0) { return MISSING_ORD; } else { Debug.Assert((long)ordCount == 1); return OrdsIt.next(); } } public virtual void Remove() { throw new System.NotSupportedException(); } }*/ //} protected override void Dispose(bool disposing) { if (disposing) { bool success = false; try { if (Meta != null) { Meta.WriteVInt(-1); // write EOF marker CodecUtil.WriteFooter(Meta); // write checksum } if (Data != null) { CodecUtil.WriteFooter(Data); // write checksum } success = true; } finally { if (success) { IOUtils.Close(Data, Meta); } else { IOUtils.CloseWhileHandlingException(Data, Meta); } Meta = Data = null; } } }
/// <summary> /// Subclasses must implement the actual skip data encoding in this method. /// </summary> /// <param name="level"> The level skip data shall be writing for. </param> /// <param name="skipBuffer"> The skip buffer to write to. </param> protected abstract void WriteSkipData(int level, IndexOutput skipBuffer);
protected override void Dispose(bool disposing) { _fileMutex.WaitOne(); try { string fileName = _name; // make sure it's all written out _indexOutput.Flush(); long originalLength = _indexOutput.Length; _indexOutput.Dispose(); Stream blobStream; #if COMPRESSBLOBS // optionally put a compressor around the blob stream if (_azureDirectory.ShouldCompressFile(_name)) { // unfortunately, deflate stream doesn't allow seek, and we need a seekable stream // to pass to the blob storage stuff, so we compress into a memory stream MemoryStream compressedStream = new MemoryStream(); try { IndexInput indexInput = CacheDirectory.OpenInput(fileName); using (DeflateStream compressor = new DeflateStream(compressedStream, CompressionMode.Compress, true)) { // compress to compressedOutputStream byte[] bytes = new byte[indexInput.Length()]; indexInput.ReadBytes(bytes, 0, (int)bytes.Length); compressor.Write(bytes, 0, (int)bytes.Length); } indexInput.Dispose(); // seek back to beginning of comrpessed stream compressedStream.Seek(0, SeekOrigin.Begin); Debug.WriteLine(string.Format("COMPRESSED {0} -> {1} {2}% to {3}", originalLength, compressedStream.Length, ((float)compressedStream.Length / (float)originalLength) * 100, _name)); } catch { // release the compressed stream resources if an error occurs compressedStream.Dispose(); throw; } blobStream = compressedStream; } else #endif { blobStream = new StreamInput(CacheDirectory.OpenInput(fileName)); } try { // push the blobStream up to the cloud _blob.UploadFromStream(blobStream); // set the metadata with the original index file properties _blob.Metadata["CachedLength"] = originalLength.ToString(); _blob.Metadata["CachedLastModified"] = CacheDirectory.FileModified(fileName).ToString(); _blob.SetMetadata(); Debug.WriteLine(string.Format("PUT {1} bytes to {0} in cloud", _name, blobStream.Length)); } finally { blobStream.Dispose(); } #if FULLDEBUG Debug.WriteLine(string.Format("CLOSED WRITESTREAM {0}", _name)); #endif // clean up _indexOutput = null; _blobContainer = null; _blob = null; GC.SuppressFinalize(this); } finally { _fileMutex.ReleaseMutex(); } }
private void WriteHeader(IndexOutput @out) { CodecUtil.WriteHeader(@out, TERMS_CODEC_NAME, TERMS_VERSION_CURRENT); }