public virtual FieldInfos CreateAndWriteFieldInfos(Directory dir, string filename) { //Positive test of FieldInfos Assert.IsTrue(TestDoc != null); FieldInfos.Builder builder = new FieldInfos.Builder(); foreach (IIndexableField field in TestDoc) { builder.AddOrUpdate(field.Name, field.IndexableFieldType); } FieldInfos fieldInfos = builder.Finish(); //Since the complement is stored as well in the fields map Assert.IsTrue(fieldInfos.Count == DocHelper.All.Count); //this is all b/c we are using the no-arg constructor IndexOutput output = dir.CreateOutput(filename, NewIOContext(Random)); Assert.IsTrue(output != null); //Use a RAMOutputStream FieldInfosWriter writer = Codec.Default.FieldInfosFormat.FieldInfosWriter; writer.Write(dir, filename, "", fieldInfos, IOContext.DEFAULT); output.Dispose(); return(fieldInfos); }
internal FieldsWriter(Directory d, System.String segment, FieldInfos fn) { fieldInfos = fn; fieldsStream = d.CreateOutput(segment + ".fdt"); indexStream = d.CreateOutput(segment + ".fdx"); doClose = true; }
protected internal override void WriteSkipData(int level, IndexOutput skipBuffer) { int delta = CurDoc - LastSkipDoc[level]; // if (DEBUG) { // System.out.println("writeSkipData level=" + level + " lastDoc=" + curDoc + " delta=" + delta + " curDocPointer=" + curDocPointer); // } skipBuffer.WriteVInt(delta); LastSkipDoc[level] = CurDoc; skipBuffer.WriteVInt((int)(CurDocPointer - LastSkipDocPointer[level])); LastSkipDocPointer[level] = CurDocPointer; if (FieldHasPositions) { // if (DEBUG) { // System.out.println(" curPosPointer=" + curPosPointer + " curPosBufferUpto=" + curPosBufferUpto); // } skipBuffer.WriteVInt((int)(CurPosPointer - LastSkipPosPointer[level])); LastSkipPosPointer[level] = CurPosPointer; skipBuffer.WriteVInt(CurPosBufferUpto); if (FieldHasPayloads) { skipBuffer.WriteVInt(CurPayloadByteUpto); } if (FieldHasOffsets || FieldHasPayloads) { skipBuffer.WriteVInt((int)(CurPayPointer - LastSkipPayPointer[level])); LastSkipPayPointer[level] = CurPayPointer; } } }
internal void ReWrite(SegmentInfo si) { // NOTE: norms are re-written in regular directory, not cfs System.String oldFileName = si.GetNormFileName(this.number); if (oldFileName != null && !oldFileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION)) { // Mark this file for deletion. Note that we don't // actually try to delete it until the new segments files is // successfully written: Enclosing_Instance.deleter.AddPendingFile(oldFileName); } si.AdvanceNormGen(this.number); IndexOutput out_Renamed = Enclosing_Instance.Directory().CreateOutput(si.GetNormFileName(this.number)); try { out_Renamed.WriteBytes(bytes, Enclosing_Instance.MaxDoc()); } finally { out_Renamed.Close(); } this.dirty = false; }
internal FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) { fieldInfos = fn; fieldsStream = fdt; indexStream = fdx; doClose = false; }
private void MergeNorms() { for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.isIndexed && !fi.omitNorms) { IndexOutput output = directory.CreateOutput(segment + ".f" + i); try { for (int j = 0; j < readers.Count; j++) { IndexReader reader = (IndexReader)readers[j]; int maxDoc = reader.MaxDoc(); byte[] input = new byte[maxDoc]; reader.Norms(fi.name, input, 0); for (int k = 0; k < maxDoc; k++) { if (!reader.IsDeleted(k)) { output.WriteByte(input[k]); } } } } finally { output.Close(); } } } }
public void ReWrite() { // NOTE: norms are re-written in regular directory, not cfs IndexOutput out_Renamed = Enclosing_Instance.Directory().CreateOutput(Enclosing_Instance.segment + ".tmp"); try { out_Renamed.WriteBytes(bytes, Enclosing_Instance.MaxDoc()); } finally { out_Renamed.Close(); } System.String fileName; if (Enclosing_Instance.cfsReader == null) { fileName = Enclosing_Instance.segment + ".f" + number; } else { // use a different file name if we have compound format fileName = Enclosing_Instance.segment + ".s" + number; } Enclosing_Instance.Directory().RenameFile(Enclosing_Instance.segment + ".tmp", fileName); this.dirty = false; }
public PreFlexRWStoredFieldsWriter(Directory directory, string segment, IOContext context) { Debug.Assert(directory != null); this.Directory = directory; this.Segment = segment; bool success = false; try { FieldsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION), context); IndexStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION), context); FieldsStream.WriteInt(Lucene3xStoredFieldsReader.FORMAT_CURRENT); IndexStream.WriteInt(Lucene3xStoredFieldsReader.FORMAT_CURRENT); success = true; } finally { if (!success) { Abort(); } } }
internal override void closeDocStore(DocumentsWriter.FlushState state) { lock (this) { if (tvx != null) { // At least one doc in this run had term vectors // enabled fill(state.numDocsInStore - docWriter.GetDocStoreOffset()); tvx.Close(); tvf.Close(); tvd.Close(); tvx = null; System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null); if (4 + state.numDocsInStore * 16 != state.directory.FileLength(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION)) { throw new System.SystemException("after flush: tvx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION) + " length in bytes of " + state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); } string tvxFile = state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION; string tvfFile = state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION; string tvdFile = state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION; state.flushedFiles[tvxFile] = tvxFile; state.flushedFiles[tvfFile] = tvfFile; state.flushedFiles[tvdFile] = tvdFile; docWriter.RemoveOpenFile(tvxFile); docWriter.RemoveOpenFile(tvfFile); docWriter.RemoveOpenFile(tvdFile); lastDocID = 0; } } }
public void Write(Directory directory) { IndexOutput output = directory.CreateOutput("segments.new"); try { output.WriteInt(FORMAT); // write FORMAT output.WriteLong(++version); // every write changes the index output.WriteInt(counter); // write counter output.WriteInt(Count); // write infos for (int i = 0; i < Count; i++) { SegmentInfo si = Info(i); output.WriteString(si.name); output.WriteInt(si.docCount); } } finally { output.Close(); } // install new segment info directory.RenameFile("segments.new", IndexFileNames.SEGMENTS); }
/// <summary> Save this segment's info.</summary> internal void Write(IndexOutput output) { output.WriteString(name); output.WriteInt(docCount); output.WriteLong(delGen); output.WriteInt(docStoreOffset); if (docStoreOffset != -1) { output.WriteString(docStoreSegment); output.WriteByte((byte)(docStoreIsCompoundFile ? 1 : 0)); } output.WriteByte((byte)(hasSingleNormFile ? 1 : 0)); if (normGen == null) { output.WriteInt(NO); } else { output.WriteInt(normGen.Length); for (int j = 0; j < normGen.Length; j++) { output.WriteLong(normGen[j]); } } output.WriteByte((byte)isCompoundFile); output.WriteInt(delCount); output.WriteByte((byte)(hasProx ? 1 : 0)); }
private void MergeTerms() { try { freqOutput = directory.CreateOutput(segment + ".frq"); proxOutput = directory.CreateOutput(segment + ".prx"); termInfosWriter = new TermInfosWriter(directory, segment, fieldInfos, termIndexInterval); skipInterval = termInfosWriter.skipInterval; maxSkipLevels = termInfosWriter.maxSkipLevels; skipListWriter = new DefaultSkipListWriter(skipInterval, maxSkipLevels, mergedDocs, freqOutput, proxOutput); queue = new SegmentMergeQueue(readers.Count); MergeTermInfos(); } finally { if (freqOutput != null) { freqOutput.Close(); } if (proxOutput != null) { proxOutput.Close(); } if (termInfosWriter != null) { termInfosWriter.Close(); } if (queue != null) { queue.Close(); } } }
public void Write(IndexOutput output) { output.WriteVInt(Size()); for (int i = 0; i < Size(); i++) { FieldInfo fi = FieldInfo(i); byte bits = (byte)(0x0); if (fi.isIndexed) { bits |= IS_INDEXED; } if (fi.storeTermVector) { bits |= STORE_TERMVECTOR; } if (fi.storePositionWithTermVector) { bits |= STORE_POSITIONS_WITH_TERMVECTOR; } if (fi.storeOffsetWithTermVector) { bits |= STORE_OFFSET_WITH_TERMVECTOR; } if (fi.omitNorms) { bits |= OMIT_NORMS; } output.WriteString(fi.name); output.WriteByte(bits); } }
private void MergeNorms() { byte[] normBuffer = null; IndexOutput output = null; try { for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.isIndexed && !fi.omitNorms) { if (output == null) { output = directory.CreateOutput(segment + "." + IndexFileNames.NORMS_EXTENSION); output.WriteBytes(NORMS_HEADER, NORMS_HEADER.Length); } for (int j = 0; j < readers.Count; j++) { IndexReader reader = (IndexReader)readers[j]; int maxDoc = reader.MaxDoc(); if (normBuffer == null || normBuffer.Length < maxDoc) { // the buffer is too small for the current segment normBuffer = new byte[maxDoc]; } reader.Norms(fi.name, normBuffer, 0); if (!reader.HasDeletions()) { //optimized case for segments without deleted docs output.WriteBytes(normBuffer, maxDoc); } else { // this segment has deleted docs, so we have to // check for every doc if it is deleted or not for (int k = 0; k < maxDoc; k++) { if (!reader.IsDeleted(k)) { output.WriteByte(normBuffer[k]); } } } if (checkAbort != null) { checkAbort.Work(maxDoc); } } } } } finally { if (output != null) { output.Close(); } } }
internal void InitTermVectorsWriter() { lock (this) { if (tvx == null) { System.String docStoreSegment = docWriter.GetDocStoreSegment(); if (docStoreSegment == null) { return; } System.Diagnostics.Debug.Assert(docStoreSegment != null); // If we hit an exception while init'ing the term // vector output files, we must abort this segment // because those files will be in an unknown // state: tvx = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); tvd = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); tvf = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); tvx.WriteInt(TermVectorsReader.FORMAT_CURRENT); tvd.WriteInt(TermVectorsReader.FORMAT_CURRENT); tvf.WriteInt(TermVectorsReader.FORMAT_CURRENT); docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); lastDocID = 0; } } }
private void Demo_FSIndexInputBug(Directory fsdir, System.String file) { // Setup the test file - we need more than 1024 bytes IndexOutput os = fsdir.CreateOutput(file, null); for (int i = 0; i < 2000; i++) { os.WriteByte((byte)i); } os.Close(); IndexInput in_Renamed = fsdir.OpenInput(file, null); // This read primes the buffer in IndexInput byte b = in_Renamed.ReadByte(null); // Close the file in_Renamed.Close(); // ERROR: this call should fail, but succeeds because the buffer // is still filled b = in_Renamed.ReadByte(null); // ERROR: this call should fail, but succeeds for some reason as well in_Renamed.Seek(1099, null); // OK: this call correctly fails. We are now past the 1024 internal // buffer, so an actual IO is attempted, which fails Assert.Throws <NullReferenceException>(() => in_Renamed.ReadByte(null), "expected readByte() to throw exception"); }
internal override void CloseDocStore(SegmentWriteState state) { lock (this) { if (tvx != null) { // At least one doc in this run had term vectors // enabled Fill(state.numDocsInStore - docWriter.DocStoreOffset); tvx.Close(); tvf.Close(); tvd.Close(); tvx = null; System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null); string fileName = state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION; if (4 + ((long)state.numDocsInStore) * 16 != state.directory.FileLength(fileName)) { throw new System.SystemException("after flush: tvx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName)); } state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); lastDocID = 0; } } }
public void Dispose() { // Move to protected method if class becomes unsealed if (doClose) { try { if (fieldsStream != null) { try { fieldsStream.Close(); } finally { fieldsStream = null; } } } catch (System.IO.IOException) { try { if (indexStream != null) { try { indexStream.Close(); } finally { indexStream = null; } } } catch (System.IO.IOException) { // Ignore so we throw only first IOException hit } throw; } finally { if (indexStream != null) { try { indexStream.Close(); } finally { indexStream = null; } } } } }
internal void Close() { if (doClose) { try { if (fieldsStream != null) { try { fieldsStream.Close(); } finally { fieldsStream = null; } } } catch (System.IO.IOException ioe) { try { if (indexStream != null) { try { indexStream.Close(); } finally { indexStream = null; } } } catch (System.IO.IOException ioe2) { // Ignore so we throw only first IOException hit } throw ioe; } finally { if (indexStream != null) { try { indexStream.Close(); } finally { indexStream = null; } } } } }
public ThrottledIndexOutput(int bytesPerSecond, long flushDelayMillis, long closeDelayMillis, long seekDelayMillis, long minBytesWritten, IndexOutput @delegate) { Debug.Assert(bytesPerSecond > 0); this.@delegate = @delegate; this.BytesPerSecond = bytesPerSecond; this.FlushDelayMillis = flushDelayMillis; this.CloseDelayMillis = closeDelayMillis; this.SeekDelayMillis = seekDelayMillis; this.MinBytesWritten = minBytesWritten; }
internal DefaultSkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, IndexOutput freqOutput, IndexOutput proxOutput) : base(skipInterval, numberOfSkipLevels, docCount) { this.freqOutput = freqOutput; this.proxOutput = proxOutput; lastSkipDoc = new int[numberOfSkipLevels]; lastSkipPayloadLength = new int[numberOfSkipLevels]; lastSkipFreqPointer = new long[numberOfSkipLevels]; lastSkipProxPointer = new long[numberOfSkipLevels]; }
internal DefaultSkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, IndexOutput freqOutput, IndexOutput proxOutput):base(skipInterval, numberOfSkipLevels, docCount) { this.freqOutput = freqOutput; this.proxOutput = proxOutput; lastSkipDoc = new int[numberOfSkipLevels]; lastSkipPayloadLength = new int[numberOfSkipLevels]; lastSkipFreqPointer = new long[numberOfSkipLevels]; lastSkipProxPointer = new long[numberOfSkipLevels]; }
public void Write(Directory directory) { System.String segmentFileName = GetNextSegmentFileName(); // Always advance the generation on write: if (generation == -1) { generation = 1; } else { generation++; } IndexOutput output = directory.CreateOutput(segmentFileName); try { output.WriteInt(FORMAT_SINGLE_NORM_FILE); // write FORMAT output.WriteLong(++version); // every write changes // the index output.WriteInt(counter); // write counter output.WriteInt(Count); // write infos for (int i = 0; i < Count; i++) { Info(i).Write(output); } } finally { output.Close(); } try { output = directory.CreateOutput(IndexFileNames.SEGMENTS_GEN); try { output.WriteInt(FORMAT_LOCKLESS); output.WriteLong(generation); output.WriteLong(generation); } finally { output.Close(); } } catch (System.IO.IOException e) { // It's OK if we fail to write this file since it's // used only as one of the retry fallbacks. } lastGeneration = generation; }
protected internal override void WriteSkipData(int level, IndexOutput skipBuffer) { // To efficiently store payloads in the posting lists we do not store the length of // every payload. Instead we omit the length for a payload if the previous payload had // the same length. // However, in order to support skipping the payload length at every skip point must be known. // So we use the same length encoding that we use for the posting lists for the skip data as well: // Case 1: current field does not store payloads // SkipDatum --> DocSkip, FreqSkip, ProxSkip // DocSkip,FreqSkip,ProxSkip --> VInt // DocSkip records the document number before every SkipInterval th document in TermFreqs. // Document numbers are represented as differences from the previous value in the sequence. // Case 2: current field stores payloads // SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip // DocSkip,FreqSkip,ProxSkip --> VInt // PayloadLength --> VInt // In this case DocSkip/2 is the difference between // the current and the previous value. If DocSkip // is odd, then a PayloadLength encoded as VInt follows, // if DocSkip is even, then it is assumed that the // current payload length equals the length at the previous // skip point if (curStorePayloads) { int delta = curDoc - lastSkipDoc[level]; if (curPayloadLength == lastSkipPayloadLength[level]) { // the current payload length equals the length at the previous skip point, // so we don't store the length again skipBuffer.WriteVInt(delta * 2); } else { // the payload length is different from the previous one. We shift the DocSkip, // set the lowest bit and store the current payload length as VInt. skipBuffer.WriteVInt(delta * 2 + 1); skipBuffer.WriteVInt(curPayloadLength); lastSkipPayloadLength[level] = curPayloadLength; } } else { // current field does not store payloads skipBuffer.WriteVInt(curDoc - lastSkipDoc[level]); } skipBuffer.WriteVInt((int) (curFreqPointer - lastSkipFreqPointer[level])); skipBuffer.WriteVInt((int) (curProxPointer - lastSkipProxPointer[level])); lastSkipDoc[level] = curDoc; //System.out.println("write doc at level " + level + ": " + curDoc); lastSkipFreqPointer[level] = curFreqPointer; lastSkipProxPointer[level] = curProxPointer; }
public PreFlexRWSkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, IndexOutput freqOutput, IndexOutput proxOutput) : base(skipInterval, numberOfSkipLevels, docCount) { this.FreqOutput = freqOutput; this.ProxOutput = proxOutput; LastSkipDoc = new int[numberOfSkipLevels]; LastSkipPayloadLength = new int[numberOfSkipLevels]; LastSkipFreqPointer = new long[numberOfSkipLevels]; LastSkipProxPointer = new long[numberOfSkipLevels]; }
/// <summary>Creates a file of the specified size with random data. </summary> private void CreateRandomFile(Directory dir, System.String name, int size) { IndexOutput os = dir.CreateOutput(name); for (int i = 0; i < size; i++) { byte b = (byte)((new System.Random().NextDouble()) * 256); os.WriteByte(b); } os.Close(); }
private void Initialize(Directory directory, System.String segment, FieldInfos fis, int interval, bool isi) { indexInterval = interval; fieldInfos = fis; isIndex = isi; output = directory.CreateOutput(segment + (isIndex ? ".tii" : ".tis")); output.WriteInt(FORMAT); // write format output.WriteLong(0); // leave space for size output.WriteInt(indexInterval); // write indexInterval output.WriteInt(skipInterval); // write skipInterval }
protected override void WriteSkipData(int level, IndexOutput skipBuffer) { // To efficiently store payloads in the posting lists we do not store the length of // every payload. Instead we omit the length for a payload if the previous payload had // the same length. // However, in order to support skipping the payload length at every skip point must be known. // So we use the same length encoding that we use for the posting lists for the skip data as well: // Case 1: current field does not store payloads // SkipDatum --> DocSkip, FreqSkip, ProxSkip // DocSkip,FreqSkip,ProxSkip --> VInt // DocSkip records the document number before every SkipInterval th document in TermFreqs. // Document numbers are represented as differences from the previous value in the sequence. // Case 2: current field stores payloads // SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip // DocSkip,FreqSkip,ProxSkip --> VInt // PayloadLength --> VInt // In this case DocSkip/2 is the difference between // the current and the previous value. If DocSkip // is odd, then a PayloadLength encoded as VInt follows, // if DocSkip is even, then it is assumed that the // current payload length equals the length at the previous // skip point if (CurStorePayloads) { int delta = CurDoc - LastSkipDoc[level]; if (CurPayloadLength == LastSkipPayloadLength[level]) { // the current payload length equals the length at the previous skip point, // so we don't store the length again skipBuffer.WriteVInt32(delta * 2); } else { // the payload length is different from the previous one. We shift the DocSkip, // set the lowest bit and store the current payload length as VInt. skipBuffer.WriteVInt32(delta * 2 + 1); skipBuffer.WriteVInt32(CurPayloadLength); LastSkipPayloadLength[level] = CurPayloadLength; } } else { // current field does not store payloads skipBuffer.WriteVInt32(CurDoc - LastSkipDoc[level]); } skipBuffer.WriteVInt32((int)(CurFreqPointer - LastSkipFreqPointer[level])); skipBuffer.WriteVInt32((int)(CurProxPointer - LastSkipProxPointer[level])); LastSkipDoc[level] = CurDoc; LastSkipFreqPointer[level] = CurFreqPointer; LastSkipProxPointer[level] = CurProxPointer; }
/// <summary>Creates a file of the specified size with sequential data. The first /// byte is written as the start byte provided. All subsequent bytes are /// computed as start + offset where offset is the number of the byte. /// </summary> private void CreateSequenceFile(Directory dir, System.String name, byte start, int size) { IndexOutput os = dir.CreateOutput(name); for (int i = 0; i < size; i++) { os.WriteByte(start); start++; } os.Close(); }
public TermVectorsWriter(Directory directory, System.String segment, FieldInfos fieldInfos) { // Open files for TermVector storage tvx = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); tvx.WriteInt(TermVectorsReader.FORMAT_CURRENT); tvd = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); tvd.WriteInt(TermVectorsReader.FORMAT_CURRENT); tvf = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); tvf.WriteInt(TermVectorsReader.FORMAT_CURRENT); this.fieldInfos = fieldInfos; }
public TermVectorsWriter(Directory directory, string segment, FieldInfos fieldInfos) { // Open files for TermVector storage tvx = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); tvx.WriteInt(TermVectorsReader.FORMAT_CURRENT); tvd = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); tvd.WriteInt(TermVectorsReader.FORMAT_CURRENT); tvf = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); tvf.WriteInt(TermVectorsReader.FORMAT_CURRENT); this.fieldInfos = fieldInfos; }
public void Write(Directory d, System.String name) { IndexOutput output = d.CreateOutput(name); try { Write(output); } finally { output.Close(); } }
private void Initialize(Directory directory, System.String segment, FieldInfos fis, int interval, bool isi) { indexInterval = interval; fieldInfos = fis; isIndex = isi; output = directory.CreateOutput(segment + (isIndex?".tii":".tis")); output.WriteInt(FORMAT_CURRENT); // write format output.WriteLong(0); // leave space for size output.WriteInt(indexInterval); // write indexInterval output.WriteInt(skipInterval); // write skipInterval output.WriteInt(maxSkipLevels); // write maxSkipLevels System.Diagnostics.Debug.Assert(InitUTF16Results()); }
public virtual void Test() { //Positive test of FieldInfos Assert.IsTrue(testDoc != null); FieldInfos fieldInfos = new FieldInfos(); fieldInfos.Add(testDoc); //Since the complement is stored as well in the fields map Assert.IsTrue(fieldInfos.Size() == DocHelper.all.Count); //this is all b/c we are using the no-arg constructor RAMDirectory dir = new RAMDirectory(); System.String name = "testFile"; IndexOutput output = dir.CreateOutput(name, null); Assert.IsTrue(output != null); //Use a RAMOutputStream try { fieldInfos.Write(output); output.Close(); Assert.IsTrue(output.Length > 0); FieldInfos readIn = new FieldInfos(dir, name, null); Assert.IsTrue(fieldInfos.Size() == readIn.Size()); FieldInfo info = readIn.FieldInfo("textField1"); Assert.IsTrue(info != null); Assert.IsTrue(info.storeTermVector_ForNUnit == false); Assert.IsTrue(info.omitNorms_ForNUnit == false); info = readIn.FieldInfo("textField2"); Assert.IsTrue(info != null); Assert.IsTrue(info.storeTermVector_ForNUnit == true); Assert.IsTrue(info.omitNorms_ForNUnit == false); info = readIn.FieldInfo("textField3"); Assert.IsTrue(info != null); Assert.IsTrue(info.storeTermVector_ForNUnit == false); Assert.IsTrue(info.omitNorms_ForNUnit == true); info = readIn.FieldInfo("omitNorms"); Assert.IsTrue(info != null); Assert.IsTrue(info.storeTermVector_ForNUnit == false); Assert.IsTrue(info.omitNorms_ForNUnit == true); dir.Close(); } catch (System.IO.IOException) { Assert.IsTrue(false); } }
public TermVectorsWriter(Directory directory, System.String segment, FieldInfos fieldInfos) { // Open files for TermVector storage tvx = directory.CreateOutput(segment + TVX_EXTENSION); tvx.WriteInt(FORMAT_VERSION); tvd = directory.CreateOutput(segment + TVD_EXTENSION); tvd.WriteInt(FORMAT_VERSION); tvf = directory.CreateOutput(segment + TVF_EXTENSION); tvf.WriteInt(FORMAT_VERSION); this.fieldInfos = fieldInfos; fields = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(fieldInfos.Size())); terms = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); }
internal FormatPostingsDocsWriter(SegmentWriteState state, FormatPostingsTermsWriter parent):base() { this.parent = parent; System.String fileName = IndexFileNames.SegmentFileName(parent.parent.segment, IndexFileNames.FREQ_EXTENSION); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, fileName); out_Renamed = parent.parent.dir.CreateOutput(fileName); totalNumDocs = parent.parent.totalNumDocs; // TODO: abstraction violation skipInterval = parent.parent.termsOut.skipInterval; skipListWriter = parent.parent.skipListWriter; skipListWriter.SetFreqOutput(out_Renamed); posWriter = new FormatPostingsPositionsWriter(state, this); }
internal FormatPostingsPositionsWriter(SegmentWriteState state, FormatPostingsDocsWriter parent) { this.parent = parent; omitTermFreqAndPositions = parent.omitTermFreqAndPositions; if (parent.parent.parent.fieldInfos.HasProx()) { // At least one field does not omit TF, so create the // prox file System.String fileName = IndexFileNames.SegmentFileName(parent.parent.parent.segment, IndexFileNames.PROX_EXTENSION); state.flushedFiles.Add(fileName); out_Renamed = parent.parent.parent.dir.CreateOutput(fileName); parent.skipListWriter.SetProxOutput(out_Renamed); } // Every field omits TF so we will write no prox file else out_Renamed = null; }
public PreFlexRWFieldsWriter(SegmentWriteState state) { TermsOut = new TermInfosWriter(state.Directory, state.SegmentInfo.Name, state.FieldInfos, state.TermIndexInterval); bool success = false; try { string freqFile = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, "", Lucene3xPostingsFormat.FREQ_EXTENSION); FreqOut = state.Directory.CreateOutput(freqFile, state.Context); TotalNumDocs = state.SegmentInfo.DocCount; success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(TermsOut); } } success = false; try { if (state.FieldInfos.HasProx()) { string proxFile = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, "", Lucene3xPostingsFormat.PROX_EXTENSION); ProxOut = state.Directory.CreateOutput(proxFile, state.Context); } else { ProxOut = null; } success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(TermsOut, FreqOut); } } SkipListWriter = new PreFlexRWSkipListWriter(TermsOut.SkipInterval, TermsOut.MaxSkipLevels, TotalNumDocs, FreqOut, ProxOut); //System.out.println("\nw start seg=" + segment); }
public Lucene41SkipWriter(int maxSkipLevels, int blockSize, int docCount, IndexOutput docOut, IndexOutput posOut, IndexOutput payOut) : base(blockSize, 8, maxSkipLevels, docCount) { this.DocOut = docOut; this.PosOut = posOut; this.PayOut = payOut; LastSkipDoc = new int[maxSkipLevels]; LastSkipDocPointer = new long[maxSkipLevels]; if (posOut != null) { LastSkipPosPointer = new long[maxSkipLevels]; if (payOut != null) { LastSkipPayPointer = new long[maxSkipLevels]; } LastPayloadByteUpto = new int[maxSkipLevels]; } }
public long WriteTo(IndexOutput @out) { long size = 0; while (true) { if (limit + bufferOffset == endIndex) { System.Diagnostics.Debug.Assert(endIndex - bufferOffset >= upto); @out.WriteBytes(buffer, upto, limit - upto); size += limit - upto; break; } else { @out.WriteBytes(buffer, upto, limit - upto); size += limit - upto; NextSlice(); } } return size; }
internal Lucene42NormsConsumer(SegmentWriteState state, string dataCodec, string dataExtension, string metaCodec, string metaExtension, float acceptableOverheadRatio) { this.AcceptableOverheadRatio = acceptableOverheadRatio; MaxDoc = state.SegmentInfo.DocCount; bool success = false; try { string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension); Data = state.Directory.CreateOutput(dataName, state.Context); CodecUtil.WriteHeader(Data, dataCodec, Lucene42DocValuesProducer.VERSION_CURRENT); string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension); Meta = state.Directory.CreateOutput(metaName, state.Context); CodecUtil.WriteHeader(Meta, metaCodec, Lucene42DocValuesProducer.VERSION_CURRENT); success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(this); } } }
public override void Abort() { if (tvx != null) { try { tvx.Close(); } catch (System.Exception t) { } tvx = null; } if (tvd != null) { try { tvd.Close(); } catch (System.Exception t) { } tvd = null; } if (tvf != null) { try { tvf.Close(); } catch (System.Exception t) { } tvf = null; } lastDocID = 0; }
private int LastFieldNumber = -1; // only for assert #endregion Fields #region Constructors public PreFlexRWNormsConsumer(Directory directory, string segment, IOContext context) { string normsFileName = IndexFileNames.SegmentFileName(segment, "", NORMS_EXTENSION); bool success = false; IndexOutput output = null; try { output = directory.CreateOutput(normsFileName, context); // output.WriteBytes(NORMS_HEADER, 0, NORMS_HEADER.Length); foreach (var @sbyte in NORMS_HEADER) { output.WriteByte((byte)@sbyte); } @out = output; success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(output); } } }
public PreFlexRWTermVectorsWriter(Directory directory, string segment, IOContext context) { this.Directory = directory; this.Segment = segment; bool success = false; try { // Open files for TermVector storage Tvx = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION), context); Tvx.WriteInt(Lucene3xTermVectorsReader.FORMAT_CURRENT); Tvd = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION), context); Tvd.WriteInt(Lucene3xTermVectorsReader.FORMAT_CURRENT); Tvf = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION), context); Tvf.WriteInt(Lucene3xTermVectorsReader.FORMAT_CURRENT); success = true; } finally { if (!success) { Abort(); } } }
/// <summary> /// Called once after startup, before any terms have been /// added. Implementations typically write a header to /// the provided {@code termsOut}. /// </summary> public abstract void Init(IndexOutput termsOut);
/// <summary> Writes the buffered skip lists to the given output. /// /// </summary> /// <param name="output">the IndexOutput the skip lists shall be written to /// </param> /// <returns> the pointer the skip list starts /// </returns> internal virtual long WriteSkip(IndexOutput output) { long skipPointer = output.FilePointer; if (skipBuffer == null || skipBuffer.Length == 0) return skipPointer; for (int level = numberOfSkipLevels - 1; level > 0; level--) { long length = skipBuffer[level].FilePointer; if (length > 0) { output.WriteVLong(length); skipBuffer[level].WriteTo(output); } } skipBuffer[0].WriteTo(output); return skipPointer; }
/// <summary> Subclasses must implement the actual skip data encoding in this method. /// /// </summary> /// <param name="level">the level skip data shall be writting for /// </param> /// <param name="skipBuffer">the skip buffer to write to /// </param> protected internal abstract void WriteSkipData(int level, IndexOutput skipBuffer);
/// <summary>Write as a d-gaps list </summary> private void WriteDgaps(IndexOutput output) { output.WriteInt(- 1); // mark using d-gaps output.WriteInt(Size()); // write size output.WriteInt(Count()); // write count int last = 0; int n = Count(); int m = bits.Length; for (int i = 0; i < m && n > 0; i++) { if (bits[i] != 0) { output.WriteVInt(i - last); output.WriteByte(bits[i]); last = i; n -= BYTE_COUNTS[bits[i] & 0xFF]; } } }
/// <summary> /// Close all streams. </summary> protected override void Dispose(bool disposing) { // make an effort to close all streams we can but remember and re-throw // the first exception encountered in this process IOUtils.Close(Tvx, Tvd, Tvf); Tvx = Tvd = Tvf = null; }
public void Write(IndexOutput output) { output.WriteVInt(CURRENT_FORMAT); output.WriteVInt(Size()); for (int i = 0; i < Size(); i++) { FieldInfo fi = FieldInfo(i); byte bits = (byte) (0x0); if (fi.isIndexed) bits |= IS_INDEXED; if (fi.storeTermVector) bits |= STORE_TERMVECTOR; if (fi.storePositionWithTermVector) bits |= STORE_POSITIONS_WITH_TERMVECTOR; if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR; if (fi.omitNorms) bits |= OMIT_NORMS; if (fi.storePayloads) bits |= STORE_PAYLOADS; if (fi.omitTermFreqAndPositions) bits |= OMIT_TERM_FREQ_AND_POSITIONS; output.WriteString(fi.name); output.WriteByte(bits); } }
internal override void CloseDocStore(SegmentWriteState state) { lock (this) { if (tvx != null) { // At least one doc in this run had term vectors // enabled Fill(state.numDocsInStore - docWriter.GetDocStoreOffset()); tvx.Close(); tvf.Close(); tvd.Close(); tvx = null; System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null); System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION; if (4 + ((long) state.numDocsInStore) * 16 != state.directory.FileLength(fileName)) throw new System.SystemException("after flush: tvx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName)); state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); lastDocID = 0; } } }
internal void InitTermVectorsWriter() { lock (this) { if (tvx == null) { System.String docStoreSegment = docWriter.GetDocStoreSegment(); if (docStoreSegment == null) return ; System.Diagnostics.Debug.Assert(docStoreSegment != null); // If we hit an exception while init'ing the term // vector output files, we must abort this segment // because those files will be in an unknown // state: tvx = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); tvd = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); tvf = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); tvx.WriteInt(TermVectorsReader.FORMAT_CURRENT); tvd.WriteInt(TermVectorsReader.FORMAT_CURRENT); tvf.WriteInt(TermVectorsReader.FORMAT_CURRENT); docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); lastDocID = 0; } } }
public override void Init(IndexOutput termsOut) { CodecUtil.WriteHeader(termsOut, Lucene40PostingsReader.TERMS_CODEC, Lucene40PostingsReader.VERSION_CURRENT); termsOut.WriteInt(SkipInterval); // write skipInterval termsOut.WriteInt(MaxSkipLevels); // write maxSkipLevels termsOut.WriteInt(SkipMinimum); // write skipMinimum }
/// <summary>Copy the contents of the file with specified extension into the /// provided output stream. Use the provided buffer for moving data /// to reduce memory allocation. /// </summary> private void CopyFile(FileEntry source, IndexOutput os, byte[] buffer) { IndexInput is_Renamed = null; try { long startPtr = os.GetFilePointer(); is_Renamed = directory.OpenInput(source.file); long length = is_Renamed.Length(); long remainder = length; int chunk = buffer.Length; while (remainder > 0) { int len = (int) System.Math.Min(chunk, remainder); is_Renamed.ReadBytes(buffer, 0, len, false); os.WriteBytes(buffer, len); remainder -= len; if (checkAbort != null) // Roughly every 2 MB we will check if // it's time to abort checkAbort.Work(80); } // Verify that remainder is 0 if (remainder != 0) throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")"); // Verify that the output length diff is equal to original file long endPtr = os.GetFilePointer(); long diff = endPtr - startPtr; if (diff != length) throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length); } finally { if (is_Renamed != null) is_Renamed.Close(); } }
/// <summary>Write as a bit set </summary> private void WriteBits(IndexOutput output) { output.WriteInt(Size()); // write size output.WriteInt(Count()); // write count output.WriteBytes(bits, bits.Length); }