public PreFlexRWStoredFieldsWriter(Directory directory, string segment, IOContext context) { Debug.Assert(directory != null); this.Directory = directory; this.Segment = segment; bool success = false; try { FieldsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION), context); IndexStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION), context); FieldsStream.WriteInt(Lucene3xStoredFieldsReader.FORMAT_CURRENT); IndexStream.WriteInt(Lucene3xStoredFieldsReader.FORMAT_CURRENT); success = true; } finally { if (!success) { Abort(); } } }
/// <summary> /// Save a single segment's info. </summary> public override void Write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) { string fileName = IndexFileNames.SegmentFileName(si.Name, "", Lucene40SegmentInfoFormat.SI_EXTENSION); si.AddFile(fileName); IndexOutput output = dir.CreateOutput(fileName, ioContext); bool success = false; try { CodecUtil.WriteHeader(output, Lucene40SegmentInfoFormat.CODEC_NAME, Lucene40SegmentInfoFormat.VERSION_CURRENT); // Write the Lucene version that created this segment, since 3.1 output.WriteString(si.Version); output.WriteInt(si.DocCount); output.WriteByte((sbyte)(si.UseCompoundFile ? SegmentInfo.YES : SegmentInfo.NO)); output.WriteStringStringMap(si.Diagnostics); output.WriteStringStringMap(CollectionsHelper.EmptyMap<string, string>()); output.WriteStringSet(si.Files); success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(output); si.Dir.DeleteFile(fileName); } else { output.Dispose(); } } }
public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, String segmentSuffix, IOContext context) { _input = dir.OpenInput( IndexFileNames.SegmentFileName(segment, segmentSuffix, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION), new IOContext(context, true)); var success = false; Debug.Assert(indexDivisor == -1 || indexDivisor > 0); try { _version = ReadHeader(_input); _indexDivisor = indexDivisor; if (_version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM) CodecUtil.ChecksumEntireFile(_input); SeekDir(_input, _dirOffset); // Read directory var numFields = _input.ReadVInt(); if (numFields < 0) { throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + _input + ")"); } for (var i = 0; i < numFields; i++) { var field = _input.ReadVInt(); var indexStart = _input.ReadVLong(); var fieldInfo = fieldInfos.FieldInfo(field); try { _fields.Add(fieldInfo, new FieldIndexData(indexStart, this)); } catch (ArgumentException) { throw new CorruptIndexException(String.Format("Duplicate Field: {0}, Resource: {1}", fieldInfo.Name, _input)); } } success = true; } finally { if (indexDivisor > 0) { _input.Dispose(); _input = null; if (success) { _indexLoaded = true; } } } }
public override IndexInput OpenInput(string name, IOContext context) { if (sleepMillis != -1) { return new SlowIndexInput(this, base.OpenInput(name, context)); } return base.OpenInput(name, context); }
public override IndexInput OpenInput(string fileName, IOContext context) { IndexInput @in = base.OpenInput(fileName, context); if (fileName.EndsWith(".frq")) { @in = new CountingStream(OuterInstance, @in); } return @in; }
public override IndexOutput CreateOutput(string name, IOContext context) { if (sleepMillis != -1) { return new SlowIndexOutput(this, base.CreateOutput(name, context)); } return base.CreateOutput(name, context); }
public UniversalMachine(IOContext ioContext) { ioContext_ = ioContext; arrays_ = new uint[INIT_ARRAYS][]; for (int i = 1; i < INIT_ARRAYS; ++i) { arraysFree_.Enqueue(i); } }
public SepPostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext context, IntStreamFactory intFactory, string segmentSuffix) { var success = false; try { var docFileName = IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, SepPostingsWriter.DOC_EXTENSION); _docIn = intFactory.OpenInput(dir, docFileName, context); _skipIn = dir.OpenInput( IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, SepPostingsWriter.SKIP_EXTENSION), context); if (fieldInfos.HasFreq()) { _freqIn = intFactory.OpenInput(dir, IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, SepPostingsWriter.FREQ_EXTENSION), context); } else { _freqIn = null; } if (fieldInfos.HasProx()) { _posIn = intFactory.OpenInput(dir, IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, SepPostingsWriter.POS_EXTENSION), context); _payloadIn = dir.OpenInput( IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, SepPostingsWriter.PAYLOAD_EXTENSION), context); } else { _posIn = null; _payloadIn = null; } success = true; } finally { if (!success) { Dispose(); } } }
// note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!! public SegmentMerger(IList<AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir, int termIndexInterval, MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context, bool validate) { // validate incoming readers if (validate) { foreach (AtomicReader reader in readers) { reader.CheckIntegrity(); } } MergeState = new MergeState(readers, segmentInfo, infoStream, checkAbort); Directory = dir; this.TermIndexInterval = termIndexInterval; this.Codec = segmentInfo.Codec; this.Context = context; this.FieldInfosBuilder = new FieldInfos.Builder(fieldNumbers); MergeState.SegmentInfo.DocCount = SetDocMaps(); }
/// <summary> /// Sole constructor. </summary> public Lucene40StoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) { string segment = si.Name; bool success = false; FieldInfos = fn; try { FieldsStream = d.OpenInput(IndexFileNames.SegmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION), context); string indexStreamFN = IndexFileNames.SegmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION); IndexStream = d.OpenInput(indexStreamFN, context); CodecUtil.CheckHeader(IndexStream, Lucene40StoredFieldsWriter.CODEC_NAME_IDX, Lucene40StoredFieldsWriter.VERSION_START, Lucene40StoredFieldsWriter.VERSION_CURRENT); CodecUtil.CheckHeader(FieldsStream, Lucene40StoredFieldsWriter.CODEC_NAME_DAT, Lucene40StoredFieldsWriter.VERSION_START, Lucene40StoredFieldsWriter.VERSION_CURRENT); Debug.Assert(Lucene40StoredFieldsWriter.HEADER_LENGTH_DAT == FieldsStream.FilePointer); Debug.Assert(Lucene40StoredFieldsWriter.HEADER_LENGTH_IDX == IndexStream.FilePointer); long indexSize = IndexStream.Length() - Lucene40StoredFieldsWriter.HEADER_LENGTH_IDX; this.Size_Renamed = (int)(indexSize >> 3); // Verify two sources of "maxDoc" agree: if (this.Size_Renamed != si.DocCount) { throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.Size_Renamed + " but segmentInfo shows " + si.DocCount); } NumTotalDocs = (int)(indexSize >> 3); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { try { Dispose(); } // ensure we throw our original exception catch (Exception) { } } } }
private int LastFieldNumber = -1; // only for assert public PreFlexRWNormsConsumer(Directory directory, string segment, IOContext context) { string normsFileName = IndexFileNames.SegmentFileName(segment, "", NORMS_EXTENSION); bool success = false; IndexOutput output = null; try { output = directory.CreateOutput(normsFileName, context); output.WriteBytes(NORMS_HEADER, 0, NORMS_HEADER.Length); @out = output; success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(output); } } }
public override SegmentInfo Read(Directory dir, string segment, IOContext context) { string fileName = IndexFileNames.SegmentFileName(segment, "", Lucene40SegmentInfoFormat.SI_EXTENSION); IndexInput input = dir.OpenInput(fileName, context); bool success = false; try { CodecUtil.CheckHeader(input, Lucene40SegmentInfoFormat.CODEC_NAME, Lucene40SegmentInfoFormat.VERSION_START, Lucene40SegmentInfoFormat.VERSION_CURRENT); string version = input.ReadString(); int docCount = input.ReadInt(); if (docCount < 0) { throw new CorruptIndexException("invalid docCount: " + docCount + " (resource=" + input + ")"); } bool isCompoundFile = input.ReadByte() == SegmentInfo.YES; IDictionary<string, string> diagnostics = input.ReadStringStringMap(); input.ReadStringStringMap(); // read deprecated attributes ISet<string> files = input.ReadStringSet(); CodecUtil.CheckEOF(input); SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics); si.Files = files; success = true; return si; } finally { if (!success) { IOUtils.CloseWhileHandlingException(input); } else { input.Dispose(); } } }
// private String segment; /// <summary> /// Sole constructor. </summary> public Lucene40PostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, string segmentSuffix) { bool success = false; IndexInput freqIn = null; IndexInput proxIn = null; try { freqIn = dir.OpenInput(IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, Lucene40PostingsFormat.FREQ_EXTENSION), ioContext); CodecUtil.CheckHeader(freqIn, FRQ_CODEC, VERSION_START, VERSION_CURRENT); // TODO: hasProx should (somehow!) become codec private, // but it's tricky because 1) FIS.hasProx is global (it // could be all fields that have prox are written by a // different codec), 2) the field may have had prox in // the past but all docs w/ that field were deleted. // Really we'd need to init prxOut lazily on write, and // then somewhere record that we actually wrote it so we // know whether to open on read: if (fieldInfos.HasProx()) { proxIn = dir.OpenInput(IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, Lucene40PostingsFormat.PROX_EXTENSION), ioContext); CodecUtil.CheckHeader(proxIn, PRX_CODEC, VERSION_START, VERSION_CURRENT); } else { proxIn = null; } this.FreqIn = freqIn; this.ProxIn = proxIn; success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(freqIn, proxIn); } } }
public SimpleTextTermVectorsReader(Directory directory, SegmentInfo si, IOContext context) { bool success = false; try { _input = directory.OpenInput(IndexFileNames.SegmentFileName(si.Name, "", SimpleTextTermVectorsWriter.VECTORS_EXTENSION), context); success = true; } finally { if (!success) { try { Dispose(); } catch (Exception) { // ensure we throw our original exception } } } ReadIndex(si.DocCount); }
/// <summary> /// NOTE: this method creates a compound file for all files returned by /// info.files(). While, generally, this may include separate norms and /// deletion files, this SegmentInfo must not reference such files when this /// method is called, because they are not allowed within a compound file. /// </summary> public static ICollection<string> CreateCompoundFile(InfoStream infoStream, Directory directory, CheckAbort checkAbort, SegmentInfo info, IOContext context) { string fileName = Index.IndexFileNames.SegmentFileName(info.Name, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_EXTENSION); if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "create compound file " + fileName); } Debug.Assert(Lucene3xSegmentInfoFormat.GetDocStoreOffset(info) == -1); // Now merge all added files ICollection<string> files = info.Files; CompoundFileDirectory cfsDir = new CompoundFileDirectory(directory, fileName, context, true); IOException prior = null; try { foreach (string file in files) { directory.Copy(cfsDir, file, file, context); checkAbort.Work(directory.FileLength(file)); } } catch (System.IO.IOException ex) { prior = ex; } finally { bool success = false; try { IOUtils.CloseWhileHandlingException(prior, cfsDir); success = true; } finally { if (!success) { try { directory.DeleteFile(fileName); } catch (Exception) { } try { directory.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(info.Name, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); } catch (Exception) { } } } } // Replace all previous files with the CFS/CFE files: HashSet<string> siFiles = new HashSet<string>(); siFiles.Add(fileName); siFiles.Add(Lucene.Net.Index.IndexFileNames.SegmentFileName(info.Name, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); info.Files = siFiles; return files; }
/// <summary> /// Does the actual (time-consuming) work of the merge, /// but without holding synchronized lock on IndexWriter /// instance /// </summary> private int MergeMiddle(MergePolicy.OneMerge merge) { merge.CheckAborted(directory); string mergedName = merge.Info_Renamed.Info.Name; IList<SegmentCommitInfo> sourceSegments = merge.Segments; IOContext context = new IOContext(merge.MergeInfo); MergeState.CheckAbort checkAbort = new MergeState.CheckAbort(merge, directory); TrackingDirectoryWrapper dirWrapper = new TrackingDirectoryWrapper(directory); if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "merging " + SegString(merge.Segments)); } merge.Readers = new List<SegmentReader>(); // this is try/finally to make sure merger's readers are // closed: bool success = false; try { int segUpto = 0; while (segUpto < sourceSegments.Count) { SegmentCommitInfo info = sourceSegments[segUpto]; // Hold onto the "live" reader; we will use this to // commit merged deletes ReadersAndUpdates rld = readerPool.Get(info, true); // Carefully pull the most recent live docs and reader SegmentReader reader; Bits liveDocs; int delCount; lock (this) { // Must sync to ensure BufferedDeletesStream cannot change liveDocs, // pendingDeleteCount and field updates while we pull a copy: reader = rld.GetReaderForMerge(context); liveDocs = rld.ReadOnlyLiveDocs; delCount = rld.PendingDeleteCount + info.DelCount; Debug.Assert(reader != null); Debug.Assert(rld.VerifyDocCounts()); if (infoStream.IsEnabled("IW")) { if (rld.PendingDeleteCount != 0) { infoStream.Message("IW", "seg=" + SegString(info) + " delCount=" + info.DelCount + " pendingDelCount=" + rld.PendingDeleteCount); } else if (info.DelCount != 0) { infoStream.Message("IW", "seg=" + SegString(info) + " delCount=" + info.DelCount); } else { infoStream.Message("IW", "seg=" + SegString(info) + " no deletes"); } } } // Deletes might have happened after we pulled the merge reader and // before we got a read-only copy of the segment's actual live docs // (taking pending deletes into account). In that case we need to // make a new reader with updated live docs and del count. if (reader.NumDeletedDocs() != delCount) { // fix the reader's live docs and del count Debug.Assert(delCount > reader.NumDeletedDocs()); // beware of zombies SegmentReader newReader = new SegmentReader(info, reader, liveDocs, info.Info.DocCount - delCount); bool released = false; try { rld.Release(reader); released = true; } finally { if (!released) { newReader.DecRef(); } } reader = newReader; } merge.Readers.Add(reader); Debug.Assert(delCount <= info.Info.DocCount, "delCount=" + delCount + " info.docCount=" + info.Info.DocCount + " rld.pendingDeleteCount=" + rld.PendingDeleteCount + " info.getDelCount()=" + info.DelCount); segUpto++; } // System.out.println("[" + Thread.currentThread().getName() + "] IW.mergeMiddle: merging " + merge.getMergeReaders()); // we pass merge.getMergeReaders() instead of merge.readers to allow the // OneMerge to return a view over the actual segments to merge SegmentMerger merger = new SegmentMerger(merge.MergeReaders, merge.Info_Renamed.Info, infoStream, dirWrapper, Config_Renamed.TermIndexInterval, checkAbort, GlobalFieldNumberMap, context, Config_Renamed.CheckIntegrityAtMerge); merge.CheckAborted(directory); // this is where all the work happens: MergeState mergeState; bool success3 = false; try { if (!merger.ShouldMerge()) { // would result in a 0 document segment: nothing to merge! mergeState = new MergeState(new List<AtomicReader>(), merge.Info_Renamed.Info, infoStream, checkAbort); } else { mergeState = merger.Merge(); } success3 = true; } finally { if (!success3) { lock (this) { Deleter.Refresh(merge.Info_Renamed.Info.Name); } } } Debug.Assert(mergeState.SegmentInfo == merge.Info_Renamed.Info); merge.Info_Renamed.Info.Files = new HashSet<string>(dirWrapper.CreatedFiles); // Record which codec was used to write the segment if (infoStream.IsEnabled("IW")) { if (merge.Info_Renamed.Info.DocCount == 0) { infoStream.Message("IW", "merge away fully deleted segments"); } else { infoStream.Message("IW", "merge codec=" + Codec + " docCount=" + merge.Info_Renamed.Info.DocCount + "; merged segment has " + (mergeState.FieldInfos.HasVectors() ? "vectors" : "no vectors") + "; " + (mergeState.FieldInfos.HasNorms() ? "norms" : "no norms") + "; " + (mergeState.FieldInfos.HasDocValues() ? "docValues" : "no docValues") + "; " + (mergeState.FieldInfos.HasProx() ? "prox" : "no prox") + "; " + (mergeState.FieldInfos.HasProx() ? "freqs" : "no freqs")); } } // Very important to do this before opening the reader // because codec must know if prox was written for // this segment: //System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + merge.info.name); bool useCompoundFile; lock (this) // Guard segmentInfos { useCompoundFile = mergePolicy.UseCompoundFile(segmentInfos, merge.Info_Renamed); } if (useCompoundFile) { success = false; ICollection<string> filesToRemove = merge.Info_Renamed.Files(); try { filesToRemove = CreateCompoundFile(infoStream, directory, checkAbort, merge.Info_Renamed.Info, context); success = true; } catch (System.IO.IOException ioe) { lock (this) { if (merge.Aborted) { // this can happen if rollback or close(false) // is called -- fall through to logic below to // remove the partially created CFS: } else { HandleMergeException(ioe, merge); } } } catch (Exception t) { HandleMergeException(t, merge); } finally { if (!success) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "hit exception creating compound file during merge"); } lock (this) { Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_EXTENSION)); Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); Deleter.DeleteNewFiles(merge.Info_Renamed.Files()); } } } // So that, if we hit exc in deleteNewFiles (next) // or in commitMerge (later), we close the // per-segment readers in the finally clause below: success = false; lock (this) { // delete new non cfs files directly: they were never // registered with IFD Deleter.DeleteNewFiles(filesToRemove); if (merge.Aborted) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "abort merge after building CFS"); } Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_EXTENSION)); Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); return 0; } } merge.Info_Renamed.Info.UseCompoundFile = true; } else { // So that, if we hit exc in commitMerge (later), // we close the per-segment readers in the finally // clause below: success = false; } // Have codec write SegmentInfo. Must do this after // creating CFS so that 1) .si isn't slurped into CFS, // and 2) .si reflects useCompoundFile=true change // above: bool success2 = false; try { Codec.SegmentInfoFormat().SegmentInfoWriter.Write(directory, merge.Info_Renamed.Info, mergeState.FieldInfos, context); success2 = true; } finally { if (!success2) { lock (this) { Deleter.DeleteNewFiles(merge.Info_Renamed.Files()); } } } // TODO: ideally we would freeze merge.info here!! // because any changes after writing the .si will be // lost... if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", string.Format(CultureInfo.InvariantCulture, "merged segment size=%.3f MB vs estimate=%.3f MB", merge.Info_Renamed.SizeInBytes() / 1024.0 / 1024.0, merge.EstimatedMergeBytes / 1024 / 1024.0)); } IndexReaderWarmer mergedSegmentWarmer = Config_Renamed.MergedSegmentWarmer; if (PoolReaders && mergedSegmentWarmer != null && merge.Info_Renamed.Info.DocCount != 0) { ReadersAndUpdates rld = readerPool.Get(merge.Info_Renamed, true); SegmentReader sr = rld.GetReader(IOContext.READ); try { mergedSegmentWarmer.Warm(sr); } finally { lock (this) { rld.Release(sr); readerPool.Release(rld); } } } // Force READ context because we merge deletes onto // this reader: if (!CommitMerge(merge, mergeState)) { // commitMerge will return false if this merge was // aborted return 0; } success = true; } finally { // Readers are already closed in commitMerge if we didn't hit // an exc: if (!success) { CloseMergeReaders(merge, true); } } return merge.Info_Renamed.Info.DocCount; }
/// <summary> /// Copies the segment files as-is into the IndexWriter's directory. </summary> private SegmentCommitInfo CopySegmentAsIs(SegmentCommitInfo info, string segName, IDictionary<string, string> dsNames, ISet<string> dsFilesCopied, IOContext context, ISet<string> copiedFiles) { // Determine if the doc store of this segment needs to be copied. It's // only relevant for segments that share doc store with others, // because the DS might have been copied already, in which case we // just want to update the DS name of this SegmentInfo. string dsName = Lucene3xSegmentInfoFormat.GetDocStoreSegment(info.Info); Debug.Assert(dsName != null); string newDsName; if (dsNames.ContainsKey(dsName)) { newDsName = dsNames[dsName]; } else { dsNames[dsName] = segName; newDsName = segName; } // note: we don't really need this fis (its copied), but we load it up // so we don't pass a null value to the si writer FieldInfos fis = SegmentReader.ReadFieldInfos(info); ISet<string> docStoreFiles3xOnly = Lucene3xCodec.GetDocStoreFiles(info.Info); IDictionary<string, string> attributes; // copy the attributes map, we might modify it below. // also we need to ensure its read-write, since we will invoke the SIwriter (which might want to set something). if (info.Info.Attributes() == null) { attributes = new Dictionary<string, string>(); } else { attributes = new Dictionary<string, string>(info.Info.Attributes()); } if (docStoreFiles3xOnly != null) { // only violate the codec this way if it's preflex & // shares doc stores // change docStoreSegment to newDsName attributes[Lucene3xSegmentInfoFormat.DS_NAME_KEY] = newDsName; } //System.out.println("copy seg=" + info.info.name + " version=" + info.info.getVersion()); // Same SI as before but we change directory, name and docStoreSegment: SegmentInfo newInfo = new SegmentInfo(directory, info.Info.Version, segName, info.Info.DocCount, info.Info.UseCompoundFile, info.Info.Codec, info.Info.Diagnostics, attributes); SegmentCommitInfo newInfoPerCommit = new SegmentCommitInfo(newInfo, info.DelCount, info.DelGen, info.FieldInfosGen); HashSet<string> segFiles = new HashSet<string>(); // Build up new segment's file names. Must do this // before writing SegmentInfo: foreach (string file in info.Files()) { string newFileName; if (docStoreFiles3xOnly != null && docStoreFiles3xOnly.Contains(file)) { newFileName = newDsName + Lucene.Net.Index.IndexFileNames.StripSegmentName(file); } else { newFileName = segName + Lucene.Net.Index.IndexFileNames.StripSegmentName(file); } segFiles.Add(newFileName); } newInfo.Files = segFiles; // We must rewrite the SI file because it references // segment name (its own name, if its 3.x, and doc // store segment name): TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory); Codec currentCodec = newInfo.Codec; try { currentCodec.SegmentInfoFormat().SegmentInfoWriter.Write(trackingDir, newInfo, fis, context); } catch (System.NotSupportedException uoe) { if (currentCodec is Lucene3xCodec) { // OK: 3x codec cannot write a new SI file; // SegmentInfos will write this on commit } else { throw uoe; } } ICollection<string> siFiles = trackingDir.CreatedFiles; bool success = false; try { // Copy the segment's files foreach (string file in info.Files()) { string newFileName; if (docStoreFiles3xOnly != null && docStoreFiles3xOnly.Contains(file)) { newFileName = newDsName + Lucene.Net.Index.IndexFileNames.StripSegmentName(file); if (dsFilesCopied.Contains(newFileName)) { continue; } dsFilesCopied.Add(newFileName); } else { newFileName = segName + Lucene.Net.Index.IndexFileNames.StripSegmentName(file); } if (siFiles.Contains(newFileName)) { // We already rewrote this above continue; } Debug.Assert(!SlowFileExists(directory, newFileName), "file \"" + newFileName + "\" already exists; siFiles=" + siFiles); Debug.Assert(!copiedFiles.Contains(file), "file \"" + file + "\" is being copied more than once"); copiedFiles.Add(file); info.Info.Dir.Copy(directory, file, newFileName, context); } success = true; } finally { if (!success) { foreach (string file in newInfo.Files) { try { directory.DeleteFile(file); } catch (Exception) { } } } } return newInfoPerCommit; }
public override IndexOutput CreateOutput(string name, IOContext cxt) { IndexOutput indexOutput = @in.CreateOutput(name, cxt); if (null != CrashAfterCreateOutput_Renamed && name.Equals(CrashAfterCreateOutput_Renamed)) { // CRASH! indexOutput.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: now crash"); Console.WriteLine(new Exception().StackTrace); } throw new CrashingException("crashAfterCreateOutput " + CrashAfterCreateOutput_Renamed); } return indexOutput; }
public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, SegmentInfo info, PostingsReaderBase postingsReader, IOContext context, String segmentSuffix) { _postingsReader = postingsReader; _input = dir.OpenInput( IndexFileNames.SegmentFileName(info.Name, segmentSuffix, BlockTermsWriter.TERMS_EXTENSION), context); var success = false; try { _version = ReadHeader(_input); // Have PostingsReader init itself postingsReader.Init(_input); // Read per-field details SeekDir(_input, _dirOffset); int numFields = _input.ReadVInt(); if (numFields < 0) { throw new CorruptIndexException(String.Format("Invalid number of fields: {0}, Resource: {1}", numFields, _input)); } for (var i = 0; i < numFields; i++) { var field = _input.ReadVInt(); var numTerms = _input.ReadVLong(); Debug.Assert(numTerms >= 0); var termsStartPointer = _input.ReadVLong(); var fieldInfo = fieldInfos.FieldInfo(field); var sumTotalTermFreq = fieldInfo.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_ONLY ? -1 : _input.ReadVLong(); var sumDocFreq = _input.ReadVLong(); var docCount = _input.ReadVInt(); var longsSize = _version >= BlockTermsWriter.VERSION_META_ARRAY ? _input.ReadVInt() : 0; if (docCount < 0 || docCount > info.DocCount) { // #docs with field must be <= #docs throw new CorruptIndexException( String.Format("Invalid DocCount: {0}, MaxDoc: {1}, Resource: {2}", docCount, info.DocCount, _input)); } if (sumDocFreq < docCount) { // #postings must be >= #docs with field throw new CorruptIndexException( String.Format("Invalid sumDocFreq: {0}, DocCount: {1}, Resource: {2}", sumDocFreq, docCount, _input)); } if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings throw new CorruptIndexException( String.Format("Invalid sumTotalTermFreq: {0}, sumDocFreq: {1}, Resource: {2}", sumTotalTermFreq, sumDocFreq, _input)); } try { _fields.Add(fieldInfo.Name, new FieldReader(fieldInfo, this, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize)); } catch (ArgumentException) { throw new CorruptIndexException(String.Format("Duplicate fields: {0}, Resource: {1}", fieldInfo.Name, _input)); } } success = true; } finally { if (!success) { _input.Dispose(); } } _indexReader = indexReader; }
public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context) { string fileName = IndexFileNames.SegmentFileName(segmentName, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION); IndexOutput output = directory.CreateOutput(fileName, context); bool success = false; try { CodecUtil.WriteHeader(output, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_CURRENT); output.WriteVInt(infos.Size()); foreach (FieldInfo fi in infos) { FieldInfo.IndexOptions? indexOptions = fi.FieldIndexOptions; sbyte bits = 0x0; if (fi.HasVectors()) { bits |= Lucene46FieldInfosFormat.STORE_TERMVECTOR; } if (fi.OmitsNorms()) { bits |= Lucene46FieldInfosFormat.OMIT_NORMS; } if (fi.HasPayloads()) { bits |= Lucene46FieldInfosFormat.STORE_PAYLOADS; } if (fi.Indexed) { bits |= Lucene46FieldInfosFormat.IS_INDEXED; Debug.Assert(indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.HasPayloads()); if (indexOptions == FieldInfo.IndexOptions.DOCS_ONLY) { bits |= Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS; } else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { bits |= Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS; } else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS) { bits |= Lucene46FieldInfosFormat.OMIT_POSITIONS; } } output.WriteString(fi.Name); output.WriteVInt(fi.Number); output.WriteByte((byte)bits); // pack the DV types in one byte var dv = DocValuesByte(fi.DocValuesType); var nrm = DocValuesByte(fi.NormType); Debug.Assert((dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0); var val = unchecked((sbyte)(0xff & ((nrm << 4) | dv))); output.WriteByte((byte)val); output.WriteLong(fi.DocValuesGen); output.WriteStringStringMap(fi.Attributes()); } CodecUtil.WriteFooter(output); success = true; } finally { if (success) { output.Dispose(); } else { IOUtils.CloseWhileHandlingException(output); } } }
/* private class IterableAnonymousInnerClassHelper : IEnumerable<Number> { private readonly ReadersAndUpdates OuterInstance; private Lucene.Net.Index.SegmentReader Reader; private string Field; private Lucene.Net.Index.NumericDocValuesFieldUpdates FieldUpdates; public IterableAnonymousInnerClassHelper(ReadersAndUpdates outerInstance, Lucene.Net.Index.SegmentReader reader, string field, Lucene.Net.Index.NumericDocValuesFieldUpdates fieldUpdates) { this.OuterInstance = outerInstance; this.Reader = reader; this.Field = field; this.FieldUpdates = fieldUpdates; currentValues = reader.GetNumericDocValues(field); docsWithField = reader.GetDocsWithField(field); maxDoc = reader.MaxDoc; updatesIter = fieldUpdates.Iterator(); } internal readonly NumericDocValues currentValues; internal readonly Bits docsWithField; internal readonly int maxDoc; internal readonly NumericDocValuesFieldUpdates.Iterator updatesIter; public virtual IEnumerator<Number> GetEnumerator() { updatesIter.Reset(); return new IteratorAnonymousInnerClassHelper(this); } private class IteratorAnonymousInnerClassHelper : IEnumerator<Number> { private readonly IterableAnonymousInnerClassHelper OuterInstance; public IteratorAnonymousInnerClassHelper(IterableAnonymousInnerClassHelper outerInstance) { this.OuterInstance = outerInstance; curDoc = -1; updateDoc = updatesIter.NextDoc(); } internal int curDoc; internal int updateDoc; public virtual bool HasNext() { return curDoc < maxDoc - 1; } public virtual Number Next() { if (++curDoc >= maxDoc) { throw new NoSuchElementException("no more documents to return values for"); } if (curDoc == updateDoc) // this document has an updated value { long? value = updatesIter.value(); // either null (unset value) or updated value updateDoc = updatesIter.nextDoc(); // prepare for next round return value; } else { // no update for this document Debug.Assert(curDoc < updateDoc); if (currentValues != null && docsWithField.Get(curDoc)) { // only read the current value if the document had a value before return currentValues.Get(curDoc); } else { return null; } } } public virtual void Remove() { throw new System.NotSupportedException("this iterator does not support removing elements"); } } }*/ /* private class IterableAnonymousInnerClassHelper2 : IEnumerable<BytesRef> { private readonly ReadersAndUpdates OuterInstance; private Lucene.Net.Index.SegmentReader Reader; private string Field; private Lucene.Net.Index.BinaryDocValuesFieldUpdates DvFieldUpdates; public IterableAnonymousInnerClassHelper2(ReadersAndUpdates outerInstance, Lucene.Net.Index.SegmentReader reader, string field, Lucene.Net.Index.BinaryDocValuesFieldUpdates dvFieldUpdates) { this.OuterInstance = outerInstance; this.Reader = reader; this.Field = field; this.DvFieldUpdates = dvFieldUpdates; currentValues = reader.GetBinaryDocValues(field); docsWithField = reader.GetDocsWithField(field); maxDoc = reader.MaxDoc; updatesIter = dvFieldUpdates.Iterator(); } internal readonly BinaryDocValues currentValues; internal readonly Bits docsWithField; internal readonly int maxDoc; internal readonly BinaryDocValuesFieldUpdates.Iterator updatesIter; public virtual IEnumerator<BytesRef> GetEnumerator() { updatesIter.Reset(); return new IteratorAnonymousInnerClassHelper2(this); } private class IteratorAnonymousInnerClassHelper2 : IEnumerator<BytesRef> { private readonly IterableAnonymousInnerClassHelper2 OuterInstance; public IteratorAnonymousInnerClassHelper2(IterableAnonymousInnerClassHelper2 outerInstance) { this.OuterInstance = outerInstance; curDoc = -1; updateDoc = updatesIter.nextDoc(); scratch = new BytesRef(); } internal int curDoc; internal int updateDoc; internal BytesRef scratch; public virtual bool HasNext() { return curDoc < maxDoc - 1; } public virtual BytesRef Next() { if (++curDoc >= maxDoc) { throw new NoSuchElementException("no more documents to return values for"); } if (curDoc == updateDoc) // this document has an updated value { BytesRef value = updatesIter.value(); // either null (unset value) or updated value updateDoc = updatesIter.nextDoc(); // prepare for next round return value; } else { // no update for this document Debug.Assert(curDoc < updateDoc); if (currentValues != null && docsWithField.get(curDoc)) { // only read the current value if the document had a value before currentValues.get(curDoc, scratch); return scratch; } else { return null; } } } public virtual void Remove() { throw new System.NotSupportedException("this iterator does not support removing elements"); } } }*/ /// <summary> /// Returns a reader for merge. this method applies field updates if there are /// any and marks that this segment is currently merging. /// </summary> internal virtual SegmentReader GetReaderForMerge(IOContext context) { lock (this) { //Debug.Assert(Thread.holdsLock(Writer)); // must execute these two statements as atomic operation, otherwise we // could lose updates if e.g. another thread calls writeFieldUpdates in // between, or the updates are applied to the obtained reader, but then // re-applied in IW.commitMergedDeletes (unnecessary work and potential // bugs). IsMerging = true; return GetReader(context); } }
public static void Main(string[] args) { string filename = null; bool extract = false; string dirImpl = null; int j = 0; while (j < args.Length) { string arg = args[j]; if ("-extract".Equals(arg)) { extract = true; } else if ("-dir-impl".Equals(arg)) { if (j == args.Length - 1) { Console.WriteLine("ERROR: missing value for -dir-impl option"); Environment.Exit(1); } j++; dirImpl = args[j]; } else if (filename == null) { filename = arg; } j++; } if (filename == null) { Console.WriteLine("Usage: org.apache.lucene.index.CompoundFileExtractor [-extract] [-dir-impl X] <cfsfile>"); return; } Directory dir = null; CompoundFileDirectory cfr = null; IOContext context = IOContext.READ; try { File file = new File(filename); string dirname = file.AbsoluteFile.Parent; filename = file.Name; if (dirImpl == null) { dir = FSDirectory.open(new File(dirname)); } else { dir = CommandLineUtil.newFSDirectory(dirImpl, new File(dirname)); } cfr = new CompoundFileDirectory(dir, filename, IOContext.DEFAULT, false); string[] files = cfr.listAll(); ArrayUtil.timSort(files); // sort the array of filename so that the output is more readable for (int i = 0; i < files.Length; ++i) { long len = cfr.fileLength(files[i]); if (extract) { Console.WriteLine("extract " + files[i] + " with " + len + " bytes to local directory..."); IndexInput ii = cfr.openInput(files[i], context); FileOutputStream f = new FileOutputStream(files[i]); // read and write with a small buffer, which is more effective than reading byte by byte sbyte[] buffer = new sbyte[1024]; int chunk = buffer.Length; while (len > 0) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int bufLen = (int) Math.min(chunk, len); int bufLen = (int)Math.Min(chunk, len); ii.readBytes(buffer, 0, bufLen); f.write(buffer, 0, bufLen); len -= bufLen; } f.close(); ii.close(); } else { Console.WriteLine(files[i] + ": " + len + " bytes"); } } } catch (IOException ioe) { Console.WriteLine(ioe.ToString()); Console.Write(ioe.StackTrace); } finally { try { if (dir != null) { dir.close(); } if (cfr != null) { cfr.close(); } } catch (IOException ioe) { Console.WriteLine(ioe.ToString()); Console.Write(ioe.StackTrace); } } }
/// <summary> /// Sole constructor. </summary> public CompressingStoredFieldsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode) { this.compressionMode = compressionMode; string segment = si.Name; bool success = false; fieldInfos = fn; numDocs = si.DocCount; ChecksumIndexInput indexStream = null; try { string indexStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION); string fieldsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_EXTENSION); // Load the index into memory indexStream = d.OpenChecksumInput(indexStreamFN, context); string codecNameIdx = formatName + CompressingStoredFieldsWriter.CODEC_SFX_IDX; version = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT); Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.GetFilePointer()); indexReader = new CompressingStoredFieldsIndexReader(indexStream, si); long maxPointer = -1; if (version >= CompressingStoredFieldsWriter.VERSION_CHECKSUM) { maxPointer = indexStream.ReadVInt64(); CodecUtil.CheckFooter(indexStream); } else { #pragma warning disable 612, 618 CodecUtil.CheckEOF(indexStream); #pragma warning restore 612, 618 } indexStream.Dispose(); indexStream = null; // Open the data file and read metadata fieldsStream = d.OpenInput(fieldsStreamFN, context); if (version >= CompressingStoredFieldsWriter.VERSION_CHECKSUM) { if (maxPointer + CodecUtil.FooterLength() != fieldsStream.Length) { throw new CorruptIndexException("Invalid fieldsStream maxPointer (file truncated?): maxPointer=" + maxPointer + ", length=" + fieldsStream.Length); } } else { maxPointer = fieldsStream.Length; } this.maxPointer = maxPointer; string codecNameDat = formatName + CompressingStoredFieldsWriter.CODEC_SFX_DAT; int fieldsVersion = CodecUtil.CheckHeader(fieldsStream, codecNameDat, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT); if (version != fieldsVersion) { throw new CorruptIndexException("Version mismatch between stored fields index and data: " + version + " != " + fieldsVersion); } Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == fieldsStream.GetFilePointer()); if (version >= CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS) { chunkSize = fieldsStream.ReadVInt32(); } else { chunkSize = -1; } packedIntsVersion = fieldsStream.ReadVInt32(); decompressor = compressionMode.NewDecompressor(); this.bytes = new BytesRef(); success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(this, indexStream); } } }
/// <summary> /// TODO: javadoc </summary> public static IOContext NewIOContext(Random random, IOContext oldContext) { int randomNumDocs = random.Next(4192); int size = random.Next(512) * randomNumDocs; if (oldContext.FlushInfo != null) { // Always return at least the estimatedSegmentSize of // the incoming IOContext: return new IOContext(new FlushInfo(randomNumDocs, (long)Math.Max(oldContext.FlushInfo.EstimatedSegmentSize, size))); } else if (oldContext.MergeInfo != null) { // Always return at least the estimatedMergeBytes of // the incoming IOContext: return new IOContext(new MergeInfo(randomNumDocs, Math.Max(oldContext.MergeInfo.EstimatedMergeBytes, size), random.NextBoolean(), TestUtil.NextInt(random, 1, 100))); } else { // Make a totally random IOContext: IOContext context; switch (random.Next(5)) { case 0: context = IOContext.DEFAULT; break; case 1: context = IOContext.READ; break; case 2: context = IOContext.READONCE; break; case 3: context = new IOContext(new MergeInfo(randomNumDocs, size, true, -1)); break; case 4: context = new IOContext(new FlushInfo(randomNumDocs, size)); break; default: context = IOContext.DEFAULT; break; } return context; } }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new AnonymousFieldsReaderLocal(this); termVectorsLocal = new AnonymousTermVectorsLocal(this); if (termsIndexDivisor == 0) { throw new System.ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { cfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos; this.termsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat; SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields fields = format.FieldsProducer(segmentReadState); Debug.Assert(fields != null); // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms) { normsProducer = codec.NormsFormat.NormsProducer(segmentReadState); Debug.Assert(normsProducer != null); } else { normsProducer = null; } // LUCENENET TODO: EXCEPTIONS Not sure why this catch block is swallowing AccessViolationException, // because it didn't exist in Lucene. Is it really needed? AVE is for protected memory...could // this be needed because we are using unchecked?? #if !NETSTANDARD try { #endif fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context); #if !NETSTANDARD } #pragma warning disable 168 catch (System.AccessViolationException ave) #pragma warning restore 168 { } #endif if (fieldInfos.HasVectors) // open term vector files only as needed { termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { termVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }
/// <summary> /// Sole constructor. </summary> public Lucene40TermVectorsReader(Directory d, SegmentInfo si, FieldInfos fieldInfos, IOContext context) { string segment = si.Name; int size = si.DocCount; bool success = false; try { string idxName = IndexFileNames.SegmentFileName(segment, "", VECTORS_INDEX_EXTENSION); Tvx = d.OpenInput(idxName, context); int tvxVersion = CodecUtil.CheckHeader(Tvx, CODEC_NAME_INDEX, VERSION_START, VERSION_CURRENT); string fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_DOCUMENTS_EXTENSION); Tvd = d.OpenInput(fn, context); int tvdVersion = CodecUtil.CheckHeader(Tvd, CODEC_NAME_DOCS, VERSION_START, VERSION_CURRENT); fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_FIELDS_EXTENSION); Tvf = d.OpenInput(fn, context); int tvfVersion = CodecUtil.CheckHeader(Tvf, CODEC_NAME_FIELDS, VERSION_START, VERSION_CURRENT); Debug.Assert(HEADER_LENGTH_INDEX == Tvx.FilePointer); Debug.Assert(HEADER_LENGTH_DOCS == Tvd.FilePointer); Debug.Assert(HEADER_LENGTH_FIELDS == Tvf.FilePointer); Debug.Assert(tvxVersion == tvdVersion); Debug.Assert(tvxVersion == tvfVersion); NumTotalDocs = (int)(Tvx.Length() - HEADER_LENGTH_INDEX >> 4); this.Size_Renamed = NumTotalDocs; Debug.Assert(size == 0 || NumTotalDocs == size); this.FieldInfos = fieldInfos; success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { try { Dispose(); } // ensure we throw our original exception catch (Exception t) { } } } }
public override SegmentInfo Read(Directory directory, string segmentName, IOContext context) { var scratch = new BytesRef(); string segFileName = IndexFileNames.SegmentFileName(segmentName, "", SimpleTextSegmentInfoFormat.SI_EXTENSION); ChecksumIndexInput input = directory.OpenChecksumInput(segFileName, context); bool success = false; try { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_VERSION)); string version = ReadString(SimpleTextSegmentInfoWriter.SI_VERSION.Length, scratch); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_DOCCOUNT)); int docCount = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_DOCCOUNT.Length, scratch), CultureInfo.InvariantCulture); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_USECOMPOUND)); bool isCompoundFile = Convert.ToBoolean(ReadString(SimpleTextSegmentInfoWriter.SI_USECOMPOUND.Length, scratch), CultureInfo.InvariantCulture); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_NUM_DIAG)); int numDiag = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_NUM_DIAG.Length, scratch), CultureInfo.InvariantCulture); IDictionary<string, string> diagnostics = new Dictionary<string, string>(); for (int i = 0; i < numDiag; i++) { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_DIAG_KEY)); string key = ReadString(SimpleTextSegmentInfoWriter.SI_DIAG_KEY.Length, scratch); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_DIAG_VALUE)); string value = ReadString(SimpleTextSegmentInfoWriter.SI_DIAG_VALUE.Length, scratch); diagnostics[key] = value; } SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_NUM_FILES)); int numFiles = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_NUM_FILES.Length, scratch), CultureInfo.InvariantCulture); var files = new HashSet<string>(); for (int i = 0; i < numFiles; i++) { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_FILE)); string fileName = ReadString(SimpleTextSegmentInfoWriter.SI_FILE.Length, scratch); files.Add(fileName); } SimpleTextUtil.CheckFooter(input); var info = new SegmentInfo(directory, version, segmentName, docCount, isCompoundFile, null, diagnostics) {Files = files}; success = true; return info; } finally { if (!success) { IOUtils.CloseWhileHandlingException(input); } else { input.Dispose(); } } }
public override TermVectorsWriter VectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) { return(new AssertingTermVectorsWriter(@in.VectorsWriter(directory, segmentInfo, context))); }
public Lucene3xTermVectorsReader(Directory d, SegmentInfo si, FieldInfos fieldInfos, IOContext context) { string segment = Lucene3xSegmentInfoFormat.GetDocStoreSegment(si); int docStoreOffset = Lucene3xSegmentInfoFormat.GetDocStoreOffset(si); int size = si.DocCount; bool success = false; try { if (docStoreOffset != -1 && Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si)) { d = storeCFSReader = new CompoundFileDirectory(si.Dir, IndexFileNames.SegmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false); } else { storeCFSReader = null; } string idxName = IndexFileNames.SegmentFileName(segment, "", VECTORS_INDEX_EXTENSION); tvx = d.OpenInput(idxName, context); format = CheckValidFormat(tvx); string fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_DOCUMENTS_EXTENSION); tvd = d.OpenInput(fn, context); int tvdFormat = CheckValidFormat(tvd); fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_FIELDS_EXTENSION); tvf = d.OpenInput(fn, context); int tvfFormat = CheckValidFormat(tvf); Debug.Assert(format == tvdFormat); Debug.Assert(format == tvfFormat); numTotalDocs = (int)(tvx.Length >> 4); if (-1 == docStoreOffset) { this.docStoreOffset = 0; this.size = numTotalDocs; Debug.Assert(size == 0 || numTotalDocs == size); } else { this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs Debug.Assert(numTotalDocs >= size + docStoreOffset, "numTotalDocs=" + numTotalDocs + " size=" + size + " docStoreOffset=" + docStoreOffset); } this.fieldInfos = fieldInfos; success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { try { Dispose(); } // keep our original exception catch (Exception) { } } } }
/// <summary> /// Adds all segments from an array of indexes into this index. /// /// <p>this may be used to parallelize batch indexing. A large document /// collection can be broken into sub-collections. Each sub-collection can be /// indexed in parallel, on a different thread, process or machine. The /// complete index can then be created by merging sub-collection indexes /// with this method. /// /// <p> /// <b>NOTE:</b> this method acquires the write lock in /// each directory, to ensure that no {@code IndexWriter} /// is currently open or tries to open while this is /// running. /// /// <p>this method is transactional in how Exceptions are /// handled: it does not commit a new segments_N file until /// all indexes are added. this means if an Exception /// occurs (for example disk full), then either no indexes /// will have been added or they all will have been. /// /// <p>Note that this requires temporary free space in the /// <seealso cref="Directory"/> up to 2X the sum of all input indexes /// (including the starting index). If readers/searchers /// are open against the starting index, then temporary /// free space required will be higher by the size of the /// starting index (see <seealso cref="#forceMerge(int)"/> for details). /// /// <p> /// <b>NOTE:</b> this method only copies the segments of the incoming indexes /// and does not merge them. Therefore deleted documents are not removed and /// the new segments are not merged with the existing ones. /// /// <p>this requires this index not be among those to be added. /// /// <p> /// <b>NOTE</b>: if this method hits an OutOfMemoryError /// you should immediately close the writer. See <a /// href="#OOME">above</a> for details. /// </summary> /// <exception cref="CorruptIndexException"> if the index is corrupt </exception> /// <exception cref="IOException"> if there is a low-level IO error </exception> /// <exception cref="LockObtainFailedException"> if we were unable to /// acquire the write lock in at least one directory </exception> public virtual void AddIndexes(params Directory[] dirs) { EnsureOpen(); NoDupDirs(dirs); IEnumerable<Lock> locks = AcquireWriteLocks(dirs); bool successTop = false; try { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "flush at addIndexes(Directory...)"); } Flush(false, true); IList<SegmentCommitInfo> infos = new List<SegmentCommitInfo>(); bool success = false; try { foreach (Directory dir in dirs) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "addIndexes: process directory " + dir); } SegmentInfos sis = new SegmentInfos(); // read infos from dir sis.Read(dir); HashSet<string> dsFilesCopied = new HashSet<string>(); IDictionary<string, string> dsNames = new Dictionary<string, string>(); HashSet<string> copiedFiles = new HashSet<string>(); foreach (SegmentCommitInfo info in sis.Segments) { Debug.Assert(!infos.Contains(info), "dup info dir=" + info.Info.Dir + " name=" + info.Info.Name); string newSegName = NewSegmentName(); if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "addIndexes: process segment origName=" + info.Info.Name + " newName=" + newSegName + " info=" + info); } IOContext context = new IOContext(new MergeInfo(info.Info.DocCount, info.SizeInBytes(), true, -1)); foreach (FieldInfo fi in SegmentReader.ReadFieldInfos(info)) { GlobalFieldNumberMap.AddOrGet(fi.Name, fi.Number, fi.DocValuesType); } infos.Add(CopySegmentAsIs(info, newSegName, dsNames, dsFilesCopied, context, copiedFiles)); } } success = true; } finally { if (!success) { foreach (SegmentCommitInfo sipc in infos) { foreach (string file in sipc.Files()) { try { directory.DeleteFile(file); } catch (Exception) { } } } } } lock (this) { success = false; try { EnsureOpen(); success = true; } finally { if (!success) { foreach (SegmentCommitInfo sipc in infos) { foreach (string file in sipc.Files()) { try { directory.DeleteFile(file); } catch (Exception) { } } } } } segmentInfos.AddAll(infos); Checkpoint(); } successTop = true; } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "addIndexes(Directory...)"); } finally { if (locks != null) { foreach (var lk in locks) { lk.Release(); } } if (successTop) { IOUtils.Close(locks); } else { IOUtils.CloseWhileHandlingException(locks); } } }
public Lucene3xStoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) { string segment = Lucene3xSegmentInfoFormat.GetDocStoreSegment(si); int docStoreOffset = Lucene3xSegmentInfoFormat.GetDocStoreOffset(si); int size = si.DocCount; bool success = false; FieldInfos = fn; try { if (docStoreOffset != -1 && Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si)) { d = StoreCFSReader = new CompoundFileDirectory(si.Dir, IndexFileNames.SegmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false); } else { StoreCFSReader = null; } FieldsStream = d.OpenInput(IndexFileNames.SegmentFileName(segment, "", FIELDS_EXTENSION), context); string indexStreamFN = IndexFileNames.SegmentFileName(segment, "", FIELDS_INDEX_EXTENSION); IndexStream = d.OpenInput(indexStreamFN, context); Format = IndexStream.ReadInt(); if (Format < FORMAT_MINIMUM) { throw new IndexFormatTooOldException(IndexStream, Format, FORMAT_MINIMUM, FORMAT_CURRENT); } if (Format > FORMAT_CURRENT) { throw new IndexFormatTooNewException(IndexStream, Format, FORMAT_MINIMUM, FORMAT_CURRENT); } long indexSize = IndexStream.Length() - FORMAT_SIZE; if (docStoreOffset != -1) { // We read only a slice out of this shared fields file this.DocStoreOffset = docStoreOffset; this.Size = size; // Verify the file is long enough to hold all of our // docs Debug.Assert(((int)(indexSize / 8)) >= size + this.DocStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset); } else { this.DocStoreOffset = 0; this.Size = (int)(indexSize >> 3); // Verify two sources of "maxDoc" agree: if (this.Size != si.DocCount) { throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.Size + " but segmentInfo shows " + si.DocCount); } } NumTotalDocs = (int)(indexSize >> 3); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { try { Dispose(); } // keep our original exception catch (Exception t) { } } } }
/// <summary> /// Merges the provided indexes into this index. /// /// <p> /// The provided IndexReaders are not closed. /// /// <p> /// See <seealso cref="#addIndexes"/> for details on transactional semantics, temporary /// free space required in the Directory, and non-CFS segments on an Exception. /// /// <p> /// <b>NOTE</b>: if this method hits an OutOfMemoryError you should immediately /// close the writer. See <a href="#OOME">above</a> for details. /// /// <p> /// <b>NOTE:</b> empty segments are dropped by this method and not added to this /// index. /// /// <p> /// <b>NOTE:</b> this method merges all given <seealso cref="IndexReader"/>s in one /// merge. If you intend to merge a large number of readers, it may be better /// to call this method multiple times, each time with a small set of readers. /// In principle, if you use a merge policy with a {@code mergeFactor} or /// {@code maxMergeAtOnce} parameter, you should pass that many readers in one /// call. Also, if the given readers are <seealso cref="DirectoryReader"/>s, they can be /// opened with {@code termIndexInterval=-1} to save RAM, since during merge /// the in-memory structure is not used. See /// <seealso cref="DirectoryReader#open(Directory, int)"/>. /// /// <p> /// <b>NOTE</b>: if you call <seealso cref="#close(boolean)"/> with <tt>false</tt>, which /// aborts all running merges, then any thread still running this method might /// hit a <seealso cref="MergePolicy.MergeAbortedException"/>. /// </summary> /// <exception cref="CorruptIndexException"> /// if the index is corrupt </exception> /// <exception cref="IOException"> /// if there is a low-level IO error </exception> public virtual void AddIndexes(params IndexReader[] readers) { EnsureOpen(); int numDocs = 0; try { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "flush at addIndexes(IndexReader...)"); } Flush(false, true); string mergedName = NewSegmentName(); IList<AtomicReader> mergeReaders = new List<AtomicReader>(); foreach (IndexReader indexReader in readers) { numDocs += indexReader.NumDocs(); foreach (AtomicReaderContext ctx in indexReader.Leaves()) { mergeReaders.Add(ctx.AtomicReader); } } IOContext context = new IOContext(new MergeInfo(numDocs, -1, true, -1)); // TODO: somehow we should fix this merge so it's // abortable so that IW.close(false) is able to stop it TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory); SegmentInfo info = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergedName, -1, false, Codec, null); SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir, Config_Renamed.TermIndexInterval, MergeState.CheckAbort.NONE, GlobalFieldNumberMap, context, Config_Renamed.CheckIntegrityAtMerge); if (!merger.ShouldMerge()) { return; } MergeState mergeState; bool success = false; try { mergeState = merger.Merge(); // merge 'em success = true; } finally { if (!success) { lock (this) { Deleter.Refresh(info.Name); } } } SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, 0, -1L, -1L); info.Files = new HashSet<string>(trackingDir.CreatedFiles); trackingDir.CreatedFiles.Clear(); SetDiagnostics(info, SOURCE_ADDINDEXES_READERS); bool useCompoundFile; lock (this) // Guard segmentInfos { if (StopMerges) { Deleter.DeleteNewFiles(infoPerCommit.Files()); return; } EnsureOpen(); useCompoundFile = mergePolicy.UseCompoundFile(segmentInfos, infoPerCommit); } // Now create the compound file if needed if (useCompoundFile) { ICollection<string> filesToDelete = infoPerCommit.Files(); try { CreateCompoundFile(infoStream, directory, MergeState.CheckAbort.NONE, info, context); } finally { // delete new non cfs files directly: they were never // registered with IFD lock (this) { Deleter.DeleteNewFiles(filesToDelete); } } info.UseCompoundFile = true; } // Have codec write SegmentInfo. Must do this after // creating CFS so that 1) .si isn't slurped into CFS, // and 2) .si reflects useCompoundFile=true change // above: success = false; try { Codec.SegmentInfoFormat().SegmentInfoWriter.Write(trackingDir, info, mergeState.FieldInfos, context); success = true; } finally { if (!success) { lock (this) { Deleter.Refresh(info.Name); } } } info.AddFiles(trackingDir.CreatedFiles); // Register the new segment lock (this) { if (StopMerges) { Deleter.DeleteNewFiles(info.Files); return; } EnsureOpen(); segmentInfos.Add(infoPerCommit); Checkpoint(); } } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "addIndexes(IndexReader...)"); } }
public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, IComparer<BytesRef> termComp, String segmentSuffix, IOContext context) { _termComp = termComp; Debug.Assert(indexDivisor == -1 || indexDivisor > 0); _input = dir.OpenInput( IndexFileNames.SegmentFileName(segment, segmentSuffix, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION), context); var success = false; try { _version = ReadHeader(_input); if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) CodecUtil.ChecksumEntireFile(_input); indexInterval = _input.ReadInt(); if (indexInterval < 1) { throw new CorruptIndexException(String.Format("Invalid indexInterval: {0}, Resource: {1}", indexInterval, _input)); } _indexDivisor = indexDivisor; if (indexDivisor < 0) { _totalIndexInterval = indexInterval; } else { // In case terms index gets loaded, later, on demand _totalIndexInterval = indexInterval*indexDivisor; } Debug.Assert(_totalIndexInterval > 0); SeekDir(_input, _dirOffset); // Read directory int numFields = _input.ReadVInt(); if (numFields < 0) throw new CorruptIndexException(String.Format("Invalid numFields: {0}, Resource: {1}", numFields, _input)); for (int i = 0; i < numFields; i++) { int field = _input.ReadVInt(); int numIndexTerms = _input.ReadVInt(); if (numIndexTerms < 0) throw new CorruptIndexException(String.Format("Invalid numIndexTerms: {0}, Resource: {1}", numIndexTerms, _input)); long termsStart = _input.ReadVLong(); long indexStart = _input.ReadVLong(); long packedIndexStart = _input.ReadVLong(); long packedOffsetsStart = _input.ReadVLong(); if (packedIndexStart < indexStart) throw new CorruptIndexException( String.Format( "Invalid packedIndexStart: {0}, IndexStart: {1}, NumIndexTerms: {2}, Resource: {3}", packedIndexStart, indexStart, numIndexTerms, _input)); FieldInfo fieldInfo = fieldInfos.FieldInfo(field); try { _fields.Add(fieldInfo, new FieldIndexData(numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart, this)); } catch (ArgumentException) { throw new CorruptIndexException(String.Format("Duplicate field: {0}, Resource {1}", fieldInfo.Name, _input)); } } success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(_input); } if (indexDivisor > 0) { _input.Dispose(); _input = null; if (success) _indexLoaded = true; _termBytesReader = _termBytes.Freeze(true); } } }
public override IndexInput OpenInput(string name, IOContext context) { return new FaultyIndexInput(FsDir.OpenInput(name, context)); }
public override IndexOutput CreateOutput(string name, IOContext context) { return FsDir.CreateOutput(name, context); }
public override TermVectorsReader VectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) { return(new AssertingTermVectorsReader(@in.VectorsReader(directory, segmentInfo, fieldInfos, context))); }