public override SegmentInfo Read(Directory directory, string segmentName, IOContext context) { // NOTE: this is NOT how 3.x is really written... string fileName = IndexFileNames.SegmentFileName(segmentName, "", Lucene3xSegmentInfoFormat.UPGRADED_SI_EXTENSION); bool success = false; IndexInput input = directory.OpenInput(fileName, context); try { SegmentInfo si = ReadUpgradedSegmentInfo(segmentName, directory, input); success = true; return(si); } finally { if (!success) { IOUtils.DisposeWhileHandlingException(input); } else { input.Dispose(); } } }
private static void AddIfExists(Directory dir, ISet <string> files, string fileName) { if (dir.FileExists(fileName)) { files.Add(fileName); } }
public static void ReadLegacyInfos(SegmentInfos infos, Directory directory, IndexInput input, int format) { infos.Version = input.ReadInt64(); // read version infos.Counter = input.ReadInt32(); // read counter Lucene3xSegmentInfoReader reader = new Lucene3xSegmentInfoReader(); for (int i = input.ReadInt32(); i > 0; i--) // read segmentInfos { SegmentCommitInfo siPerCommit = reader.ReadLegacySegmentInfo(directory, format, input); SegmentInfo si = siPerCommit.Info; if (si.Version == null) { // Could be a 3.0 - try to open the doc stores - if it fails, it's a // 2.x segment, and an IndexFormatTooOldException will be thrown, // which is what we want. Directory dir = directory; if (Lucene3xSegmentInfoFormat.GetDocStoreOffset(si) != -1) { if (Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si)) { dir = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(Lucene3xSegmentInfoFormat.GetDocStoreSegment(si), "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), IOContext.READ_ONCE, false); } } else if (si.UseCompoundFile) { dir = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), IOContext.READ_ONCE, false); } try { Lucene3xStoredFieldsReader.CheckCodeVersion(dir, Lucene3xSegmentInfoFormat.GetDocStoreSegment(si)); } finally { // If we opened the directory, close it if (dir != directory) { dir.Dispose(); } } // Above call succeeded, so it's a 3.0 segment. Upgrade it so the next // time the segment is read, its version won't be null and we won't // need to open FieldsReader every time for each such segment. si.Version = "3.0"; } else if (si.Version.Equals("2.x", StringComparison.Ordinal)) { // If it's a 3x index touched by 3.1+ code, then segments record their // version, whether they are 2.x ones or not. We detect that and throw // appropriate exception. throw new IndexFormatTooOldException("segment " + si.Name + " in resource " + input, si.Version); } infos.Add(siPerCommit); } infos.UserData = input.ReadStringStringMap(); }
internal TermInfosReader(Directory dir, string seg, FieldInfos fis, IOContext context, int indexDivisor) { bool success = false; if (indexDivisor < 1 && indexDivisor != -1) { throw new ArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor); } try { directory = dir; segment = seg; fieldInfos = fis; origEnum = new SegmentTermEnum(directory.OpenInput(IndexFileNames.SegmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_EXTENSION), context), fieldInfos, false); size = origEnum.size; if (indexDivisor != -1) { // Load terms index totalIndexInterval = origEnum.indexInterval * indexDivisor; string indexFileName = IndexFileNames.SegmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION); SegmentTermEnum indexEnum = new SegmentTermEnum(directory.OpenInput(indexFileName, context), fieldInfos, true); try { index = new TermInfosReaderIndex(indexEnum, indexDivisor, dir.FileLength(indexFileName), totalIndexInterval); indexLength = index.Length; } finally { indexEnum.Dispose(); } } else { // Do not load terms index: totalIndexInterval = -1; index = null; indexLength = -1; } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Dispose(); } } }
/// <summary> /// Create a <see cref="SegmentReadState"/>. </summary> public SegmentReadState(Directory dir, SegmentInfo info, FieldInfos fieldInfos, IOContext context, int termsIndexDivisor, string segmentSuffix) { this.Directory = dir; this.SegmentInfo = info; this.FieldInfos = fieldInfos; this.Context = context; this.TermsIndexDivisor = termsIndexDivisor; this.SegmentSuffix = segmentSuffix; }
/// <summary> /// Constructor which takes segment suffix. /// </summary> /// <seealso cref="SegmentWriteState(InfoStream, Directory, SegmentInfo, FieldInfos, int, /// BufferedUpdates, IOContext)"/> public SegmentWriteState(InfoStream infoStream, Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, int termIndexInterval, BufferedUpdates segUpdates, IOContext context, string segmentSuffix) { this.InfoStream = infoStream; this.SegUpdates = segUpdates; this.Directory = directory; this.SegmentInfo = segmentInfo; this.FieldInfos = fieldInfos; this.TermIndexInterval = termIndexInterval; this.SegmentSuffix = segmentSuffix; this.Context = context; }
public override TermVectorsReader VectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) { string fileName = IndexFileNames.SegmentFileName(Lucene3xSegmentInfoFormat.GetDocStoreSegment(segmentInfo), "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION); // Unfortunately, for 3.x indices, each segment's // FieldInfos can lie about hasVectors (claim it's true // when really it's false).... so we have to carefully // check if the files really exist before trying to open // them (4.x has fixed this): bool exists; if (Lucene3xSegmentInfoFormat.GetDocStoreOffset(segmentInfo) != -1 && Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(segmentInfo)) { string cfxFileName = IndexFileNames.SegmentFileName(Lucene3xSegmentInfoFormat.GetDocStoreSegment(segmentInfo), "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION); if (segmentInfo.Dir.FileExists(cfxFileName)) { Directory cfsDir = new CompoundFileDirectory(segmentInfo.Dir, cfxFileName, context, false); try { exists = cfsDir.FileExists(fileName); } finally { cfsDir.Dispose(); } } else { exists = false; } } else { exists = directory.FileExists(fileName); } if (!exists) { // 3x's FieldInfos sometimes lies and claims a segment // has vectors when it doesn't: return(null); } else { return(new Lucene3xTermVectorsReader(directory, segmentInfo, fieldInfos, context)); } }
private SegmentInfo ReadUpgradedSegmentInfo(string name, Directory dir, IndexInput input) { CodecUtil.CheckHeader(input, Lucene3xSegmentInfoFormat.UPGRADED_SI_CODEC_NAME, Lucene3xSegmentInfoFormat.UPGRADED_SI_VERSION_START, Lucene3xSegmentInfoFormat.UPGRADED_SI_VERSION_CURRENT); string version = input.ReadString(); int docCount = input.ReadInt32(); IDictionary <string, string> attributes = input.ReadStringStringMap(); bool isCompoundFile = input.ReadByte() == SegmentInfo.YES; IDictionary <string, string> diagnostics = input.ReadStringStringMap(); ISet <string> files = input.ReadStringSet(); SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, null, diagnostics, attributes.AsReadOnly()); info.SetFiles(files); return(info); }
/// <summary> /// Save a single segment's info. </summary> public override void Write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) { string fileName = IndexFileNames.SegmentFileName(si.Name, "", Lucene46SegmentInfoFormat.SI_EXTENSION); si.AddFile(fileName); IndexOutput output = dir.CreateOutput(fileName, ioContext); bool success = false; try { CodecUtil.WriteHeader(output, Lucene46SegmentInfoFormat.CODEC_NAME, Lucene46SegmentInfoFormat.VERSION_CURRENT); // Write the Lucene version that created this segment, since 3.1 output.WriteString(si.Version); output.WriteInt32(si.DocCount); output.WriteByte((byte)(sbyte)(si.UseCompoundFile ? SegmentInfo.YES : SegmentInfo.NO)); output.WriteStringStringMap(si.Diagnostics); output.WriteStringSet(si.GetFiles()); CodecUtil.WriteFooter(output); success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(output); si.Dir.DeleteFile(fileName); } else { output.Dispose(); } } }
public override TermVectorsWriter VectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) { throw new System.NotSupportedException("this codec can only be used for reading"); }
public Lucene3xTermVectorsReader(Directory d, SegmentInfo si, FieldInfos fieldInfos, IOContext context) { string segment = Lucene3xSegmentInfoFormat.GetDocStoreSegment(si); int docStoreOffset = Lucene3xSegmentInfoFormat.GetDocStoreOffset(si); int size = si.DocCount; bool success = false; try { if (docStoreOffset != -1 && Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si)) { d = storeCFSReader = new CompoundFileDirectory(si.Dir, IndexFileNames.SegmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false); } else { storeCFSReader = null; } string idxName = IndexFileNames.SegmentFileName(segment, "", VECTORS_INDEX_EXTENSION); tvx = d.OpenInput(idxName, context); format = CheckValidFormat(tvx); string fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_DOCUMENTS_EXTENSION); tvd = d.OpenInput(fn, context); int tvdFormat = CheckValidFormat(tvd); fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_FIELDS_EXTENSION); tvf = d.OpenInput(fn, context); int tvfFormat = CheckValidFormat(tvf); Debug.Assert(format == tvdFormat); Debug.Assert(format == tvfFormat); numTotalDocs = (int)(tvx.Length >> 4); if (-1 == docStoreOffset) { this.docStoreOffset = 0; this.size = numTotalDocs; Debug.Assert(size == 0 || numTotalDocs == size); } else { this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs Debug.Assert(numTotalDocs >= size + docStoreOffset, "numTotalDocs=" + numTotalDocs + " size=" + size + " docStoreOffset=" + docStoreOffset); } this.fieldInfos = fieldInfos; success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { try { Dispose(); } // keep our original exception catch (Exception) { } } } }
public override void Merge(IndexWriter writer, MergeTrigger trigger, bool newMergesFound) { using (_lock.Write()) { _writer = writer; _directory = writer.Directory; if (Verbose) { Message("now merge"); Message(" index: " + writer.SegString()); } // First, quickly run through the newly proposed merges // and add any orthogonal merges (ie a merge not // involving segments already pending to be merged) to // the queue. If we are way behind on merging, many of // these newly proposed merges will likely already be // registered. // Iterate, pulling from the IndexWriter's queue of // pending merges, until it's empty: while (true) { long startStallTime = 0; while (writer.HasPendingMerges() && MergeThreadCount >= MaxMergeCount) { // this means merging has fallen too far behind: we // have already created maxMergeCount threads, and // now there's at least one more merge pending. // Note that only maxThreadCount of // those created merge threads will actually be // running; the rest will be paused (see // updateMergeThreads). We stall this producer // thread to prevent creation of new segments, // until merging has caught up: startStallTime = Environment.TickCount; if (Verbose) { Message(" too many merges; stalling..."); } _manualResetEvent.Reset(); _manualResetEvent.Wait(); } if (Verbose) { if (startStallTime != 0) { Message(" stalled for " + (Environment.TickCount - startStallTime) + " msec"); } } MergePolicy.OneMerge merge = writer.NextMerge(); if (merge is null) { if (Verbose) { Message(" no more merges pending; now return"); } return; } bool success = false; try { if (Verbose) { Message(" consider merge " + writer.SegString(merge.Segments)); } // OK to spawn a new merge thread to handle this // merge: var merger = CreateTask(writer, merge); merger.MergeThreadCompleted += OnMergeThreadCompleted; _mergeThreads.Add(merger); if (Verbose) { Message(" launch new thread [" + merger.Name + "]"); } merger.Start(_taskScheduler); // Must call this after starting the thread else // the new thread is removed from mergeThreads // (since it's not alive yet): UpdateMergeThreads(); success = true; } finally { if (!success) { writer.MergeFinish(merge); } } } } }
public override TermVectorsWriter VectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) { throw UnsupportedOperationException.Create("this codec can only be used for reading"); }
/// <summary> /// Returns a <see cref="StoredFieldsReader"/> to load stored /// fields. /// </summary> public abstract StoredFieldsReader FieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context);
/// <summary> /// Returns a <see cref="StoredFieldsWriter"/> to write stored /// fields. /// </summary> public abstract StoredFieldsWriter FieldsWriter(Directory directory, SegmentInfo si, IOContext context);
public override StoredFieldsWriter FieldsWriter(Directory directory, SegmentInfo si, IOContext context) { throw new NotSupportedException("this codec can only be used for reading"); }
/// <summary> /// Writes the provided <see cref="FieldInfos"/> to the /// directory. /// </summary> public abstract void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context);
public override sealed TermVectorsReader VectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) { return(new CompressingTermVectorsReader(directory, segmentInfo, segmentSuffix, fieldInfos, context, formatName, compressionMode)); }
/// <summary> /// Write <see cref="SegmentInfo"/> data. </summary> /// <exception cref="System.IO.IOException"> If an I/O error occurs. </exception> public abstract void Write(Directory dir, SegmentInfo info, FieldInfos fis, IOContext ioContext);
public override StoredFieldsReader FieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) { return(new Lucene40StoredFieldsReader(directory, si, fn, context)); }
/// <summary> /// Returns a <see cref="TermVectorsReader"/> to read term /// vectors. /// </summary> public abstract TermVectorsReader VectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context);
/// <summary> /// Reads from legacy 3.x segments_N. </summary> private SegmentCommitInfo ReadLegacySegmentInfo(Directory dir, int format, IndexInput input) { // check that it is a format we can understand if (format > Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS) { throw new IndexFormatTooOldException(input, format, Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1); } if (format < Lucene3xSegmentInfoFormat.FORMAT_3_1) { throw new IndexFormatTooNewException(input, format, Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1); } string version; if (format <= Lucene3xSegmentInfoFormat.FORMAT_3_1) { version = input.ReadString(); } else { version = null; } string name = input.ReadString(); int docCount = input.ReadInt32(); long delGen = input.ReadInt64(); int docStoreOffset = input.ReadInt32(); IDictionary <string, string> attributes = new Dictionary <string, string>(); // parse the docstore stuff and shove it into attributes string docStoreSegment; bool docStoreIsCompoundFile; if (docStoreOffset != -1) { docStoreSegment = input.ReadString(); docStoreIsCompoundFile = input.ReadByte() == SegmentInfo.YES; attributes[Lucene3xSegmentInfoFormat.DS_OFFSET_KEY] = Convert.ToString(docStoreOffset, CultureInfo.InvariantCulture); attributes[Lucene3xSegmentInfoFormat.DS_NAME_KEY] = docStoreSegment; attributes[Lucene3xSegmentInfoFormat.DS_COMPOUND_KEY] = Convert.ToString(docStoreIsCompoundFile, CultureInfo.InvariantCulture); } else { docStoreSegment = name; docStoreIsCompoundFile = false; } // pre-4.0 indexes write a byte if there is a single norms file byte b = input.ReadByte(); //System.out.println("version=" + version + " name=" + name + " docCount=" + docCount + " delGen=" + delGen + " dso=" + docStoreOffset + " dss=" + docStoreSegment + " dssCFs=" + docStoreIsCompoundFile + " b=" + b + " format=" + format); Debug.Assert(1 == b, "expected 1 but was: " + b + " format: " + format); int numNormGen = input.ReadInt32(); IDictionary <int, long> normGen; if (numNormGen == SegmentInfo.NO) { normGen = null; } else { normGen = new Dictionary <int, long>(); for (int j = 0; j < numNormGen; j++) { normGen[j] = input.ReadInt64(); } } bool isCompoundFile = input.ReadByte() == SegmentInfo.YES; int delCount = input.ReadInt32(); Debug.Assert(delCount <= docCount); bool hasProx = input.ReadByte() == 1; IDictionary <string, string> diagnostics = input.ReadStringStringMap(); if (format <= Lucene3xSegmentInfoFormat.FORMAT_HAS_VECTORS) { // NOTE: unused int hasVectors = input.ReadByte(); } // Replicate logic from 3.x's SegmentInfo.files(): ISet <string> files = new JCG.HashSet <string>(); if (isCompoundFile) { files.Add(IndexFileNames.SegmentFileName(name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); } else { AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xFieldInfosReader.FIELD_INFOS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.FREQ_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.PROX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.TERMS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xNormsProducer.NORMS_EXTENSION)); } if (docStoreOffset != -1) { if (docStoreIsCompoundFile) { files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION)); } else { files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION)); } } else if (!isCompoundFile) { files.Add(IndexFileNames.SegmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); files.Add(IndexFileNames.SegmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION)); } // parse the normgen stuff and shove it into attributes if (normGen != null) { attributes[Lucene3xSegmentInfoFormat.NORMGEN_KEY] = Convert.ToString(numNormGen, CultureInfo.InvariantCulture); foreach (KeyValuePair <int, long> ent in normGen) { long gen = ent.Value; if (gen >= SegmentInfo.YES) { // Definitely a separate norm file, with generation: files.Add(IndexFileNames.FileNameFromGeneration(name, "s" + ent.Key, gen)); attributes[Lucene3xSegmentInfoFormat.NORMGEN_PREFIX + ent.Key] = Convert.ToString(gen, CultureInfo.InvariantCulture); } else if (gen == SegmentInfo.NO) { // No separate norm } else { // We should have already hit indexformat too old exception Debug.Assert(false); } } } SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, null, diagnostics, attributes.AsReadOnly()); info.SetFiles(files); SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, delCount, delGen, -1); return(infoPerCommit); }
/// <summary> /// Returns a <see cref="TermVectorsWriter"/> to write term /// vectors. /// </summary> public abstract TermVectorsWriter VectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context);
/// <summary> /// Create a <see cref="SegmentReadState"/>. </summary> public SegmentReadState(Directory dir, SegmentInfo info, FieldInfos fieldInfos, IOContext context, int termsIndexDivisor) : this(dir, info, fieldInfos, context, termsIndexDivisor, "") { }
public override sealed TermVectorsWriter VectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) { return(new CompressingTermVectorsWriter(directory, segmentInfo, segmentSuffix, context, formatName, compressionMode, chunkSize)); }
/// <summary> /// Sole constructor. </summary> public SegmentWriteState(InfoStream infoStream, Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, int termIndexInterval, BufferedUpdates segUpdates, IOContext context) : this(infoStream, directory, segmentInfo, fieldInfos, termIndexInterval, segUpdates, context, "") { }
public override StoredFieldsWriter FieldsWriter(Directory directory, SegmentInfo si, IOContext context) { return(new Lucene40StoredFieldsWriter(directory, si.Name, context)); }
public override StoredFieldsReader FieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) { return(new CompressingStoredFieldsReader(directory, si, segmentSuffix, fn, context, formatName, compressionMode)); }
public override StoredFieldsWriter FieldsWriter(Directory directory, SegmentInfo si, IOContext context) { return(new CompressingStoredFieldsWriter(directory, si, segmentSuffix, context, formatName, compressionMode, chunkSize)); }
public override TermVectorsWriter VectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) { return(new Lucene40TermVectorsWriter(directory, segmentInfo.Name, context)); }