public static void AfterClass() { Dir.Dispose(); Dir = null; FieldInfos = null; TestDoc = null; }
public static void BeforeClass() { TestDoc = new Document(); FieldInfos = new FieldInfos.Builder(); DocHelper.SetupDoc(TestDoc); foreach (IndexableField field in TestDoc) { FieldInfos.AddOrUpdate(field.Name(), field.FieldType()); } Dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()); conf.MergePolicy.NoCFSRatio = 0.0; IndexWriter writer = new IndexWriter(Dir, conf); writer.AddDocument(TestDoc); writer.Dispose(); FaultyIndexInput.DoFail = false; }
// note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!! public SegmentMerger(IList<AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir, int termIndexInterval, MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context, bool validate) { // validate incoming readers if (validate) { foreach (AtomicReader reader in readers) { reader.CheckIntegrity(); } } MergeState = new MergeState(readers, segmentInfo, infoStream, checkAbort); Directory = dir; this.TermIndexInterval = termIndexInterval; this.Codec = segmentInfo.Codec; this.Context = context; this.FieldInfosBuilder = new FieldInfos.Builder(fieldNumbers); MergeState.SegmentInfo.DocCount = SetDocMaps(); }
public virtual FieldInfos CreateAndWriteFieldInfos(Directory dir, string filename) { //Positive test of FieldInfos Assert.IsTrue(TestDoc != null); FieldInfos.Builder builder = new FieldInfos.Builder(); foreach (IndexableField field in TestDoc) { builder.AddOrUpdate(field.Name(), field.FieldType()); } FieldInfos fieldInfos = builder.Finish(); //Since the complement is stored as well in the fields map Assert.IsTrue(fieldInfos.Size() == DocHelper.All.Count); //this is all b/c we are using the no-arg constructor IndexOutput output = dir.CreateOutput(filename, NewIOContext(Random())); Assert.IsTrue(output != null); //Use a RAMOutputStream FieldInfosWriter writer = Codec.Default.FieldInfosFormat().FieldInfosWriter; writer.Write(dir, filename, "", fieldInfos, IOContext.DEFAULT); output.Dispose(); return fieldInfos; }
// Writes field updates (new _X_N updates files) to the directory public virtual void WriteFieldUpdates(Directory dir, DocValuesFieldUpdates.Container dvUpdates) { lock (this) { //Debug.Assert(Thread.holdsLock(Writer)); //System.out.println("rld.writeFieldUpdates: seg=" + info + " numericFieldUpdates=" + numericFieldUpdates); Debug.Assert(dvUpdates.Any()); // Do this so we can delete any created files on // exception; this saves all codecs from having to do // it: TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir); FieldInfos fieldInfos = null; bool success = false; try { Codec codec = Info.Info.Codec; // reader could be null e.g. for a just merged segment (from // IndexWriter.commitMergedDeletes). SegmentReader reader = this.Reader == null ? new SegmentReader(Info, Writer.Config.ReaderTermsIndexDivisor, IOContext.READONCE) : this.Reader; try { // clone FieldInfos so that we can update their dvGen separately from // the reader's infos and write them to a new fieldInfos_gen file FieldInfos.Builder builder = new FieldInfos.Builder(Writer.GlobalFieldNumberMap); // cannot use builder.add(reader.getFieldInfos()) because it does not // clone FI.attributes as well FI.dvGen foreach (FieldInfo fi in reader.FieldInfos) { FieldInfo clone = builder.Add(fi); // copy the stuff FieldInfos.Builder doesn't copy if (fi.Attributes() != null) { foreach (KeyValuePair<string, string> e in fi.Attributes()) { clone.PutAttribute(e.Key, e.Value); } } clone.DocValuesGen = fi.DocValuesGen; } // create new fields or update existing ones to have NumericDV type foreach (string f in dvUpdates.NumericDVUpdates.Keys) { builder.AddOrUpdate(f, NumericDocValuesField.TYPE); } // create new fields or update existing ones to have BinaryDV type foreach (string f in dvUpdates.BinaryDVUpdates.Keys) { builder.AddOrUpdate(f, BinaryDocValuesField.fType); } fieldInfos = builder.Finish(); long nextFieldInfosGen = Info.NextFieldInfosGen; string segmentSuffix = nextFieldInfosGen.ToString(CultureInfo.InvariantCulture);//Convert.ToString(nextFieldInfosGen, Character.MAX_RADIX)); SegmentWriteState state = new SegmentWriteState(null, trackingDir, Info.Info, fieldInfos, Writer.Config.TermIndexInterval, null, IOContext.DEFAULT, segmentSuffix); DocValuesFormat docValuesFormat = codec.DocValuesFormat(); DocValuesConsumer fieldsConsumer = docValuesFormat.FieldsConsumer(state); bool fieldsConsumerSuccess = false; try { // System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeFieldUpdates: applying numeric updates; seg=" + info + " updates=" + numericFieldUpdates); foreach (KeyValuePair<string, NumericDocValuesFieldUpdates> e in dvUpdates.NumericDVUpdates) { string field = e.Key; NumericDocValuesFieldUpdates fieldUpdates = e.Value; FieldInfo fieldInfo = fieldInfos.FieldInfo(field); Debug.Assert(fieldInfo != null); fieldInfo.DocValuesGen = nextFieldInfosGen; // write the numeric updates to a new gen'd docvalues file fieldsConsumer.AddNumericField(fieldInfo, GetLongEnumerable(reader, field, fieldUpdates)); } // System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " updates=" + dvUpdates.binaryDVUpdates); foreach (KeyValuePair<string, BinaryDocValuesFieldUpdates> e in dvUpdates.BinaryDVUpdates) { string field = e.Key; BinaryDocValuesFieldUpdates dvFieldUpdates = e.Value; FieldInfo fieldInfo = fieldInfos.FieldInfo(field); Debug.Assert(fieldInfo != null); // System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " f=" + dvFieldUpdates + ", updates=" + dvFieldUpdates); fieldInfo.DocValuesGen = nextFieldInfosGen; // write the numeric updates to a new gen'd docvalues file fieldsConsumer.AddBinaryField(fieldInfo, GetBytesRefEnumerable(reader, field, dvFieldUpdates)); } codec.FieldInfosFormat().FieldInfosWriter.Write(trackingDir, Info.Info.Name, segmentSuffix, fieldInfos, IOContext.DEFAULT); fieldsConsumerSuccess = true; } finally { if (fieldsConsumerSuccess) { fieldsConsumer.Dispose(); } else { IOUtils.CloseWhileHandlingException(fieldsConsumer); } } } finally { if (reader != this.Reader) { // System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader); reader.Dispose(); } } success = true; } finally { if (!success) { // Advance only the nextWriteDocValuesGen so that a 2nd // attempt to write will write to a new file Info.AdvanceNextWriteFieldInfosGen(); // Delete any partially created file(s): foreach (string fileName in trackingDir.CreatedFiles) { try { dir.DeleteFile(fileName); } catch (Exception) { // Ignore so we throw only the first exc } } } } Info.AdvanceFieldInfosGen(); // copy all the updates to mergingUpdates, so they can later be applied to the merged segment if (IsMerging) { foreach (KeyValuePair<string, NumericDocValuesFieldUpdates> e in dvUpdates.NumericDVUpdates) { DocValuesFieldUpdates updates; if (!MergingDVUpdates.TryGetValue(e.Key, out updates)) { MergingDVUpdates[e.Key] = e.Value; } else { updates.Merge(e.Value); } } foreach (KeyValuePair<string, BinaryDocValuesFieldUpdates> e in dvUpdates.BinaryDVUpdates) { DocValuesFieldUpdates updates; if (!MergingDVUpdates.TryGetValue(e.Key, out updates)) { MergingDVUpdates[e.Key] = e.Value; } else { updates.Merge(e.Value); } } } // create a new map, keeping only the gens that are in use IDictionary<long, ISet<string>> genUpdatesFiles = Info.UpdatesFiles; IDictionary<long, ISet<string>> newGenUpdatesFiles = new Dictionary<long, ISet<string>>(); long fieldInfosGen = Info.FieldInfosGen; foreach (FieldInfo fi in fieldInfos) { long dvGen = fi.DocValuesGen; if (dvGen != -1 && !newGenUpdatesFiles.ContainsKey(dvGen)) { if (dvGen == fieldInfosGen) { newGenUpdatesFiles[fieldInfosGen] = trackingDir.CreatedFiles; } else { newGenUpdatesFiles[dvGen] = genUpdatesFiles[dvGen]; } } } Info.GenUpdatesFiles = newGenUpdatesFiles; // wrote new files, should checkpoint() Writer.Checkpoint(); // if there is a reader open, reopen it to reflect the updates if (Reader != null) { SegmentReader newReader = new SegmentReader(Info, Reader, LiveDocs_Renamed, Info.Info.DocCount - Info.DelCount - PendingDeleteCount_Renamed); bool reopened = false; try { Reader.DecRef(); Reader = newReader; reopened = true; } finally { if (!reopened) { newReader.DecRef(); } } } } }
public DocumentsWriterPerThread(string segmentName, Directory directory, LiveIndexWriterConfig indexWriterConfig, InfoStream infoStream, DocumentsWriterDeleteQueue deleteQueue, FieldInfos.Builder fieldInfos) { this.DirectoryOrig = directory; this.Directory = new TrackingDirectoryWrapper(directory); this.FieldInfos = fieldInfos; this.IndexWriterConfig = indexWriterConfig; this.InfoStream = infoStream; this.Codec = indexWriterConfig.Codec; this.docState = new DocState(this, infoStream); this.docState.Similarity = indexWriterConfig.Similarity; bytesUsed = Counter.NewCounter(); ByteBlockAllocator = new DirectTrackingAllocator(bytesUsed); PendingUpdates = new BufferedUpdates(); intBlockAllocator = new IntBlockAllocator(bytesUsed); this.DeleteQueue = deleteQueue; Debug.Assert(numDocsInRAM == 0, "num docs " + numDocsInRAM); PendingUpdates.Clear(); DeleteSlice = deleteQueue.NewSlice(); SegmentInfo_Renamed = new SegmentInfo(DirectoryOrig, Constants.LUCENE_MAIN_VERSION, segmentName, -1, false, Codec, null); Debug.Assert(numDocsInRAM == 0); if (INFO_VERBOSE && infoStream.IsEnabled("DWPT")) { infoStream.Message("DWPT", Thread.CurrentThread.Name + " init seg=" + segmentName + " delQueue=" + deleteQueue); } // this should be the last call in the ctor // it really sucks that we need to pull this within the ctor and pass this ref to the chain! Consumer = indexWriterConfig.IndexingChain.GetChain(this); }
/// <summary> /// Call this to get the (merged) FieldInfos for a /// composite reader. /// <p> /// NOTE: the returned field numbers will likely not /// correspond to the actual field numbers in the underlying /// readers, and codec metadata (<seealso cref="FieldInfo#getAttribute(String)"/> /// will be unavailable. /// </summary> public static FieldInfos GetMergedFieldInfos(IndexReader reader) { FieldInfos.Builder builder = new FieldInfos.Builder(); foreach (AtomicReaderContext ctx in reader.Leaves()) { builder.Add(ctx.AtomicReader.FieldInfos); } return builder.Finish(); }
private void EnsureInitialized(ThreadState state) { if (state.Active && state.Dwpt == null) { FieldInfos.Builder infos = new FieldInfos.Builder(Writer.GlobalFieldNumberMap); state.Dwpt = new DocumentsWriterPerThread(Writer.NewSegmentName(), Directory, LIWConfig, InfoStream, DeleteQueue, infos); } }
/// <summary> /// Expert: create a ParallelAtomicReader based on the provided /// readers and storedFieldReaders; when a document is /// loaded, only storedFieldsReaders will be used. /// </summary> public ParallelAtomicReader(bool closeSubReaders, AtomicReader[] readers, AtomicReader[] storedFieldsReaders) { if (!InstanceFieldsInitialized) { InitializeInstanceFields(); InstanceFieldsInitialized = true; } this.CloseSubReaders = closeSubReaders; if (readers.Length == 0 && storedFieldsReaders.Length > 0) { throw new System.ArgumentException("There must be at least one main reader if storedFieldsReaders are used."); } this.ParallelReaders = (AtomicReader[])readers.Clone(); this.StoredFieldsReaders = (AtomicReader[])storedFieldsReaders.Clone(); if (ParallelReaders.Length > 0) { AtomicReader first = ParallelReaders[0]; this.maxDoc = first.MaxDoc; this.numDocs = first.NumDocs; this.hasDeletions = first.HasDeletions; } else { this.maxDoc = this.numDocs = 0; this.hasDeletions = false; } CollectionsHelper.AddAll(CompleteReaderSet, this.ParallelReaders); CollectionsHelper.AddAll(CompleteReaderSet, this.StoredFieldsReaders); // check compatibility: foreach (AtomicReader reader in CompleteReaderSet) { if (reader.MaxDoc != maxDoc) { throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc); } } // TODO: make this read-only in a cleaner way? FieldInfos.Builder builder = new FieldInfos.Builder(); // build FieldInfos and fieldToReader map: foreach (AtomicReader reader in this.ParallelReaders) { FieldInfos readerFieldInfos = reader.FieldInfos; foreach (FieldInfo fieldInfo in readerFieldInfos) { // NOTE: first reader having a given field "wins": if (!FieldToReader.ContainsKey(fieldInfo.Name)) { builder.Add(fieldInfo); FieldToReader[fieldInfo.Name] = reader; if (fieldInfo.HasVectors()) { TvFieldToReader[fieldInfo.Name] = reader; } } } } FieldInfos_Renamed = builder.Finish(); // build Fields instance foreach (AtomicReader reader in this.ParallelReaders) { Fields readerFields = reader.Fields; if (readerFields != null) { foreach (string field in readerFields) { // only add if the reader responsible for that field name is the current: if (FieldToReader[field].Equals(reader)) { this.Fields_Renamed.AddField(field, readerFields.Terms(field)); } } } } // do this finally so any Exceptions occurred before don't affect refcounts: foreach (AtomicReader reader in CompleteReaderSet) { if (!closeSubReaders) { reader.IncRef(); } reader.RegisterParentReader(this); } }
public abstract void ProcessDocument(FieldInfos.Builder fieldInfos);