public override void SetUp() { base.SetUp(); // set the default codec, so adding test cases to this isn't fragile SavedCodec = Codec.Default; Codec.Default = Codec; }
// note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!! public SegmentMerger(IList<AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir, int termIndexInterval, MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context, bool validate) { // validate incoming readers if (validate) { foreach (AtomicReader reader in readers) { reader.CheckIntegrity(); } } MergeState = new MergeState(readers, segmentInfo, infoStream, checkAbort); Directory = dir; this.TermIndexInterval = termIndexInterval; this.Codec = segmentInfo.Codec; this.Context = context; this.FieldInfosBuilder = new FieldInfos.Builder(fieldNumbers); MergeState.SegmentInfo.DocCount = SetDocMaps(); }
/// <summary> /// Check codec restrictions. /// </summary> /// <exception cref="AssumptionViolatedException"> if the class does not work with a given codec. </exception> private void CheckCodecRestrictions(Codec codec) { LuceneTestCase.AssumeFalse("Class not allowed to use codec: " + codec.Name + ".", ShouldAvoidCodec(codec.Name)); if (codec is RandomCodec && AvoidCodecs.Count > 0) { foreach (string name in ((RandomCodec)codec).FormatNames) { LuceneTestCase.AssumeFalse("Class not allowed to use postings format: " + name + ".", ShouldAvoidCodec(name)); } } PostingsFormat pf = codec.PostingsFormat(); LuceneTestCase.AssumeFalse("Class not allowed to use postings format: " + pf.Name + ".", ShouldAvoidCodec(pf.Name)); LuceneTestCase.AssumeFalse("Class not allowed to use postings format: " + LuceneTestCase.TEST_POSTINGSFORMAT + ".", ShouldAvoidCodec(LuceneTestCase.TEST_POSTINGSFORMAT)); }
public virtual void TestUpdateOldSegments() { Codec[] oldCodecs = new Codec[] { new Lucene40RWCodec(), new Lucene41RWCodec(), new Lucene42RWCodec(), new Lucene45RWCodec() }; Directory dir = NewDirectory(); bool oldValue = OLD_FORMAT_IMPERSONATION_IS_ACTIVE; // create a segment with an old Codec IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); conf.SetCodec(oldCodecs[Random().Next(oldCodecs.Length)]); OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; IndexWriter writer = new IndexWriter(dir, conf); Document doc = new Document(); doc.Add(new StringField("id", "doc", Store.NO)); doc.Add(new NumericDocValuesField("f", 5)); writer.AddDocument(doc); writer.Dispose(); conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); writer = new IndexWriter(dir, conf); writer.UpdateNumericDocValue(new Term("id", "doc"), "f", 4L); OLD_FORMAT_IMPERSONATION_IS_ACTIVE = false; try { writer.Dispose(); Assert.Fail("should not have succeeded to update a segment written with an old Codec"); } catch (System.NotSupportedException e) { writer.Rollback(); } finally { OLD_FORMAT_IMPERSONATION_IS_ACTIVE = oldValue; } dir.Dispose(); }
/// <summary> /// Set the <seealso cref="Codec"/>. /// /// <p> /// Only takes effect when IndexWriter is first created. /// </summary> public IndexWriterConfig SetCodec(Codec codec) { if (codec == null) { throw new System.ArgumentException("codec must not be null"); } this.codec = codec; return this; }
public DocumentsWriterPerThread(string segmentName, Directory directory, LiveIndexWriterConfig indexWriterConfig, InfoStream infoStream, DocumentsWriterDeleteQueue deleteQueue, FieldInfos.Builder fieldInfos) { this.DirectoryOrig = directory; this.Directory = new TrackingDirectoryWrapper(directory); this.FieldInfos = fieldInfos; this.IndexWriterConfig = indexWriterConfig; this.InfoStream = infoStream; this.Codec = indexWriterConfig.Codec; this.docState = new DocState(this, infoStream); this.docState.Similarity = indexWriterConfig.Similarity; bytesUsed = Counter.NewCounter(); ByteBlockAllocator = new DirectTrackingAllocator(bytesUsed); PendingUpdates = new BufferedUpdates(); intBlockAllocator = new IntBlockAllocator(bytesUsed); this.DeleteQueue = deleteQueue; Debug.Assert(numDocsInRAM == 0, "num docs " + numDocsInRAM); PendingUpdates.Clear(); DeleteSlice = deleteQueue.NewSlice(); SegmentInfo_Renamed = new SegmentInfo(DirectoryOrig, Constants.LUCENE_MAIN_VERSION, segmentName, -1, false, Codec, null); Debug.Assert(numDocsInRAM == 0); if (INFO_VERBOSE && infoStream.IsEnabled("DWPT")) { infoStream.Message("DWPT", Thread.CurrentThread.Name + " init seg=" + segmentName + " delQueue=" + deleteQueue); } // this should be the last call in the ctor // it really sucks that we need to pull this within the ctor and pass this ref to the chain! Consumer = indexWriterConfig.IndexingChain.GetChain(this); }
/// <summary> /// Constructs a new IndexWriter per the settings given in <code>conf</code>. /// If you want to make "live" changes to this writer instance, use /// <seealso cref="#getConfig()"/>. /// /// <p> /// <b>NOTE:</b> after ths writer is created, the given configuration instance /// cannot be passed to another writer. If you intend to do so, you should /// <seealso cref="IndexWriterConfig#clone() clone"/> it beforehand. /// </summary> /// <param name="d"> /// the index directory. The index is either created or appended /// according <code>conf.getOpenMode()</code>. </param> /// <param name="conf"> /// the configuration settings according to which IndexWriter should /// be initialized. </param> /// <exception cref="IOException"> /// if the directory cannot be read/written to, or if it does not /// exist and <code>conf.getOpenMode()</code> is /// <code>OpenMode.APPEND</code> or if there is any other low-level /// IO error </exception> public IndexWriter(Directory d, IndexWriterConfig conf) { /*if (!InstanceFieldsInitialized) { InitializeInstanceFields(); InstanceFieldsInitialized = true; }*/ readerPool = new ReaderPool(this); conf.SetIndexWriter(this); // prevent reuse by other instances Config_Renamed = new LiveIndexWriterConfig(conf); directory = d; analyzer = Config_Renamed.Analyzer; infoStream = Config_Renamed.InfoStream; mergePolicy = Config_Renamed.MergePolicy; mergePolicy.IndexWriter = this; mergeScheduler = Config_Renamed.MergeScheduler; Codec = Config_Renamed.Codec; BufferedUpdatesStream = new BufferedUpdatesStream(infoStream); PoolReaders = Config_Renamed.ReaderPooling; WriteLock = directory.MakeLock(WRITE_LOCK_NAME); if (!WriteLock.Obtain(Config_Renamed.WriteLockTimeout)) // obtain write lock { throw new LockObtainFailedException("Index locked for write: " + WriteLock); } bool success = false; try { OpenMode_e? mode = Config_Renamed.OpenMode; bool create; if (mode == OpenMode_e.CREATE) { create = true; } else if (mode == OpenMode_e.APPEND) { create = false; } else { // CREATE_OR_APPEND - create only if an index does not exist create = !DirectoryReader.IndexExists(directory); } // If index is too old, reading the segments will throw // IndexFormatTooOldException. segmentInfos = new SegmentInfos(); bool initialIndexExists = true; if (create) { // Try to read first. this is to allow create // against an index that's currently open for // searching. In this case we write the next // segments_N file with no segments: try { segmentInfos.Read(directory); segmentInfos.Clear(); } catch (IOException) { // Likely this means it's a fresh directory initialIndexExists = false; } // Record that we have a change (zero out all // segments) pending: Changed(); } else { segmentInfos.Read(directory); IndexCommit commit = Config_Renamed.IndexCommit; if (commit != null) { // Swap out all segments, but, keep metadata in // SegmentInfos, like version & generation, to // preserve write-once. this is important if // readers are open against the future commit // points. if (commit.Directory != directory) { throw new ArgumentException(string.Format("IndexCommit's directory doesn't match my directory (mine: {0}, commit's: {1})", directory, commit.Directory)); } SegmentInfos oldInfos = new SegmentInfos(); oldInfos.Read(directory, commit.SegmentsFileName); segmentInfos.Replace(oldInfos); Changed(); if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "init: loaded commit \"" + commit.SegmentsFileName + "\""); } } } RollbackSegments = segmentInfos.CreateBackupSegmentInfos(); // start with previous field numbers, but new FieldInfos GlobalFieldNumberMap = FieldNumberMap; Config_Renamed.FlushPolicy.Init(Config_Renamed); DocWriter = new DocumentsWriter(this, Config_Renamed, directory); eventQueue = DocWriter.EventQueue(); // Default deleter (for backwards compatibility) is // KeepOnlyLastCommitDeleter: lock (this) { Deleter = new IndexFileDeleter(directory, Config_Renamed.DelPolicy, segmentInfos, infoStream, this, initialIndexExists); } if (Deleter.StartingCommitDeleted) { // Deletion policy deleted the "head" commit point. // We have to mark ourself as changed so that if we // are closed w/o any further changes we write a new // segments_N file. Changed(); } if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "init: create=" + create); MessageState(); } success = true; } finally { if (!success) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "init: hit exception on init; releasing write lock"); } WriteLock.Release(); IOUtils.CloseWhileHandlingException(WriteLock); WriteLock = null; } } }
public override void SetUp() { base.SetUp(); // for now its SimpleText vs Lucene46(random postings format) // as this gives the best overall coverage. when we have more // codecs we should probably pick 2 from Codec.availableCodecs() LeftCodec = Codec.ForName("Lucene41"); RightCodec = new RandomCodec(Random()); LeftDir = NewDirectory(); RightDir = NewDirectory(); long seed = Random().Next(); // must use same seed because of random payloads, etc int maxTermLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); MockAnalyzer leftAnalyzer = new MockAnalyzer(new Random((int)seed)); leftAnalyzer.MaxTokenLength = maxTermLength; MockAnalyzer rightAnalyzer = new MockAnalyzer(new Random((int)seed)); rightAnalyzer.MaxTokenLength = maxTermLength; // but these can be different // TODO: this turns this into a really big test of Multi*, is that what we want? IndexWriterConfig leftConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, leftAnalyzer); leftConfig.SetCodec(LeftCodec); // preserve docids leftConfig.SetMergePolicy(NewLogMergePolicy()); IndexWriterConfig rightConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, rightAnalyzer); rightConfig.SetCodec(RightCodec); // preserve docids rightConfig.SetMergePolicy(NewLogMergePolicy()); // must use same seed because of random docvalues fields, etc RandomIndexWriter leftWriter = new RandomIndexWriter(new Random((int)seed), LeftDir, leftConfig); RandomIndexWriter rightWriter = new RandomIndexWriter(new Random((int)seed), RightDir, rightConfig); int numdocs = AtLeast(100); CreateRandomIndex(numdocs, leftWriter, seed); CreateRandomIndex(numdocs, rightWriter, seed); LeftReader = MaybeWrapReader(leftWriter.Reader); leftWriter.Dispose(); RightReader = MaybeWrapReader(rightWriter.Reader); rightWriter.Dispose(); // check that our readers are valid TestUtil.CheckReader(LeftReader); TestUtil.CheckReader(RightReader); Info = "left: " + LeftCodec.ToString() + " / right: " + RightCodec.ToString(); }
/// <summary> /// Creates a new config that that handles the live <seealso cref="IndexWriter"/> /// settings. /// </summary> internal LiveIndexWriterConfig(IndexWriterConfig config) { maxBufferedDeleteTerms = config.MaxBufferedDeleteTerms; maxBufferedDocs = config.MaxBufferedDocs; mergedSegmentWarmer = config.MergedSegmentWarmer; RamBufferSizeMB = config.RAMBufferSizeMB; readerTermsIndexDivisor = config.ReaderTermsIndexDivisor; termIndexInterval = config.TermIndexInterval; MatchVersion = config.MatchVersion; analyzer = config.Analyzer; delPolicy = config.DelPolicy; Commit = config.IndexCommit; openMode = config.OpenMode; similarity = config.Similarity; mergeScheduler = config.MergeScheduler; writeLockTimeout = config.WriteLockTimeout; indexingChain = config.IndexingChain; codec = config.Codec; infoStream = config.InfoStream; mergePolicy = config.MergePolicy; indexerThreadPool = config.IndexerThreadPool; readerPooling = config.ReaderPooling; flushPolicy = config.FlushPolicy; PerThreadHardLimitMB = config.RAMPerThreadHardLimitMB; useCompoundFile = config.UseCompoundFile; checkIntegrityAtMerge = config.CheckIntegrityAtMerge; }
private volatile int termIndexInterval; // TODO: this should be private to the codec, not settable here #endregion Fields #region Constructors // used by IndexWriterConfig internal LiveIndexWriterConfig(Analyzer analyzer, LuceneVersion matchVersion) { this.analyzer = analyzer; this.MatchVersion = matchVersion; RamBufferSizeMB = IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB; maxBufferedDocs = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS; maxBufferedDeleteTerms = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS; readerTermsIndexDivisor = IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR; mergedSegmentWarmer = null; termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here delPolicy = new KeepOnlyLastCommitDeletionPolicy(); Commit = null; useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM; openMode = OpenMode_e.CREATE_OR_APPEND; similarity = IndexSearcher.DefaultSimilarity; mergeScheduler = new ConcurrentMergeScheduler(); writeLockTimeout = IndexWriterConfig.WRITE_LOCK_TIMEOUT; indexingChain = DocumentsWriterPerThread.defaultIndexingChain; codec = Codec.Default; if (codec == null) { throw new System.NullReferenceException(); } infoStream = Util.InfoStream.Default; mergePolicy = new TieredMergePolicy(); flushPolicy = new FlushByRamOrCountsPolicy(); readerPooling = IndexWriterConfig.DEFAULT_READER_POOLING; indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES); PerThreadHardLimitMB = IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB; }