private int[] docMap; // use getDocMap() internal SegmentMergeInfo(int b, TermEnum te, IndexReader r) { base_Renamed = b; reader = r; termEnum = te; term = te.Term(); }
private void Initialize(IndexReader[] subReaders, bool closeSubReaders) { this.subReaders = new IndexReader[subReaders.Length]; subReaders.CopyTo(this.subReaders, 0); starts = new int[subReaders.Length + 1]; // build starts array decrefOnClose = new bool[subReaders.Length]; for (int i = 0; i < subReaders.Length; i++) { starts[i] = maxDoc; maxDoc += subReaders[i].MaxDoc(); // compute maxDocs if (!closeSubReaders) { subReaders[i].IncRef(); decrefOnClose[i] = true; } else { decrefOnClose[i] = false; } if (subReaders[i].HasDeletions()) hasDeletions = true; } starts[subReaders.Length] = maxDoc; }
private void CopyVectorsNoDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader) { int maxDoc = reader.MaxDoc(); if (matchingVectorsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" int docCount = 0; while (docCount < maxDoc) { int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, docCount, len); termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len); docCount += len; checkAbort.Work(300 * len); } } else { for (int docNum = 0; docNum < maxDoc; docNum++) { // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum); termVectorsWriter.AddAllDocVectors(vectors); checkAbort.Work(300); } } }
/// <summary> <p/>Construct a MultiReader aggregating the named set of (sub)readers. /// Directory locking for delete, undeleteAll, and setNorm operations is /// left to the subreaders. <p/> /// <p/>Note that all subreaders are closed if this Multireader is closed.<p/> /// </summary> /// <param name="subReaders">set of (sub)readers /// </param> /// <throws> IOException </throws> public MultiReader(IndexReader[] subReaders) { Initialize(subReaders, true); }
/// <summary> If clone is true then we clone each of the subreaders</summary> /// <param name="doClone"> /// </param> /// <returns> New IndexReader, or same one (this) if /// reopen/clone is not necessary /// </returns> /// <throws> CorruptIndexException </throws> /// <throws> IOException </throws> protected internal virtual IndexReader DoReopen(bool doClone) { EnsureOpen(); bool reopened = false; IndexReader[] newSubReaders = new IndexReader[subReaders.Length]; bool success = false; try { for (int i = 0; i < subReaders.Length; i++) { if (doClone) newSubReaders[i] = (IndexReader) subReaders[i].Clone(); else newSubReaders[i] = subReaders[i].Reopen(); // if at least one of the subreaders was updated we remember that // and return a new MultiReader if (newSubReaders[i] != subReaders[i]) { reopened = true; } } success = true; } finally { if (!success && reopened) { for (int i = 0; i < newSubReaders.Length; i++) { if (newSubReaders[i] != subReaders[i]) { try { newSubReaders[i].Close(); } catch (System.IO.IOException ignore) { // keep going - we want to clean up as much as possible } } } } } if (reopened) { bool[] newDecrefOnClose = new bool[subReaders.Length]; for (int i = 0; i < subReaders.Length; i++) { if (newSubReaders[i] == subReaders[i]) { newSubReaders[i].IncRef(); newDecrefOnClose[i] = true; } } MultiReader mr = new MultiReader(newSubReaders); mr.decrefOnClose = newDecrefOnClose; mr.SetDisableFakeNorms(GetDisableFakeNorms()); return mr; } else { return this; } }
/// <summary>Add an IndexReader.</summary> /// <throws> IOException if there is a low-level IO error </throws> public virtual void Add(IndexReader reader) { EnsureOpen(); Add(reader, false); }
/// <summary> /// Creates a new instance of the provider class for the given IndexReader. /// </summary> public CustomScoreProvider(IndexReader reader) { this.reader = reader; }
/// <seealso cref="IndexReader.GetFieldNames(IndexReader.FieldOption)"> /// </seealso> public override System.Collections.Generic.ICollection<string> GetFieldNames(IndexReader.FieldOption fieldOption) { EnsureOpen(); System.Collections.Generic.IDictionary<string, string> fieldSet = new System.Collections.Generic.Dictionary<string, string>(); for (int i = 0; i < core.fieldInfos.Size(); i++) { FieldInfo fi = core.fieldInfos.FieldInfo(i); if (fieldOption == IndexReader.FieldOption.ALL) { fieldSet[fi.name] = fi.name; } else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) { fieldSet[fi.name] = fi.name; } else if (fi.omitTermFreqAndPositions && fieldOption == IndexReader.FieldOption.OMIT_TERM_FREQ_AND_POSITIONS) { fieldSet[fi.name] = fi.name; } else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) { fieldSet[fi.name] = fi.name; } else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED) { fieldSet[fi.name] = fi.name; } else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) { fieldSet[fi.name] = fi.name; } else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR) { fieldSet[fi.name] = fi.name; } else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) { fieldSet[fi.name] = fi.name; } else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) { fieldSet[fi.name] = fi.name; } else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) { fieldSet[fi.name] = fi.name; } else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) { fieldSet[fi.name] = fi.name; } } return fieldSet.Keys; }
// Apply buffered delete terms, queries and docIDs to the // provided reader private bool ApplyDeletes(IndexReader reader, int docIDStart) { lock (this) { int docEnd = docIDStart + reader.MaxDoc(); bool any = false; System.Diagnostics.Debug.Assert(CheckDeleteTerm(null)); // Delete by term //System.Collections.IEnumerator iter = new System.Collections.Hashtable(deletesFlushed.terms).GetEnumerator(); System.Collections.IEnumerator iter = deletesFlushed.terms.GetEnumerator(); TermDocs docs = reader.TermDocs(); try { while (iter.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current; Term term = (Term) entry.Key; // LUCENE-2086: we should be iterating a TreeMap, // here, so terms better be in order: System.Diagnostics.Debug.Assert(CheckDeleteTerm(term)); docs.Seek(term); int limit = ((BufferedDeletes.Num) entry.Value).GetNum(); while (docs.Next()) { int docID = docs.Doc(); if (docIDStart + docID >= limit) break; reader.DeleteDocument(docID); any = true; } } } finally { docs.Close(); } // Delete by docID iter = deletesFlushed.docIDs.GetEnumerator(); while (iter.MoveNext()) { int docID = ((System.Int32) iter.Current); if (docID >= docIDStart && docID < docEnd) { reader.DeleteDocument(docID - docIDStart); any = true; } } // Delete by query IndexSearcher searcher = new IndexSearcher(reader); iter = new System.Collections.Hashtable(deletesFlushed.queries).GetEnumerator(); while (iter.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current; Query query = (Query) entry.Key; int limit = ((System.Int32) entry.Value); Weight weight = query.Weight(searcher); Scorer scorer = weight.Scorer(reader, true, false); if (scorer != null) { while (true) { int doc = scorer.NextDoc(); if (((long) docIDStart) + doc >= limit) break; reader.DeleteDocument(doc); any = true; } } } searcher.Close(); return any; } }
public abstract void Warm(IndexReader reader);
/// <summary>Merges the provided indexes into this index. /// <p/>After this completes, the index is optimized. <p/> /// <p/>The provided IndexReaders are not closed.<p/> /// /// <p/><b>NOTE:</b> while this is running, any attempts to /// add or delete documents (with another thread) will be /// paused until this method completes. /// /// <p/>See {@link #AddIndexesNoOptimize(Directory[])} for /// details on transactional semantics, temporary free /// space required in the Directory, and non-CFS segments /// on an Exception.<p/> /// /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError /// you should immediately close the writer. See <a /// href="#OOME">above</a> for details.<p/> /// /// </summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public virtual void AddIndexes(IndexReader[] readers) { EnsureOpen(); // Do not allow add docs or deletes while we are running: docWriter.PauseAllThreads(); // We must pre-acquire a read lock here (and upgrade to // write lock in startTransaction below) so that no // other addIndexes is allowed to start up after we have // flushed & optimized but before we then start our // transaction. This is because the merging below // requires that only one segment is present in the // index: AcquireRead(); try { SegmentInfo info = null; System.String mergedName = null; SegmentMerger merger = null; bool success = false; try { Flush(true, false, true); Optimize(); // start with zero or 1 seg success = true; } finally { // Take care to release the read lock if we hit an // exception before starting the transaction if (!success) ReleaseRead(); } // true means we already have a read lock; if this // call hits an exception it will release the write // lock: StartTransaction(true); try { mergedName = NewSegmentName(); merger = new SegmentMerger(this, mergedName, null); SegmentReader sReader = null; lock (this) { if (segmentInfos.Count == 1) { // add existing index, if any sReader = readerPool.Get(segmentInfos.Info(0), true, BufferedIndexInput.BUFFER_SIZE, - 1); } } success = false; try { if (sReader != null) merger.Add(sReader); for (int i = 0; i < readers.Length; i++) // add new indexes merger.Add(readers[i]); int docCount = merger.Merge(); // merge 'em lock (this) { segmentInfos.Clear(); // pop old infos & add new info = new SegmentInfo(mergedName, docCount, directory, false, true, - 1, null, false, merger.HasProx()); SetDiagnostics(info, "addIndexes(IndexReader[])"); segmentInfos.Add(info); } // Notify DocumentsWriter that the flushed count just increased docWriter.UpdateFlushedDocCount(docCount); success = true; } finally { if (sReader != null) { readerPool.Release(sReader); } } } finally { if (!success) { if (infoStream != null) Message("hit exception in addIndexes during merge"); RollbackTransaction(); } else { CommitTransaction(); } } if (mergePolicy is LogMergePolicy && GetUseCompoundFile()) { System.Collections.Generic.IList<string> files = null; lock (this) { // Must incRef our files so that if another thread // is running merge/optimize, it doesn't delete our // segment's files before we have a change to // finish making the compound file. if (segmentInfos.Contains(info)) { files = info.Files(); deleter.IncRef(files); } } if (files != null) { success = false; StartTransaction(false); try { merger.CreateCompoundFile(mergedName + ".cfs"); lock (this) { info.SetUseCompoundFile(true); } success = true; } finally { lock (this) { deleter.DecRef(files); } if (!success) { if (infoStream != null) Message("hit exception building compound file in addIndexes during merge"); RollbackTransaction(); } else { CommitTransaction(); } } } } } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "addIndexes(IndexReader[])"); } finally { if (docWriter != null) { docWriter.ResumeAllThreads(); } } }
/// <summary> Close this index, writing all pending changes to disk. /// /// </summary> /// <throws> IllegalStateException if the index has been closed before already </throws> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public virtual void Close() { lock (directory) { if (!open) throw new System.SystemException("Index is closed already"); if (indexWriter != null) { indexWriter.Close(); indexWriter = null; } else if (indexReader != null) { indexReader.Close(); indexReader = null; } open = false; if (closeDir) { directory.Close(); } closeDir = false; } }
/// <summary> Make sure all changes are written to disk.</summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> LockObtainFailedException if another writer </throws> /// <summary> has this index open (<code>write.lock</code> could not /// be obtained) /// </summary> /// <throws> IOException if there is a low-level IO error </throws> public virtual void Flush() { lock (directory) { AssureOpen(); if (indexWriter != null) { indexWriter.Close(); indexWriter = null; CreateIndexWriter(); } else { indexReader.Close(); indexReader = null; CreateIndexReader(); } } }
/// <summary> Close the IndexWriter and open an IndexReader.</summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> protected internal virtual void CreateIndexReader() { if (indexReader == null) { if (indexWriter != null) { indexWriter.Close(); indexWriter = null; } indexReader = IndexReader.Open(directory); } }
/// <summary> Close the IndexReader and open an IndexWriter.</summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> LockObtainFailedException if another writer </throws> /// <summary> has this index open (<code>write.lock</code> could not /// be obtained) /// </summary> /// <throws> IOException if there is a low-level IO error </throws> protected internal virtual void CreateIndexWriter() { if (indexWriter == null) { if (indexReader != null) { indexReader.Close(); indexReader = null; } indexWriter = new IndexWriter(directory, analyzer, false, new IndexWriter.MaxFieldLength(maxFieldLength)); // IndexModifier cannot use ConcurrentMergeScheduler // because it synchronizes on the directory which can // cause deadlock indexWriter.SetMergeScheduler(new SerialMergeScheduler()); indexWriter.SetInfoStream(infoStream); indexWriter.SetUseCompoundFile(useCompoundFile); if (maxBufferedDocs != IndexWriter.DISABLE_AUTO_FLUSH) indexWriter.SetMaxBufferedDocs(maxBufferedDocs); indexWriter.SetMergeFactor(mergeFactor); } }
protected internal override TermDocs TermDocs(IndexReader reader) { return (TermDocs) reader.TermPositions(); }
/// <summary> Creates a new <code>MultipleTermPositions</code> instance. /// /// </summary> /// <exception cref="IOException"> /// </exception> public MultipleTermPositions(IndexReader indexReader, Term[] terms) { System.Collections.IList termPositions = new System.Collections.ArrayList(); for (int i = 0; i < terms.Length; i++) termPositions.Add(indexReader.TermPositions(terms[i])); _termPositionsQueue = new TermPositionsQueue(termPositions); _posList = new IntQueue(); }
/// <summary> <p/>Construct a FilterIndexReader based on the specified base reader. /// Directory locking for delete, undeleteAll, and setNorm operations is /// left to the base reader.<p/> /// <p/>Note that base reader is closed if this FilterIndexReader is closed.<p/> /// </summary> /// <param name="in">specified base reader. /// </param> public FilterIndexReader(IndexReader in_Renamed):base() { this.in_Renamed = in_Renamed; }
public /*internal*/ static SegmentReader GetOnlySegmentReader(IndexReader reader) { if (reader is SegmentReader) return (SegmentReader) reader; if (reader is DirectoryReader) { IndexReader[] subReaders = reader.GetSequentialSubReaders(); if (subReaders.Length != 1) { throw new System.ArgumentException(reader + " has " + subReaders.Length + " segments instead of exactly one"); } return (SegmentReader) subReaders[0]; } throw new System.ArgumentException(reader + " is not a SegmentReader or a single-segment DirectoryReader"); }
public override System.Collections.Generic.ICollection<string> GetFieldNames(IndexReader.FieldOption fieldNames) { EnsureOpen(); return in_Renamed.GetFieldNames(fieldNames); }
public override System.Collections.Generic.ICollection<string> GetFieldNames(IndexReader.FieldOption fieldNames) { EnsureOpen(); System.Collections.Generic.List<string> fieldSet = new System.Collections.Generic.List<string>(); for (int i = 0; i < readers.Count; i++) { IndexReader reader = ((IndexReader) readers[i]); System.Collections.Generic.ICollection<string> names = reader.GetFieldNames(fieldNames); fieldSet.AddRange(names); } return fieldSet; }
internal static System.Collections.Generic.ICollection<string> GetFieldNames(IndexReader.FieldOption fieldNames, IndexReader[] subReaders) { // maintain a unique set of field names System.Collections.Generic.Dictionary<string,string> fieldSet = new System.Collections.Generic.Dictionary<string,string>(); for (int i = 0; i < subReaders.Length; i++) { IndexReader reader = subReaders[i]; System.Collections.Generic.ICollection<string> names = reader.GetFieldNames(fieldNames); SupportClass.CollectionsHelper.AddAllIfNotContains(fieldSet, names); } return fieldSet.Keys; }
/// <summary>Add an IndexReader whose stored fields will not be returned. This can /// accellerate search when stored fields are only needed from a subset of /// the IndexReaders. /// /// </summary> /// <throws> IllegalArgumentException if not all indexes contain the same number </throws> /// <summary> of documents /// </summary> /// <throws> IllegalArgumentException if not all indexes have the same value </throws> /// <summary> of {@link IndexReader#MaxDoc()} /// </summary> /// <throws> IOException if there is a low-level IO error </throws> public virtual void Add(IndexReader reader, bool ignoreStoredFields) { EnsureOpen(); if (readers.Count == 0) { this.maxDoc = reader.MaxDoc(); this.numDocs = reader.NumDocs(); this.hasDeletions = reader.HasDeletions(); } if (reader.MaxDoc() != maxDoc) // check compatibility throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc()); if (reader.NumDocs() != numDocs) throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs()); System.Collections.Generic.ICollection<string> fields = reader.GetFieldNames(IndexReader.FieldOption.ALL); readerToFields[reader] = fields; System.Collections.IEnumerator i = fields.GetEnumerator(); while (i.MoveNext()) { // update fieldToReader map System.String field = (System.String) i.Current; if (fieldToReader[field] == null) fieldToReader[field] = reader; } if (!ignoreStoredFields) storedFieldReaders.Add(reader); // add to storedFieldReaders readers.Add(reader); if (incRefReaders) { reader.IncRef(); } decrefOnClose.Add(incRefReaders); }
internal SegmentMergeInfo[] matchingSegments; // null terminated array of matching segments public MultiTermEnum(IndexReader topReader, IndexReader[] readers, int[] starts, Term t) { this.topReader = topReader; queue = new SegmentMergeQueue(readers.Length); matchingSegments = new SegmentMergeInfo[readers.Length + 1]; for (int i = 0; i < readers.Length; i++) { IndexReader reader = readers[i]; TermEnum termEnum; if (t != null) { termEnum = reader.Terms(t); } else termEnum = reader.Terms(); SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader); smi.ord = i; if (t == null?smi.Next():termEnum.Term() != null) queue.Put(smi); // initialize queue else smi.Close(); } if (t != null && queue.Size() > 0) { Next(); } }
public override System.Collections.Generic.ICollection<string> GetFieldNames(IndexReader.FieldOption fieldNames) { EnsureOpen(); return DirectoryReader.GetFieldNames(fieldNames, this.subReaders); }
internal SegmentMergeInfo smi; // current segment mere info... can be null public MultiTermDocs(IndexReader topReader, IndexReader[] r, int[] s) { this.topReader = topReader; readers = r; starts = s; readerTermDocs = new TermDocs[r.Length]; }
/// <summary> <p/>Construct a MultiReader aggregating the named set of (sub)readers. /// Directory locking for delete, undeleteAll, and setNorm operations is /// left to the subreaders. <p/> /// </summary> /// <param name="closeSubReaders">indicates whether the subreaders should be closed /// when this MultiReader is closed /// </param> /// <param name="subReaders">set of (sub)readers /// </param> /// <throws> IOException </throws> public MultiReader(IndexReader[] subReaders, bool closeSubReaders) { Initialize(subReaders, closeSubReaders); }
protected internal virtual TermDocs TermDocs(IndexReader reader) { return term == null?reader.TermDocs(null):reader.TermDocs(); }
public MultiTermPositions(IndexReader topReader, IndexReader[] r, int[] s):base(topReader, r, s) { }
private void CopyVectorsWithDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader) { int maxDoc = reader.MaxDoc(); if (matchingVectorsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int docNum = 0; docNum < maxDoc; ) { if (reader.IsDeleted(docNum)) { // skip deleted docs ++docNum; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = docNum, numDocs = 0; do { docNum++; numDocs++; if (docNum >= maxDoc) break; if (reader.IsDeleted(docNum)) { docNum++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs); termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); checkAbort.Work(300 * numDocs); } } else { for (int docNum = 0; docNum < maxDoc; docNum++) { if (reader.IsDeleted(docNum)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum); termVectorsWriter.AddAllDocVectors(vectors); checkAbort.Work(300); } } }