/// <summary>Add an IndexReader whose stored fields will not be returned. This can /// accellerate search when stored fields are only needed from a subset of /// the IndexReaders. /// /// </summary> /// <throws> IllegalArgumentException if not all indexes contain the same number </throws> /// <summary> of documents /// </summary> /// <throws> IllegalArgumentException if not all indexes have the same value </throws> /// <summary> of {@link IndexReader#MaxDoc()} /// </summary> /// <throws> IOException if there is a low-level IO error </throws> public virtual void Add(IndexReader reader, bool ignoreStoredFields) { EnsureOpen(); if (readers.Count == 0) { this.maxDoc = reader.MaxDoc(); this.numDocs = reader.NumDocs(); this.hasDeletions = reader.HasDeletions(); } if (reader.MaxDoc() != maxDoc) // check compatibility throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc()); if (reader.NumDocs() != numDocs) throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs()); System.Collections.Generic.ICollection<string> fields = reader.GetFieldNames(IndexReader.FieldOption.ALL); readerToFields[reader] = fields; System.Collections.IEnumerator i = fields.GetEnumerator(); while (i.MoveNext()) { // update fieldToReader map System.String field = (System.String) i.Current; if (fieldToReader[field] == null) fieldToReader[field] = reader; } if (!ignoreStoredFields) storedFieldReaders.Add(reader); // add to storedFieldReaders readers.Add(reader); if (incRefReaders) { reader.IncRef(); } decrefOnClose.Add(incRefReaders); }
// Apply buffered delete terms, queries and docIDs to the // provided reader private bool ApplyDeletes(IndexReader reader, int docIDStart) { lock (this) { int docEnd = docIDStart + reader.MaxDoc(); bool any = false; System.Diagnostics.Debug.Assert(CheckDeleteTerm(null)); // Delete by term //System.Collections.IEnumerator iter = new System.Collections.Hashtable(deletesFlushed.terms).GetEnumerator(); System.Collections.IEnumerator iter = deletesFlushed.terms.GetEnumerator(); TermDocs docs = reader.TermDocs(); try { while (iter.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current; Term term = (Term) entry.Key; // LUCENE-2086: we should be iterating a TreeMap, // here, so terms better be in order: System.Diagnostics.Debug.Assert(CheckDeleteTerm(term)); docs.Seek(term); int limit = ((BufferedDeletes.Num) entry.Value).GetNum(); while (docs.Next()) { int docID = docs.Doc(); if (docIDStart + docID >= limit) break; reader.DeleteDocument(docID); any = true; } } } finally { docs.Close(); } // Delete by docID iter = deletesFlushed.docIDs.GetEnumerator(); while (iter.MoveNext()) { int docID = ((System.Int32) iter.Current); if (docID >= docIDStart && docID < docEnd) { reader.DeleteDocument(docID - docIDStart); any = true; } } // Delete by query IndexSearcher searcher = new IndexSearcher(reader); iter = new System.Collections.Hashtable(deletesFlushed.queries).GetEnumerator(); while (iter.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current; Query query = (Query) entry.Key; int limit = ((System.Int32) entry.Value); Weight weight = query.Weight(searcher); Scorer scorer = weight.Scorer(reader, true, false); if (scorer != null) { while (true) { int doc = scorer.NextDoc(); if (((long) docIDStart) + doc >= limit) break; reader.DeleteDocument(doc); any = true; } } } searcher.Close(); return any; } }
private void CopyVectorsNoDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader) { int maxDoc = reader.MaxDoc(); if (matchingVectorsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" int docCount = 0; while (docCount < maxDoc) { int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, docCount, len); termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len); docCount += len; checkAbort.Work(300 * len); } } else { for (int docNum = 0; docNum < maxDoc; docNum++) { // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum); termVectorsWriter.AddAllDocVectors(vectors); checkAbort.Work(300); } } }
private void CopyVectorsWithDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader) { int maxDoc = reader.MaxDoc(); if (matchingVectorsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int docNum = 0; docNum < maxDoc; ) { if (reader.IsDeleted(docNum)) { // skip deleted docs ++docNum; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = docNum, numDocs = 0; do { docNum++; numDocs++; if (docNum >= maxDoc) break; if (reader.IsDeleted(docNum)) { docNum++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs); termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); checkAbort.Work(300 * numDocs); } } else { for (int docNum = 0; docNum < maxDoc; docNum++) { if (reader.IsDeleted(docNum)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum); termVectorsWriter.AddAllDocVectors(vectors); checkAbort.Work(300); } } }
private int CopyFieldsNoDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader) { int maxDoc = reader.MaxDoc(); int docCount = 0; if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" while (docCount < maxDoc) { int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len); fieldsWriter.AddRawDocuments(stream, rawDocLengths, len); docCount += len; checkAbort.Work(300 * len); } } else { for (; docCount < maxDoc; docCount++) { // NOTE: it's very important to first assign to doc then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Document doc = reader.Document(docCount, fieldSelectorMerge); fieldsWriter.AddDocument(doc); checkAbort.Work(300); } } return docCount; }
private int CopyFieldsWithDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader) { int docCount = 0; int maxDoc = reader.MaxDoc(); if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int j = 0; j < maxDoc; ) { if (reader.IsDeleted(j)) { // skip deleted docs ++j; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = j, numDocs = 0; do { j++; numDocs++; if (j >= maxDoc) break; if (reader.IsDeleted(j)) { j++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs); fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; checkAbort.Work(300 * numDocs); } } else { for (int j = 0; j < maxDoc; j++) { if (reader.IsDeleted(j)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to doc then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Document doc = reader.Document(j, fieldSelectorMerge); fieldsWriter.AddDocument(doc); docCount++; checkAbort.Work(300); } } return docCount; }