/// <summary>Add an IndexReader whose stored fields will not be returned. This can /// accellerate search when stored fields are only needed from a subset of /// the IndexReaders. /// /// </summary> /// <throws> IllegalArgumentException if not all indexes contain the same number </throws> /// <summary> of documents /// </summary> /// <throws> IllegalArgumentException if not all indexes have the same value </throws> /// <summary> of {@link IndexReader#MaxDoc()} /// </summary> public virtual void Add(IndexReader reader, bool ignoreStoredFields) { if (readers.Count == 0) { this.maxDoc = reader.MaxDoc(); this.numDocs = reader.NumDocs(); this.hasDeletions = reader.HasDeletions(); } if (reader.MaxDoc() != maxDoc) // check compatibility throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc()); if (reader.NumDocs() != numDocs) throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs()); System.Collections.IEnumerator i = reader.GetFieldNames(IndexReader.FieldOption.ALL).GetEnumerator(); while (i.MoveNext()) { System.Collections.DictionaryEntry fi = (System.Collections.DictionaryEntry) i.Current; // update fieldToReader map System.String field = fi.Key.ToString(); if (fieldToReader[field] == null) fieldToReader[field] = reader; } if (!ignoreStoredFields) storedFieldReaders.Add(reader); // add to storedFieldReaders readers.Add(reader); }
public virtual void TestDocCount() { Directory dir = new RAMDirectory(); IndexWriter writer = null; IndexReader reader = null; int i; try { writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); // add 100 documents for (i = 0; i < 100; i++) { AddDoc(writer); } Assert.AreEqual(100, writer.DocCount()); writer.Close(); // delete 40 documents reader = IndexReader.Open(dir); for (i = 0; i < 40; i++) { reader.Delete(i); } reader.Close(); // test doc count before segments are merged/index is optimized writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); Assert.AreEqual(100, writer.DocCount()); writer.Close(); reader = IndexReader.Open(dir); Assert.AreEqual(100, reader.MaxDoc()); Assert.AreEqual(60, reader.NumDocs()); reader.Close(); // optimize the index and check that the new doc count is correct writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); writer.Optimize(); Assert.AreEqual(60, writer.DocCount()); writer.Close(); // check that the index reader gives the same numbers. reader = IndexReader.Open(dir); Assert.AreEqual(60, reader.MaxDoc()); Assert.AreEqual(60, reader.NumDocs()); reader.Close(); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); } }
/// <summary>Add an IndexReader whose stored fields will not be returned. This can /// accellerate search when stored fields are only needed from a subset of /// the IndexReaders. /// /// </summary> /// <throws> IllegalArgumentException if not all indexes contain the same number </throws> /// <summary> of documents /// </summary> /// <throws> IllegalArgumentException if not all indexes have the same value </throws> /// <summary> of {@link IndexReader#MaxDoc()} /// </summary> /// <throws> IOException if there is a low-level IO error </throws> public virtual void Add(IndexReader reader, bool ignoreStoredFields) { EnsureOpen(); if (readers.Count == 0) { this.maxDoc = reader.MaxDoc(); this.numDocs = reader.NumDocs(); this.hasDeletions = reader.HasDeletions(); } if (reader.MaxDoc() != maxDoc) { // check compatibility throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc()); } if (reader.NumDocs() != numDocs) { throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs()); } ICollection <string> fields = reader.GetFieldNames(IndexReader.FieldOption.ALL); readerToFields[reader] = fields; IEnumerator <string> i = fields.GetEnumerator(); while (i.MoveNext()) { //// update fieldToReader map string field = i.Current; //if (fieldToReader[field] == null) if (!fieldToReader.ContainsKey(field)) { fieldToReader[field] = reader; } } if (!ignoreStoredFields) { storedFieldReaders.Add(reader); // add to storedFieldReaders } readers.Add(reader); if (incRefReaders) { reader.IncRef(); } decrefOnClose.Add(incRefReaders); }
private void CopyVectorsNoDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader) { int maxDoc = reader.MaxDoc(); if (matchingVectorsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" int docCount = 0; while (docCount < maxDoc) { int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, docCount, len); termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len); docCount += len; checkAbort.Work(300 * len); } } else { for (int docNum = 0; docNum < maxDoc; docNum++) { // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum); termVectorsWriter.AddAllDocVectors(vectors); checkAbort.Work(300); } } }
// maps around deleted docs internal int[] GetDocMap() { if (docMap == null) { // build array which maps document numbers around deletions if (reader.HasDeletions()) { int maxDoc = reader.MaxDoc(); docMap = new int[maxDoc]; int j = 0; for (int i = 0; i < maxDoc; i++) { if (reader.IsDeleted(i)) { docMap[i] = -1; } else { docMap[i] = j++; } } } } return(docMap); }
private void MergeNorms() { for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.isIndexed && !fi.omitNorms) { IndexOutput output = directory.CreateOutput(segment + ".f" + i); try { for (int j = 0; j < readers.Count; j++) { IndexReader reader = (IndexReader)readers[j]; int maxDoc = reader.MaxDoc(); byte[] input = new byte[maxDoc]; reader.Norms(fi.name, input, 0); for (int k = 0; k < maxDoc; k++) { if (!reader.IsDeleted(k)) { output.WriteByte(input[k]); } } } } finally { output.Close(); } } } }
public virtual void TestAddIndexes2() { bool optimize = false; Directory dir1 = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.SetInfoStream(infoStream); // create a 2nd index Directory dir2 = new MockRAMDirectory(); IndexWriter writer2 = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer2.SetInfoStream(infoStream); CreateIndexNoClose(!optimize, "index2", writer2); writer2.Close(); writer.AddIndexesNoOptimize(new Directory[] { dir2 }); writer.AddIndexesNoOptimize(new Directory[] { dir2 }); writer.AddIndexesNoOptimize(new Directory[] { dir2 }); writer.AddIndexesNoOptimize(new Directory[] { dir2 }); writer.AddIndexesNoOptimize(new Directory[] { dir2 }); IndexReader r1 = writer.GetReader(); Assert.AreEqual(500, r1.MaxDoc()); r1.Close(); writer.Close(); dir1.Close(); }
/// <summary> Merge the TermVectors from each of the segments into the new one.</summary> /// <throws> IOException </throws> private void MergeVectors() { TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos); try { for (int r = 0; r < readers.Count; r++) { IndexReader reader = (IndexReader)readers[r]; int maxDoc = reader.MaxDoc(); for (int docNum = 0; docNum < maxDoc; docNum++) { // skip deleted docs if (reader.IsDeleted(docNum)) { continue; } termVectorsWriter.AddAllDocVectors(reader.GetTermFreqVectors(docNum)); if (checkAbort != null) { checkAbort.Work(300); } } } } finally { termVectorsWriter.Close(); } }
private int CopyFieldsNoDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader) { int maxDoc = reader.MaxDoc(); int docCount = 0; if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" while (docCount < maxDoc) { int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len); fieldsWriter.AddRawDocuments(stream, rawDocLengths, len); docCount += len; checkAbort.Work(300 * len); } } else { for (; docCount < maxDoc; docCount++) { // NOTE: it's very important to first assign to doc then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Document doc = reader.Document(docCount, fieldSelectorMerge); fieldsWriter.AddDocument(doc); checkAbort.Work(300); } } return(docCount); }
private void MergeNorms() { byte[] normBuffer = null; IndexOutput output = null; try { for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.isIndexed && !fi.omitNorms) { if (output == null) { output = directory.CreateOutput(segment + "." + IndexFileNames.NORMS_EXTENSION); output.WriteBytes(NORMS_HEADER, NORMS_HEADER.Length); } for (int j = 0; j < readers.Count; j++) { IndexReader reader = (IndexReader)readers[j]; int maxDoc = reader.MaxDoc(); if (normBuffer == null || normBuffer.Length < maxDoc) { // the buffer is too small for the current segment normBuffer = new byte[maxDoc]; } reader.Norms(fi.name, normBuffer, 0); if (!reader.HasDeletions()) { //optimized case for segments without deleted docs output.WriteBytes(normBuffer, maxDoc); } else { // this segment has deleted docs, so we have to // check for every doc if it is deleted or not for (int k = 0; k < maxDoc; k++) { if (!reader.IsDeleted(k)) { output.WriteByte(normBuffer[k]); } } } if (checkAbort != null) { checkAbort.Work(maxDoc); } } } } } finally { if (output != null) { output.Close(); } } }
private int CopyFieldsWithDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader) { int docCount = 0; int maxDoc = reader.MaxDoc(); if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int j = 0; j < maxDoc;) { if (reader.IsDeleted(j)) { // skip deleted docs ++j; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = j, numDocs = 0; do { j++; numDocs++; if (j >= maxDoc) { break; } if (reader.IsDeleted(j)) { j++; break; } }while (numDocs < MAX_RAW_MERGE_DOCS); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs); fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; checkAbort.Work(300 * numDocs); } } else { for (int j = 0; j < maxDoc; j++) { if (reader.IsDeleted(j)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to doc then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Document doc = reader.Document(j, fieldSelectorMerge); fieldsWriter.AddDocument(doc); docCount++; checkAbort.Work(300); } } return(docCount); }
public override void SetNextReader(IndexReader reader, int docBase) { currentReaderValues = new int[reader.MaxDoc()]; for (int i = 0; i < currentReaderValues.Length; i++) { currentReaderValues[i] = random.Next(); } }
private void VerifyNumDocs(Directory dir, int numDocs) { IndexReader reader = IndexReader.Open(dir); Assert.AreEqual(numDocs, reader.MaxDoc()); Assert.AreEqual(numDocs, reader.NumDocs()); reader.Close(); }
public OpenBitSetDISI TermToBitSet(string term, IndexReader indexReader) { var facetQuery = new TermQuery(new Term(this.Field, term)); var facetQueryFilter = new CachingWrapperFilter(new QueryWrapperFilter(facetQuery)); var bitSet = new OpenBitSetDISI(facetQueryFilter.GetDocIdSet(indexReader).Iterator(), indexReader.MaxDoc()); return bitSet; }
/// <summary> Tests creating a segment, then check to insure the segment can be seen via /// IW.getReader /// </summary> public virtual void DoTestIndexWriterReopenSegment(bool optimize) { Directory dir1 = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.SetInfoStream(infoStream); IndexReader r1 = writer.GetReader(); Assert.AreEqual(0, r1.MaxDoc()); CreateIndexNoClose(false, "index1", writer); writer.Flush(!optimize, true, true); IndexReader iwr1 = writer.GetReader(); Assert.AreEqual(100, iwr1.MaxDoc()); IndexReader r2 = writer.GetReader(); Assert.AreEqual(r2.MaxDoc(), 100); // add 100 documents for (int x = 10000; x < 10000 + 100; x++) { Document d = CreateDocument(x, "index1", 5); writer.AddDocument(d); } writer.Flush(false, true, true); // verify the reader was reopened internally IndexReader iwr2 = writer.GetReader(); Assert.IsTrue(iwr2 != r1); Assert.AreEqual(200, iwr2.MaxDoc()); // should have flushed out a segment IndexReader r3 = writer.GetReader(); Assert.IsTrue(r2 != r3); Assert.AreEqual(200, r3.MaxDoc()); // dec ref the readers rather than close them because // closing flushes changes to the writer r1.Close(); iwr1.Close(); r2.Close(); r3.Close(); iwr2.Close(); writer.Close(); // test whether the changes made it to the directory writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); IndexReader w2r1 = writer.GetReader(); // insure the deletes were actually flushed to the directory Assert.AreEqual(200, w2r1.MaxDoc()); w2r1.Close(); writer.Close(); dir1.Close(); }
private void CopyVectorsWithDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader) { int maxDoc = reader.MaxDoc(); if (matchingVectorsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int docNum = 0; docNum < maxDoc;) { if (reader.IsDeleted(docNum)) { // skip deleted docs ++docNum; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = docNum, numDocs = 0; do { docNum++; numDocs++; if (docNum >= maxDoc) { break; } if (reader.IsDeleted(docNum)) { docNum++; break; } }while (numDocs < MAX_RAW_MERGE_DOCS); matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs); termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); checkAbort.Work(300 * numDocs); } } else { for (int docNum = 0; docNum < maxDoc; docNum++) { if (reader.IsDeleted(docNum)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum); termVectorsWriter.AddAllDocVectors(vectors); checkAbort.Work(300); } } }
public override DocIdSet GetDocIdSet(IndexReader reader) { double[] latIndex = FieldCache_Fields.DEFAULT.GetDoubles(reader, _latField); double[] lngIndex = FieldCache_Fields.DEFAULT.GetDoubles(reader, _lngField); int docBase = NextDocBase; NextDocBase += reader.MaxDoc(); return new LatLongFilteredDocIdSet(StartingFilter.GetDocIdSet(reader), latIndex, lngIndex, DistanceLookupCache, _lat, _lng, Distance, docBase, Distances); }
public override BitArray Bits(IndexReader reader) { BitArray bitArray = new BitArray(reader.MaxDoc()); TermDocs termDocs = reader.TermDocs(new Term("score", "5")); while (termDocs.Next()) { bitArray.Set(termDocs.Doc(), true); } return bitArray; }
public override BitArray Bits(IndexReader reader) { if (done) { throw new NotSupportedException("Called twice"); } BitArray bitArray = new BitArray(reader.MaxDoc()); done = true; return bitArray; }
/// <summary>Add an IndexReader whose stored fields will not be returned. This can /// accellerate search when stored fields are only needed from a subset of /// the IndexReaders. /// /// </summary> /// <throws> IllegalArgumentException if not all indexes contain the same number </throws> /// <summary> of documents /// </summary> /// <throws> IllegalArgumentException if not all indexes have the same value </throws> /// <summary> of {@link IndexReader#MaxDoc()} /// </summary> public virtual void Add(IndexReader reader, bool ignoreStoredFields) { if (readers.Count == 0) { this.maxDoc = reader.MaxDoc(); this.numDocs = reader.NumDocs(); this.hasDeletions = reader.HasDeletions(); } if (reader.MaxDoc() != maxDoc) { // check compatibility throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc()); } if (reader.NumDocs() != numDocs) { throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs()); } System.Collections.ICollection fields = reader.GetFieldNames(IndexReader.FieldOption.ALL); readerToFields[reader] = fields; System.Collections.IEnumerator i = fields.GetEnumerator(); while (i.MoveNext()) { System.Collections.DictionaryEntry fi = (System.Collections.DictionaryEntry)i.Current; // update fieldToReader map System.String field = fi.Key.ToString(); if (fieldToReader[field] == null) { fieldToReader[field] = reader; } } if (!ignoreStoredFields) { storedFieldReaders.Add(reader); // add to storedFieldReaders } readers.Add(reader); }
public virtual void TestAddIndexes() { bool optimize = false; Directory dir1 = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.SetInfoStream(infoStream); // create the index CreateIndexNoClose(!optimize, "index1", writer); writer.Flush(false, true, true); // create a 2nd index Directory dir2 = new MockRAMDirectory(); IndexWriter writer2 = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer2.SetInfoStream(infoStream); CreateIndexNoClose(!optimize, "index2", writer2); writer2.Close(); IndexReader r0 = writer.GetReader(); Assert.IsTrue(r0.IsCurrent()); writer.AddIndexesNoOptimize(new Directory[] { dir2 }); Assert.IsFalse(r0.IsCurrent()); r0.Close(); IndexReader r1 = writer.GetReader(); Assert.IsTrue(r1.IsCurrent()); writer.Commit(); Assert.IsTrue(r1.IsCurrent()); Assert.AreEqual(200, r1.MaxDoc()); int index2df = r1.DocFreq(new Term("indexname", "index2")); Assert.AreEqual(100, index2df); // verify the docs are from different indexes Document doc5 = r1.Document(5); Assert.AreEqual("index1", doc5.Get("indexname")); Document doc150 = r1.Document(150); Assert.AreEqual("index2", doc150.Get("indexname")); r1.Close(); writer.Close(); dir1.Close(); }
public static void CheckNorms(IndexReader reader) { // test omit norms for (int i = 0; i < DocHelper.fields.Length; i++) { Fieldable f = DocHelper.fields[i]; if (f.IsIndexed()) { Assert.AreEqual(reader.HasNorms(f.Name()), !f.GetOmitNorms()); Assert.AreEqual(reader.HasNorms(f.Name()), !DocHelper.noNorms.Contains(f.Name())); if (!reader.HasNorms(f.Name())) { // test for fake norms of 1.0 or null depending on the flag byte[] norms = reader.Norms(f.Name()); byte norm1 = DefaultSimilarity.EncodeNorm(1.0f); if (reader.GetDisableFakeNorms()) { Assert.IsNull(norms); } else { Assert.AreEqual(norms.Length, reader.MaxDoc()); for (int j = 0; j < reader.MaxDoc(); j++) { Assert.AreEqual(norms[j], norm1); } } norms = new byte[reader.MaxDoc()]; reader.Norms(f.Name(), norms, 0); for (int j = 0; j < reader.MaxDoc(); j++) { Assert.AreEqual(norms[j], norm1); } } } } }
public DeleteThreads(TestIndexWriterReader enclosingInstance, IndexWriter mainWriter) { InitBlock(enclosingInstance); this.mainWriter = mainWriter; IndexReader reader = mainWriter.GetReader(); int maxDoc = reader.MaxDoc(); random = Enclosing_Instance.NewRandom(); int iter = random.Next(maxDoc); for (int x = 0; x < iter; x++) { int doc = random.Next(iter); System.String id = reader.Document(doc).Get("id"); toDeleteTerms.Add(new Term("id", id)); } }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> IOException </throws> private int MergeFields() { fieldInfos = new FieldInfos(); // merge field names int docCount = 0; for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false); fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false); } fieldInfos.Write(directory, segment + ".fnm"); FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're // in merge mode, we use this FieldSelector FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this); try { for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; int maxDoc = reader.MaxDoc(); for (int j = 0; j < maxDoc; j++) { if (!reader.IsDeleted(j)) { // skip deleted docs fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge)); docCount++; } } } } finally { fieldsWriter.Close(); } return(docCount); }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> IOException </throws> private int MergeFields() { fieldInfos = new FieldInfos(); // merge field names int docCount = 0; for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false); fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false); } fieldInfos.Write(directory, segment + ".fnm"); FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; int maxDoc = reader.MaxDoc(); for (int j = 0; j < maxDoc; j++) { if (!reader.IsDeleted(j)) { // skip deleted docs fieldsWriter.AddDocument(reader.Document(j)); docCount++; } } } } finally { fieldsWriter.Close(); } return(docCount); }
private void ModifyNormsForF1(Directory dir) { IndexReader ir = IndexReader.Open(dir); int n = ir.MaxDoc(); for (int i = 0; i < n; i += 3) { // modify for every third doc int k = (i * 3) % modifiedNorms.Count; float origNorm = (float)((System.Single)modifiedNorms[i]); float newNorm = (float)((System.Single)modifiedNorms[k]); //System.out.println("Modifying: for "+i+" from "+origNorm+" to "+newNorm); //System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm); modifiedNorms[i] = (float)newNorm; modifiedNorms[k] = (float)origNorm; ir.SetNorm(i, "f" + 1, newNorm); ir.SetNorm(k, "f" + 1, origNorm); } ir.Close(); }
private bool VerifyIndex(Directory directory, int startAt) { bool fail = false; IndexReader reader = IndexReader.Open(directory); int max = reader.MaxDoc(); for (int i = 0; i < max; i++) { Document temp = reader.Document(i); //System.out.println("doc "+i+"="+temp.getField("count").stringValue()); //compare the index doc number to the value that it should be if (!temp.GetField("count").StringValue().Equals((i + startAt) + "")) { fail = true; System.Console.Out.WriteLine("Document " + (i + startAt) + " is returning document " + temp.GetField("count").StringValue()); } } reader.Close(); return(fail); }
private void CheckExpecteds(System.Collections.BitArray expecteds) { IndexReader r = IndexReader.Open(dir); //Perhaps not the most efficient approach but meets our needs here. for (int i = 0; i < r.MaxDoc(); i++) { if (!r.IsDeleted(i)) { System.String sval = r.Document(i).Get(FIELD_RECORD_ID); if (sval != null) { int val = System.Int32.Parse(sval); Assert.IsTrue(expecteds.Get(val), "Did not expect document #" + val); expecteds.Set(val, false); } } } r.Close(); Assert.AreEqual(0, SupportClass.BitSetSupport.Cardinality(expecteds), "Should have 0 docs remaining "); }
private List<FacetMatch> FindMatchesInQuery(Facet facet, Filter query, Filter filter, IndexReader indexReader) { var matches = facet.Values.Select(value => { var bitsQuery = new OpenBitSetDISI(query.GetDocIdSet(indexReader).Iterator(), indexReader.MaxDoc()); bitsQuery.And(value.Item2); if (filter != null) { //TODO: Remove this hard coded value (1000) var bitsFilter = new OpenBitSetDISI(filter.GetDocIdSet(indexReader).Iterator(), 1000); bitsQuery.And(bitsFilter); } var count = bitsQuery.Cardinality(); return new FacetMatch() { Count = count, Value = value.Item1, Id = facet.Id }; }).ToList(); return matches; }
/// <summary> /// Get the DocIdSet. /// </summary> /// <param name="reader">Applcible reader.</param> /// <returns>The set.</returns> public override DocIdSet GetDocIdSet(IndexReader reader) { OpenBitSet result = new OpenBitSet(reader.MaxDoc()); TermDocs td = reader.TermDocs(); try { foreach (Term t in this.terms) { td.Seek(t); while (td.Next()) { result.Set(td.Doc()); } } } finally { td.Close(); } return result; }
private OpenBitSet CorrectBits(IndexReader reader) { OpenBitSet bits = new OpenBitSet(reader.MaxDoc()); //assume all are INvalid Term startTerm = new Term(fieldName); TermEnum te = reader.Terms(startTerm); if (te != null) { Term currTerm = te.Term(); while ((currTerm != null) && (currTerm.Field() == startTerm.Field())) //term fieldnames are interned { int lastDoc = -1; //set non duplicates TermDocs td = reader.TermDocs(currTerm); if (td.Next()) { if (keepMode == KM_USE_FIRST_OCCURRENCE) { bits.Set(td.Doc()); } else { do { lastDoc = td.Doc(); } while (td.Next()); bits.Set(lastDoc); } } if (!te.Next()) { break; } currTerm = te.Term(); } } return bits; }
private void ModifyNormsForF1(IndexReader ir) { int n = ir.MaxDoc(); // System.out.println("modifyNormsForF1 maxDoc: "+n); for (int i = 0; i < n; i += 3) { // modify for every third doc int k = (i * 3) % modifiedNorms.Count; float origNorm = (float)((System.Single)modifiedNorms[i]); float newNorm = (float)((System.Single)modifiedNorms[k]); // System.out.println("Modifying: for "+i+" from "+origNorm+" to // "+newNorm); // System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm); modifiedNorms[i] = (float)newNorm; modifiedNorms[k] = (float)origNorm; ir.SetNorm(i, "f" + 1, newNorm); ir.SetNorm(k, "f" + 1, origNorm); // System.out.println("setNorm i: "+i); // break; } // ir.close(); }
public override DocIdSet GetDocIdSet(IndexReader reader) { var bits = new OpenBitSet(reader.MaxDoc()); TermDocs termDocs = reader.TermDocs(); List<double> area = _shape.Area; int sz = area.Count; // iterate through each boxid for (int i = 0; i < sz; i++) { double boxId = area[i]; termDocs.Seek(new Term(_fieldName, NumericUtils.DoubleToPrefixCoded(boxId))); // iterate through all documents // which have this boxId while (termDocs.Next()) { bits.FastSet(termDocs.Doc()); } } return bits; }
public static void VerifyEquals(IndexReader r1, IndexReader r2, System.String idField) { Assert.AreEqual(r1.NumDocs(), r2.NumDocs()); bool hasDeletes = !(r1.MaxDoc() == r2.MaxDoc() && r1.NumDocs() == r1.MaxDoc()); int[] r2r1 = new int[r2.MaxDoc()]; // r2 id to r1 id mapping TermDocs termDocs1 = r1.TermDocs(); TermDocs termDocs2 = r2.TermDocs(); // create mapping from id2 space to id2 based on idField idField = StringHelper.Intern(idField); TermEnum termEnum = r1.Terms(new Term(idField, "")); do { Term term = termEnum.Term(); if (term == null || (System.Object) term.Field() != (System.Object) idField) break; termDocs1.Seek(termEnum); if (!termDocs1.Next()) { // This doc is deleted and wasn't replaced termDocs2.Seek(termEnum); Assert.IsFalse(termDocs2.Next()); continue; } int id1 = termDocs1.Doc(); Assert.IsFalse(termDocs1.Next()); termDocs2.Seek(termEnum); Assert.IsTrue(termDocs2.Next()); int id2 = termDocs2.Doc(); Assert.IsFalse(termDocs2.Next()); r2r1[id2] = id1; // verify stored fields are equivalent try { VerifyEquals(r1.Document(id1), r2.Document(id2)); } catch (System.Exception t) { System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term); System.Console.Out.WriteLine(" d1=" + r1.Document(id1)); System.Console.Out.WriteLine(" d2=" + r2.Document(id2)); throw t; } try { // verify term vectors are equivalent VerifyEquals(r1.GetTermFreqVectors(id1), r2.GetTermFreqVectors(id2)); } catch (System.Exception e) { System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2); TermFreqVector[] tv1 = r1.GetTermFreqVectors(id1); System.Console.Out.WriteLine(" d1=" + tv1); if (tv1 != null) for (int i = 0; i < tv1.Length; i++) { System.Console.Out.WriteLine(" " + i + ": " + tv1[i]); } TermFreqVector[] tv2 = r2.GetTermFreqVectors(id2); System.Console.Out.WriteLine(" d2=" + tv2); if (tv2 != null) for (int i = 0; i < tv2.Length; i++) { System.Console.Out.WriteLine(" " + i + ": " + tv2[i]); } throw e; } } while (termEnum.Next()); termEnum.Close(); // Verify postings TermEnum termEnum1 = r1.Terms(new Term("", "")); TermEnum termEnum2 = r2.Terms(new Term("", "")); // pack both doc and freq into single element for easy sorting long[] info1 = new long[r1.NumDocs()]; long[] info2 = new long[r2.NumDocs()]; for (; ; ) { Term term1, term2; // iterate until we get some docs int len1; for (; ; ) { len1 = 0; term1 = termEnum1.Term(); if (term1 == null) break; termDocs1.Seek(termEnum1); while (termDocs1.Next()) { int d1 = termDocs1.Doc(); int f1 = termDocs1.Freq(); info1[len1] = (((long) d1) << 32) | f1; len1++; } if (len1 > 0) break; if (!termEnum1.Next()) break; } // iterate until we get some docs int len2; for (; ; ) { len2 = 0; term2 = termEnum2.Term(); if (term2 == null) break; termDocs2.Seek(termEnum2); while (termDocs2.Next()) { int d2 = termDocs2.Doc(); int f2 = termDocs2.Freq(); info2[len2] = (((long) r2r1[d2]) << 32) | f2; len2++; } if (len2 > 0) break; if (!termEnum2.Next()) break; } if (!hasDeletes) Assert.AreEqual(termEnum1.DocFreq(), termEnum2.DocFreq()); Assert.AreEqual(len1, len2); if (len1 == 0) break; // no more terms Assert.AreEqual(term1, term2); // sort info2 to get it into ascending docid System.Array.Sort(info2, 0, len2 - 0); // now compare for (int i = 0; i < len1; i++) { Assert.AreEqual(info1[i], info2[i]); } termEnum1.Next(); termEnum2.Next(); } }
private int CopyFieldsWithDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader) { int docCount = 0; int maxDoc = reader.MaxDoc(); if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int j = 0; j < maxDoc; ) { if (reader.IsDeleted(j)) { // skip deleted docs ++j; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = j, numDocs = 0; do { j++; numDocs++; if (j >= maxDoc) break; if (reader.IsDeleted(j)) { j++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs); fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; checkAbort.Work(300 * numDocs); } } else { for (int j = 0; j < maxDoc; j++) { if (reader.IsDeleted(j)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to doc then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Document doc = reader.Document(j, fieldSelectorMerge); fieldsWriter.AddDocument(doc); docCount++; checkAbort.Work(300); } } return docCount; }
public SimpleFacetedSearch(IndexReader reader, string[] groupByFields) { this._Reader = reader; List<FieldValuesBitSets> fieldValuesBitSets = new List<FieldValuesBitSets>(); //STEP 1 //f1 = A, B //f2 = I, J //f3 = 1, 2, 3 int maxFacets = 1; List<List<string>> inputToCP = new List<List<string>>(); foreach (string field in groupByFields) { FieldValuesBitSets f = new FieldValuesBitSets(reader, field); maxFacets *= f.FieldValueBitSetPair.Count; if (maxFacets > MAX_FACETS) throw new Exception("Facet count exceeded " + MAX_FACETS); fieldValuesBitSets.Add(f); inputToCP.Add(f.FieldValueBitSetPair.Keys.ToList()); } //STEP 2 // comb1: A I 1 // comb2: A I 2 etc. var cp = inputToCP.CartesianProduct(); //SETP 3 //create a single BitSet for each combination //BitSet1: A AND I AND 1 //BitSet2: A AND I AND 2 etc. //and remove impossible comb's (for ex, B J 3) from list. Parallel.ForEach(cp, combinations => { OpenBitSetDISI bitSet = new OpenBitSetDISI(_Reader.MaxDoc()); bitSet.Set(0, bitSet.Size()); List<string> comb = combinations.ToList(); for (int j = 0; j < comb.Count; j++) { bitSet.And(fieldValuesBitSets[j].FieldValueBitSetPair[comb[j]]); } //STEP 3 if (bitSet.Cardinality() > 0) { lock(_Groups) _Groups.Add(new KeyValuePair<List<string>, OpenBitSetDISI>(comb, bitSet)); } }); //Now _Groups has 7 rows (as <List<string>, BitSet> pairs) }
// Apply buffered delete terms, queries and docIDs to the // provided reader private bool ApplyDeletes(IndexReader reader, int docIDStart) { lock (this) { int docEnd = docIDStart + reader.MaxDoc(); bool any = false; System.Diagnostics.Debug.Assert(CheckDeleteTerm(null)); // Delete by term //System.Collections.IEnumerator iter = new System.Collections.Hashtable(deletesFlushed.terms).GetEnumerator(); System.Collections.IEnumerator iter = deletesFlushed.terms.GetEnumerator(); TermDocs docs = reader.TermDocs(); try { while (iter.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current; Term term = (Term) entry.Key; // LUCENE-2086: we should be iterating a TreeMap, // here, so terms better be in order: System.Diagnostics.Debug.Assert(CheckDeleteTerm(term)); docs.Seek(term); int limit = ((BufferedDeletes.Num) entry.Value).GetNum(); while (docs.Next()) { int docID = docs.Doc(); if (docIDStart + docID >= limit) break; reader.DeleteDocument(docID); any = true; } } } finally { docs.Close(); } // Delete by docID iter = deletesFlushed.docIDs.GetEnumerator(); while (iter.MoveNext()) { int docID = ((System.Int32) iter.Current); if (docID >= docIDStart && docID < docEnd) { reader.DeleteDocument(docID - docIDStart); any = true; } } // Delete by query IndexSearcher searcher = new IndexSearcher(reader); iter = new System.Collections.Hashtable(deletesFlushed.queries).GetEnumerator(); while (iter.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current; Query query = (Query) entry.Key; int limit = ((System.Int32) entry.Value); Weight weight = query.Weight(searcher); Scorer scorer = weight.Scorer(reader, true, false); if (scorer != null) { while (true) { int doc = scorer.NextDoc(); if (((long) docIDStart) + doc >= limit) break; reader.DeleteDocument(doc); any = true; } } } searcher.Close(); return any; } }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int MergeFields() { if (!mergeDocStores) { // When we are not merging by doc stores, that means // all segments were written as part of a single // autoCommit=false IndexWriter session, so their field // name -> number mapping are the same. So, we start // with the fieldInfos of the last segment in this // case, to keep that numbering. SegmentReader sr = (SegmentReader)readers[readers.Count - 1]; fieldInfos = (FieldInfos)sr.fieldInfos.Clone(); } else { fieldInfos = new FieldInfos(); // merge field names } for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; if (reader is SegmentReader) { SegmentReader segmentReader = (SegmentReader)reader; for (int j = 0; j < segmentReader.GetFieldInfos().Size(); j++) { FieldInfo fi = segmentReader.GetFieldInfos().FieldInfo(j); fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads); } } else { AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false); fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false); } } fieldInfos.Write(directory, segment + ".fnm"); int docCount = 0; if (mergeDocStores) { // If the i'th reader is a SegmentReader and has // identical fieldName -> number mapping, then this // array will be non-null at position i: SegmentReader[] matchingSegmentReaders = new SegmentReader[readers.Count]; // If this reader is a SegmentReader, and all of its // field name -> number mappings match the "merged" // FieldInfos, then we can do a bulk copy of the // stored fields: for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; if (reader is SegmentReader) { SegmentReader segmentReader = (SegmentReader)reader; bool same = true; FieldInfos segmentFieldInfos = segmentReader.GetFieldInfos(); for (int j = 0; same && j < segmentFieldInfos.Size(); j++) { same = fieldInfos.FieldName(j).Equals(segmentFieldInfos.FieldName(j)); } if (same) { matchingSegmentReaders[i] = segmentReader; } } } // Used for bulk-reading raw bytes for stored fields int[] rawDocLengths = new int[MAX_RAW_MERGE_DOCS]; // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're // in merge mode, we use this FieldSelector FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this); // merge field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; SegmentReader matchingSegmentReader = matchingSegmentReaders[i]; FieldsReader matchingFieldsReader; if (matchingSegmentReader != null) { matchingFieldsReader = matchingSegmentReader.GetFieldsReader(); } else { matchingFieldsReader = null; } int maxDoc = reader.MaxDoc(); for (int j = 0; j < maxDoc;) { if (!reader.IsDeleted(j)) { // skip deleted docs if (matchingSegmentReader != null) { // We can optimize this case (doing a bulk // byte copy) since the field numbers are // identical int start = j; int numDocs = 0; do { j++; numDocs++; }while (j < maxDoc && !matchingSegmentReader.IsDeleted(j) && numDocs < MAX_RAW_MERGE_DOCS); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs); fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; if (checkAbort != null) { checkAbort.Work(300 * numDocs); } } else { fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge)); j++; docCount++; if (checkAbort != null) { checkAbort.Work(300); } } } else { j++; } } } } finally { fieldsWriter.Close(); } } // If we are skipping the doc stores, that means there // are no deletions in any of these segments, so we // just sum numDocs() of each segment to get total docCount else { for (int i = 0; i < readers.Count; i++) { docCount += ((IndexReader)readers[i]).NumDocs(); } } return(docCount); }
public static void CheckNorms(IndexReader reader) { // test omit norms for (int i = 0; i < DocHelper.fields.Length; i++) { Fieldable f = DocHelper.fields[i]; if (f.IsIndexed()) { Assert.AreEqual(reader.HasNorms(f.Name()), !f.GetOmitNorms()); Assert.AreEqual(reader.HasNorms(f.Name()), !DocHelper.noNorms.Contains(f.Name())); if (!reader.HasNorms(f.Name())) { // test for fake norms of 1.0 or null depending on the flag byte[] norms = reader.Norms(f.Name()); byte norm1 = DefaultSimilarity.EncodeNorm(1.0f); if (reader.GetDisableFakeNorms()) Assert.IsNull(norms); else { Assert.AreEqual(norms.Length, reader.MaxDoc()); for (int j = 0; j < reader.MaxDoc(); j++) { Assert.AreEqual(norms[j], norm1); } } norms = new byte[reader.MaxDoc()]; reader.Norms(f.Name(), norms, 0); for (int j = 0; j < reader.MaxDoc(); j++) { Assert.AreEqual(norms[j], norm1); } } } } }
public virtual void TestRandom() { int numThreads = 1 + Random().Next(8); int numDocumentsToIndex = 50 + AtLeast(70); AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); iwc.SetFlushPolicy(flushPolicy); int numDWPT = 1 + Random().Next(8); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT); iwc.SetIndexerThreadPool(threadPool); IndexWriter writer = new IndexWriter(dir, iwc); flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy; DocumentsWriter docsWriter = writer.DocsWriter; Assert.IsNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.FlushControl; Assert.AreEqual(0, flushControl.FlushBytes(), " bytes must be 0 after init"); IndexThread[] threads = new IndexThread[numThreads]; for (int x = 0; x < threads.Length; x++) { threads[x] = new IndexThread(this, numDocs, numThreads, writer, LineDocFile, true); threads[x].Start(); } for (int x = 0; x < threads.Length; x++) { threads[x].Join(); } Assert.AreEqual(0, flushControl.FlushBytes(), " all flushes must be due"); Assert.AreEqual(numDocumentsToIndex, writer.NumDocs()); Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc()); if (flushPolicy.FlushOnRAM() && !flushPolicy.FlushOnDocCount() && !flushPolicy.FlushOnDeleteTerms()) { long maxRAMBytes = (long)(iwc.RAMBufferSizeMB * 1024.0 * 1024.0); Assert.IsTrue(flushPolicy.PeakBytesWithoutFlush <= maxRAMBytes, "peak bytes without flush exceeded watermark"); if (flushPolicy.HasMarkedPending) { Assert.IsTrue("max: " + maxRAMBytes + " " + flushControl.PeakActiveBytes, maxRAMBytes <= flushControl.PeakActiveBytes); } } AssertActiveBytesAfter(flushControl); writer.Commit(); Assert.AreEqual(0, flushControl.ActiveBytes()); IndexReader r = DirectoryReader.Open(dir); Assert.AreEqual(numDocumentsToIndex, r.NumDocs()); Assert.AreEqual(numDocumentsToIndex, r.MaxDoc()); if (!flushPolicy.FlushOnRAM()) { Assert.IsFalse("never stall if we don't flush on RAM", docsWriter.FlushControl.StallControl.WasStalled()); Assert.IsFalse("never block if we don't flush on RAM", docsWriter.FlushControl.StallControl.HasBlocked()); } r.Dispose(); writer.Dispose(); dir.Dispose(); }
private OpenBitSet FastBits(IndexReader reader) { OpenBitSet bits = new OpenBitSet(reader.MaxDoc()); bits.Set(0, reader.MaxDoc()); //assume all are valid Term startTerm = new Term(fieldName); TermEnum te = reader.Terms(startTerm); if (te != null) { Term currTerm = te.Term(); while ((currTerm != null) && (currTerm.Field() == startTerm.Field())) //term fieldnames are interned { if (te.DocFreq() > 1) { int lastDoc = -1; //unset potential duplicates TermDocs td = reader.TermDocs(currTerm); td.Next(); if (keepMode == KM_USE_FIRST_OCCURRENCE) { td.Next(); } do { lastDoc = td.Doc(); bits.Clear(lastDoc); } while (td.Next()); if (keepMode == KM_USE_LAST_OCCURRENCE) { //restore the last bit bits.Set(lastDoc); } } if (!te.Next()) { break; } currTerm = te.Term(); } } return bits; }
public override int MaxDoc() { return(in_Renamed.MaxDoc()); }
public static void VerifyEquals(IndexReader r1, IndexReader r2, System.String idField) { Assert.AreEqual(r1.NumDocs(), r2.NumDocs()); bool hasDeletes = !(r1.MaxDoc() == r2.MaxDoc() && r1.NumDocs() == r1.MaxDoc()); int[] r2r1 = new int[r2.MaxDoc()]; // r2 id to r1 id mapping TermDocs termDocs1 = r1.TermDocs(); TermDocs termDocs2 = r2.TermDocs(); // create mapping from id2 space to id2 based on idField idField = StringHelper.Intern(idField); TermEnum termEnum = r1.Terms(new Term(idField, "")); do { Term term = termEnum.Term(); if (term == null || (System.Object)term.Field() != (System.Object)idField) { break; } termDocs1.Seek(termEnum); if (!termDocs1.Next()) { // This doc is deleted and wasn't replaced termDocs2.Seek(termEnum); Assert.IsFalse(termDocs2.Next()); continue; } int id1 = termDocs1.Doc(); Assert.IsFalse(termDocs1.Next()); termDocs2.Seek(termEnum); Assert.IsTrue(termDocs2.Next()); int id2 = termDocs2.Doc(); Assert.IsFalse(termDocs2.Next()); r2r1[id2] = id1; // verify stored fields are equivalent try { VerifyEquals(r1.Document(id1), r2.Document(id2)); } catch (System.Exception t) { System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term); System.Console.Out.WriteLine(" d1=" + r1.Document(id1)); System.Console.Out.WriteLine(" d2=" + r2.Document(id2)); throw t; } try { // verify term vectors are equivalent VerifyEquals(r1.GetTermFreqVectors(id1), r2.GetTermFreqVectors(id2)); } catch (System.Exception e) { System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2); TermFreqVector[] tv1 = r1.GetTermFreqVectors(id1); System.Console.Out.WriteLine(" d1=" + tv1); if (tv1 != null) { for (int i = 0; i < tv1.Length; i++) { System.Console.Out.WriteLine(" " + i + ": " + tv1[i]); } } TermFreqVector[] tv2 = r2.GetTermFreqVectors(id2); System.Console.Out.WriteLine(" d2=" + tv2); if (tv2 != null) { for (int i = 0; i < tv2.Length; i++) { System.Console.Out.WriteLine(" " + i + ": " + tv2[i]); } } throw e; } }while (termEnum.Next()); termEnum.Close(); // Verify postings TermEnum termEnum1 = r1.Terms(new Term("", "")); TermEnum termEnum2 = r2.Terms(new Term("", "")); // pack both doc and freq into single element for easy sorting long[] info1 = new long[r1.NumDocs()]; long[] info2 = new long[r2.NumDocs()]; for (; ;) { Term term1, term2; // iterate until we get some docs int len1; for (; ;) { len1 = 0; term1 = termEnum1.Term(); if (term1 == null) { break; } termDocs1.Seek(termEnum1); while (termDocs1.Next()) { int d1 = termDocs1.Doc(); int f1 = termDocs1.Freq(); info1[len1] = (((long)d1) << 32) | f1; len1++; } if (len1 > 0) { break; } if (!termEnum1.Next()) { break; } } // iterate until we get some docs int len2; for (; ;) { len2 = 0; term2 = termEnum2.Term(); if (term2 == null) { break; } termDocs2.Seek(termEnum2); while (termDocs2.Next()) { int d2 = termDocs2.Doc(); int f2 = termDocs2.Freq(); info2[len2] = (((long)r2r1[d2]) << 32) | f2; len2++; } if (len2 > 0) { break; } if (!termEnum2.Next()) { break; } } if (!hasDeletes) { Assert.AreEqual(termEnum1.DocFreq(), termEnum2.DocFreq()); } Assert.AreEqual(len1, len2); if (len1 == 0) { break; // no more terms } Assert.AreEqual(term1, term2); // sort info2 to get it into ascending docid System.Array.Sort(info2, 0, len2 - 0); // now compare for (int i = 0; i < len1; i++) { Assert.AreEqual(info1[i], info2[i]); } termEnum1.Next(); termEnum2.Next(); } }
// Apply buffered delete terms, queries and docIDs to the // provided reader private bool ApplyDeletes(IndexReader reader, int docIDStart) { lock (this) { int docEnd = docIDStart + reader.MaxDoc(); bool any = false; System.Diagnostics.Debug.Assert(CheckDeleteTerm(null)); // Delete by term TermDocs docs = reader.TermDocs(); try { foreach(KeyValuePair<Term,BufferedDeletes.Num> entry in deletesFlushed.terms) { Term term = entry.Key; // LUCENE-2086: we should be iterating a TreeMap, // here, so terms better be in order: System.Diagnostics.Debug.Assert(CheckDeleteTerm(term)); docs.Seek(term); int limit = entry.Value.GetNum(); while (docs.Next()) { int docID = docs.Doc(); if (docIDStart + docID >= limit) break; reader.DeleteDocument(docID); any = true; } } } finally { docs.Close(); } // Delete by docID foreach(int docID in deletesFlushed.docIDs) { if (docID >= docIDStart && docID < docEnd) { reader.DeleteDocument(docID - docIDStart); any = true; } } // Delete by query IndexSearcher searcher = new IndexSearcher(reader); foreach(KeyValuePair<Query,int> entry in new Support.Dictionary<Query,int>(deletesFlushed.queries)) { Query query = entry.Key; int limit = entry.Value; Weight weight = query.Weight(searcher); Scorer scorer = weight.Scorer(reader, true, false); if (scorer != null) { while (true) { int doc = scorer.NextDoc(); if (((long) docIDStart) + doc >= limit) break; reader.DeleteDocument(doc); any = true; } } } searcher.Close(); return any; } }
//////////////////////////////////////////////////////////////// static private void ScoreHits (Dictionary<int, Hit> hits_by_id, IndexReader reader, ICollection term_list) { LNS.Similarity similarity; similarity = LNS.Similarity.GetDefault (); TermDocs term_docs = reader.TermDocs (); Hit hit; foreach (Term term in term_list) { double idf; idf = similarity.Idf (reader.DocFreq (term), reader.MaxDoc ()); int hit_count; hit_count = hits_by_id.Count; term_docs.Seek (term); while (term_docs.Next () && hit_count > 0) { int id; id = term_docs.Doc (); if (hits_by_id.TryGetValue (id, out hit)) { double tf; tf = similarity.Tf (term_docs.Freq ()); hit.Score += tf * idf; --hit_count; } } } term_docs.Close (); }
private int CopyFieldsNoDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader) { int maxDoc = reader.MaxDoc(); int docCount = 0; if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" while (docCount < maxDoc) { int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len); fieldsWriter.AddRawDocuments(stream, rawDocLengths, len); docCount += len; checkAbort.Work(300 * len); } } else { for (; docCount < maxDoc; docCount++) { // NOTE: it's very important to first assign to doc then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Document doc = reader.Document(docCount, fieldSelectorMerge); fieldsWriter.AddDocument(doc); checkAbort.Work(300); } } return docCount; }
// Apply buffered delete terms, queries and docIDs to the // provided reader private bool ApplyDeletes(IndexReader reader, int docIDStart) { lock (this) { int docEnd = docIDStart + reader.MaxDoc(); bool any = false; // Delete by term IEnumerator<KeyValuePair<object, object>> iter = deletesFlushed.terms.GetEnumerator(); while (iter.MoveNext()) { KeyValuePair<object, object> entry = (KeyValuePair<object, object>)iter.Current; Term term = (Term)entry.Key; TermDocs docs = reader.TermDocs(term); if (docs != null) { int limit = ((BufferedDeletes.Num)entry.Value).GetNum(); try { while (docs.Next()) { int docID = docs.Doc(); if (docIDStart + docID >= limit) break; reader.DeleteDocument(docID); any = true; } } finally { docs.Close(); } } } // Delete by docID IEnumerator<object> iter2 = deletesFlushed.docIDs.GetEnumerator(); while (iter2.MoveNext()) { int docID = (int)iter2.Current; if (docID >= docIDStart && docID < docEnd) { reader.DeleteDocument(docID - docIDStart); any = true; } } // Delete by query IndexSearcher searcher = new IndexSearcher(reader); iter = deletesFlushed.queries.GetEnumerator(); while (iter.MoveNext()) { KeyValuePair<object, object> entry = (KeyValuePair<object, object>)iter.Current; Query query = (Query)entry.Key; int limit = (int)entry.Value; Weight weight = query.Weight(searcher); Scorer scorer = weight.Scorer(reader); while (scorer.Next()) { int docID = scorer.Doc(); if (docIDStart + docID >= limit) break; reader.DeleteDocument(docID); any = true; } } searcher.Close(); return any; } }
private void CopyVectorsWithDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader) { int maxDoc = reader.MaxDoc(); if (matchingVectorsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int docNum = 0; docNum < maxDoc; ) { if (reader.IsDeleted(docNum)) { // skip deleted docs ++docNum; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = docNum, numDocs = 0; do { docNum++; numDocs++; if (docNum >= maxDoc) break; if (reader.IsDeleted(docNum)) { docNum++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs); termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); checkAbort.Work(300 * numDocs); } } else { for (int docNum = 0; docNum < maxDoc; docNum++) { if (reader.IsDeleted(docNum)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum); termVectorsWriter.AddAllDocVectors(vectors); checkAbort.Work(300); } } }
private void ModifyNormsForF1(IndexReader ir) { int n = ir.MaxDoc(); // System.out.println("modifyNormsForF1 maxDoc: "+n); for (int i = 0; i < n; i += 3) { // modify for every third doc int k = (i * 3) % modifiedNorms.Count; float origNorm = (float) ((System.Single) modifiedNorms[i]); float newNorm = (float) ((System.Single) modifiedNorms[k]); // System.out.println("Modifying: for "+i+" from "+origNorm+" to // "+newNorm); // System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm); modifiedNorms[i] = (float) newNorm; modifiedNorms[k] = (float) origNorm; ir.SetNorm(i, "f" + 1, newNorm); ir.SetNorm(k, "f" + 1, origNorm); // System.out.println("setNorm i: "+i); // break; } // ir.close(); }
public override int MaxDoc() { // Don't call ensureOpen() here (it could affect performance) return(in_Renamed.MaxDoc()); }
private OpenBitSetDISI InitialResult(IndexReader reader, Logic logic, int[] index) { OpenBitSetDISI result; /** * First AND operation takes place against a completely false * bitset and will always return zero results. */ if (logic == Logic.AND) { result = new OpenBitSetDISI(GetDISI(chain[index[0]], reader), reader.MaxDoc()); ++index[0]; } else if (logic == Logic.ANDNOT) { result = new OpenBitSetDISI(GetDISI(chain[index[0]], reader), reader.MaxDoc()); result.Flip(0, reader.MaxDoc()); // NOTE: may set bits for deleted docs. ++index[0]; } else { result = new OpenBitSetDISI(reader.MaxDoc()); } return result; }
/// <summary>Add an IndexReader whose stored fields will not be returned. This can /// accellerate search when stored fields are only needed from a subset of /// the IndexReaders. /// /// </summary> /// <throws> IllegalArgumentException if not all indexes contain the same number </throws> /// <summary> of documents /// </summary> /// <throws> IllegalArgumentException if not all indexes have the same value </throws> /// <summary> of {@link IndexReader#MaxDoc()} /// </summary> /// <throws> IOException if there is a low-level IO error </throws> public virtual void Add(IndexReader reader, bool ignoreStoredFields) { EnsureOpen(); if (readers.Count == 0) { this.maxDoc = reader.MaxDoc(); this.numDocs = reader.NumDocs(); this.hasDeletions = reader.HasDeletions(); } if (reader.MaxDoc() != maxDoc) // check compatibility throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc()); if (reader.NumDocs() != numDocs) throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs()); System.Collections.Generic.ICollection<string> fields = reader.GetFieldNames(IndexReader.FieldOption.ALL); readerToFields[reader] = fields; System.Collections.IEnumerator i = fields.GetEnumerator(); while (i.MoveNext()) { // update fieldToReader map System.String field = (System.String) i.Current; if (fieldToReader[field] == null) fieldToReader[field] = reader; } if (!ignoreStoredFields) storedFieldReaders.Add(reader); // add to storedFieldReaders readers.Add(reader); if (incRefReaders) { reader.IncRef(); } decrefOnClose.Add(incRefReaders); }
public static void AssertIndexEquals(IndexReader index1, IndexReader index2) { Assert.AreEqual(index1.NumDocs(), index2.NumDocs(), "IndexReaders have different values for numDocs."); Assert.AreEqual(index1.MaxDoc(), index2.MaxDoc(), "IndexReaders have different values for maxDoc."); Assert.AreEqual(index1.HasDeletions(), index2.HasDeletions(), "Only one IndexReader has deletions."); Assert.AreEqual(index1.IsOptimized(), index2.IsOptimized(), "Only one index is optimized."); // check field names System.Collections.ICollection fields1 = index1.GetFieldNames(FieldOption.ALL); System.Collections.ICollection fields2 = index2.GetFieldNames(FieldOption.ALL); Assert.AreEqual(fields1.Count, fields2.Count, "IndexReaders have different numbers of fields."); System.Collections.IEnumerator it1 = fields1.GetEnumerator(); System.Collections.IEnumerator it2 = fields2.GetEnumerator(); while (it1.MoveNext()) { Assert.IsTrue(it2.MoveNext()); Assert.AreEqual((System.String) it1.Current, (System.String) it2.Current, "Different field names."); } // check norms it1 = fields1.GetEnumerator(); while (it1.MoveNext()) { System.String curField = (System.String) it1.Current; byte[] norms1 = index1.Norms(curField); byte[] norms2 = index2.Norms(curField); Assert.AreEqual(norms1.Length, norms2.Length); for (int i = 0; i < norms1.Length; i++) { Assert.AreEqual(norms1[i], norms2[i], "Norm different for doc " + i + " and field '" + curField + "'."); } } // check deletions for (int i = 0; i < index1.MaxDoc(); i++) { Assert.AreEqual(index1.IsDeleted(i), index2.IsDeleted(i), "Doc " + i + " only deleted in one index."); } // check stored fields for (int i = 0; i < index1.MaxDoc(); i++) { if (!index1.IsDeleted(i)) { Document doc1 = index1.Document(i); Document doc2 = index2.Document(i); fields1 = doc1.GetFields(); fields2 = doc2.GetFields(); Assert.AreEqual(fields1.Count, fields2.Count, "Different numbers of fields for doc " + i + "."); it1 = fields1.GetEnumerator(); it2 = fields2.GetEnumerator(); while (it1.MoveNext()) { Assert.IsTrue(it2.MoveNext()); Field curField1 = (Field) it1.Current; Field curField2 = (Field) it2.Current; Assert.AreEqual(curField1.Name(), curField2.Name(), "Different fields names for doc " + i + "."); Assert.AreEqual(curField1.StringValue(), curField2.StringValue(), "Different field values for doc " + i + "."); } } } // check dictionary and posting lists TermEnum enum1 = index1.Terms(); TermEnum enum2 = index2.Terms(); TermPositions tp1 = index1.TermPositions(); TermPositions tp2 = index2.TermPositions(); while (enum1.Next()) { Assert.IsTrue(enum2.Next()); Assert.AreEqual(enum1.Term(), enum2.Term(), "Different term in dictionary."); tp1.Seek(enum1.Term()); tp2.Seek(enum1.Term()); while (tp1.Next()) { Assert.IsTrue(tp2.Next()); Assert.AreEqual(tp1.Doc(), tp2.Doc(), "Different doc id in postinglist of term " + enum1.Term() + "."); Assert.AreEqual(tp1.Freq(), tp2.Freq(), "Different term frequence in postinglist of term " + enum1.Term() + "."); for (int i = 0; i < tp1.Freq(); i++) { Assert.AreEqual(tp1.NextPosition(), tp2.NextPosition(), "Different positions in postinglist of term " + enum1.Term() + "."); } } } }
public static void CheckNorms(IndexReader reader) { // test omit norms for (int i = 0; i < DocHelper.fields.Length; i++) { Lucene.Net.Documents.Fieldable f = DocHelper.fields[i]; if (f.IsIndexed()) { Assert.AreEqual(reader.HasNorms(f.Name()), !f.GetOmitNorms()); Assert.AreEqual(reader.HasNorms(f.Name()), !DocHelper.noNorms.Contains(f.Name())); if (!reader.HasNorms(f.Name())) { // test for fake norms of 1.0 byte[] norms = reader.Norms(f.Name()); Assert.AreEqual(norms.Length, reader.MaxDoc()); for (int j = 0; j < reader.MaxDoc(); j++) { Assert.AreEqual(norms[j], DefaultSimilarity.EncodeNorm(1.0f)); } norms = new byte[reader.MaxDoc()]; reader.Norms(f.Name(), norms, 0); for (int j = 0; j < reader.MaxDoc(); j++) { Assert.AreEqual(norms[j], DefaultSimilarity.EncodeNorm(1.0f)); } } } } }
/// <summary> /// Returns a single <seealso cref="Bits"/> instance for this /// reader, merging live Documents on the /// fly. this method will return null if the reader /// has no deletions. /// /// <p><b>NOTE</b>: this is a very slow way to access live docs. /// For example, each Bits access will require a binary search. /// It's better to get the sub-readers and iterate through them /// yourself. /// </summary> public static Bits GetLiveDocs(IndexReader reader) { if (reader.HasDeletions()) { IList<AtomicReaderContext> leaves = reader.Leaves(); int size = leaves.Count; Debug.Assert(size > 0, "A reader with deletions must have at least one leave"); if (size == 1) { return leaves[0].AtomicReader.LiveDocs; } Bits[] liveDocs = new Bits[size]; int[] starts = new int[size + 1]; for (int i = 0; i < size; i++) { // record all liveDocs, even if they are null AtomicReaderContext ctx = leaves[i]; liveDocs[i] = ctx.AtomicReader.LiveDocs; starts[i] = ctx.DocBase; } starts[size] = reader.MaxDoc(); return new MultiBits(liveDocs, starts, true); } else { return null; } }
/// <summary> Merge the TermVectors from each of the segments into the new one.</summary> /// <throws> IOException </throws> private void MergeVectors() { TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos); try { for (int r = 0; r < readers.Count; r++) { SegmentReader matchingSegmentReader = matchingSegmentReaders[r]; TermVectorsReader matchingVectorsReader; bool hasMatchingReader; if (matchingSegmentReader != null) { matchingVectorsReader = matchingSegmentReader.termVectorsReaderOrig; // If the TV* files are an older format then they // cannot read raw docs: if (matchingVectorsReader != null && !matchingVectorsReader.CanReadRawDocs()) { matchingVectorsReader = null; hasMatchingReader = false; } else { hasMatchingReader = matchingVectorsReader != null; } } else { hasMatchingReader = false; matchingVectorsReader = null; } IndexReader reader = (IndexReader)readers[r]; bool hasDeletions = reader.HasDeletions(); int maxDoc = reader.MaxDoc(); for (int docNum = 0; docNum < maxDoc;) { // skip deleted docs if (!hasDeletions || !reader.IsDeleted(docNum)) { if (hasMatchingReader) { // We can optimize this case (doing a bulk // byte copy) since the field numbers are // identical int start = docNum; int numDocs = 0; do { docNum++; numDocs++; if (docNum >= maxDoc) { break; } if (hasDeletions && matchingSegmentReader.IsDeleted(docNum)) { docNum++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs); termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); if (checkAbort != null) { checkAbort.Work(300 * numDocs); } } else { // NOTE: it's very important to first assign // to vectors then pass it to // termVectorsWriter.addAllDocVectors; see // LUCENE-1282 TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum); termVectorsWriter.AddAllDocVectors(vectors); docNum++; if (checkAbort != null) { checkAbort.Work(300); } } } else { docNum++; } } } } finally { termVectorsWriter.Close(); } long tvxSize = directory.FileLength(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); // {{dougsale-2.4.0} // this shouldn't be a problem for us - if it is, // then it's not a JRE bug //if (4 + mergedDocs * 16 != tvxSize) // // This is most likely a bug in Sun JRE 1.6.0_04/_05; // // we detect that the bug has struck, here, and // // throw an exception to prevent the corruption from // // entering the index. See LUCENE-1282 for // // details. // throw new RuntimeException("mergeVectors produced an invalid result: mergedDocs is " + mergedDocs + " but tvx size is " + tvxSize + "; now aborting this merge to prevent index corruption"); }
protected internal double[] ComputeDistances(IndexReader reader) { double[] retArray = null; var termDocs = reader.TermDocs(); var termEnum = reader.Terms(new Term(Constants.SpatialShapeFieldName)); try { do { Term term = termEnum.Term(); if (term == null) break; Debug.Assert(Constants.SpatialShapeFieldName.Equals(term.Field())); Shape termval; try { termval = SpatialIndex.RavenSpatialContext.ReadShape(term.Text()); // read shape } catch (InvalidShapeException) { continue; } var pt = termval as Point; if (pt == null) continue; var distance = SpatialIndex.RavenSpatialContext.GetDistCalc().Distance(pt, originPt); if (retArray == null) // late init retArray = new double[reader.MaxDoc()]; termDocs.Seek(termEnum); while (termDocs.Next()) { retArray[termDocs.Doc()] = distance; } } while (termEnum.Next()); } finally { termDocs.Close(); termEnum.Close(); } return retArray ?? new double[reader.MaxDoc()]; }
/// <summary> /// Get the id set for the filter. /// </summary> /// <param name="reader">The reader.</param> /// <returns>The filter set to use.</returns> public override DocIdSet GetDocIdSet(IndexReader reader) { OpenBitSetDISI res = null; if (shouldFilters != null) { for (int i = 0; i < shouldFilters.Count; i++) { if (res == null) { res = new OpenBitSetDISI(GetDISI(shouldFilters, i, reader), reader.MaxDoc()); } else { DocIdSet dis = ((Filter)shouldFilters[i]).GetDocIdSet(reader); if (dis is OpenBitSet) { // optimized case for OpenBitSets res.Or((OpenBitSet)dis); } else { res.InPlaceOr(GetDISI(shouldFilters, i, reader)); } } } } if (notFilters != null) { for (int i = 0; i < notFilters.Count; i++) { if (res == null) { res = new OpenBitSetDISI(GetDISI(notFilters, i, reader), reader.MaxDoc()); res.Flip(0, reader.MaxDoc()); // NOTE: may set bits on deleted docs } else { DocIdSet dis = ((Filter)notFilters[i]).GetDocIdSet(reader); if (dis is OpenBitSet) { // optimized case for OpenBitSets res.AndNot((OpenBitSet)dis); } else { res.InPlaceNot(GetDISI(notFilters, i, reader)); } } } } if (mustFilters != null) { for (int i = 0; i < mustFilters.Count; i++) { if (res == null) { res = new OpenBitSetDISI(GetDISI(mustFilters, i, reader), reader.MaxDoc()); } else { DocIdSet dis = ((Filter)mustFilters[i]).GetDocIdSet(reader); if (dis is OpenBitSet) { // optimized case for OpenBitSets res.And((OpenBitSet)dis); } else { res.InPlaceAnd(GetDISI(mustFilters, i, reader)); } } } } if (res != null) return FinalResult(res, reader.MaxDoc()); else { //TODO: 2.- change return DocIdSet.EMPTY_DOCIDSET; return null; } }
private void MergeTermInfos(FormatPostingsFieldsConsumer consumer) { int base_Renamed = 0; int readerCount = readers.Count; for (int i = 0; i < readerCount; i++) { IndexReader reader = (IndexReader)readers[i]; TermEnum termEnum = reader.Terms(); SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader); int[] docMap = smi.GetDocMap(); if (docMap != null) { if (docMaps == null) { docMaps = new int[readerCount][]; delCounts = new int[readerCount]; } docMaps[i] = docMap; delCounts[i] = smi.reader.MaxDoc() - smi.reader.NumDocs(); } base_Renamed += reader.NumDocs(); System.Diagnostics.Debug.Assert(reader.NumDocs() == reader.MaxDoc() - smi.delCount); if (smi.Next()) { queue.Add(smi); } // initialize queue else { smi.Close(); } } SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count]; System.String currentField = null; FormatPostingsTermsConsumer termsConsumer = null; while (queue.Size() > 0) { int matchSize = 0; // pop matching terms match[matchSize++] = (SegmentMergeInfo)queue.Pop(); Term term = match[0].term; SegmentMergeInfo top = (SegmentMergeInfo)queue.Top(); while (top != null && term.CompareTo(top.term) == 0) { match[matchSize++] = (SegmentMergeInfo)queue.Pop(); top = (SegmentMergeInfo)queue.Top(); } if ((System.Object)currentField != (System.Object)term.field) { currentField = term.field; if (termsConsumer != null) { termsConsumer.Finish(); } FieldInfo fieldInfo = fieldInfos.FieldInfo(currentField); termsConsumer = consumer.AddField(fieldInfo); omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; } int df = AppendPostings(termsConsumer, match, matchSize); // add new TermInfo checkAbort.Work(df / 3.0); while (matchSize > 0) { SegmentMergeInfo smi = match[--matchSize]; if (smi.Next()) { queue.Add(smi); } // restore queue else { smi.Close(); // done with a segment } } } }