private int CopyVectorsNoDeletions(MergeState mergeState, Lucene40TermVectorsReader matchingVectorsReader, AtomicReader reader, int[] rawDocLengths, int[] rawDocLengths2) { int maxDoc = reader.MaxDoc; if (matchingVectorsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" int docCount = 0; while (docCount < maxDoc) { int len = Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, docCount, len); AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len); docCount += len; mergeState.CheckAbort.Work(300 * len); } } else { for (int docNum = 0; docNum < maxDoc; docNum++) { // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Fields vectors = reader.GetTermVectors(docNum); AddAllDocVectors(vectors, mergeState); mergeState.CheckAbort.Work(300); } } return(maxDoc); }
public virtual int Merge(MergeState mergeState) { int docCount = 0; for (int i = 0; i < mergeState.Readers.Count; i++) { AtomicReader reader = mergeState.Readers[i]; int maxDoc = reader.MaxDoc; IBits liveDocs = reader.LiveDocs; for (int docID = 0; docID < maxDoc; docID++) { if (liveDocs != null && !liveDocs.Get(docID)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Fields vectors = reader.GetTermVectors(docID); AddAllDocVectors(vectors, mergeState); docCount++; mergeState.CheckAbort.Work(300); } } Finish(mergeState.FieldInfos, docCount); return(docCount); }
public virtual void TestChangeGaps() { // LUCENE-5324: check that it is possible to change the wrapper's gaps int positionGap = Random.Next(1000); int offsetGap = Random.Next(1000); Analyzer @delegate = new MockAnalyzer(Random); Analyzer a = new AnalyzerWrapperAnonymousClass2(this, @delegate.Strategy, positionGap, offsetGap, @delegate); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, NewDirectory()); Document doc = new Document(); FieldType ft = new FieldType(); ft.IsIndexed = true; ft.IndexOptions = IndexOptions.DOCS_ONLY; ft.IsTokenized = true; ft.StoreTermVectors = true; ft.StoreTermVectorPositions = true; ft.StoreTermVectorOffsets = true; doc.Add(new Field("f", "a", ft)); doc.Add(new Field("f", "a", ft)); writer.AddDocument(doc, a); AtomicReader reader = GetOnlySegmentReader(writer.GetReader()); Fields fields = reader.GetTermVectors(0); Terms terms = fields.GetTerms("f"); TermsEnum te = terms.GetEnumerator(); Assert.IsTrue(te.MoveNext()); Assert.AreEqual(new BytesRef("a"), te.Term); DocsAndPositionsEnum dpe = te.DocsAndPositions(null, null); Assert.AreEqual(0, dpe.NextDoc()); Assert.AreEqual(2, dpe.Freq); Assert.AreEqual(0, dpe.NextPosition()); Assert.AreEqual(0, dpe.StartOffset); int endOffset = dpe.EndOffset; Assert.AreEqual(1 + positionGap, dpe.NextPosition()); Assert.AreEqual(1 + endOffset + offsetGap, dpe.EndOffset); Assert.IsFalse(te.MoveNext()); reader.Dispose(); writer.Dispose(); writer.IndexWriter.Directory.Dispose(); }
public virtual void TestChangeGaps() { // LUCENE-5324: check that it is possible to change the wrapper's gaps int positionGap = Random().Next(1000); int offsetGap = Random().Next(1000); Analyzer @delegate = new MockAnalyzer(Random()); Analyzer a = new AnalyzerWrapperAnonymousInnerClassHelper2(this, @delegate.Strategy, positionGap, offsetGap, @delegate); RandomIndexWriter writer = new RandomIndexWriter(Random(), NewDirectory()); Document doc = new Document(); FieldType ft = new FieldType(); ft.Indexed = true; ft.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; ft.Tokenized = true; ft.StoreTermVectors = true; ft.StoreTermVectorPositions = true; ft.StoreTermVectorOffsets = true; doc.Add(new Field("f", "a", ft)); doc.Add(new Field("f", "a", ft)); writer.AddDocument(doc, a); AtomicReader reader = GetOnlySegmentReader(writer.Reader); Fields fields = reader.GetTermVectors(0); Terms terms = fields.Terms("f"); TermsEnum te = terms.Iterator(null); Assert.AreEqual(new BytesRef("a"), te.Next()); DocsAndPositionsEnum dpe = te.DocsAndPositions(null, null); Assert.AreEqual(0, dpe.NextDoc()); Assert.AreEqual(2, dpe.Freq()); Assert.AreEqual(0, dpe.NextPosition()); Assert.AreEqual(0, dpe.StartOffset()); int endOffset = dpe.EndOffset(); Assert.AreEqual(1 + positionGap, dpe.NextPosition()); Assert.AreEqual(1 + endOffset + offsetGap, dpe.EndOffset()); Assert.AreEqual(null, te.Next()); reader.Dispose(); writer.Dispose(); writer.w.Directory.Dispose(); }
private int CopyVectorsWithDeletions(MergeState mergeState, Lucene40TermVectorsReader matchingVectorsReader, AtomicReader reader, int[] rawDocLengths, int[] rawDocLengths2) { int maxDoc = reader.MaxDoc; IBits liveDocs = reader.LiveDocs; int totalNumDocs = 0; if (matchingVectorsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int docNum = 0; docNum < maxDoc;) { if (!liveDocs.Get(docNum)) { // skip deleted docs ++docNum; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = docNum, numDocs = 0; do { docNum++; numDocs++; if (docNum >= maxDoc) { break; } if (!liveDocs.Get(docNum)) { docNum++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs); AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); totalNumDocs += numDocs; mergeState.CheckAbort.Work(300 * numDocs); } } else { for (int docNum = 0; docNum < maxDoc; docNum++) { if (!liveDocs.Get(docNum)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Fields vectors = reader.GetTermVectors(docNum); AddAllDocVectors(vectors, mergeState); totalNumDocs++; mergeState.CheckAbort.Work(300); } } return(totalNumDocs); }