/// <summary> /// Initializes a new instance of the <see cref="MergeStateDataObject"/> class. /// </summary> /// <param name="path">The path to a file from which to retrieve data.</param> internal MergeStateDataObject(string path) { try { using (var file = new FileStream(path, FileMode.Open)) { var serializer = new XmlSerializer(typeof(MergeState)); var newState = serializer.Deserialize(file) as MergeState; if (newState != null) { state = newState; } } didLoad = true; } catch (IOException e) { MessageBox.Show( string.Format(Resources.MainWindowStrings.ErrorOnLoad, e.Message), Resources.MainWindowStrings.ErrorOnLoadCaption, MessageBoxButton.OK, MessageBoxImage.Error); } }
/// <summary> /// Default merge impl </summary> public virtual void Merge(MergeState mergeState, FieldInfo.IndexOptions? indexOptions, TermsEnum termsEnum) { BytesRef term; Debug.Assert(termsEnum != null); long sumTotalTermFreq = 0; long sumDocFreq = 0; long sumDFsinceLastAbortCheck = 0; FixedBitSet visitedDocs = new FixedBitSet(mergeState.SegmentInfo.DocCount); if (indexOptions == FieldInfo.IndexOptions.DOCS_ONLY) { if (DocsEnum == null) { DocsEnum = new MappingMultiDocsEnum(); } DocsEnum.MergeState = mergeState; MultiDocsEnum docsEnumIn = null; while ((term = termsEnum.Next()) != null) { // We can pass null for liveDocs, because the // mapping enum will skip the non-live docs: docsEnumIn = (MultiDocsEnum)termsEnum.Docs(null, docsEnumIn, Index.DocsEnum.FLAG_NONE); if (docsEnumIn != null) { DocsEnum.Reset(docsEnumIn); PostingsConsumer postingsConsumer = StartTerm(term); TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, DocsEnum, visitedDocs); if (stats.DocFreq > 0) { FinishTerm(term, stats); sumTotalTermFreq += stats.DocFreq; sumDFsinceLastAbortCheck += stats.DocFreq; sumDocFreq += stats.DocFreq; if (sumDFsinceLastAbortCheck > 60000) { mergeState.checkAbort.Work(sumDFsinceLastAbortCheck / 5.0); sumDFsinceLastAbortCheck = 0; } } } } } else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS) { if (DocsAndFreqsEnum == null) { DocsAndFreqsEnum = new MappingMultiDocsEnum(); } DocsAndFreqsEnum.MergeState = mergeState; MultiDocsEnum docsAndFreqsEnumIn = null; while ((term = termsEnum.Next()) != null) { // We can pass null for liveDocs, because the // mapping enum will skip the non-live docs: docsAndFreqsEnumIn = (MultiDocsEnum)termsEnum.Docs(null, docsAndFreqsEnumIn); Debug.Assert(docsAndFreqsEnumIn != null); DocsAndFreqsEnum.Reset(docsAndFreqsEnumIn); PostingsConsumer postingsConsumer = StartTerm(term); TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, DocsAndFreqsEnum, visitedDocs); if (stats.DocFreq > 0) { FinishTerm(term, stats); sumTotalTermFreq += stats.TotalTermFreq; sumDFsinceLastAbortCheck += stats.DocFreq; sumDocFreq += stats.DocFreq; if (sumDFsinceLastAbortCheck > 60000) { mergeState.checkAbort.Work(sumDFsinceLastAbortCheck / 5.0); sumDFsinceLastAbortCheck = 0; } } } } else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { if (PostingsEnum == null) { PostingsEnum = new MappingMultiDocsAndPositionsEnum(); } PostingsEnum.MergeState = mergeState; MultiDocsAndPositionsEnum postingsEnumIn = null; while ((term = termsEnum.Next()) != null) { // We can pass null for liveDocs, because the // mapping enum will skip the non-live docs: postingsEnumIn = (MultiDocsAndPositionsEnum)termsEnum.DocsAndPositions(null, postingsEnumIn, DocsAndPositionsEnum.FLAG_PAYLOADS); Debug.Assert(postingsEnumIn != null); PostingsEnum.Reset(postingsEnumIn); PostingsConsumer postingsConsumer = StartTerm(term); TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, PostingsEnum, visitedDocs); if (stats.DocFreq > 0) { FinishTerm(term, stats); sumTotalTermFreq += stats.TotalTermFreq; sumDFsinceLastAbortCheck += stats.DocFreq; sumDocFreq += stats.DocFreq; if (sumDFsinceLastAbortCheck > 60000) { mergeState.checkAbort.Work(sumDFsinceLastAbortCheck / 5.0); sumDFsinceLastAbortCheck = 0; } } } } else { Debug.Assert(indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); if (PostingsEnum == null) { PostingsEnum = new MappingMultiDocsAndPositionsEnum(); } PostingsEnum.MergeState = mergeState; MultiDocsAndPositionsEnum postingsEnumIn = null; while ((term = termsEnum.Next()) != null) { // We can pass null for liveDocs, because the // mapping enum will skip the non-live docs: postingsEnumIn = (MultiDocsAndPositionsEnum)termsEnum.DocsAndPositions(null, postingsEnumIn); Debug.Assert(postingsEnumIn != null); PostingsEnum.Reset(postingsEnumIn); PostingsConsumer postingsConsumer = StartTerm(term); TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, PostingsEnum, visitedDocs); if (stats.DocFreq > 0) { FinishTerm(term, stats); sumTotalTermFreq += stats.TotalTermFreq; sumDFsinceLastAbortCheck += stats.DocFreq; sumDocFreq += stats.DocFreq; if (sumDFsinceLastAbortCheck > 60000) { mergeState.checkAbort.Work(sumDFsinceLastAbortCheck / 5.0); sumDFsinceLastAbortCheck = 0; } } } } Finish(indexOptions == FieldInfo.IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq, sumDocFreq, visitedDocs.Cardinality()); }
/// <summary> /// Merges in the term vectors from the readers in /// <code>mergeState</code>. The default implementation skips /// over deleted documents, and uses <seealso cref="#startDocument(int)"/>, /// <seealso cref="#startField(FieldInfo, int, boolean, boolean, boolean)"/>, /// <seealso cref="#startTerm(BytesRef, int)"/>, <seealso cref="#addPosition(int, int, int, BytesRef)"/>, /// and <seealso cref="#finish(FieldInfos, int)"/>, /// returning the number of documents that were written. /// Implementations can override this method for more sophisticated /// merging (bulk-byte copying, etc). /// </summary> public virtual int Merge(MergeState mergeState) { int docCount = 0; for (int i = 0; i < mergeState.Readers.Count; i++) { AtomicReader reader = mergeState.Readers[i]; int maxDoc = reader.MaxDoc(); Bits liveDocs = reader.LiveDocs; for (int docID = 0; docID < maxDoc; docID++) { if (liveDocs != null && !liveDocs.Get(docID)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Fields vectors = reader.GetTermVectors(docID); AddAllDocVectors(vectors, mergeState); docCount++; mergeState.checkAbort.Work(300); } } Finish(mergeState.FieldInfos, docCount); return docCount; }
//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET: //ORIGINAL LINE: @Override public org.apache.lucene.index.MergePolicy.DocMap getDocMap(final org.apache.lucene.index.MergeState mergeState) public override MergePolicy.DocMap getDocMap(MergeState mergeState) { if (unsortedReaders == null) { throw new IllegalStateException(); } if (docMap == null) { return base.getDocMap(mergeState); } Debug.Assert(mergeState.docMaps.length == 1); // we returned a singleton reader //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.util.packed.MonotonicAppendingLongBuffer deletes = getDeletes(unsortedReaders); MonotonicAppendingLongBuffer deletes = getDeletes(unsortedReaders); return new DocMapAnonymousInnerClassHelper(this, mergeState, deletes); }
public virtual void Merge(MergeState mergeState, IndexOptions indexOptions, TermsEnum termsEnum) { BytesRef term; if (Debugging.AssertsEnabled) { Debugging.Assert(termsEnum != null); } long sumTotalTermFreq = 0; long sumDocFreq = 0; long sumDFsinceLastAbortCheck = 0; FixedBitSet visitedDocs = new FixedBitSet(mergeState.SegmentInfo.DocCount); if (indexOptions == IndexOptions.DOCS_ONLY) { if (docsEnum is null) { docsEnum = new MappingMultiDocsEnum(); } docsEnum.MergeState = mergeState; MultiDocsEnum docsEnumIn = null; while (termsEnum.MoveNext()) { term = termsEnum.Term; // We can pass null for liveDocs, because the // mapping enum will skip the non-live docs: docsEnumIn = (MultiDocsEnum)termsEnum.Docs(null, docsEnumIn, DocsFlags.NONE); if (docsEnumIn != null) { docsEnum.Reset(docsEnumIn); PostingsConsumer postingsConsumer = StartTerm(term); TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, docsEnum, visitedDocs); if (stats.DocFreq > 0) { FinishTerm(term, stats); sumTotalTermFreq += stats.DocFreq; sumDFsinceLastAbortCheck += stats.DocFreq; sumDocFreq += stats.DocFreq; if (sumDFsinceLastAbortCheck > 60000) { mergeState.CheckAbort.Work(sumDFsinceLastAbortCheck / 5.0); sumDFsinceLastAbortCheck = 0; } } } } } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) { if (docsAndFreqsEnum is null) { docsAndFreqsEnum = new MappingMultiDocsEnum(); } docsAndFreqsEnum.MergeState = mergeState; MultiDocsEnum docsAndFreqsEnumIn = null; while (termsEnum.MoveNext()) { term = termsEnum.Term; // We can pass null for liveDocs, because the // mapping enum will skip the non-live docs: docsAndFreqsEnumIn = (MultiDocsEnum)termsEnum.Docs(null, docsAndFreqsEnumIn); if (Debugging.AssertsEnabled) { Debugging.Assert(docsAndFreqsEnumIn != null); } docsAndFreqsEnum.Reset(docsAndFreqsEnumIn); PostingsConsumer postingsConsumer = StartTerm(term); TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, docsAndFreqsEnum, visitedDocs); if (stats.DocFreq > 0) { FinishTerm(term, stats); sumTotalTermFreq += stats.TotalTermFreq; sumDFsinceLastAbortCheck += stats.DocFreq; sumDocFreq += stats.DocFreq; if (sumDFsinceLastAbortCheck > 60000) { mergeState.CheckAbort.Work(sumDFsinceLastAbortCheck / 5.0); sumDFsinceLastAbortCheck = 0; } } } } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { if (postingsEnum is null) { postingsEnum = new MappingMultiDocsAndPositionsEnum(); } postingsEnum.MergeState = mergeState; MultiDocsAndPositionsEnum postingsEnumIn = null; while (termsEnum.MoveNext()) { term = termsEnum.Term; // We can pass null for liveDocs, because the // mapping enum will skip the non-live docs: postingsEnumIn = (MultiDocsAndPositionsEnum)termsEnum.DocsAndPositions(null, postingsEnumIn, DocsAndPositionsFlags.PAYLOADS); if (Debugging.AssertsEnabled) { Debugging.Assert(postingsEnumIn != null); } postingsEnum.Reset(postingsEnumIn); PostingsConsumer postingsConsumer = StartTerm(term); TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, postingsEnum, visitedDocs); if (stats.DocFreq > 0) { FinishTerm(term, stats); sumTotalTermFreq += stats.TotalTermFreq; sumDFsinceLastAbortCheck += stats.DocFreq; sumDocFreq += stats.DocFreq; if (sumDFsinceLastAbortCheck > 60000) { mergeState.CheckAbort.Work(sumDFsinceLastAbortCheck / 5.0); sumDFsinceLastAbortCheck = 0; } } } } else { if (Debugging.AssertsEnabled) { Debugging.Assert(indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); } if (postingsEnum is null) { postingsEnum = new MappingMultiDocsAndPositionsEnum(); } postingsEnum.MergeState = mergeState; MultiDocsAndPositionsEnum postingsEnumIn = null; while (termsEnum.MoveNext()) { term = termsEnum.Term; // We can pass null for liveDocs, because the // mapping enum will skip the non-live docs: postingsEnumIn = (MultiDocsAndPositionsEnum)termsEnum.DocsAndPositions(null, postingsEnumIn); if (Debugging.AssertsEnabled) { Debugging.Assert(postingsEnumIn != null); } postingsEnum.Reset(postingsEnumIn); PostingsConsumer postingsConsumer = StartTerm(term); TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, postingsEnum, visitedDocs); if (stats.DocFreq > 0) { FinishTerm(term, stats); sumTotalTermFreq += stats.TotalTermFreq; sumDFsinceLastAbortCheck += stats.DocFreq; sumDocFreq += stats.DocFreq; if (sumDFsinceLastAbortCheck > 60000) { mergeState.CheckAbort.Work(sumDFsinceLastAbortCheck / 5.0); sumDFsinceLastAbortCheck = 0; } } } } Finish(indexOptions == IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq, sumDocFreq, visitedDocs.Cardinality); }
/* private class IterableAnonymousInnerClassHelper3 : IEnumerable<BytesRef> { private readonly DocValuesConsumer OuterInstance; private SortedDocValues[] Dvs; private OrdinalMap Map; public IterableAnonymousInnerClassHelper3(DocValuesConsumer outerInstance, SortedDocValues[] dvs, OrdinalMap map) { this.OuterInstance = outerInstance; this.Dvs = dvs; this.Map = map; } // ord -> value public virtual IEnumerator<BytesRef> GetEnumerator() { return new IteratorAnonymousInnerClassHelper3(this); } private class IteratorAnonymousInnerClassHelper3 : IEnumerator<BytesRef> { private readonly IterableAnonymousInnerClassHelper3 OuterInstance; public IteratorAnonymousInnerClassHelper3(IterableAnonymousInnerClassHelper3 outerInstance) { this.OuterInstance = outerInstance; scratch = new BytesRef(); } internal readonly BytesRef scratch; internal int currentOrd; public virtual bool HasNext() { return currentOrd < OuterInstance.Map.ValueCount; } public virtual BytesRef Next() { if (!HasNext()) { throw new Exception(); } int segmentNumber = OuterInstance.Map.GetFirstSegmentNumber(currentOrd); int segmentOrd = (int)OuterInstance.Map.GetFirstSegmentOrd(currentOrd); OuterInstance.Dvs[segmentNumber].LookupOrd(segmentOrd, scratch); currentOrd++; return scratch; } public virtual void Remove() { throw new System.NotSupportedException(); } } } private class IterableAnonymousInnerClassHelper4 : IEnumerable<Number> { private readonly DocValuesConsumer OuterInstance; private AtomicReader[] Readers; private SortedDocValues[] Dvs; private OrdinalMap Map; public IterableAnonymousInnerClassHelper4(DocValuesConsumer outerInstance, AtomicReader[] readers, SortedDocValues[] dvs, OrdinalMap map) { this.OuterInstance = outerInstance; this.Readers = readers; this.Dvs = dvs; this.Map = map; } public virtual IEnumerator<Number> GetEnumerator() { return new IteratorAnonymousInnerClassHelper4(this); } private class IteratorAnonymousInnerClassHelper4 : IEnumerator<Number> { private readonly IterableAnonymousInnerClassHelper4 OuterInstance; public IteratorAnonymousInnerClassHelper4(IterableAnonymousInnerClassHelper4 outerInstance) { this.OuterInstance = outerInstance; readerUpto = -1; } internal int readerUpto; internal int docIDUpto; internal int nextValue; internal AtomicReader currentReader; internal Bits currentLiveDocs; internal bool nextIsSet; public virtual bool HasNext() { return nextIsSet || SetNext(); } public virtual void Remove() { throw new System.NotSupportedException(); } public virtual Number Next() { if (!HasNext()) { throw new NoSuchElementException(); } Debug.Assert(nextIsSet); nextIsSet = false; // TODO make a mutable number return nextValue; } private bool SetNext() { while (true) { if (readerUpto == OuterInstance.Readers.Length) { return false; } if (currentReader == null || docIDUpto == currentReader.MaxDoc) { readerUpto++; if (readerUpto < OuterInstance.Readers.Length) { currentReader = OuterInstance.Readers[readerUpto]; currentLiveDocs = currentReader.LiveDocs; } docIDUpto = 0; continue; } if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { nextIsSet = true; int segOrd = OuterInstance.Dvs[readerUpto].GetOrd(docIDUpto); nextValue = segOrd == -1 ? - 1 : (int) OuterInstance.Map.GetGlobalOrd(readerUpto, segOrd); docIDUpto++; return true; } docIDUpto++; } } } }*/ /// <summary> /// Merges the sortedset docvalues from <code>toMerge</code>. /// <p> /// The default implementation calls <seealso cref="#addSortedSetField"/>, passing /// an Iterable that merges ordinals and values and filters deleted documents . /// </summary> public virtual void MergeSortedSetField(FieldInfo fieldInfo, MergeState mergeState, IList<SortedSetDocValues> toMerge) { var readers = mergeState.Readers.ToArray(); var dvs = toMerge.ToArray(); // step 1: iterate thru each sub and mark terms still in use var liveTerms = new TermsEnum[dvs.Length]; for (int sub = 0; sub < liveTerms.Length; sub++) { var reader = readers[sub]; var dv = dvs[sub]; var liveDocs = reader.LiveDocs; if (liveDocs == null) { liveTerms[sub] = dv.TermsEnum(); } else { var bitset = new LongBitSet(dv.ValueCount); for (int i = 0; i < reader.MaxDoc; i++) { if (liveDocs.Get(i)) { dv.Document = i; long ord; while ((ord = dv.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { bitset.Set(ord); } } } liveTerms[sub] = new BitsFilteredTermsEnum(dv.TermsEnum(), bitset); } } // step 2: create ordinal map (this conceptually does the "merging") var map = new OrdinalMap(this, liveTerms); // step 3: add field AddSortedSetField(fieldInfo, GetMergeSortedSetValuesEnumerable(map, dvs), // doc -> ord count GetMergeSortedSetDocToOrdCountEnumerable(readers, dvs), // ords GetMergeSortedSetOrdsEnumerable(readers, dvs, map) ); }
private IEnumerable<BytesRef> GetMergeBinaryFieldEnumerable(FieldInfo fieldInfo, MergeState mergeState, IList<BinaryDocValues> toMerge, IList<Bits> docsWithField) { int readerUpto = -1; int docIDUpto = 0; AtomicReader currentReader = null; BinaryDocValues currentValues = null; Bits currentLiveDocs = null; Bits currentDocsWithField = null; while (true) { if (readerUpto == toMerge.Count) { yield break; } if (currentReader == null || docIDUpto == currentReader.MaxDoc) { readerUpto++; if (readerUpto < toMerge.Count) { currentReader = mergeState.Readers[readerUpto]; currentValues = toMerge[readerUpto]; currentDocsWithField = docsWithField[readerUpto]; currentLiveDocs = currentReader.LiveDocs; } docIDUpto = 0; continue; } if (currentLiveDocs == null || currentLiveDocs.Get(docIDUpto)) { var nextValue = new BytesRef(); if (currentDocsWithField.Get(docIDUpto)) { currentValues.Get(docIDUpto, nextValue); } else { nextValue = null; } docIDUpto++; yield return nextValue; continue; } docIDUpto++; } }
public override void Replay(MergeState merge) { if (!merge.IsCommon(this.sv1)) return; if (!merge.IsCommon(this.sv2)) return; if (merge.G1.IsEqual(this.sv1, this.sv2)) { if (merge.Result.IsEqual(this.sv1, this.sv2)) { // already present return; } if (merge.G2.IsEqual(this.sv1, this.sv2)) { // add equality merge.Result.AssumeEqual(this.sv1, this.sv2); } else { // Changed vs G1 (since not present in output) merge.changed = true; } } }
public override int Merge(MergeState mergeState) { int docCount = 0; int idx = 0; foreach (AtomicReader reader in mergeState.Readers) { SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++]; CompressingTermVectorsReader matchingVectorsReader = null; if (matchingSegmentReader != null) { TermVectorsReader vectorsReader = matchingSegmentReader.TermVectorsReader; // we can only bulk-copy if the matching reader is also a CompressingTermVectorsReader if (vectorsReader != null && vectorsReader is CompressingTermVectorsReader compressingTermVectorsReader) { matchingVectorsReader = compressingTermVectorsReader; } } int maxDoc = reader.MaxDoc; IBits liveDocs = reader.LiveDocs; if (matchingVectorsReader == null || matchingVectorsReader.Version != VERSION_CURRENT || matchingVectorsReader.CompressionMode != compressionMode || matchingVectorsReader.ChunkSize != chunkSize || matchingVectorsReader.PackedInt32sVersion != PackedInt32s.VERSION_CURRENT) { // naive merge... for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.CheckAbort.Work(300); } } else { CompressingStoredFieldsIndexReader index = matchingVectorsReader.Index; IndexInput vectorsStreamOrig = matchingVectorsReader.VectorsStream; vectorsStreamOrig.Seek(0); ChecksumIndexInput vectorsStream = new BufferedChecksumIndexInput((IndexInput)vectorsStreamOrig.Clone()); for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc;) { // We make sure to move the checksum input in any case, otherwise the final // integrity check might need to read the whole file a second time long startPointer = index.GetStartPointer(i); if (startPointer > vectorsStream.GetFilePointer()) { vectorsStream.Seek(startPointer); } if ((pendingDocs.Count == 0) && (i == 0 || index.GetStartPointer(i - 1) < startPointer)) // start of a chunk { int docBase = vectorsStream.ReadVInt32(); int chunkDocs = vectorsStream.ReadVInt32(); if (Debugging.AssertsEnabled) { Debugging.Assert(docBase + chunkDocs <= matchingSegmentReader.MaxDoc); } if (docBase + chunkDocs < matchingSegmentReader.MaxDoc && NextDeletedDoc(docBase, liveDocs, docBase + chunkDocs) == docBase + chunkDocs) { long chunkEnd = index.GetStartPointer(docBase + chunkDocs); long chunkLength = chunkEnd - vectorsStream.GetFilePointer(); indexWriter.WriteIndex(chunkDocs, this.vectorsStream.GetFilePointer()); this.vectorsStream.WriteVInt32(docCount); this.vectorsStream.WriteVInt32(chunkDocs); this.vectorsStream.CopyBytes(vectorsStream, chunkLength); docCount += chunkDocs; this.numDocs += chunkDocs; mergeState.CheckAbort.Work(300 * chunkDocs); i = NextLiveDoc(docBase + chunkDocs, liveDocs, maxDoc); } else { for (; i < docBase + chunkDocs; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.CheckAbort.Work(300); } } } else { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.CheckAbort.Work(300); i = NextLiveDoc(i + 1, liveDocs, maxDoc); } } vectorsStream.Seek(vectorsStream.Length - CodecUtil.FooterLength()); CodecUtil.CheckFooter(vectorsStream); } } Finish(mergeState.FieldInfos, docCount); return(docCount); }
public DocMapAnonymousInnerClassHelper(SortingOneMerge outerInstance, MergeState mergeState, MonotonicAppendingInt64Buffer deletes) { this.outerInstance = outerInstance; this.mergeState = mergeState; this.deletes = deletes; }
private int CopyFieldsNoDeletions(MergeState mergeState, AtomicReader reader, Lucene40StoredFieldsReader matchingFieldsReader, int[] rawDocLengths) { int maxDoc = reader.MaxDoc(); int docCount = 0; if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" while (docCount < maxDoc) { int len = Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len); AddRawDocuments(stream, rawDocLengths, len); docCount += len; mergeState.checkAbort.Work(300 * len); } } else { for (; docCount < maxDoc; docCount++) { // NOTE: it's very important to first assign to doc then pass it to // fieldsWriter.addDocument; see LUCENE-1282 Document doc = reader.Document(docCount); AddDocument(doc, mergeState.FieldInfos); mergeState.checkAbort.Work(300); } } return docCount; }
private int CopyFieldsWithDeletions(MergeState mergeState, AtomicReader reader, Lucene40StoredFieldsReader matchingFieldsReader, int[] rawDocLengths) { int docCount = 0; int maxDoc = reader.MaxDoc(); Bits liveDocs = reader.LiveDocs; Debug.Assert(liveDocs != null); if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int j = 0; j < maxDoc; ) { if (!liveDocs.Get(j)) { // skip deleted docs ++j; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = j, numDocs = 0; do { j++; numDocs++; if (j >= maxDoc) { break; } if (!liveDocs.Get(j)) { j++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs); AddRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; mergeState.checkAbort.Work(300 * numDocs); } } else { for (int j = 0; j < maxDoc; j++) { if (!liveDocs.Get(j)) { // skip deleted docs continue; } // TODO: this could be more efficient using // FieldVisitor instead of loading/writing entire // doc; ie we just have to renumber the field number // on the fly? // NOTE: it's very important to first assign to doc then pass it to // fieldsWriter.addDocument; see LUCENE-1282 Document doc = reader.Document(j); AddDocument(doc, mergeState.FieldInfos); docCount++; mergeState.checkAbort.Work(300); } } return docCount; }
public override int Merge(MergeState mergeState) { int docCount = 0; // Used for bulk-reading raw bytes for stored fields int[] rawDocLengths = new int[MAX_RAW_MERGE_DOCS]; int idx = 0; foreach (AtomicReader reader in mergeState.Readers) { SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++]; Lucene40StoredFieldsReader matchingFieldsReader = null; if (matchingSegmentReader != null) { StoredFieldsReader fieldsReader = matchingSegmentReader.FieldsReader; // we can only bulk-copy if the matching reader is also a Lucene40FieldsReader if (fieldsReader != null && fieldsReader is Lucene40StoredFieldsReader) { matchingFieldsReader = (Lucene40StoredFieldsReader)fieldsReader; } } if (reader.LiveDocs != null) { docCount += CopyFieldsWithDeletions(mergeState, reader, matchingFieldsReader, rawDocLengths); } else { docCount += CopyFieldsNoDeletions(mergeState, reader, matchingFieldsReader, rawDocLengths); } } Finish(mergeState.FieldInfos, docCount); return docCount; }
public override int Merge(MergeState mergeState) { int docCount = 0; int idx = 0; foreach (AtomicReader reader in mergeState.Readers) { SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++]; CompressingTermVectorsReader matchingVectorsReader = null; if (matchingSegmentReader != null) { TermVectorsReader vectorsReader = matchingSegmentReader.TermVectorsReader; // we can only bulk-copy if the matching reader is also a CompressingTermVectorsReader if (vectorsReader != null && vectorsReader is CompressingTermVectorsReader) { matchingVectorsReader = (CompressingTermVectorsReader)vectorsReader; } } int maxDoc = reader.MaxDoc; Bits liveDocs = reader.LiveDocs; if (matchingVectorsReader == null || matchingVectorsReader.Version != VERSION_CURRENT || matchingVectorsReader.CompressionMode != CompressionMode || matchingVectorsReader.ChunkSize != ChunkSize || matchingVectorsReader.PackedIntsVersion != PackedInts.VERSION_CURRENT) { // naive merge... for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.checkAbort.Work(300); } } else { CompressingStoredFieldsIndexReader index = matchingVectorsReader.Index; IndexInput vectorsStreamOrig = matchingVectorsReader.VectorsStream; vectorsStreamOrig.Seek(0); ChecksumIndexInput vectorsStream = new BufferedChecksumIndexInput((IndexInput)vectorsStreamOrig.Clone()); for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; ) { // We make sure to move the checksum input in any case, otherwise the final // integrity check might need to read the whole file a second time long startPointer = index.GetStartPointer(i); if (startPointer > vectorsStream.FilePointer) { vectorsStream.Seek(startPointer); } if ((PendingDocs.Count == 0) && (i == 0 || index.GetStartPointer(i - 1) < startPointer)) // start of a chunk { int docBase = vectorsStream.ReadVInt(); int chunkDocs = vectorsStream.ReadVInt(); Debug.Assert(docBase + chunkDocs <= matchingSegmentReader.MaxDoc); if (docBase + chunkDocs < matchingSegmentReader.MaxDoc && NextDeletedDoc(docBase, liveDocs, docBase + chunkDocs) == docBase + chunkDocs) { long chunkEnd = index.GetStartPointer(docBase + chunkDocs); long chunkLength = chunkEnd - vectorsStream.FilePointer; IndexWriter.WriteIndex(chunkDocs, this.VectorsStream.FilePointer); this.VectorsStream.WriteVInt(docCount); this.VectorsStream.WriteVInt(chunkDocs); this.VectorsStream.CopyBytes(vectorsStream, chunkLength); docCount += chunkDocs; this.NumDocs += chunkDocs; mergeState.checkAbort.Work(300 * chunkDocs); i = NextLiveDoc(docBase + chunkDocs, liveDocs, maxDoc); } else { for (; i < docBase + chunkDocs; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.checkAbort.Work(300); } } } else { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.checkAbort.Work(300); i = NextLiveDoc(i + 1, liveDocs, maxDoc); } } vectorsStream.Seek(vectorsStream.Length() - CodecUtil.FooterLength()); CodecUtil.CheckFooter(vectorsStream); } } Finish(mergeState.FieldInfos, docCount); return docCount; }
/// <summary> /// Простановка соответствующего параметра в RTCManifest /// </summary> /// <param name="item">Файл в Манифесте</param> /// <param name="parameter">Значение параметра</param> public static void ApplyParameter(this ManifestFileItem item, Models.MergeItemValue parameter) { if (parameter == null) { m_loger.Log("При простановке параметра в одном из файлов манифеста, передана пустая ссылка на Параметр" , MessageType.Warning , new LogParameter("Имя файла манифеста", LogMessageBuilder.GetStringLogVal(item.FileName))); } else { switch (parameter.ParameterType) { case ManifestFileParameters.Name: var name = parameter.GetValue() as string; if (name != null) { item.FileName = name; } else { m_loger.Log("Не удалось изменить имя файла в манифесте - значение параметра не определено", MessageType.Warning , new LogParameter("Тип значения", parameter.GetValue() == null ? "null" : parameter.GetValue().GetType().FullName)); } break; case ManifestFileParameters.CRC: var crc = parameter.GetValue() as string; if (crc != null) { item.CRC = crc; } else { m_loger.Log("Не удалось изменить контрольную сумму в манифесте - значение параметра не определено", MessageType.Warning , new LogParameter("Тип значения", parameter.GetValue() == null ? "null" : parameter.GetValue().GetType().FullName)); } break; case ManifestFileParameters.Folder: var folder = parameter.GetValue() as string; if (folder != null) { MergeState state = parameter.MergeState; //parameter.Dispatcher.Invoke(() => { state = parameter.MergeState; }); switch (state) { case MergeState.Added: item.AddFolder(folder); break; case MergeState.Changed: m_loger.Log("Логика простановки значения для изменённых папок не определена. Изменение пропущено", MessageType.Warning , new LogParameter("Имя редактируемого файла в манифесте", LogMessageBuilder.GetStringLogVal(item.FileName))); return; case MergeState.Removed: if (item.ContainsFolder(folder)) { item.RemoveFolder(folder); } else { m_loger.Log("Помеченная на удаление папка не найдена в манифесте", MessageType.Warning , new LogParameter("Имя редактируемого файла в манифесте", LogMessageBuilder.GetStringLogVal(item.FileName)) , new LogParameter("Имя искомого каталога", LogMessageBuilder.GetStringLogVal(folder))); } break; default: return; } } else { m_loger.Log("Не удалось изменить контрольную сумму в манифесте - значение параметра не определено", MessageType.Warning , new LogParameter("Тип значения", parameter.GetValue() == null ? "null" : parameter.GetValue().GetType().FullName)); } break; case ManifestFileParameters.RegAsm: try { item.REGASM.StringAction = parameter.GetValue() as string; } catch (Exception ex) { m_loger.Log("Не удалось изменить флаг регистрации RegAsm в манифесте - значение параметра не определено", ex, MessageType.Warning , new LogParameter("Тип значения", parameter.GetValue() == null ? "null" : parameter.GetValue().GetType().FullName)); } break; case ManifestFileParameters.RegGAC: try { item.GAC.StringAction = parameter.GetValue() as string; } catch (Exception ex) { m_loger.Log("Не удалось изменить флаг регистрации RegGAC в манифесте - значение параметра не определено", ex, MessageType.Warning , new LogParameter("Тип значения", parameter.GetValue() == null ? "null" : parameter.GetValue().GetType().FullName)); } break; case ManifestFileParameters.RegSrv32: try { item.REGSRV32.StringAction = parameter.GetValue() as string; } catch (Exception ex) { m_loger.Log("Не удалось изменить флаг регистрации RegSvr32 в манифесте - значение параметра не определено", ex, MessageType.Warning , new LogParameter("Тип значения", parameter.GetValue() == null ? "null" : parameter.GetValue().GetType().FullName)); } break; case ManifestFileParameters.Type: var type = parameter.GetValue() as string; if (type != null) { item.Type = type; } else { m_loger.Log("Не удалось изменить тип файла в манифесте - значение параметра не определено", MessageType.Warning , new LogParameter("Тип значения", parameter.GetValue() == null ? "null" : parameter.GetValue().GetType().FullName)); } break; case ManifestFileParameters.Version: var version = parameter.GetValue() as string; if (version != null) { item.Version = version; } else { m_loger.Log("Не удалось изменить версию файла в манифесте - значение параметра не определено", MessageType.Warning , new LogParameter("Тип значения", parameter.GetValue() == null ? "null" : parameter.GetValue().GetType().FullName)); } break; case ManifestFileParameters.Delete: try { bool flag = (bool)parameter.GetValue(); item.Delete = flag; } catch (Exception ex) { m_loger.Log("Не удалось изменить флаг удаления файла в манифесте - значение параметра не определено", ex, MessageType.Warning , new LogParameter("Тип значения", parameter.GetValue() == null ? "null" : parameter.GetValue().GetType().FullName)); } break; case ManifestFileParameters.PublicKeyToken: var pkt = parameter.GetValue() as string; item.PublicKeyToken = pkt == null ? string.Empty : pkt; break; case ManifestFileParameters.AddinName: var addinName = parameter.GetValue() as string; item.AddinName = addinName == null ? string.Empty : addinName; break; default: m_loger.Log("Не удалось проставить параметр в файле манифеста - тип параметра не определён", MessageType.Warning , new LogParameter("Тип", parameter.ParameterType.ToString())); break; } } }
/// <summary> /// Replay update on merge state /// </summary> public abstract void Replay(MergeState merge);
public override void Replay(MergeState merge) { if (!merge.IsCommon(from)) return; SymbolicValue v1target = merge.G1.LookupWithoutManifesting(from,function); SymbolicValue v2target = merge.G2.LookupWithoutManifesting(from,function); if (v1target == null) { // no longer in G1 return; } if (v2target == null) { // no longer in G2 merge.changed = true; // no longer in result. return; } SymbolicValue rtarget = merge.AddJointEdge(v1target, v2target, function, from); if (rtarget != null && rtarget.UniqueId > merge.lastCommonVariable) { merge.JoinSymbolicValue(v1target, v2target, rtarget); } }
/// <summary> /// Merges the numeric docvalues from <code>toMerge</code>. /// <p> /// The default implementation calls <seealso cref="#addNumericField"/>, passing /// an Iterable that merges and filters deleted documents on the fly. /// </summary> public virtual void MergeNumericField(FieldInfo fieldInfo, MergeState mergeState, IList <NumericDocValues> toMerge, IList <Bits> docsWithField) { AddNumericField(fieldInfo, GetMergeNumericFieldEnumerable(fieldInfo, mergeState, toMerge, docsWithField)); }
private IEnumerable <BytesRef> GetMergeBinaryFieldEnumerable(FieldInfo fieldInfo, MergeState mergeState, IList <BinaryDocValues> toMerge) { int readerUpto = -1; int docIDUpto = 0; AtomicReader currentReader = null; BinaryDocValues currentValues = null; Bits currentLiveDocs = null; BytesRef nextValue = new BytesRef(); while (true) { if (readerUpto == toMerge.Count) { yield break; } if (currentReader == null || docIDUpto == currentReader.MaxDoc) { readerUpto++; if (readerUpto < toMerge.Count) { currentReader = mergeState.Readers[readerUpto]; currentValues = toMerge[readerUpto]; currentLiveDocs = currentReader.LiveDocs; } docIDUpto = 0; continue; } if (currentLiveDocs == null || currentLiveDocs.Get(docIDUpto)) { currentValues.Get(docIDUpto, nextValue); docIDUpto++; yield return(nextValue); continue; } docIDUpto++; } }
public override int Merge(MergeState mergeState) { int docCount = 0; int idx = 0; foreach (AtomicReader reader in mergeState.Readers) { SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++]; CompressingStoredFieldsReader matchingFieldsReader = null; if (matchingSegmentReader != null) { StoredFieldsReader fieldsReader = matchingSegmentReader.FieldsReader; // we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader if (fieldsReader != null && fieldsReader is CompressingStoredFieldsReader compressingStoredFieldsReader) { matchingFieldsReader = compressingStoredFieldsReader; } } int maxDoc = reader.MaxDoc; IBits liveDocs = reader.LiveDocs; if (matchingFieldsReader == null || matchingFieldsReader.Version != VERSION_CURRENT || matchingFieldsReader.CompressionMode != compressionMode || matchingFieldsReader.ChunkSize != chunkSize) // the way data is decompressed depends on the chunk size - means reader version is not the same as the writer version { // naive merge... for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) { Document doc = reader.Document(i); AddDocument(doc, mergeState.FieldInfos); ++docCount; mergeState.CheckAbort.Work(300); } } else { int docID = NextLiveDoc(0, liveDocs, maxDoc); if (docID < maxDoc) { // not all docs were deleted CompressingStoredFieldsReader.ChunkIterator it = matchingFieldsReader.GetChunkIterator(docID); int[] startOffsets = Arrays.Empty <int>(); do { // go to the next chunk that contains docID it.Next(docID); // transform lengths into offsets if (startOffsets.Length < it.chunkDocs) { startOffsets = new int[ArrayUtil.Oversize(it.chunkDocs, 4)]; } for (int i = 1; i < it.chunkDocs; ++i) { startOffsets[i] = startOffsets[i - 1] + it.lengths[i - 1]; } if (numBufferedDocs == 0 && startOffsets[it.chunkDocs - 1] < chunkSize && startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] >= chunkSize && NextDeletedDoc(it.docBase, liveDocs, it.docBase + it.chunkDocs) == it.docBase + it.chunkDocs) // no deletion in the chunk - chunk is large enough - chunk is small enough - starting a new chunk { if (Debugging.AssertsEnabled) { Debugging.Assert(docID == it.docBase); } // no need to decompress, just copy data indexWriter.WriteIndex(it.chunkDocs, fieldsStream.GetFilePointer()); WriteHeader(this.docBase, it.chunkDocs, it.numStoredFields, it.lengths); it.CopyCompressedData(fieldsStream); this.docBase += it.chunkDocs; docID = NextLiveDoc(it.docBase + it.chunkDocs, liveDocs, maxDoc); docCount += it.chunkDocs; mergeState.CheckAbort.Work(300 * it.chunkDocs); } else { // decompress it.Decompress(); if (startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] != it.bytes.Length) { throw new CorruptIndexException("Corrupted: expected chunk size=" + startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] + ", got " + it.bytes.Length); } // copy non-deleted docs for (; docID < it.docBase + it.chunkDocs; docID = NextLiveDoc(docID + 1, liveDocs, maxDoc)) { int diff = docID - it.docBase; StartDocument(it.numStoredFields[diff]); bufferedDocs.WriteBytes(it.bytes.Bytes, it.bytes.Offset + startOffsets[diff], it.lengths[diff]); FinishDocument(); ++docCount; mergeState.CheckAbort.Work(300); } } } while (docID < maxDoc); it.CheckIntegrity(); } } } Finish(mergeState.FieldInfos, docCount); return(docCount); }
/// <summary> /// Merges the sorted docvalues from <code>toMerge</code>. /// <p> /// The default implementation calls <seealso cref="#addSortedField"/>, passing /// an Iterable that merges ordinals and values and filters deleted documents.</p> /// </summary> public virtual void MergeSortedField(FieldInfo fieldInfo, MergeState mergeState, IList<SortedDocValues> toMerge) { AtomicReader[] readers = mergeState.Readers.ToArray(); SortedDocValues[] dvs = toMerge.ToArray(); // step 1: iterate thru each sub and mark terms still in use var liveTerms = new TermsEnum[dvs.Length]; for (int sub = 0; sub < liveTerms.Length; sub++) { AtomicReader reader = readers[sub]; SortedDocValues dv = dvs[sub]; Bits liveDocs = reader.LiveDocs; if (liveDocs == null) { liveTerms[sub] = dv.TermsEnum(); } else { var bitset = new LongBitSet(dv.ValueCount); for (int i = 0; i < reader.MaxDoc; i++) { if (liveDocs.Get(i)) { int ord = dv.GetOrd(i); if (ord >= 0) { bitset.Set(ord); } } } liveTerms[sub] = new BitsFilteredTermsEnum(dv.TermsEnum(), bitset); } } // step 2: create ordinal map (this conceptually does the "merging") var map = new OrdinalMap(this, liveTerms); // step 3: add field AddSortedField(fieldInfo, GetMergeSortValuesEnumerable(map, dvs), // doc -> ord GetMergeSortedFieldDocToOrdEnumerable(readers, dvs, map) ); }
/// <summary> /// Default merge impl: append documents, mapping around /// deletes /// </summary> public virtual TermStats Merge(MergeState mergeState, IndexOptions indexOptions, DocsEnum postings, FixedBitSet visitedDocs) { int df = 0; long totTF = 0; if (indexOptions == IndexOptions.DOCS_ONLY) { while (true) { int doc = postings.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } visitedDocs.Set(doc); this.StartDoc(doc, -1); this.FinishDoc(); df++; } totTF = -1; } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) { while (true) { int doc = postings.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } visitedDocs.Set(doc); int freq = postings.Freq; this.StartDoc(doc, freq); this.FinishDoc(); df++; totTF += freq; } } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { var postingsEnum = (DocsAndPositionsEnum)postings; while (true) { int doc = postingsEnum.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } visitedDocs.Set(doc); int freq = postingsEnum.Freq; this.StartDoc(doc, freq); totTF += freq; for (int i = 0; i < freq; i++) { int position = postingsEnum.NextPosition(); BytesRef payload = postingsEnum.GetPayload(); this.AddPosition(position, payload, -1, -1); } this.FinishDoc(); df++; } } else { Debug.Assert(indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); var postingsEnum = (DocsAndPositionsEnum)postings; while (true) { int doc = postingsEnum.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } visitedDocs.Set(doc); int freq = postingsEnum.Freq; this.StartDoc(doc, freq); totTF += freq; for (int i = 0; i < freq; i++) { int position = postingsEnum.NextPosition(); BytesRef payload = postingsEnum.GetPayload(); this.AddPosition(position, payload, postingsEnum.StartOffset, postingsEnum.EndOffset); } this.FinishDoc(); df++; } } return(new TermStats(df, indexOptions == IndexOptions.DOCS_ONLY ? -1 : totTF)); }
public override MergePolicy.DocMap GetDocMap(MergeState mergeState) { if (unsortedReaders == null) { throw new InvalidOperationException("Invalid state"); } if (docMap == null) { return base.GetDocMap(mergeState); } Debug.Assert(mergeState.DocMaps.Length == 1); // we returned a singleton reader MonotonicAppendingLongBuffer deletes = GetDeletes(unsortedReaders); return new DocMapAnonymousInnerClassHelper(this, mergeState, deletes); }
/// <summary> /// Merges the binary docvalues from <paramref name="toMerge"/>. /// <para> /// The default implementation calls <see cref="AddBinaryField(FieldInfo, IEnumerable{BytesRef})"/>, passing /// an <see cref="IEnumerable{T}"/> that merges and filters deleted documents on the fly.</para> /// </summary> public virtual void MergeBinaryField(FieldInfo fieldInfo, MergeState mergeState, IList <BinaryDocValues> toMerge, IList <IBits> docsWithField) { AddBinaryField(fieldInfo, GetMergeBinaryFieldEnumerable(/*fieldInfo, // LUCENENET: Never read */ mergeState, toMerge, docsWithField)); }
/// <summary> /// Safe (but, slowish) default method to write every /// vector field in the document. /// </summary> protected void AddAllDocVectors(Fields vectors, MergeState mergeState) { if (vectors == null) { StartDocument(0); FinishDocument(); return; } int numFields = vectors.Count; if (numFields == -1) { // count manually! TODO: Maybe enforce that Fields.size() returns something valid? numFields = 0; //for (IEnumerator<string> it = vectors.Iterator(); it.hasNext();) foreach (string it in vectors) { numFields++; } } StartDocument(numFields); string lastFieldName = null; TermsEnum termsEnum = null; DocsAndPositionsEnum docsAndPositionsEnum = null; int fieldCount = 0; foreach (string fieldName in vectors) { fieldCount++; FieldInfo fieldInfo = mergeState.FieldInfos.FieldInfo(fieldName); Debug.Assert(lastFieldName == null || fieldName.CompareToOrdinal(lastFieldName) > 0, "lastFieldName=" + lastFieldName + " fieldName=" + fieldName); lastFieldName = fieldName; Terms terms = vectors.GetTerms(fieldName); if (terms == null) { // FieldsEnum shouldn't lie... continue; } bool hasPositions = terms.HasPositions; bool hasOffsets = terms.HasOffsets; bool hasPayloads = terms.HasPayloads; Debug.Assert(!hasPayloads || hasPositions); int numTerms = (int)terms.Count; if (numTerms == -1) { // count manually. It is stupid, but needed, as Terms.size() is not a mandatory statistics function numTerms = 0; termsEnum = terms.GetIterator(termsEnum); while (termsEnum.Next() != null) { numTerms++; } } StartField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads); termsEnum = terms.GetIterator(termsEnum); int termCount = 0; while (termsEnum.Next() != null) { termCount++; int freq = (int)termsEnum.TotalTermFreq; StartTerm(termsEnum.Term, freq); if (hasPositions || hasOffsets) { docsAndPositionsEnum = termsEnum.DocsAndPositions(null, docsAndPositionsEnum); Debug.Assert(docsAndPositionsEnum != null); int docID = docsAndPositionsEnum.NextDoc(); Debug.Assert(docID != DocIdSetIterator.NO_MORE_DOCS); Debug.Assert(docsAndPositionsEnum.Freq == freq); for (int posUpto = 0; posUpto < freq; posUpto++) { int pos = docsAndPositionsEnum.NextPosition(); int startOffset = docsAndPositionsEnum.StartOffset; int endOffset = docsAndPositionsEnum.EndOffset; BytesRef payload = docsAndPositionsEnum.GetPayload(); Debug.Assert(!hasPositions || pos >= 0); AddPosition(pos, startOffset, endOffset, payload); } } FinishTerm(); } Debug.Assert(termCount == numTerms); FinishField(); } Debug.Assert(fieldCount == numFields); FinishDocument(); }
private int CopyVectorsWithDeletions(MergeState mergeState, Lucene40TermVectorsReader matchingVectorsReader, AtomicReader reader, int[] rawDocLengths, int[] rawDocLengths2) { int maxDoc = reader.MaxDoc; IBits liveDocs = reader.LiveDocs; int totalNumDocs = 0; if (matchingVectorsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int docNum = 0; docNum < maxDoc;) { if (!liveDocs.Get(docNum)) { // skip deleted docs ++docNum; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = docNum, numDocs = 0; do { docNum++; numDocs++; if (docNum >= maxDoc) { break; } if (!liveDocs.Get(docNum)) { docNum++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs); AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); totalNumDocs += numDocs; mergeState.CheckAbort.Work(300 * numDocs); } } else { for (int docNum = 0; docNum < maxDoc; docNum++) { if (!liveDocs.Get(docNum)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Fields vectors = reader.GetTermVectors(docNum); AddAllDocVectors(vectors, mergeState); totalNumDocs++; mergeState.CheckAbort.Work(300); } } return(totalNumDocs); }
public DocMapAnonymousInnerClassHelper(SortingOneMerge outerInstance, MergeState mergeState, MonotonicAppendingLongBuffer deletes) { this.outerInstance = outerInstance; this.mergeState = mergeState; this.deletes = deletes; }
private int CopyFieldsWithDeletions(MergeState mergeState, AtomicReader reader, Lucene40StoredFieldsReader matchingFieldsReader, int[] rawDocLengths) { int docCount = 0; int maxDoc = reader.MaxDoc; IBits liveDocs = reader.LiveDocs; if (Debugging.AssertsEnabled) { Debugging.Assert(liveDocs != null); } if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int j = 0; j < maxDoc;) { if (!liveDocs.Get(j)) { // skip deleted docs ++j; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = j, numDocs = 0; do { j++; numDocs++; if (j >= maxDoc) { break; } if (!liveDocs.Get(j)) { j++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs); AddRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; mergeState.CheckAbort.Work(300 * numDocs); } } else { for (int j = 0; j < maxDoc; j++) { if (!liveDocs.Get(j)) { // skip deleted docs continue; } // TODO: this could be more efficient using // FieldVisitor instead of loading/writing entire // doc; ie we just have to renumber the field number // on the fly? // NOTE: it's very important to first assign to doc then pass it to // fieldsWriter.addDocument; see LUCENE-1282 Document doc = reader.Document(j); AddDocument(doc, mergeState.FieldInfos); docCount++; mergeState.CheckAbort.Work(300); } } return(docCount); }
/// <summary> /// Safe (but, slowish) default method to write every /// vector field in the document. /// </summary> protected internal void AddAllDocVectors(Fields vectors, MergeState mergeState) { if (vectors == null) { StartDocument(0); FinishDocument(); return; } int numFields = vectors.Size(); if (numFields == -1) { // count manually! TODO: Maybe enforce that Fields.size() returns something valid? numFields = 0; //for (IEnumerator<string> it = vectors.Iterator(); it.hasNext();) foreach (string it in vectors) { numFields++; } } StartDocument(numFields); string lastFieldName = null; TermsEnum termsEnum = null; DocsAndPositionsEnum docsAndPositionsEnum = null; int fieldCount = 0; foreach (string fieldName in vectors) { fieldCount++; FieldInfo fieldInfo = mergeState.FieldInfos.FieldInfo(fieldName); Debug.Assert(lastFieldName == null || fieldName.CompareTo(lastFieldName) > 0, "lastFieldName=" + lastFieldName + " fieldName=" + fieldName); lastFieldName = fieldName; Terms terms = vectors.Terms(fieldName); if (terms == null) { // FieldsEnum shouldn't lie... continue; } bool hasPositions = terms.HasPositions(); bool hasOffsets = terms.HasOffsets(); bool hasPayloads = terms.HasPayloads(); Debug.Assert(!hasPayloads || hasPositions); int numTerms = (int)terms.Size(); if (numTerms == -1) { // count manually. It is stupid, but needed, as Terms.size() is not a mandatory statistics function numTerms = 0; termsEnum = terms.Iterator(termsEnum); while (termsEnum.Next() != null) { numTerms++; } } StartField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads); termsEnum = terms.Iterator(termsEnum); int termCount = 0; while (termsEnum.Next() != null) { termCount++; int freq = (int)termsEnum.TotalTermFreq(); StartTerm(termsEnum.Term(), freq); if (hasPositions || hasOffsets) { docsAndPositionsEnum = termsEnum.DocsAndPositions(null, docsAndPositionsEnum); Debug.Assert(docsAndPositionsEnum != null); int docID = docsAndPositionsEnum.NextDoc(); Debug.Assert(docID != DocIdSetIterator.NO_MORE_DOCS); Debug.Assert(docsAndPositionsEnum.Freq() == freq); for (int posUpto = 0; posUpto < freq; posUpto++) { int pos = docsAndPositionsEnum.NextPosition(); int startOffset = docsAndPositionsEnum.StartOffset(); int endOffset = docsAndPositionsEnum.EndOffset(); BytesRef payload = docsAndPositionsEnum.Payload; Debug.Assert(!hasPositions || pos >= 0); AddPosition(pos, startOffset, endOffset, payload); } } FinishTerm(); } Debug.Assert(termCount == numTerms); FinishField(); } Debug.Assert(fieldCount == numFields); FinishDocument(); }
private IEnumerable <long?> GetMergeNumericFieldEnumerable(FieldInfo fieldinfo, MergeState mergeState, IList <NumericDocValues> toMerge, IList <IBits> docsWithField) { int readerUpto = -1; int docIDUpto = 0; AtomicReader currentReader = null; NumericDocValues currentValues = null; IBits currentLiveDocs = null; IBits currentDocsWithField = null; while (true) { if (readerUpto == toMerge.Count) { yield break; } if (currentReader == null || docIDUpto == currentReader.MaxDoc) { readerUpto++; if (readerUpto < toMerge.Count) { currentReader = mergeState.Readers[readerUpto]; currentValues = toMerge[readerUpto]; currentDocsWithField = docsWithField[readerUpto]; currentLiveDocs = currentReader.LiveDocs; } docIDUpto = 0; continue; } if (currentLiveDocs == null || currentLiveDocs.Get(docIDUpto)) { long?nextValue; if (currentDocsWithField.Get(docIDUpto)) { nextValue = currentValues.Get(docIDUpto); } else { nextValue = null; } docIDUpto++; yield return(nextValue); continue; } docIDUpto++; } }
/// <summary> /// Merges the binary docvalues from <paramref name="toMerge"/>. /// <para> /// The default implementation calls <see cref="AddBinaryField(FieldInfo, IEnumerable{BytesRef})"/>, passing /// an <see cref="IEnumerable{T}"/> that merges and filters deleted documents on the fly.</para> /// </summary> public virtual void MergeBinaryField(FieldInfo fieldInfo, MergeState mergeState, IList <BinaryDocValues> toMerge, IList <IBits> docsWithField) { AddBinaryField(fieldInfo, GetMergeBinaryFieldEnumerable(fieldInfo, mergeState, toMerge, docsWithField)); }
private IEnumerable <BytesRef> GetMergeBinaryFieldEnumerable(FieldInfo fieldInfo, MergeState mergeState, IList <BinaryDocValues> toMerge, IList <IBits> docsWithField) { int readerUpto = -1; int docIDUpto = 0; var nextValue = new BytesRef(); BytesRef nextPointer; // points to null if missing, or nextValue AtomicReader currentReader = null; BinaryDocValues currentValues = null; IBits currentLiveDocs = null; IBits currentDocsWithField = null; while (true) { if (readerUpto == toMerge.Count) { yield break; } if (currentReader == null || docIDUpto == currentReader.MaxDoc) { readerUpto++; if (readerUpto < toMerge.Count) { currentReader = mergeState.Readers[readerUpto]; currentValues = toMerge[readerUpto]; currentDocsWithField = docsWithField[readerUpto]; currentLiveDocs = currentReader.LiveDocs; } docIDUpto = 0; continue; } if (currentLiveDocs == null || currentLiveDocs.Get(docIDUpto)) { if (currentDocsWithField.Get(docIDUpto)) { currentValues.Get(docIDUpto, nextValue); nextPointer = nextValue; } else { nextPointer = null; } docIDUpto++; yield return(nextPointer); continue; } docIDUpto++; } }
public IEGraph Join(IEGraph g2, CfgBlock joinPoint, out IMergeInfo mergeInfo) { EGraph eg1 = this; EGraph eg2 = (EGraph)g2; int updateSize; EGraph common = ComputeCommonTail(eg1, eg2, out updateSize); EGraph result; bool doReplay = true; if (common == null) { doReplay = false; result = new EGraph(eg1.elementLattice); result.Block = joinPoint; } else { result = new EGraph(common, joinPoint); } if (Analyzer.Debug) { Console.WriteLine("Last common symbol: {0}", common.idCounter); } if (Analyzer.Statistics) { Console.WriteLine("G1:{0} G2:{1} Tail:{2} UpdateSize:{3}", eg1.historySize, eg2.historySize, result.historySize, updateSize); } MergeState ms = new MergeState(result, eg1, eg2); // Heuristic for using Replay vs. full update doReplay &= (common != eg1.root); doReplay &= (eg1.historySize > 3); doReplay &= (eg2.historySize > 3); if (doReplay) { ms.Replay(common); } else { ms.AddMapping(eg1.constRoot, eg2.constRoot, result.constRoot); ms.JoinSymbolicValue(eg1.constRoot, eg2.constRoot, result.constRoot); } mergeInfo = ms; return result; }
/// <summary> /// Merges the numeric docvalues from <code>toMerge</code>. /// <p> /// The default implementation calls <seealso cref="#addNumericField"/>, passing /// an Iterable that merges and filters deleted documents on the fly.</p> /// </summary> public virtual void MergeNumericField(FieldInfo fieldInfo, MergeState mergeState, IList<NumericDocValues> toMerge, IList<Bits> docsWithField) { AddNumericField(fieldInfo, GetMergeNumericFieldEnumerable(fieldInfo, mergeState, toMerge, docsWithField)); }
public override void Replay(MergeState merge) { if (!merge.IsCommon(this.sv)) return; AbstractValue av1 = merge.G1[this.sv]; AbstractValue av2 = merge.G2[this.sv]; AbstractValue old = merge.Result[this.sv]; AbstractValue join = merge.Result.elementLattice.Join(av1, av2); if (join != av1 && merge.Result.elementLattice.LowerThanOrEqual(av1, join)) { merge.changed = true; } if (join != old) { merge.Result[this.sv] = join; } }
/// <summary> /// Merges the binary docvalues from <code>toMerge</code>. /// <p> /// The default implementation calls <seealso cref="#addBinaryField"/>, passing /// an Iterable that merges and filters deleted documents on the fly. /// </summary> public void MergeBinaryField(FieldInfo fieldInfo, MergeState mergeState, IList<BinaryDocValues> toMerge, IList<Bits> docsWithField) { AddBinaryField(fieldInfo, GetMergeBinaryFieldEnumerable(fieldInfo, mergeState, toMerge, docsWithField)); }
public override void Replay(MergeState merge) { if (!merge.IsCommon(this.from)) return; SymbolicValue v1target = merge.G1.LookupWithoutManifesting(this.from, this.function); SymbolicValue v2target = merge.G2.LookupWithoutManifesting(this.from, this.function); if (v1target != null && v2target != null) { // outdated return; } if (v1target != null) { merge.changed = true; } SymbolicValue rtarget = merge.Result.LookupWithoutManifesting(this.from, this.function); if (rtarget == null) { // redundant return; } merge.Result.Eliminate(this.function, this.from); }
private IEnumerable<long?> GetMergeNumericFieldEnumerable(FieldInfo fieldinfo, MergeState mergeState, IList<NumericDocValues> toMerge) { int readerUpto = -1; int docIDUpto = 0; AtomicReader currentReader = null; NumericDocValues currentValues = null; Bits currentLiveDocs = null; while (true) { if (readerUpto == toMerge.Count) { yield break; } if (currentReader == null || docIDUpto == currentReader.MaxDoc) { readerUpto++; if (readerUpto < toMerge.Count) { currentReader = mergeState.Readers[readerUpto]; currentValues = toMerge[readerUpto]; currentLiveDocs = currentReader.LiveDocs; } docIDUpto = 0; continue; } if (currentLiveDocs == null || currentLiveDocs.Get(docIDUpto)) { yield return currentValues.Get(docIDUpto++); continue; } docIDUpto++; } }