Lucene.Net.Index.MergeState C# (CSharp)代码示例

示例#1

0

显示文件

文件： StoredFieldsWriter.cs 项目： zfxsss/lucenenet

        /// <summary>
        /// Merges in the stored fields from the readers in
        ///  <code>mergeState</code>. The default implementation skips
        ///  over deleted documents, and uses <seealso cref="#startDocument(int)"/>,
        ///  <seealso cref="#writeField(FieldInfo, IndexableField)"/>, and <seealso cref="#finish(FieldInfos, int)"/>,
        ///  returning the number of documents that were written.
        ///  Implementations can override this method for more sophisticated
        ///  merging (bulk-byte copying, etc).
        /// </summary>
        public virtual int Merge(MergeState mergeState)
        {
            int docCount = 0;

            foreach (AtomicReader reader in mergeState.Readers)
            {
                int  maxDoc   = reader.MaxDoc;
                Bits liveDocs = reader.LiveDocs;
                for (int i = 0; i < maxDoc; i++)
                {
                    if (liveDocs != null && !liveDocs.Get(i))
                    {
                        // skip deleted docs
                        continue;
                    }
                    // TODO: this could be more efficient using
                    // FieldVisitor instead of loading/writing entire
                    // doc; ie we just have to renumber the field number
                    // on the fly?
                    // NOTE: it's very important to first assign to doc then pass it to
                    // fieldsWriter.addDocument; see LUCENE-1282
                    Document doc = reader.Document(i);
                    AddDocument(doc, mergeState.FieldInfos);
                    docCount++;
                    mergeState.checkAbort.Work(300);
                }
            }
            Finish(mergeState.FieldInfos, docCount);
            return(docCount);
        }

示例#2

0

显示文件

文件： FieldsConsumer.cs 项目： Cefa68000/lucenenet

 /// <summary>
 /// Called during merging to merge all <seealso cref="Fields"/> from
 ///  sub-readers.  this must recurse to merge all postings
 ///  (terms, docs, positions, etc.).  A {@link
 ///  PostingsFormat} can override this default
 ///  implementation to do its own merging.
 /// </summary>
 public virtual void Merge(MergeState mergeState, Fields fields)
 {
     foreach (string field in fields)
     {
         FieldInfo info = mergeState.FieldInfos.FieldInfo(field);
         Debug.Assert(info != null, "FieldInfo for field is null: " + field);
         Terms terms = fields.Terms(field);
         if (terms != null)
         {
             TermsConsumer termsConsumer = AddField(info);
             termsConsumer.Merge(mergeState, info.FieldIndexOptions, terms.Iterator(null));
         }
     }
 }

示例#3

0

显示文件

文件： Lucene40TermVectorsWriter.cs 项目： Cefa68000/lucenenet

        private int CopyVectorsWithDeletions(MergeState mergeState, Lucene40TermVectorsReader matchingVectorsReader, AtomicReader reader, int[] rawDocLengths, int[] rawDocLengths2)
        {
            int maxDoc = reader.MaxDoc;
            Bits liveDocs = reader.LiveDocs;
            int totalNumDocs = 0;
            if (matchingVectorsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                for (int docNum = 0; docNum < maxDoc; )
                {
                    if (!liveDocs.Get(docNum))
                    {
                        // skip deleted docs
                        ++docNum;
                        continue;
                    }
                    // We can optimize this case (doing a bulk byte copy) since the field
                    // numbers are identical
                    int start = docNum, numDocs = 0;
                    do
                    {
                        docNum++;
                        numDocs++;
                        if (docNum >= maxDoc)
                        {
                            break;
                        }
                        if (!liveDocs.Get(docNum))
                        {
                            docNum++;
                            break;
                        }
                    } while (numDocs < MAX_RAW_MERGE_DOCS);

                    matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
                    AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
                    totalNumDocs += numDocs;
                    mergeState.checkAbort.Work(300 * numDocs);
                }
            }
            else
            {
                for (int docNum = 0; docNum < maxDoc; docNum++)
                {
                    if (!liveDocs.Get(docNum))
                    {
                        // skip deleted docs
                        continue;
                    }

                    // NOTE: it's very important to first assign to vectors then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    Fields vectors = reader.GetTermVectors(docNum);
                    AddAllDocVectors(vectors, mergeState);
                    totalNumDocs++;
                    mergeState.checkAbort.Work(300);
                }
            }
            return totalNumDocs;
        }

示例#4

0

显示文件

文件： Lucene40TermVectorsWriter.cs 项目： Cefa68000/lucenenet

 private int CopyVectorsNoDeletions(MergeState mergeState, Lucene40TermVectorsReader matchingVectorsReader, AtomicReader reader, int[] rawDocLengths, int[] rawDocLengths2)
 {
     int maxDoc = reader.MaxDoc;
     if (matchingVectorsReader != null)
     {
         // We can bulk-copy because the fieldInfos are "congruent"
         int docCount = 0;
         while (docCount < maxDoc)
         {
             int len = Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
             matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, docCount, len);
             AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len);
             docCount += len;
             mergeState.checkAbort.Work(300 * len);
         }
     }
     else
     {
         for (int docNum = 0; docNum < maxDoc; docNum++)
         {
             // NOTE: it's very important to first assign to vectors then pass it to
             // termVectorsWriter.addAllDocVectors; see LUCENE-1282
             Fields vectors = reader.GetTermVectors(docNum);
             AddAllDocVectors(vectors, mergeState);
             mergeState.checkAbort.Work(300);
         }
     }
     return maxDoc;
 }

示例#5

0

显示文件

文件： Lucene40TermVectorsWriter.cs 项目： Cefa68000/lucenenet

        public override int Merge(MergeState mergeState)
        {
            // Used for bulk-reading raw bytes for term vectors
            int[] rawDocLengths = new int[MAX_RAW_MERGE_DOCS];
            int[] rawDocLengths2 = new int[MAX_RAW_MERGE_DOCS];

            int idx = 0;
            int numDocs = 0;
            for (int i = 0; i < mergeState.Readers.Count; i++)
            {
                AtomicReader reader = mergeState.Readers[i];

                SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++];
                Lucene40TermVectorsReader matchingVectorsReader = null;
                if (matchingSegmentReader != null)
                {
                    TermVectorsReader vectorsReader = matchingSegmentReader.TermVectorsReader;

                    if (vectorsReader != null && vectorsReader is Lucene40TermVectorsReader)
                    {
                        matchingVectorsReader = (Lucene40TermVectorsReader)vectorsReader;
                    }
                }
                if (reader.LiveDocs != null)
                {
                    numDocs += CopyVectorsWithDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2);
                }
                else
                {
                    numDocs += CopyVectorsNoDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2);
                }
            }
            Finish(mergeState.FieldInfos, numDocs);
            return numDocs;
        }

示例#6

0

显示文件

文件： Lucene40StoredFieldsWriter.cs 项目： Cefa68000/lucenenet

        private int CopyFieldsWithDeletions(MergeState mergeState, AtomicReader reader, Lucene40StoredFieldsReader matchingFieldsReader, int[] rawDocLengths)
        {
            int docCount = 0;
            int maxDoc = reader.MaxDoc;
            Bits liveDocs = reader.LiveDocs;
            Debug.Assert(liveDocs != null);
            if (matchingFieldsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                for (int j = 0; j < maxDoc; )
                {
                    if (!liveDocs.Get(j))
                    {
                        // skip deleted docs
                        ++j;
                        continue;
                    }
                    // We can optimize this case (doing a bulk byte copy) since the field
                    // numbers are identical
                    int start = j, numDocs = 0;
                    do
                    {
                        j++;
                        numDocs++;
                        if (j >= maxDoc)
                        {
                            break;
                        }
                        if (!liveDocs.Get(j))
                        {
                            j++;
                            break;
                        }
                    } while (numDocs < MAX_RAW_MERGE_DOCS);

                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
                    AddRawDocuments(stream, rawDocLengths, numDocs);
                    docCount += numDocs;
                    mergeState.checkAbort.Work(300 * numDocs);
                }
            }
            else
            {
                for (int j = 0; j < maxDoc; j++)
                {
                    if (!liveDocs.Get(j))
                    {
                        // skip deleted docs
                        continue;
                    }
                    // TODO: this could be more efficient using
                    // FieldVisitor instead of loading/writing entire
                    // doc; ie we just have to renumber the field number
                    // on the fly?
                    // NOTE: it's very important to first assign to doc then pass it to
                    // fieldsWriter.addDocument; see LUCENE-1282
                    Document doc = reader.Document(j);
                    AddDocument(doc, mergeState.FieldInfos);
                    docCount++;
                    mergeState.checkAbort.Work(300);
                }
            }
            return docCount;
        }

示例#7

0

显示文件

文件： Lucene40StoredFieldsWriter.cs 项目： Cefa68000/lucenenet

 private int CopyFieldsNoDeletions(MergeState mergeState, AtomicReader reader, Lucene40StoredFieldsReader matchingFieldsReader, int[] rawDocLengths)
 {
     int maxDoc = reader.MaxDoc;
     int docCount = 0;
     if (matchingFieldsReader != null)
     {
         // We can bulk-copy because the fieldInfos are "congruent"
         while (docCount < maxDoc)
         {
             int len = Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
             IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len);
             AddRawDocuments(stream, rawDocLengths, len);
             docCount += len;
             mergeState.checkAbort.Work(300 * len);
         }
     }
     else
     {
         for (; docCount < maxDoc; docCount++)
         {
             // NOTE: it's very important to first assign to doc then pass it to
             // fieldsWriter.addDocument; see LUCENE-1282
             Document doc = reader.Document(docCount);
             AddDocument(doc, mergeState.FieldInfos);
             mergeState.checkAbort.Work(300);
         }
     }
     return docCount;
 }

示例#8

0

显示文件

文件： Lucene40StoredFieldsWriter.cs 项目： Cefa68000/lucenenet

        public override int Merge(MergeState mergeState)
        {
            int docCount = 0;
            // Used for bulk-reading raw bytes for stored fields
            int[] rawDocLengths = new int[MAX_RAW_MERGE_DOCS];
            int idx = 0;

            foreach (AtomicReader reader in mergeState.Readers)
            {
                SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++];
                Lucene40StoredFieldsReader matchingFieldsReader = null;
                if (matchingSegmentReader != null)
                {
                    StoredFieldsReader fieldsReader = matchingSegmentReader.FieldsReader;
                    // we can only bulk-copy if the matching reader is also a Lucene40FieldsReader
                    if (fieldsReader != null && fieldsReader is Lucene40StoredFieldsReader)
                    {
                        matchingFieldsReader = (Lucene40StoredFieldsReader)fieldsReader;
                    }
                }

                if (reader.LiveDocs != null)
                {
                    docCount += CopyFieldsWithDeletions(mergeState, reader, matchingFieldsReader, rawDocLengths);
                }
                else
                {
                    docCount += CopyFieldsNoDeletions(mergeState, reader, matchingFieldsReader, rawDocLengths);
                }
            }
            Finish(mergeState.FieldInfos, docCount);
            return docCount;
        }

示例#9

0

显示文件

文件： TermVectorsWriter.cs 项目： Cefa68000/lucenenet

        /// <summary>
        /// Safe (but, slowish) default method to write every
        ///  vector field in the document.
        /// </summary>
        protected internal void AddAllDocVectors(Fields vectors, MergeState mergeState)
        {
            if (vectors == null)
            {
                StartDocument(0);
                FinishDocument();
                return;
            }

            int numFields = vectors.Size;
            if (numFields == -1)
            {
                // count manually! TODO: Maybe enforce that Fields.size() returns something valid?
                numFields = 0;
                //for (IEnumerator<string> it = vectors.Iterator(); it.hasNext();)
                foreach (string it in vectors)
                {
                    numFields++;
                }
            }
            StartDocument(numFields);

            string lastFieldName = null;

            TermsEnum termsEnum = null;
            DocsAndPositionsEnum docsAndPositionsEnum = null;

            int fieldCount = 0;
            foreach (string fieldName in vectors)
            {
                fieldCount++;
                FieldInfo fieldInfo = mergeState.FieldInfos.FieldInfo(fieldName);

                Debug.Assert(lastFieldName == null || fieldName.CompareTo(lastFieldName) > 0, "lastFieldName=" + lastFieldName + " fieldName=" + fieldName);
                lastFieldName = fieldName;

                Terms terms = vectors.Terms(fieldName);
                if (terms == null)
                {
                    // FieldsEnum shouldn't lie...
                    continue;
                }

                bool hasPositions = terms.HasPositions();
                bool hasOffsets = terms.HasOffsets();
                bool hasPayloads = terms.HasPayloads();
                Debug.Assert(!hasPayloads || hasPositions);

                int numTerms = (int)terms.Size();
                if (numTerms == -1)
                {
                    // count manually. It is stupid, but needed, as Terms.size() is not a mandatory statistics function
                    numTerms = 0;
                    termsEnum = terms.Iterator(termsEnum);
                    while (termsEnum.Next() != null)
                    {
                        numTerms++;
                    }
                }

                StartField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads);
                termsEnum = terms.Iterator(termsEnum);

                int termCount = 0;
                while (termsEnum.Next() != null)
                {
                    termCount++;

                    int freq = (int)termsEnum.TotalTermFreq();

                    StartTerm(termsEnum.Term(), freq);

                    if (hasPositions || hasOffsets)
                    {
                        docsAndPositionsEnum = termsEnum.DocsAndPositions(null, docsAndPositionsEnum);
                        Debug.Assert(docsAndPositionsEnum != null);

                        int docID = docsAndPositionsEnum.NextDoc();
                        Debug.Assert(docID != DocIdSetIterator.NO_MORE_DOCS);
                        Debug.Assert(docsAndPositionsEnum.Freq() == freq);

                        for (int posUpto = 0; posUpto < freq; posUpto++)
                        {
                            int pos = docsAndPositionsEnum.NextPosition();
                            int startOffset = docsAndPositionsEnum.StartOffset();
                            int endOffset = docsAndPositionsEnum.EndOffset();

                            BytesRef payload = docsAndPositionsEnum.Payload;

                            Debug.Assert(!hasPositions || pos >= 0);
                            AddPosition(pos, startOffset, endOffset, payload);
                        }
                    }
                    FinishTerm();
                }
                Debug.Assert(termCount == numTerms);
                FinishField();
            }
            Debug.Assert(fieldCount == numFields);
            FinishDocument();
        }

示例#10

0

显示文件

文件： TermVectorsWriter.cs 项目： Cefa68000/lucenenet

        /// <summary>
        /// Merges in the term vectors from the readers in
        ///  <code>mergeState</code>. The default implementation skips
        ///  over deleted documents, and uses <seealso cref="#startDocument(int)"/>,
        ///  <seealso cref="#startField(FieldInfo, int, boolean, boolean, boolean)"/>,
        ///  <seealso cref="#startTerm(BytesRef, int)"/>, <seealso cref="#addPosition(int, int, int, BytesRef)"/>,
        ///  and <seealso cref="#finish(FieldInfos, int)"/>,
        ///  returning the number of documents that were written.
        ///  Implementations can override this method for more sophisticated
        ///  merging (bulk-byte copying, etc).
        /// </summary>
        public virtual int Merge(MergeState mergeState)
        {
            int docCount = 0;
            for (int i = 0; i < mergeState.Readers.Count; i++)
            {
                AtomicReader reader = mergeState.Readers[i];
                int maxDoc = reader.MaxDoc;
                Bits liveDocs = reader.LiveDocs;

                for (int docID = 0; docID < maxDoc; docID++)
                {
                    if (liveDocs != null && !liveDocs.Get(docID))
                    {
                        // skip deleted docs
                        continue;
                    }
                    // NOTE: it's very important to first assign to vectors then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    Fields vectors = reader.GetTermVectors(docID);
                    AddAllDocVectors(vectors, mergeState);
                    docCount++;
                    mergeState.checkAbort.Work(300);
                }
            }
            Finish(mergeState.FieldInfos, docCount);
            return docCount;
        }

C# (CSharp) Lucene.Net.Index.MergeState示例