Exemplo n.º 1
0
 /// <summary> Test the term index.</summary>
 private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader)
 {
     var status = new Status.TermIndexStatus();
     
     try
     {
         if (infoStream != null)
         {
             infoStream.Write("    test: terms, freq, prox...");
         }
         
         TermEnum termEnum = reader.Terms();
         TermPositions termPositions = reader.TermPositions();
         
         // Used only to count up # deleted docs for this term
         var myTermDocs = new MySegmentTermDocs(reader);
         
         int maxDoc = reader.MaxDoc;
         
         while (termEnum.Next())
         {
             status.termCount++;
             Term term = termEnum.Term;
             int docFreq = termEnum.DocFreq();
             termPositions.Seek(term);
             int lastDoc = - 1;
             int freq0 = 0;
             status.totFreq += docFreq;
             while (termPositions.Next())
             {
                 freq0++;
                 int doc = termPositions.Doc;
                 int freq = termPositions.Freq;
                 if (doc <= lastDoc)
                 {
                     throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
                 }
                 if (doc >= maxDoc)
                 {
                     throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
                 }
                 
                 lastDoc = doc;
                 if (freq <= 0)
                 {
                     throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
                 }
                 
                 int lastPos = - 1;
                 status.totPos += freq;
                 for (int j = 0; j < freq; j++)
                 {
                     int pos = termPositions.NextPosition();
                     if (pos < - 1)
                     {
                         throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
                     }
                     if (pos < lastPos)
                     {
                         throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
                     }
                     lastPos = pos;
                 }
             }
             
             // Now count how many deleted docs occurred in
             // this term:
             int delCount;
             if (reader.HasDeletions)
             {
                 myTermDocs.Seek(term);
                 while (myTermDocs.Next())
                 {
                 }
                 delCount = myTermDocs.delCount;
             }
             else
             {
                 delCount = 0;
             }
             
             if (freq0 + delCount != docFreq)
             {
                 throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
             }
         }
         
         Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
     }
     catch (System.Exception e)
     {
         Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
         status.error = e;
         if (infoStream != null)
         {
             infoStream.WriteLine(e.StackTrace);
         }
     }
     
     return status;
 }
Exemplo n.º 2
0
        /// <summary> Test the term index.</summary>
        private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader)
        {
            Status.TermIndexStatus status = new Status.TermIndexStatus();

            try
            {
                if (infoStream != null)
                {
                    infoStream.Write("    test: terms, freq, prox...");
                }

                TermEnum      termEnum      = reader.Terms();
                TermPositions termPositions = reader.TermPositions();

                // Used only to count up # deleted docs for this term
                MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);

                int maxDoc = reader.MaxDoc();

                while (termEnum.Next())
                {
                    status.termCount++;
                    Term term    = termEnum.Term();
                    int  docFreq = termEnum.DocFreq();
                    termPositions.Seek(term);
                    int lastDoc = -1;
                    int freq0   = 0;
                    status.totFreq += docFreq;
                    while (termPositions.Next())
                    {
                        freq0++;
                        int doc  = termPositions.Doc();
                        int freq = termPositions.Freq();
                        if (doc <= lastDoc)
                        {
                            throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
                        }
                        if (doc >= maxDoc)
                        {
                            throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
                        }

                        lastDoc = doc;
                        if (freq <= 0)
                        {
                            throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
                        }

                        int lastPos = -1;
                        status.totPos += freq;
                        for (int j = 0; j < freq; j++)
                        {
                            int pos = termPositions.NextPosition();
                            if (pos < -1)
                            {
                                throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
                            }
                            if (pos < lastPos)
                            {
                                throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
                            }
                        }
                    }

                    // Now count how many deleted docs occurred in
                    // this term:
                    int delCount;
                    if (reader.HasDeletions())
                    {
                        myTermDocs.Seek(term);
                        while (myTermDocs.Next())
                        {
                        }
                        delCount = myTermDocs.delCount;
                    }
                    else
                    {
                        delCount = 0;
                    }

                    if (freq0 + delCount != docFreq)
                    {
                        throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
                    }
                }

                Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
            }
            catch (System.Exception e)
            {
                Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
                status.error = e;
                if (infoStream != null)
                {
                    infoStream.WriteLine(e.StackTrace);
                }
            }

            return(status);
        }
Exemplo n.º 3
0
        /// <summary>
        /// checks Fields api is consistent with itself.
        /// searcher is optional, to verify with queries. Can be null.
        /// </summary>
        private static Status.TermIndexStatus CheckFields(Fields fields, Bits liveDocs, int maxDoc, FieldInfos fieldInfos, bool doPrint, bool isVectors, TextWriter infoStream, bool verbose)
        {
            // TODO: we should probably return our own stats thing...?!

            Status.TermIndexStatus status = new Status.TermIndexStatus();
            int computedFieldCount = 0;

            if (fields == null)
            {
                Msg(infoStream, "OK [no fields/terms]");
                return status;
            }

            DocsEnum docs = null;
            DocsEnum docsAndFreqs = null;
            DocsAndPositionsEnum postings = null;

            string lastField = null;
            foreach (string field in fields)
            {
                // MultiFieldsEnum relies upon this order...
                if (lastField != null && field.CompareTo(lastField) <= 0)
                {
                    throw new Exception("fields out of order: lastField=" + lastField + " field=" + field);
                }
                lastField = field;

                // check that the field is in fieldinfos, and is indexed.
                // TODO: add a separate test to check this for different reader impls
                FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                if (fieldInfo == null)
                {
                    throw new Exception("fieldsEnum inconsistent with fieldInfos, no fieldInfos for: " + field);
                }
                if (!fieldInfo.Indexed)
                {
                    throw new Exception("fieldsEnum inconsistent with fieldInfos, isIndexed == false for: " + field);
                }

                // TODO: really the codec should not return a field
                // from FieldsEnum if it has no Terms... but we do
                // this today:
                // assert fields.terms(field) != null;
                computedFieldCount++;

                Terms terms = fields.Terms(field);
                if (terms == null)
                {
                    continue;
                }

                bool hasFreqs = terms.HasFreqs();
                bool hasPositions = terms.HasPositions();
                bool hasPayloads = terms.HasPayloads();
                bool hasOffsets = terms.HasOffsets();

                // term vectors cannot omit TF:
                bool expectedHasFreqs = (isVectors || fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS);

                if (hasFreqs != expectedHasFreqs)
                {
                    throw new Exception("field \"" + field + "\" should have hasFreqs=" + expectedHasFreqs + " but got " + hasFreqs);
                }

                if (hasFreqs == false)
                {
                    if (terms.SumTotalTermFreq != -1)
                    {
                        throw new Exception("field \"" + field + "\" hasFreqs is false, but Terms.getSumTotalTermFreq()=" + terms.SumTotalTermFreq + " (should be -1)");
                    }
                }

                if (!isVectors)
                {
                    bool expectedHasPositions = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
                    if (hasPositions != expectedHasPositions)
                    {
                        throw new Exception("field \"" + field + "\" should have hasPositions=" + expectedHasPositions + " but got " + hasPositions);
                    }

                    bool expectedHasPayloads = fieldInfo.HasPayloads();
                    if (hasPayloads != expectedHasPayloads)
                    {
                        throw new Exception("field \"" + field + "\" should have hasPayloads=" + expectedHasPayloads + " but got " + hasPayloads);
                    }

                    bool expectedHasOffsets = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
                    if (hasOffsets != expectedHasOffsets)
                    {
                        throw new Exception("field \"" + field + "\" should have hasOffsets=" + expectedHasOffsets + " but got " + hasOffsets);
                    }
                }

                TermsEnum termsEnum = terms.Iterator(null);

                bool hasOrd = true;
                long termCountStart = status.DelTermCount + status.TermCount;

                BytesRef lastTerm = null;

                IComparer<BytesRef> termComp = terms.Comparator;

                long sumTotalTermFreq = 0;
                long sumDocFreq = 0;
                FixedBitSet visitedDocs = new FixedBitSet(maxDoc);
                while (true)
                {
                    BytesRef term = termsEnum.Next();
                    if (term == null)
                    {
                        break;
                    }

                    Debug.Assert(term.Valid);

                    // make sure terms arrive in order according to
                    // the comp
                    if (lastTerm == null)
                    {
                        lastTerm = BytesRef.DeepCopyOf(term);
                    }
                    else
                    {
                        if (termComp.Compare(lastTerm, term) >= 0)
                        {
                            throw new Exception("terms out of order: lastTerm=" + lastTerm + " term=" + term);
                        }
                        lastTerm.CopyBytes(term);
                    }

                    int docFreq = termsEnum.DocFreq();
                    if (docFreq <= 0)
                    {
                        throw new Exception("docfreq: " + docFreq + " is out of bounds");
                    }
                    sumDocFreq += docFreq;

                    docs = termsEnum.Docs(liveDocs, docs);
                    postings = termsEnum.DocsAndPositions(liveDocs, postings);

                    if (hasFreqs == false)
                    {
                        if (termsEnum.TotalTermFreq() != -1)
                        {
                            throw new Exception("field \"" + field + "\" hasFreqs is false, but TermsEnum.totalTermFreq()=" + termsEnum.TotalTermFreq() + " (should be -1)");
                        }
                    }

                    if (hasOrd)
                    {
                        long ord = -1;
                        try
                        {
                            ord = termsEnum.Ord();
                        }
                        catch (System.NotSupportedException uoe)
                        {
                            hasOrd = false;
                        }

                        if (hasOrd)
                        {
                            long ordExpected = status.DelTermCount + status.TermCount - termCountStart;
                            if (ord != ordExpected)
                            {
                                throw new Exception("ord mismatch: TermsEnum has ord=" + ord + " vs actual=" + ordExpected);
                            }
                        }
                    }

                    DocsEnum docs2;
                    if (postings != null)
                    {
                        docs2 = postings;
                    }
                    else
                    {
                        docs2 = docs;
                    }

                    int lastDoc = -1;
                    int docCount = 0;
                    long totalTermFreq = 0;
                    while (true)
                    {
                        int doc = docs2.NextDoc();
                        if (doc == DocIdSetIterator.NO_MORE_DOCS)
                        {
                            break;
                        }
                        status.TotFreq++;
                        visitedDocs.Set(doc);
                        int freq = -1;
                        if (hasFreqs)
                        {
                            freq = docs2.Freq();
                            if (freq <= 0)
                            {
                                throw new Exception("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
                            }
                            status.TotPos += freq;
                            totalTermFreq += freq;
                        }
                        else
                        {
                            // When a field didn't index freq, it must
                            // consistently "lie" and pretend that freq was
                            // 1:
                            if (docs2.Freq() != 1)
                            {
                                throw new Exception("term " + term + ": doc " + doc + ": freq " + freq + " != 1 when Terms.hasFreqs() is false");
                            }
                        }
                        docCount++;

                        if (doc <= lastDoc)
                        {
                            throw new Exception("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
                        }
                        if (doc >= maxDoc)
                        {
                            throw new Exception("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
                        }

                        lastDoc = doc;

                        int lastPos = -1;
                        int lastOffset = 0;
                        if (hasPositions)
                        {
                            for (int j = 0; j < freq; j++)
                            {
                                int pos = postings.NextPosition();

                                if (pos < 0)
                                {
                                    throw new Exception("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
                                }
                                if (pos < lastPos)
                                {
                                    throw new Exception("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
                                }
                                lastPos = pos;
                                BytesRef payload = postings.Payload;
                                if (payload != null)
                                {
                                    Debug.Assert(payload.Valid);
                                }
                                if (payload != null && payload.Length < 1)
                                {
                                    throw new Exception("term " + term + ": doc " + doc + ": pos " + pos + " payload length is out of bounds " + payload.Length);
                                }
                                if (hasOffsets)
                                {
                                    int startOffset = postings.StartOffset();
                                    int endOffset = postings.EndOffset();
                                    // NOTE: we cannot enforce any bounds whatsoever on vectors... they were a free-for-all before?
                                    // but for offsets in the postings lists these checks are fine: they were always enforced by IndexWriter
                                    if (!isVectors)
                                    {
                                        if (startOffset < 0)
                                        {
                                            throw new Exception("term " + term + ": doc " + doc + ": pos " + pos + ": startOffset " + startOffset + " is out of bounds");
                                        }
                                        if (startOffset < lastOffset)
                                        {
                                            throw new Exception("term " + term + ": doc " + doc + ": pos " + pos + ": startOffset " + startOffset + " < lastStartOffset " + lastOffset);
                                        }
                                        if (endOffset < 0)
                                        {
                                            throw new Exception("term " + term + ": doc " + doc + ": pos " + pos + ": endOffset " + endOffset + " is out of bounds");
                                        }
                                        if (endOffset < startOffset)
                                        {
                                            throw new Exception("term " + term + ": doc " + doc + ": pos " + pos + ": endOffset " + endOffset + " < startOffset " + startOffset);
                                        }
                                    }
                                    lastOffset = startOffset;
                                }
                            }
                        }
                    }

                    if (docCount != 0)
                    {
                        status.TermCount++;
                    }
                    else
                    {
                        status.DelTermCount++;
                    }

                    long totalTermFreq2 = termsEnum.TotalTermFreq();
                    bool hasTotalTermFreq = hasFreqs && totalTermFreq2 != -1;

                    // Re-count if there are deleted docs:
                    if (liveDocs != null)
                    {
                        if (hasFreqs)
                        {
                            DocsEnum docsNoDel = termsEnum.Docs(null, docsAndFreqs);
                            docCount = 0;
                            totalTermFreq = 0;
                            while (docsNoDel.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                            {
                                visitedDocs.Set(docsNoDel.DocID());
                                docCount++;
                                totalTermFreq += docsNoDel.Freq();
                            }
                        }
                        else
                        {
                            DocsEnum docsNoDel = termsEnum.Docs(null, docs, DocsEnum.FLAG_NONE);
                            docCount = 0;
                            totalTermFreq = -1;
                            while (docsNoDel.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                            {
                                visitedDocs.Set(docsNoDel.DocID());
                                docCount++;
                            }
                        }
                    }

                    if (docCount != docFreq)
                    {
                        throw new Exception("term " + term + " docFreq=" + docFreq + " != tot docs w/o deletions " + docCount);
                    }
                    if (hasTotalTermFreq)
                    {
                        if (totalTermFreq2 <= 0)
                        {
                            throw new Exception("totalTermFreq: " + totalTermFreq2 + " is out of bounds");
                        }
                        sumTotalTermFreq += totalTermFreq;
                        if (totalTermFreq != totalTermFreq2)
                        {
                            throw new Exception("term " + term + " totalTermFreq=" + totalTermFreq2 + " != recomputed totalTermFreq=" + totalTermFreq);
                        }
                    }

                    // Test skipping
                    if (hasPositions)
                    {
                        for (int idx = 0; idx < 7; idx++)
                        {
                            int skipDocID = (int)(((idx + 1) * (long)maxDoc) / 8);
                            postings = termsEnum.DocsAndPositions(liveDocs, postings);
                            int docID = postings.Advance(skipDocID);
                            if (docID == DocIdSetIterator.NO_MORE_DOCS)
                            {
                                break;
                            }
                            else
                            {
                                if (docID < skipDocID)
                                {
                                    throw new Exception("term " + term + ": advance(docID=" + skipDocID + ") returned docID=" + docID);
                                }
                                int freq = postings.Freq();
                                if (freq <= 0)
                                {
                                    throw new Exception("termFreq " + freq + " is out of bounds");
                                }
                                int lastPosition = -1;
                                int lastOffset = 0;
                                for (int posUpto = 0; posUpto < freq; posUpto++)
                                {
                                    int pos = postings.NextPosition();

                                    if (pos < 0)
                                    {
                                        throw new Exception("position " + pos + " is out of bounds");
                                    }
                                    if (pos < lastPosition)
                                    {
                                        throw new Exception("position " + pos + " is < lastPosition " + lastPosition);
                                    }
                                    lastPosition = pos;
                                    if (hasOffsets)
                                    {
                                        int startOffset = postings.StartOffset();
                                        int endOffset = postings.EndOffset();
                                        // NOTE: we cannot enforce any bounds whatsoever on vectors... they were a free-for-all before?
                                        // but for offsets in the postings lists these checks are fine: they were always enforced by IndexWriter
                                        if (!isVectors)
                                        {
                                            if (startOffset < 0)
                                            {
                                                throw new Exception("term " + term + ": doc " + docID + ": pos " + pos + ": startOffset " + startOffset + " is out of bounds");
                                            }
                                            if (startOffset < lastOffset)
                                            {
                                                throw new Exception("term " + term + ": doc " + docID + ": pos " + pos + ": startOffset " + startOffset + " < lastStartOffset " + lastOffset);
                                            }
                                            if (endOffset < 0)
                                            {
                                                throw new Exception("term " + term + ": doc " + docID + ": pos " + pos + ": endOffset " + endOffset + " is out of bounds");
                                            }
                                            if (endOffset < startOffset)
                                            {
                                                throw new Exception("term " + term + ": doc " + docID + ": pos " + pos + ": endOffset " + endOffset + " < startOffset " + startOffset);
                                            }
                                        }
                                        lastOffset = startOffset;
                                    }
                                }

                                int nextDocID = postings.NextDoc();
                                if (nextDocID == DocIdSetIterator.NO_MORE_DOCS)
                                {
                                    break;
                                }
                                if (nextDocID <= docID)
                                {
                                    throw new Exception("term " + term + ": advance(docID=" + skipDocID + "), then .next() returned docID=" + nextDocID + " vs prev docID=" + docID);
                                }
                            }
                        }
                    }
                    else
                    {
                        for (int idx = 0; idx < 7; idx++)
                        {
                            int skipDocID = (int)(((idx + 1) * (long)maxDoc) / 8);
                            docs = termsEnum.Docs(liveDocs, docs, DocsEnum.FLAG_NONE);
                            int docID = docs.Advance(skipDocID);
                            if (docID == DocIdSetIterator.NO_MORE_DOCS)
                            {
                                break;
                            }
                            else
                            {
                                if (docID < skipDocID)
                                {
                                    throw new Exception("term " + term + ": advance(docID=" + skipDocID + ") returned docID=" + docID);
                                }
                                int nextDocID = docs.NextDoc();
                                if (nextDocID == DocIdSetIterator.NO_MORE_DOCS)
                                {
                                    break;
                                }
                                if (nextDocID <= docID)
                                {
                                    throw new Exception("term " + term + ": advance(docID=" + skipDocID + "), then .next() returned docID=" + nextDocID + " vs prev docID=" + docID);
                                }
                            }
                        }
                    }
                }

                Terms fieldTerms = fields.Terms(field);
                if (fieldTerms == null)
                {
                    // Unusual: the FieldsEnum returned a field but
                    // the Terms for that field is null; this should
                    // only happen if it's a ghost field (field with
                    // no terms, eg there used to be terms but all
                    // docs got deleted and then merged away):
                }
                else
                {
                    if (fieldTerms is BlockTreeTermsReader.FieldReader)
                    {
                        BlockTreeTermsReader.Stats stats = ((BlockTreeTermsReader.FieldReader)fieldTerms).ComputeStats();
                        Debug.Assert(stats != null);
                        if (status.BlockTreeStats == null)
                        {
                            status.BlockTreeStats = new Dictionary<string, BlockTreeTermsReader.Stats>();
                        }
                        status.BlockTreeStats[field] = stats;
                    }

                    if (sumTotalTermFreq != 0)
                    {
                        long v = fields.Terms(field).SumTotalTermFreq;
                        if (v != -1 && sumTotalTermFreq != v)
                        {
                            throw new Exception("sumTotalTermFreq for field " + field + "=" + v + " != recomputed sumTotalTermFreq=" + sumTotalTermFreq);
                        }
                    }

                    if (sumDocFreq != 0)
                    {
                        long v = fields.Terms(field).SumDocFreq;
                        if (v != -1 && sumDocFreq != v)
                        {
                            throw new Exception("sumDocFreq for field " + field + "=" + v + " != recomputed sumDocFreq=" + sumDocFreq);
                        }
                    }

                    if (fieldTerms != null)
                    {
                        int v = fieldTerms.DocCount;
                        if (v != -1 && visitedDocs.Cardinality() != v)
                        {
                            throw new Exception("docCount for field " + field + "=" + v + " != recomputed docCount=" + visitedDocs.Cardinality());
                        }
                    }

                    // Test seek to last term:
                    if (lastTerm != null)
                    {
                        if (termsEnum.SeekCeil(lastTerm) != TermsEnum.SeekStatus.FOUND)
                        {
                            throw new Exception("seek to last term " + lastTerm + " failed");
                        }

                        int expectedDocFreq = termsEnum.DocFreq();
                        DocsEnum d = termsEnum.Docs(null, null, DocsEnum.FLAG_NONE);
                        int docFreq = 0;
                        while (d.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            docFreq++;
                        }
                        if (docFreq != expectedDocFreq)
                        {
                            throw new Exception("docFreq for last term " + lastTerm + "=" + expectedDocFreq + " != recomputed docFreq=" + docFreq);
                        }
                    }

                    // check unique term count
                    long termCount = -1;

                    if ((status.DelTermCount + status.TermCount) - termCountStart > 0)
                    {
                        termCount = fields.Terms(field).Size();

                        if (termCount != -1 && termCount != status.DelTermCount + status.TermCount - termCountStart)
                        {
                            throw new Exception("termCount mismatch " + (status.DelTermCount + termCount) + " vs " + (status.TermCount - termCountStart));
                        }
                    }

                    // Test seeking by ord
                    if (hasOrd && status.TermCount - termCountStart > 0)
                    {
                        int seekCount = (int)Math.Min(10000L, termCount);
                        if (seekCount > 0)
                        {
                            BytesRef[] seekTerms = new BytesRef[seekCount];

                            // Seek by ord
                            for (int i = seekCount - 1; i >= 0; i--)
                            {
                                long ord = i * (termCount / seekCount);
                                termsEnum.SeekExact(ord);
                                seekTerms[i] = BytesRef.DeepCopyOf(termsEnum.Term());
                            }

                            // Seek by term
                            long totDocCount = 0;
                            for (int i = seekCount - 1; i >= 0; i--)
                            {
                                if (termsEnum.SeekCeil(seekTerms[i]) != TermsEnum.SeekStatus.FOUND)
                                {
                                    throw new Exception("seek to existing term " + seekTerms[i] + " failed");
                                }

                                docs = termsEnum.Docs(liveDocs, docs, DocsEnum.FLAG_NONE);
                                if (docs == null)
                                {
                                    throw new Exception("null DocsEnum from to existing term " + seekTerms[i]);
                                }

                                while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                                {
                                    totDocCount++;
                                }
                            }

                            long totDocCountNoDeletes = 0;
                            long totDocFreq = 0;
                            for (int i = 0; i < seekCount; i++)
                            {
                                if (!termsEnum.SeekExact(seekTerms[i]))
                                {
                                    throw new Exception("seek to existing term " + seekTerms[i] + " failed");
                                }

                                totDocFreq += termsEnum.DocFreq();
                                docs = termsEnum.Docs(null, docs, DocsEnum.FLAG_NONE);
                                if (docs == null)
                                {
                                    throw new Exception("null DocsEnum from to existing term " + seekTerms[i]);
                                }

                                while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                                {
                                    totDocCountNoDeletes++;
                                }
                            }

                            if (totDocCount > totDocCountNoDeletes)
                            {
                                throw new Exception("more postings with deletes=" + totDocCount + " than without=" + totDocCountNoDeletes);
                            }

                            if (totDocCountNoDeletes != totDocFreq)
                            {
                                throw new Exception("docfreqs=" + totDocFreq + " != recomputed docfreqs=" + totDocCountNoDeletes);
                            }
                        }
                    }
                }
            }

            int fieldCount = fields.Size;

            if (fieldCount != -1)
            {
                if (fieldCount < 0)
                {
                    throw new Exception("invalid fieldCount: " + fieldCount);
                }
                if (fieldCount != computedFieldCount)
                {
                    throw new Exception("fieldCount mismatch " + fieldCount + " vs recomputed field count " + computedFieldCount);
                }
            }

            // for most implementations, this is boring (just the sum across all fields)
            // but codecs that don't work per-field like preflex actually implement this,
            // but don't implement it on Terms, so the check isn't redundant.
            long uniqueTermCountAllFields = fields.UniqueTermCount;

            if (uniqueTermCountAllFields != -1 && status.TermCount + status.DelTermCount != uniqueTermCountAllFields)
            {
                throw new Exception("termCount mismatch " + uniqueTermCountAllFields + " vs " + (status.TermCount + status.DelTermCount));
            }

            if (doPrint)
            {
                Msg(infoStream, "OK [" + status.TermCount + " terms; " + status.TotFreq + " terms/docs pairs; " + status.TotPos + " tokens]");
            }

            if (verbose && status.BlockTreeStats != null && infoStream != null && status.TermCount > 0)
            {
                foreach (KeyValuePair<string, BlockTreeTermsReader.Stats> ent in status.BlockTreeStats)
                {
                    infoStream.WriteLine("      field \"" + ent.Key + "\":");
                    infoStream.WriteLine("      " + ent.Value.ToString().Replace("\n", "\n      "));
                }
            }

            return status;
        }
Exemplo n.º 4
0
        /// <summary>
        /// Test the term index.
        /// @lucene.experimental
        /// </summary>
        public static Status.TermIndexStatus TestPostings(AtomicReader reader, TextWriter infoStream, bool verbose)
        {
            // TODO: we should go and verify term vectors match, if
            // crossCheckTermVectors is on...

            Status.TermIndexStatus status;
            int maxDoc = reader.MaxDoc;
            Bits liveDocs = reader.LiveDocs;

            try
            {
                if (infoStream != null)
                {
                    infoStream.Write("    test: terms, freq, prox...");
                }

                Fields fields = reader.Fields;
                FieldInfos fieldInfos = reader.FieldInfos;
                status = CheckFields(fields, liveDocs, maxDoc, fieldInfos, true, false, infoStream, verbose);
                if (liveDocs != null)
                {
                    if (infoStream != null)
                    {
                        infoStream.Write("    test (ignoring deletes): terms, freq, prox...");
                    }
                    CheckFields(fields, null, maxDoc, fieldInfos, true, false, infoStream, verbose);
                }
            }
            catch (Exception e)
            {
                Msg(infoStream, "ERROR: " + e);
                status = new Status.TermIndexStatus();
                status.Error = e;
                if (infoStream != null)
                {
                    // LUCENENET NOTE: Some tests rely on the error type being in
                    // the message. We can't get the error type with StackTrace, we
                    // need ToString() for that.
                    infoStream.WriteLine(e.ToString());
                    //infoStream.WriteLine(e.StackTrace);
                }
            }

            return status;
        }
Exemplo n.º 5
0
        /// <summary>
        /// Test the term index.
        /// @lucene.experimental
        /// </summary>
        public static Status.TermIndexStatus TestPostings(AtomicReader reader, StreamWriter infoStream, bool verbose)
        {
            // TODO: we should go and verify term vectors match, if
            // crossCheckTermVectors is on...

            Status.TermIndexStatus status;
            int maxDoc = reader.MaxDoc;
            Bits liveDocs = reader.LiveDocs;

            try
            {
                if (infoStream != null)
                {
                    infoStream.Write("    test: terms, freq, prox...");
                }

                Fields fields = reader.Fields;
                FieldInfos fieldInfos = reader.FieldInfos;
                status = CheckFields(fields, liveDocs, maxDoc, fieldInfos, true, false, infoStream, verbose);
                if (liveDocs != null)
                {
                    if (infoStream != null)
                    {
                        infoStream.Write("    test (ignoring deletes): terms, freq, prox...");
                    }
                    CheckFields(fields, null, maxDoc, fieldInfos, true, false, infoStream, verbose);
                }
            }
            catch (Exception e)
            {
                Msg(infoStream, "ERROR: " + e);
                status = new Status.TermIndexStatus();
                status.Error = e;
                if (infoStream != null)
                {
                    infoStream.WriteLine(e.StackTrace);
                }
            }

            return status;
        }