Example #1
0
        /// <summary> Test term vectors for a segment.</summary>
        private Status.TermVectorStatus TestTermVectors(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
        {
            Status.TermVectorStatus status = new Status.TermVectorStatus();

            try
            {
                if (infoStream != null)
                {
                    infoStream.Write("    test: term vectors........");
                }

                for (int j = 0; j < info.docCount; ++j)
                {
                    if (!reader.IsDeleted(j))
                    {
                        status.docCount++;
                        TermFreqVector[] tfv = reader.GetTermFreqVectors(j);
                        if (tfv != null)
                        {
                            status.totVectors += tfv.Length;
                        }
                    }
                }

                Msg(System.String.Format(format, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { status.totVectors, (((float)status.totVectors) / status.docCount) }));
            }
            catch (System.Exception e)
            {
                Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
                status.error = e;
                if (infoStream != null)
                {
                    infoStream.WriteLine(e.StackTrace);
                }
            }

            return(status);
        }
Example #2
0
 /// <summary> Test term vectors for a segment.</summary>
 private Status.TermVectorStatus TestTermVectors(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
 {
     var status = new Status.TermVectorStatus();
     
     try
     {
         if (infoStream != null)
         {
             infoStream.Write("    test: term vectors........");
         }
         
         for (int j = 0; j < info.docCount; ++j)
         {
             if (!reader.IsDeleted(j))
             {
                 status.docCount++;
                 ITermFreqVector[] tfv = reader.GetTermFreqVectors(j);
                 if (tfv != null)
                 {
                     status.totVectors += tfv.Length;
                 }
             }
         }
         
         Msg(System.String.Format(format, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { status.totVectors, (((float) status.totVectors) / status.docCount) }));
     }
     catch (System.Exception e)
     {
         Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
         status.error = e;
         if (infoStream != null)
         {
             infoStream.WriteLine(e.StackTrace);
         }
     }
     
     return status;
 }
Example #3
0
        /// <summary>
        /// Test term vectors.
        /// @lucene.experimental
        /// </summary>
        public static Status.TermVectorStatus TestTermVectors(AtomicReader reader, TextWriter infoStream, bool verbose, bool crossCheckTermVectors)
        {
            Status.TermVectorStatus status = new Status.TermVectorStatus();
            FieldInfos fieldInfos = reader.FieldInfos;
            Bits onlyDocIsDeleted = new FixedBitSet(1);

            try
            {
                if (infoStream != null)
                {
                    infoStream.Write("    test: term vectors........");
                }

                DocsEnum docs = null;
                DocsAndPositionsEnum postings = null;

                // Only used if crossCheckTermVectors is true:
                DocsEnum postingsDocs = null;
                DocsAndPositionsEnum postingsPostings = null;

                Bits liveDocs = reader.LiveDocs;

                Fields postingsFields;
                // TODO: testTermsIndex
                if (crossCheckTermVectors)
                {
                    postingsFields = reader.Fields;
                }
                else
                {
                    postingsFields = null;
                }

                TermsEnum termsEnum = null;
                TermsEnum postingsTermsEnum = null;

                for (int j = 0; j < reader.MaxDoc; ++j)
                {
                    // Intentionally pull/visit (but don't count in
                    // stats) deleted documents to make sure they too
                    // are not corrupt:
                    Fields tfv = reader.GetTermVectors(j);

                    // TODO: can we make a IS(FIR) that searches just
                    // this term vector... to pass for searcher?

                    if (tfv != null)
                    {
                        // First run with no deletions:
                        CheckFields(tfv, null, 1, fieldInfos, false, true, infoStream, verbose);

                        // Again, with the one doc deleted:
                        CheckFields(tfv, onlyDocIsDeleted, 1, fieldInfos, false, true, infoStream, verbose);

                        // Only agg stats if the doc is live:
                        bool doStats = liveDocs == null || liveDocs.Get(j);
                        if (doStats)
                        {
                            status.DocCount++;
                        }

                        foreach (string field in tfv)
                        {
                            if (doStats)
                            {
                                status.TotVectors++;
                            }

                            // Make sure FieldInfo thinks this field is vector'd:
                            FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                            if (!fieldInfo.HasVectors())
                            {
                                throw new Exception("docID=" + j + " has term vectors for field=" + field + " but FieldInfo has storeTermVector=false");
                            }

                            if (crossCheckTermVectors)
                            {
                                Terms terms = tfv.Terms(field);
                                termsEnum = terms.Iterator(termsEnum);
                                bool postingsHasFreq = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS;
                                bool postingsHasPayload = fieldInfo.HasPayloads();
                                bool vectorsHasPayload = terms.HasPayloads();

                                Terms postingsTerms = postingsFields.Terms(field);
                                if (postingsTerms == null)
                                {
                                    throw new Exception("vector field=" + field + " does not exist in postings; doc=" + j);
                                }
                                postingsTermsEnum = postingsTerms.Iterator(postingsTermsEnum);

                                bool hasProx = terms.HasOffsets() || terms.HasPositions();
                                BytesRef term = null;
                                while ((term = termsEnum.Next()) != null)
                                {
                                    if (hasProx)
                                    {
                                        postings = termsEnum.DocsAndPositions(null, postings);
                                        Debug.Assert(postings != null);
                                        docs = null;
                                    }
                                    else
                                    {
                                        docs = termsEnum.Docs(null, docs);
                                        Debug.Assert(docs != null);
                                        postings = null;
                                    }

                                    DocsEnum docs2;
                                    if (hasProx)
                                    {
                                        Debug.Assert(postings != null);
                                        docs2 = postings;
                                    }
                                    else
                                    {
                                        Debug.Assert(docs != null);
                                        docs2 = docs;
                                    }

                                    DocsEnum postingsDocs2;
                                    if (!postingsTermsEnum.SeekExact(term))
                                    {
                                        throw new Exception("vector term=" + term + " field=" + field + " does not exist in postings; doc=" + j);
                                    }
                                    postingsPostings = postingsTermsEnum.DocsAndPositions(null, postingsPostings);
                                    if (postingsPostings == null)
                                    {
                                        // Term vectors were indexed w/ pos but postings were not
                                        postingsDocs = postingsTermsEnum.Docs(null, postingsDocs);
                                        if (postingsDocs == null)
                                        {
                                            throw new Exception("vector term=" + term + " field=" + field + " does not exist in postings; doc=" + j);
                                        }
                                    }

                                    if (postingsPostings != null)
                                    {
                                        postingsDocs2 = postingsPostings;
                                    }
                                    else
                                    {
                                        postingsDocs2 = postingsDocs;
                                    }

                                    int advanceDoc = postingsDocs2.Advance(j);
                                    if (advanceDoc != j)
                                    {
                                        throw new Exception("vector term=" + term + " field=" + field + ": doc=" + j + " was not found in postings (got: " + advanceDoc + ")");
                                    }

                                    int doc = docs2.NextDoc();

                                    if (doc != 0)
                                    {
                                        throw new Exception("vector for doc " + j + " didn't return docID=0: got docID=" + doc);
                                    }

                                    if (postingsHasFreq)
                                    {
                                        int tf = docs2.Freq();
                                        if (postingsHasFreq && postingsDocs2.Freq() != tf)
                                        {
                                            throw new Exception("vector term=" + term + " field=" + field + " doc=" + j + ": freq=" + tf + " differs from postings freq=" + postingsDocs2.Freq());
                                        }

                                        if (hasProx)
                                        {
                                            for (int i = 0; i < tf; i++)
                                            {
                                                int pos = postings.NextPosition();
                                                if (postingsPostings != null)
                                                {
                                                    int postingsPos = postingsPostings.NextPosition();
                                                    if (terms.HasPositions() && pos != postingsPos)
                                                    {
                                                        throw new Exception("vector term=" + term + " field=" + field + " doc=" + j + ": pos=" + pos + " differs from postings pos=" + postingsPos);
                                                    }
                                                }

                                                // Call the methods to at least make
                                                // sure they don't throw exc:
                                                int startOffset = postings.StartOffset();
                                                int endOffset = postings.EndOffset();
                                                // TODO: these are too anal...?
                                                /*
                                                  if (endOffset < startOffset) {
                                                  throw new RuntimeException("vector startOffset=" + startOffset + " is > endOffset=" + endOffset);
                                                  }
                                                  if (startOffset < lastStartOffset) {
                                                  throw new RuntimeException("vector startOffset=" + startOffset + " is < prior startOffset=" + lastStartOffset);
                                                  }
                                                  lastStartOffset = startOffset;
                                                */

                                                if (postingsPostings != null)
                                                {
                                                    int postingsStartOffset = postingsPostings.StartOffset();

                                                    int postingsEndOffset = postingsPostings.EndOffset();
                                                    if (startOffset != -1 && postingsStartOffset != -1 && startOffset != postingsStartOffset)
                                                    {
                                                        throw new Exception("vector term=" + term + " field=" + field + " doc=" + j + ": startOffset=" + startOffset + " differs from postings startOffset=" + postingsStartOffset);
                                                    }
                                                    if (endOffset != -1 && postingsEndOffset != -1 && endOffset != postingsEndOffset)
                                                    {
                                                        throw new Exception("vector term=" + term + " field=" + field + " doc=" + j + ": endOffset=" + endOffset + " differs from postings endOffset=" + postingsEndOffset);
                                                    }
                                                }

                                                BytesRef payload = postings.Payload;

                                                if (payload != null)
                                                {
                                                    Debug.Assert(vectorsHasPayload);
                                                }

                                                if (postingsHasPayload && vectorsHasPayload)
                                                {
                                                    Debug.Assert(postingsPostings != null);

                                                    if (payload == null)
                                                    {
                                                        // we have payloads, but not at this position.
                                                        // postings has payloads too, it should not have one at this position
                                                        if (postingsPostings.Payload != null)
                                                        {
                                                            throw new Exception("vector term=" + term + " field=" + field + " doc=" + j + " has no payload but postings does: " + postingsPostings.Payload);
                                                        }
                                                    }
                                                    else
                                                    {
                                                        // we have payloads, and one at this position
                                                        // postings should also have one at this position, with the same bytes.
                                                        if (postingsPostings.Payload == null)
                                                        {
                                                            throw new Exception("vector term=" + term + " field=" + field + " doc=" + j + " has payload=" + payload + " but postings does not.");
                                                        }
                                                        BytesRef postingsPayload = postingsPostings.Payload;
                                                        if (!payload.Equals(postingsPayload))
                                                        {
                                                            throw new Exception("vector term=" + term + " field=" + field + " doc=" + j + " has payload=" + payload + " but differs from postings payload=" + postingsPayload);
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                float vectorAvg = status.DocCount == 0 ? 0 : status.TotVectors / (float)status.DocCount;
                Msg(infoStream, "OK [" + status.TotVectors + " total vector count; avg " + vectorAvg.ToString(CultureInfo.InvariantCulture.NumberFormat) + " term/freq vector fields per doc]");
            }
            catch (Exception e)
            {
                Msg(infoStream, "ERROR [" + Convert.ToString(e.Message) + "]");
                status.Error = e;
                if (infoStream != null)
                {
                    // LUCENENET NOTE: Some tests rely on the error type being in
                    // the message. We can't get the error type with StackTrace, we
                    // need ToString() for that.
                    infoStream.WriteLine(e.ToString());
                    //infoStream.WriteLine(e.StackTrace);
                }
            }

            return status;
        }