public override void Warm(AtomicReader reader) { long startTime = Environment.TickCount; int indexedCount = 0; int docValuesCount = 0; int normsCount = 0; foreach (FieldInfo info in reader.FieldInfos) { if (info.IsIndexed) { reader.GetTerms(info.Name); indexedCount++; if (info.HasNorms) { reader.GetNormValues(info.Name); normsCount++; } } if (info.HasDocValues) { switch (info.DocValuesType) { case DocValuesType.NUMERIC: reader.GetNumericDocValues(info.Name); break; case DocValuesType.BINARY: reader.GetBinaryDocValues(info.Name); break; case DocValuesType.SORTED: reader.GetSortedDocValues(info.Name); break; case DocValuesType.SORTED_SET: reader.GetSortedSetDocValues(info.Name); break; default: if (Debugging.AssertsEnabled) { Debugging.Assert(false); // unknown dv type } break; } docValuesCount++; } } reader.Document(0); reader.GetTermVectors(0); if (infoStream.IsEnabled("SMSW")) { infoStream.Message("SMSW", "Finished warming segment: " + reader + ", indexed=" + indexedCount + ", docValues=" + docValuesCount + ", norms=" + normsCount + ", time=" + (Environment.TickCount - startTime)); } }
/// <summary> /// Test term vectors. /// @lucene.experimental /// </summary> public static Status.TermVectorStatus TestTermVectors(AtomicReader reader, TextWriter infoStream, bool verbose, bool crossCheckTermVectors) { Status.TermVectorStatus status = new Status.TermVectorStatus(); FieldInfos fieldInfos = reader.FieldInfos; Bits onlyDocIsDeleted = new FixedBitSet(1); try { if (infoStream != null) { infoStream.Write(" test: term vectors........"); } DocsEnum docs = null; DocsAndPositionsEnum postings = null; // Only used if crossCheckTermVectors is true: DocsEnum postingsDocs = null; DocsAndPositionsEnum postingsPostings = null; Bits liveDocs = reader.LiveDocs; Fields postingsFields; // TODO: testTermsIndex if (crossCheckTermVectors) { postingsFields = reader.Fields; } else { postingsFields = null; } TermsEnum termsEnum = null; TermsEnum postingsTermsEnum = null; for (int j = 0; j < reader.MaxDoc; ++j) { // Intentionally pull/visit (but don't count in // stats) deleted documents to make sure they too // are not corrupt: Fields tfv = reader.GetTermVectors(j); // TODO: can we make a IS(FIR) that searches just // this term vector... to pass for searcher? if (tfv != null) { // First run with no deletions: CheckFields(tfv, null, 1, fieldInfos, false, true, infoStream, verbose); // Again, with the one doc deleted: CheckFields(tfv, onlyDocIsDeleted, 1, fieldInfos, false, true, infoStream, verbose); // Only agg stats if the doc is live: bool doStats = liveDocs == null || liveDocs.Get(j); if (doStats) { status.DocCount++; } foreach (string field in tfv) { if (doStats) { status.TotVectors++; } // Make sure FieldInfo thinks this field is vector'd: FieldInfo fieldInfo = fieldInfos.FieldInfo(field); if (!fieldInfo.HasVectors()) { throw new Exception("docID=" + j + " has term vectors for field=" + field + " but FieldInfo has storeTermVector=false"); } if (crossCheckTermVectors) { Terms terms = tfv.Terms(field); termsEnum = terms.Iterator(termsEnum); bool postingsHasFreq = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS; bool postingsHasPayload = fieldInfo.HasPayloads(); bool vectorsHasPayload = terms.HasPayloads(); Terms postingsTerms = postingsFields.Terms(field); if (postingsTerms == null) { throw new Exception("vector field=" + field + " does not exist in postings; doc=" + j); } postingsTermsEnum = postingsTerms.Iterator(postingsTermsEnum); bool hasProx = terms.HasOffsets() || terms.HasPositions(); BytesRef term = null; while ((term = termsEnum.Next()) != null) { if (hasProx) { postings = termsEnum.DocsAndPositions(null, postings); Debug.Assert(postings != null); docs = null; } else { docs = termsEnum.Docs(null, docs); Debug.Assert(docs != null); postings = null; } DocsEnum docs2; if (hasProx) { Debug.Assert(postings != null); docs2 = postings; } else { Debug.Assert(docs != null); docs2 = docs; } DocsEnum postingsDocs2; if (!postingsTermsEnum.SeekExact(term)) { throw new Exception("vector term=" + term + " field=" + field + " does not exist in postings; doc=" + j); } postingsPostings = postingsTermsEnum.DocsAndPositions(null, postingsPostings); if (postingsPostings == null) { // Term vectors were indexed w/ pos but postings were not postingsDocs = postingsTermsEnum.Docs(null, postingsDocs); if (postingsDocs == null) { throw new Exception("vector term=" + term + " field=" + field + " does not exist in postings; doc=" + j); } } if (postingsPostings != null) { postingsDocs2 = postingsPostings; } else { postingsDocs2 = postingsDocs; } int advanceDoc = postingsDocs2.Advance(j); if (advanceDoc != j) { throw new Exception("vector term=" + term + " field=" + field + ": doc=" + j + " was not found in postings (got: " + advanceDoc + ")"); } int doc = docs2.NextDoc(); if (doc != 0) { throw new Exception("vector for doc " + j + " didn't return docID=0: got docID=" + doc); } if (postingsHasFreq) { int tf = docs2.Freq(); if (postingsHasFreq && postingsDocs2.Freq() != tf) { throw new Exception("vector term=" + term + " field=" + field + " doc=" + j + ": freq=" + tf + " differs from postings freq=" + postingsDocs2.Freq()); } if (hasProx) { for (int i = 0; i < tf; i++) { int pos = postings.NextPosition(); if (postingsPostings != null) { int postingsPos = postingsPostings.NextPosition(); if (terms.HasPositions() && pos != postingsPos) { throw new Exception("vector term=" + term + " field=" + field + " doc=" + j + ": pos=" + pos + " differs from postings pos=" + postingsPos); } } // Call the methods to at least make // sure they don't throw exc: int startOffset = postings.StartOffset(); int endOffset = postings.EndOffset(); // TODO: these are too anal...? /* if (endOffset < startOffset) { throw new RuntimeException("vector startOffset=" + startOffset + " is > endOffset=" + endOffset); } if (startOffset < lastStartOffset) { throw new RuntimeException("vector startOffset=" + startOffset + " is < prior startOffset=" + lastStartOffset); } lastStartOffset = startOffset; */ if (postingsPostings != null) { int postingsStartOffset = postingsPostings.StartOffset(); int postingsEndOffset = postingsPostings.EndOffset(); if (startOffset != -1 && postingsStartOffset != -1 && startOffset != postingsStartOffset) { throw new Exception("vector term=" + term + " field=" + field + " doc=" + j + ": startOffset=" + startOffset + " differs from postings startOffset=" + postingsStartOffset); } if (endOffset != -1 && postingsEndOffset != -1 && endOffset != postingsEndOffset) { throw new Exception("vector term=" + term + " field=" + field + " doc=" + j + ": endOffset=" + endOffset + " differs from postings endOffset=" + postingsEndOffset); } } BytesRef payload = postings.Payload; if (payload != null) { Debug.Assert(vectorsHasPayload); } if (postingsHasPayload && vectorsHasPayload) { Debug.Assert(postingsPostings != null); if (payload == null) { // we have payloads, but not at this position. // postings has payloads too, it should not have one at this position if (postingsPostings.Payload != null) { throw new Exception("vector term=" + term + " field=" + field + " doc=" + j + " has no payload but postings does: " + postingsPostings.Payload); } } else { // we have payloads, and one at this position // postings should also have one at this position, with the same bytes. if (postingsPostings.Payload == null) { throw new Exception("vector term=" + term + " field=" + field + " doc=" + j + " has payload=" + payload + " but postings does not."); } BytesRef postingsPayload = postingsPostings.Payload; if (!payload.Equals(postingsPayload)) { throw new Exception("vector term=" + term + " field=" + field + " doc=" + j + " has payload=" + payload + " but differs from postings payload=" + postingsPayload); } } } } } } } } } } } float vectorAvg = status.DocCount == 0 ? 0 : status.TotVectors / (float)status.DocCount; Msg(infoStream, "OK [" + status.TotVectors + " total vector count; avg " + vectorAvg.ToString(CultureInfo.InvariantCulture.NumberFormat) + " term/freq vector fields per doc]"); } catch (Exception e) { Msg(infoStream, "ERROR [" + Convert.ToString(e.Message) + "]"); status.Error = e; if (infoStream != null) { // LUCENENET NOTE: Some tests rely on the error type being in // the message. We can't get the error type with StackTrace, we // need ToString() for that. infoStream.WriteLine(e.ToString()); //infoStream.WriteLine(e.StackTrace); } } return status; }
public override void Warm(AtomicReader reader) { long startTime = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results int indexedCount = 0; int docValuesCount = 0; int normsCount = 0; foreach (FieldInfo info in reader.FieldInfos) { if (info.IsIndexed) { reader.GetTerms(info.Name); indexedCount++; if (info.HasNorms) { reader.GetNormValues(info.Name); normsCount++; } } if (info.HasDocValues) { switch (info.DocValuesType) { case DocValuesType.NUMERIC: reader.GetNumericDocValues(info.Name); break; case DocValuesType.BINARY: reader.GetBinaryDocValues(info.Name); break; case DocValuesType.SORTED: reader.GetSortedDocValues(info.Name); break; case DocValuesType.SORTED_SET: reader.GetSortedSetDocValues(info.Name); break; default: if (Debugging.AssertsEnabled) { Debugging.Assert(false); // unknown dv type } break; } docValuesCount++; } } reader.Document(0); reader.GetTermVectors(0); if (infoStream.IsEnabled("SMSW")) { infoStream.Message("SMSW", "Finished warming segment: " + reader + ", indexed=" + indexedCount + ", docValues=" + docValuesCount + ", norms=" + normsCount + ", time=" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - startTime)); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results } }
public override Fields GetTermVectors(int docID) { EnsureOpen(); return(m_input.GetTermVectors(docID)); }
public override void Warm(AtomicReader reader) { long startTime = DateTime.Now.Millisecond; int indexedCount = 0; int docValuesCount = 0; int normsCount = 0; foreach (FieldInfo info in reader.FieldInfos) { if (info.Indexed) { reader.Terms(info.Name); indexedCount++; if (info.HasNorms()) { reader.GetNormValues(info.Name); normsCount++; } } if (info.HasDocValues()) { switch (info.DocValuesType) { case DocValuesType_e.NUMERIC: reader.GetNumericDocValues(info.Name); break; case DocValuesType_e.BINARY: reader.GetBinaryDocValues(info.Name); break; case DocValuesType_e.SORTED: reader.GetSortedDocValues(info.Name); break; case DocValuesType_e.SORTED_SET: reader.GetSortedSetDocValues(info.Name); break; default: Debug.Assert(false); // unknown dv type break; } docValuesCount++; } } reader.Document(0); reader.GetTermVectors(0); if (InfoStream.IsEnabled("SMSW")) { InfoStream.Message("SMSW", "Finished warming segment: " + reader + ", indexed=" + indexedCount + ", docValues=" + docValuesCount + ", norms=" + normsCount + ", time=" + (DateTime.Now.Millisecond - startTime)); } }