public virtual void TestDocValuesUnstored() { Directory dir = NewDirectory(); IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwconfig.SetMergePolicy(NewLogMergePolicy()); IndexWriter writer = new IndexWriter(dir, iwconfig); for (int i = 0; i < 50; i++) { Document doc = new Document(); doc.Add(new NumericDocValuesField("dv", i)); doc.Add(new TextField("docId", "" + i, Field.Store.YES)); writer.AddDocument(doc); } DirectoryReader r = writer.GetReader(); AtomicReader slow = SlowCompositeReaderWrapper.Wrap(r); FieldInfos fi = slow.FieldInfos; FieldInfo dvInfo = fi.FieldInfo("dv"); Assert.IsTrue(dvInfo.HasDocValues); NumericDocValues dv = slow.GetNumericDocValues("dv"); for (int i = 0; i < 50; i++) { Assert.AreEqual(i, dv.Get(i)); Document d = slow.Document(i); // cannot use d.Get("dv") due to another bug! Assert.IsNull(d.GetField("dv")); Assert.AreEqual(Convert.ToString(i), d.Get("docId")); } slow.Dispose(); writer.Dispose(); dir.Dispose(); }
public override void Warm(AtomicReader reader) { if (VERBOSE) { Console.WriteLine("TEST: now warm merged reader=" + reader); } OuterInstance.Warmed[(SegmentCoreReaders)reader.CoreCacheKey] = true; int maxDoc = reader.MaxDoc; Bits liveDocs = reader.LiveDocs; int sum = 0; int inc = Math.Max(1, maxDoc / 50); for (int docID = 0; docID < maxDoc; docID += inc) { if (liveDocs == null || liveDocs.Get(docID)) { Document doc = reader.Document(docID); sum += doc.Fields.Count; } } IndexSearcher searcher = NewSearcher(reader); sum += searcher.Search(new TermQuery(new Term("body", "united")), 10).TotalHits; if (VERBOSE) { Console.WriteLine("TEST: warm visited " + sum + " fields"); } }
public override void Warm(AtomicReader reader) { long startTime = Environment.TickCount; int indexedCount = 0; int docValuesCount = 0; int normsCount = 0; foreach (FieldInfo info in reader.FieldInfos) { if (info.IsIndexed) { reader.GetTerms(info.Name); indexedCount++; if (info.HasNorms) { reader.GetNormValues(info.Name); normsCount++; } } if (info.HasDocValues) { switch (info.DocValuesType) { case DocValuesType.NUMERIC: reader.GetNumericDocValues(info.Name); break; case DocValuesType.BINARY: reader.GetBinaryDocValues(info.Name); break; case DocValuesType.SORTED: reader.GetSortedDocValues(info.Name); break; case DocValuesType.SORTED_SET: reader.GetSortedSetDocValues(info.Name); break; default: if (Debugging.AssertsEnabled) { Debugging.Assert(false); // unknown dv type } break; } docValuesCount++; } } reader.Document(0); reader.GetTermVectors(0); if (infoStream.IsEnabled("SMSW")) { infoStream.Message("SMSW", "Finished warming segment: " + reader + ", indexed=" + indexedCount + ", docValues=" + docValuesCount + ", norms=" + normsCount + ", time=" + (Environment.TickCount - startTime)); } }
private static void PrintDocs(DirectoryReader r) { foreach (AtomicReaderContext ctx in r.Leaves) { // TODO: improve this AtomicReader sub = (AtomicReader)ctx.Reader; IBits liveDocs = sub.LiveDocs; Console.WriteLine(" " + ((SegmentReader)sub).SegmentInfo); for (int docID = 0; docID < sub.MaxDoc; docID++) { Document doc = sub.Document(docID); if (liveDocs is null || liveDocs.Get(docID)) { Console.WriteLine(" docID=" + docID + " id:" + doc.Get("id")); }
public virtual void TestFloatNorms() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random); analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); Similarity provider = new MySimProvider(this); config.SetSimilarity(provider); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, config); LineFileDocs docs = new LineFileDocs(Random); int num = AtLeast(100); for (int i = 0; i < num; i++) { Document doc = docs.NextDoc(); float nextFloat = Random.nextFloat(); // Cast to a double to get more precision output to the string. Field f = new TextField(floatTestField, "" + ((double)nextFloat).ToString(CultureInfo.InvariantCulture), Field.Store.YES); f.Boost = nextFloat; doc.Add(f); writer.AddDocument(doc); doc.RemoveField(floatTestField); if (Rarely()) { writer.Commit(); } } writer.Commit(); writer.Dispose(); AtomicReader open = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir)); NumericDocValues norms = open.GetNormValues(floatTestField); Assert.IsNotNull(norms); for (int i = 0; i < open.MaxDoc; i++) { Document document = open.Document(i); float expected = Convert.ToSingle(document.Get(floatTestField), CultureInfo.InvariantCulture); Assert.AreEqual(expected, J2N.BitConversion.Int32BitsToSingle((int)norms.Get(i)), 0.0f); } open.Dispose(); dir.Dispose(); docs.Dispose(); }
public virtual void TestMaxByteNorms() { Directory dir = NewFSDirectory(CreateTempDir("TestNorms.testMaxByteNorms")); BuildIndex(dir); AtomicReader open = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir)); NumericDocValues normValues = open.GetNormValues(ByteTestField); Assert.IsNotNull(normValues); for (int i = 0; i < open.MaxDoc; i++) { Document document = open.Document(i); int expected = Convert.ToInt32(document.Get(ByteTestField)); Assert.AreEqual(expected, normValues.Get(i) & 0xff); } open.Dispose(); dir.Dispose(); }
public override void Warm(AtomicReader reader) { if (Verbose) { Console.WriteLine("TEST: now warm merged reader=" + reader); } #if !FEATURE_CONDITIONALWEAKTABLE_ADDORUPDATE UninterruptableMonitor.Enter(outerInstance.warmedLock); try { #endif outerInstance.warmed.AddOrUpdate(((SegmentReader)reader).core, true); #if !FEATURE_CONDITIONALWEAKTABLE_ADDORUPDATE } finally { UninterruptableMonitor.Exit(outerInstance.warmedLock); } #endif int maxDoc = reader.MaxDoc; IBits liveDocs = reader.LiveDocs; int sum = 0; int inc = Math.Max(1, maxDoc / 50); for (int docID = 0; docID < maxDoc; docID += inc) { if (liveDocs is null || liveDocs.Get(docID)) { Document doc = reader.Document(docID); sum += doc.Fields.Count; } } IndexSearcher searcher = NewSearcher(reader); sum += searcher.Search(new TermQuery(new Term("body", "united")), 10).TotalHits; if (Verbose) { Console.WriteLine("TEST: warm visited " + sum + " fields"); } }
// [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestNumericField() { Directory dir = NewDirectory(); var w = new RandomIndexWriter(Random(), dir, ClassEnvRule.similarity, ClassEnvRule.timeZone); var numDocs = AtLeast(500); var answers = new object[numDocs]; NumericType[] typeAnswers = new NumericType[numDocs]; for (int id = 0; id < numDocs; id++) { Document doc = new Document(); Field nf; Field sf; object answer; NumericType typeAnswer; if (Random().NextBoolean()) { // float/double if (Random().NextBoolean()) { float f = Random().NextFloat(); answer = Convert.ToSingle(f, CultureInfo.InvariantCulture); nf = new SingleField("nf", f, Field.Store.NO); sf = new StoredField("nf", f); typeAnswer = NumericType.SINGLE; } else { double d = Random().NextDouble(); answer = Convert.ToDouble(d, CultureInfo.InvariantCulture); nf = new DoubleField("nf", d, Field.Store.NO); sf = new StoredField("nf", d); typeAnswer = NumericType.DOUBLE; } } else { // int/long if (Random().NextBoolean()) { int i = Random().Next(); answer = Convert.ToInt32(i, CultureInfo.InvariantCulture); nf = new Int32Field("nf", i, Field.Store.NO); sf = new StoredField("nf", i); typeAnswer = NumericType.INT32; } else { long l = Random().NextLong(); answer = Convert.ToInt64(l, CultureInfo.InvariantCulture); nf = new Int64Field("nf", l, Field.Store.NO); sf = new StoredField("nf", l); typeAnswer = NumericType.INT64; } } doc.Add(nf); doc.Add(sf); answers[id] = answer; typeAnswers[id] = typeAnswer; FieldType ft = new FieldType(Int32Field.TYPE_STORED); ft.NumericPrecisionStep = int.MaxValue; doc.Add(new Int32Field("id", id, ft)); w.AddDocument(doc); } DirectoryReader r = w.Reader; w.Dispose(); Assert.AreEqual(numDocs, r.NumDocs); foreach (AtomicReaderContext ctx in r.Leaves) { AtomicReader sub = (AtomicReader)ctx.Reader; FieldCache.Int32s ids = FieldCache.DEFAULT.GetInt32s(sub, "id", false); for (int docID = 0; docID < sub.NumDocs; docID++) { Document doc = sub.Document(docID); Field f = (Field)doc.GetField("nf"); Assert.IsTrue(f is StoredField, "got f=" + f); Assert.AreEqual(answers[ids.Get(docID)], f.GetNumericValue()); } } r.Dispose(); dir.Dispose(); }
/// <summary> /// Test stored fields. /// @lucene.experimental /// </summary> public static Status.StoredFieldStatus TestStoredFields(AtomicReader reader, TextWriter infoStream) { Status.StoredFieldStatus status = new Status.StoredFieldStatus(); try { if (infoStream != null) { infoStream.Write(" test: stored fields......."); } // Scan stored fields for all documents Bits liveDocs = reader.LiveDocs; for (int j = 0; j < reader.MaxDoc; ++j) { // Intentionally pull even deleted documents to // make sure they too are not corrupt: Document doc = reader.Document(j); if (liveDocs == null || liveDocs.Get(j)) { status.DocCount++; status.TotFields += doc.Fields.Count; } } // Validate docCount if (status.DocCount != reader.NumDocs) { throw new Exception("docCount=" + status.DocCount + " but saw " + status.DocCount + " undeleted docs"); } Msg(infoStream, "OK [" + status.TotFields + " total field count; avg " + ((((float)status.TotFields) / status.DocCount)).ToString(CultureInfo.InvariantCulture.NumberFormat) + " fields per doc]"); } catch (Exception e) { Msg(infoStream, "ERROR [" + Convert.ToString(e.Message) + "]"); status.Error = e; if (infoStream != null) { // LUCENENET NOTE: Some tests rely on the error type being in // the message. We can't get the error type with StackTrace, we // need ToString() for that. infoStream.WriteLine(e.ToString()); //infoStream.WriteLine(e.StackTrace); } } return status; }
public override void Warm(AtomicReader reader) { long startTime = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results int indexedCount = 0; int docValuesCount = 0; int normsCount = 0; foreach (FieldInfo info in reader.FieldInfos) { if (info.IsIndexed) { reader.GetTerms(info.Name); indexedCount++; if (info.HasNorms) { reader.GetNormValues(info.Name); normsCount++; } } if (info.HasDocValues) { switch (info.DocValuesType) { case DocValuesType.NUMERIC: reader.GetNumericDocValues(info.Name); break; case DocValuesType.BINARY: reader.GetBinaryDocValues(info.Name); break; case DocValuesType.SORTED: reader.GetSortedDocValues(info.Name); break; case DocValuesType.SORTED_SET: reader.GetSortedSetDocValues(info.Name); break; default: if (Debugging.AssertsEnabled) { Debugging.Assert(false); // unknown dv type } break; } docValuesCount++; } } reader.Document(0); reader.GetTermVectors(0); if (infoStream.IsEnabled("SMSW")) { infoStream.Message("SMSW", "Finished warming segment: " + reader + ", indexed=" + indexedCount + ", docValues=" + docValuesCount + ", norms=" + normsCount + ", time=" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - startTime)); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results } }
public override void Warm(AtomicReader reader) { if (VERBOSE) { Console.WriteLine("TEST: now warm merged reader=" + reader); } OuterInstance.Warmed[(SegmentCoreReaders)reader.CoreCacheKey] = true; int maxDoc = reader.MaxDoc; Bits liveDocs = reader.LiveDocs; int sum = 0; int inc = Math.Max(1, maxDoc / 50); for (int docID = 0; docID < maxDoc; docID += inc) { if (liveDocs == null || liveDocs.Get(docID)) { Document doc = reader.Document(docID); sum += doc.Fields.Count; } } IndexSearcher searcher = OuterInstance.NewSearcher(reader); sum += searcher.Search(new TermQuery(new Term("body", "united")), 10).TotalHits; if (VERBOSE) { Console.WriteLine("TEST: warm visited " + sum + " fields"); } }
public override void Document(int docID, StoredFieldVisitor visitor) { EnsureOpen(); m_input.Document(docID, visitor); }
public override void Warm(AtomicReader reader) { long startTime = DateTime.Now.Millisecond; int indexedCount = 0; int docValuesCount = 0; int normsCount = 0; foreach (FieldInfo info in reader.FieldInfos) { if (info.Indexed) { reader.Terms(info.Name); indexedCount++; if (info.HasNorms()) { reader.GetNormValues(info.Name); normsCount++; } } if (info.HasDocValues()) { switch (info.DocValuesType) { case DocValuesType_e.NUMERIC: reader.GetNumericDocValues(info.Name); break; case DocValuesType_e.BINARY: reader.GetBinaryDocValues(info.Name); break; case DocValuesType_e.SORTED: reader.GetSortedDocValues(info.Name); break; case DocValuesType_e.SORTED_SET: reader.GetSortedSetDocValues(info.Name); break; default: Debug.Assert(false); // unknown dv type break; } docValuesCount++; } } reader.Document(0); reader.GetTermVectors(0); if (InfoStream.IsEnabled("SMSW")) { InfoStream.Message("SMSW", "Finished warming segment: " + reader + ", indexed=" + indexedCount + ", docValues=" + docValuesCount + ", norms=" + normsCount + ", time=" + (DateTime.Now.Millisecond - startTime)); } }
/** * Split a given index into 3 indexes for training, test and cross validation tasks respectively * * @param originalIndex an {@link AtomicReader} on the source index * @param trainingIndex a {@link Directory} used to write the training index * @param testIndex a {@link Directory} used to write the test index * @param crossValidationIndex a {@link Directory} used to write the cross validation index * @param analyzer {@link Analyzer} used to create the new docs * @param fieldNames names of fields that need to be put in the new indexes or <code>null</code> if all should be used * @throws IOException if any writing operation fails on any of the indexes */ public void Split(AtomicReader originalIndex, Directory trainingIndex, Directory testIndex, Directory crossValidationIndex, Analyzer analyzer, params string[] fieldNames) { // create IWs for train / test / cv IDXs IndexWriter testWriter = new IndexWriter(testIndex, new IndexWriterConfig(Util.Version.LUCENE_CURRENT, analyzer)); IndexWriter cvWriter = new IndexWriter(crossValidationIndex, new IndexWriterConfig(Util.Version.LUCENE_CURRENT, analyzer)); IndexWriter trainingWriter = new IndexWriter(trainingIndex, new IndexWriterConfig(Util.Version.LUCENE_CURRENT, analyzer)); try { int size = originalIndex.MaxDoc; IndexSearcher indexSearcher = new IndexSearcher(originalIndex); TopDocs topDocs = indexSearcher.Search(new MatchAllDocsQuery(), Int32.MaxValue); // set the type to be indexed, stored, with term vectors FieldType ft = new FieldType(TextField.TYPE_STORED); ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = true; ft.StoreTermVectorPositions = true; int b = 0; // iterate over existing documents foreach (ScoreDoc scoreDoc in topDocs.ScoreDocs) { // create a new document for indexing Document doc = new Document(); if (fieldNames != null && fieldNames.Length > 0) { foreach (String fieldName in fieldNames) { doc.Add(new Field(fieldName, originalIndex.Document(scoreDoc.Doc).GetField(fieldName).ToString(), ft)); } } else { foreach (IndexableField storableField in originalIndex.Document(scoreDoc.Doc).Fields) { if (storableField.ReaderValue != null) { doc.Add(new Field(storableField.Name(), storableField.ReaderValue, ft)); } else if (storableField.BinaryValue() != null) { doc.Add(new Field(storableField.Name(), storableField.BinaryValue(), ft)); } else if (storableField.StringValue != null) { doc.Add(new Field(storableField.Name(), storableField.StringValue, ft)); } else if (storableField.NumericValue != null) { doc.Add(new Field(storableField.Name(), storableField.NumericValue.ToString(), ft)); } } } // add it to one of the IDXs if (b % 2 == 0 && testWriter.MaxDoc < size * _testRatio) { testWriter.AddDocument(doc); } else if (cvWriter.MaxDoc < size * _crossValidationRatio) { cvWriter.AddDocument(doc); } else { trainingWriter.AddDocument(doc); } b++; } } catch (Exception e) { throw new IOException("Exceptio in DatasetSplitter", e); } finally { testWriter.Commit(); cvWriter.Commit(); trainingWriter.Commit(); // close IWs testWriter.Dispose(); cvWriter.Dispose(); trainingWriter.Dispose(); } }
public virtual void TestNumericField() { using Directory dir = NewDirectory(); DirectoryReader r = null; try { var numDocs = AtLeast(500); var answers = new Number[numDocs]; using (var w = new RandomIndexWriter(Random, dir)) { NumericType[] typeAnswers = new NumericType[numDocs]; for (int id = 0; id < numDocs; id++) { Document doc = new Document(); Field nf; Field sf; Number answer; NumericType typeAnswer; if (Random.NextBoolean()) { // float/double if (Random.NextBoolean()) { float f = Random.NextSingle(); answer = Single.GetInstance(f); nf = new SingleField("nf", f, Field.Store.NO); sf = new StoredField("nf", f); typeAnswer = NumericType.SINGLE; } else { double d = Random.NextDouble(); answer = Double.GetInstance(d); nf = new DoubleField("nf", d, Field.Store.NO); sf = new StoredField("nf", d); typeAnswer = NumericType.DOUBLE; } } else { // int/long if (Random.NextBoolean()) { int i = Random.Next(); answer = Int32.GetInstance(i); nf = new Int32Field("nf", i, Field.Store.NO); sf = new StoredField("nf", i); typeAnswer = NumericType.INT32; } else { long l = Random.NextInt64(); answer = Int64.GetInstance(l); nf = new Int64Field("nf", l, Field.Store.NO); sf = new StoredField("nf", l); typeAnswer = NumericType.INT64; } } doc.Add(nf); doc.Add(sf); answers[id] = answer; typeAnswers[id] = typeAnswer; FieldType ft = new FieldType(Int32Field.TYPE_STORED); ft.NumericPrecisionStep = int.MaxValue; doc.Add(new Int32Field("id", id, ft)); w.AddDocument(doc); } r = w.GetReader(); } // w.Dispose(); Assert.AreEqual(numDocs, r.NumDocs); foreach (AtomicReaderContext ctx in r.Leaves) { AtomicReader sub = ctx.AtomicReader; FieldCache.Int32s ids = FieldCache.DEFAULT.GetInt32s(sub, "id", false); for (int docID = 0; docID < sub.NumDocs; docID++) { Document doc = sub.Document(docID); Field f = doc.GetField <Field>("nf"); Assert.IsTrue(f is StoredField, "got f=" + f); #pragma warning disable 612, 618 Assert.AreEqual(answers[ids.Get(docID)], f.GetNumericValue()); #pragma warning restore 612, 618 } } } finally { r?.Dispose(); } }