示例#1
0
        public virtual void TestDocValuesUnstored()
        {
            Directory         dir      = NewDirectory();
            IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwconfig.SetMergePolicy(NewLogMergePolicy());
            IndexWriter writer = new IndexWriter(dir, iwconfig);

            for (int i = 0; i < 50; i++)
            {
                Document doc = new Document();
                doc.Add(new NumericDocValuesField("dv", i));
                doc.Add(new TextField("docId", "" + i, Field.Store.YES));
                writer.AddDocument(doc);
            }
            DirectoryReader r      = writer.GetReader();
            AtomicReader    slow   = SlowCompositeReaderWrapper.Wrap(r);
            FieldInfos      fi     = slow.FieldInfos;
            FieldInfo       dvInfo = fi.FieldInfo("dv");

            Assert.IsTrue(dvInfo.HasDocValues);
            NumericDocValues dv = slow.GetNumericDocValues("dv");

            for (int i = 0; i < 50; i++)
            {
                Assert.AreEqual(i, dv.Get(i));
                Document d = slow.Document(i);
                // cannot use d.Get("dv") due to another bug!
                Assert.IsNull(d.GetField("dv"));
                Assert.AreEqual(Convert.ToString(i), d.Get("docId"));
            }
            slow.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
示例#2
0
            public override void Warm(AtomicReader reader)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: now warm merged reader=" + reader);
                }
                OuterInstance.Warmed[(SegmentCoreReaders)reader.CoreCacheKey] = true;
                int  maxDoc   = reader.MaxDoc;
                Bits liveDocs = reader.LiveDocs;
                int  sum      = 0;
                int  inc      = Math.Max(1, maxDoc / 50);

                for (int docID = 0; docID < maxDoc; docID += inc)
                {
                    if (liveDocs == null || liveDocs.Get(docID))
                    {
                        Document doc = reader.Document(docID);
                        sum += doc.Fields.Count;
                    }
                }

                IndexSearcher searcher = NewSearcher(reader);

                sum += searcher.Search(new TermQuery(new Term("body", "united")), 10).TotalHits;

                if (VERBOSE)
                {
                    Console.WriteLine("TEST: warm visited " + sum + " fields");
                }
            }
        public override void Warm(AtomicReader reader)
        {
            long startTime      = Environment.TickCount;
            int  indexedCount   = 0;
            int  docValuesCount = 0;
            int  normsCount     = 0;

            foreach (FieldInfo info in reader.FieldInfos)
            {
                if (info.IsIndexed)
                {
                    reader.GetTerms(info.Name);
                    indexedCount++;

                    if (info.HasNorms)
                    {
                        reader.GetNormValues(info.Name);
                        normsCount++;
                    }
                }

                if (info.HasDocValues)
                {
                    switch (info.DocValuesType)
                    {
                    case DocValuesType.NUMERIC:
                        reader.GetNumericDocValues(info.Name);
                        break;

                    case DocValuesType.BINARY:
                        reader.GetBinaryDocValues(info.Name);
                        break;

                    case DocValuesType.SORTED:
                        reader.GetSortedDocValues(info.Name);
                        break;

                    case DocValuesType.SORTED_SET:
                        reader.GetSortedSetDocValues(info.Name);
                        break;

                    default:
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(false);                               // unknown dv type
                        }
                        break;
                    }
                    docValuesCount++;
                }
            }

            reader.Document(0);
            reader.GetTermVectors(0);

            if (infoStream.IsEnabled("SMSW"))
            {
                infoStream.Message("SMSW", "Finished warming segment: " + reader + ", indexed=" + indexedCount + ", docValues=" + docValuesCount + ", norms=" + normsCount + ", time=" + (Environment.TickCount - startTime));
            }
        }
示例#4
0
 private static void PrintDocs(DirectoryReader r)
 {
     foreach (AtomicReaderContext ctx in r.Leaves)
     {
         // TODO: improve this
         AtomicReader sub      = (AtomicReader)ctx.Reader;
         IBits        liveDocs = sub.LiveDocs;
         Console.WriteLine("  " + ((SegmentReader)sub).SegmentInfo);
         for (int docID = 0; docID < sub.MaxDoc; docID++)
         {
             Document doc = sub.Document(docID);
             if (liveDocs is null || liveDocs.Get(docID))
             {
                 Console.WriteLine("    docID=" + docID + " id:" + doc.Get("id"));
             }
示例#5
0
        public virtual void TestFloatNorms()
        {
            Directory    dir      = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);

            IndexWriterConfig config   = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            Similarity        provider = new MySimProvider(this);

            config.SetSimilarity(provider);
            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, config);
            LineFileDocs      docs   = new LineFileDocs(Random);
            int num = AtLeast(100);

            for (int i = 0; i < num; i++)
            {
                Document doc       = docs.NextDoc();
                float    nextFloat = Random.nextFloat();
                // Cast to a double to get more precision output to the string.
                Field f = new TextField(floatTestField, "" + ((double)nextFloat).ToString(CultureInfo.InvariantCulture), Field.Store.YES);
                f.Boost = nextFloat;

                doc.Add(f);
                writer.AddDocument(doc);
                doc.RemoveField(floatTestField);
                if (Rarely())
                {
                    writer.Commit();
                }
            }
            writer.Commit();
            writer.Dispose();
            AtomicReader     open  = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir));
            NumericDocValues norms = open.GetNormValues(floatTestField);

            Assert.IsNotNull(norms);
            for (int i = 0; i < open.MaxDoc; i++)
            {
                Document document = open.Document(i);
                float    expected = Convert.ToSingle(document.Get(floatTestField), CultureInfo.InvariantCulture);
                Assert.AreEqual(expected, J2N.BitConversion.Int32BitsToSingle((int)norms.Get(i)), 0.0f);
            }
            open.Dispose();
            dir.Dispose();
            docs.Dispose();
        }
示例#6
0
        public virtual void TestMaxByteNorms()
        {
            Directory dir = NewFSDirectory(CreateTempDir("TestNorms.testMaxByteNorms"));

            BuildIndex(dir);
            AtomicReader     open       = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir));
            NumericDocValues normValues = open.GetNormValues(ByteTestField);

            Assert.IsNotNull(normValues);
            for (int i = 0; i < open.MaxDoc; i++)
            {
                Document document = open.Document(i);
                int      expected = Convert.ToInt32(document.Get(ByteTestField));
                Assert.AreEqual(expected, normValues.Get(i) & 0xff);
            }
            open.Dispose();
            dir.Dispose();
        }
示例#7
0
            public override void Warm(AtomicReader reader)
            {
                if (Verbose)
                {
                    Console.WriteLine("TEST: now warm merged reader=" + reader);
                }
#if !FEATURE_CONDITIONALWEAKTABLE_ADDORUPDATE
                UninterruptableMonitor.Enter(outerInstance.warmedLock);
                try
                {
#endif
                outerInstance.warmed.AddOrUpdate(((SegmentReader)reader).core, true);
#if !FEATURE_CONDITIONALWEAKTABLE_ADDORUPDATE
            }
            finally
            {
                UninterruptableMonitor.Exit(outerInstance.warmedLock);
            }
#endif
                int maxDoc     = reader.MaxDoc;
                IBits liveDocs = reader.LiveDocs;
                int sum        = 0;
                int inc        = Math.Max(1, maxDoc / 50);
                for (int docID = 0; docID < maxDoc; docID += inc)
                {
                    if (liveDocs is null || liveDocs.Get(docID))
                    {
                        Document doc = reader.Document(docID);
                        sum += doc.Fields.Count;
                    }
                }
                IndexSearcher searcher = NewSearcher(reader);

                sum += searcher.Search(new TermQuery(new Term("body", "united")), 10).TotalHits;

                if (Verbose)
                {
                    Console.WriteLine("TEST: warm visited " + sum + " fields");
                }
            }
示例#8
0
        // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestNumericField()
        {
            Directory dir     = NewDirectory();
            var       w       = new RandomIndexWriter(Random(), dir, ClassEnvRule.similarity, ClassEnvRule.timeZone);
            var       numDocs = AtLeast(500);
            var       answers = new object[numDocs];

            NumericType[] typeAnswers = new NumericType[numDocs];
            for (int id = 0; id < numDocs; id++)
            {
                Document    doc = new Document();
                Field       nf;
                Field       sf;
                object      answer;
                NumericType typeAnswer;
                if (Random().NextBoolean())
                {
                    // float/double
                    if (Random().NextBoolean())
                    {
                        float f = Random().NextFloat();
                        answer     = Convert.ToSingle(f, CultureInfo.InvariantCulture);
                        nf         = new SingleField("nf", f, Field.Store.NO);
                        sf         = new StoredField("nf", f);
                        typeAnswer = NumericType.SINGLE;
                    }
                    else
                    {
                        double d = Random().NextDouble();
                        answer     = Convert.ToDouble(d, CultureInfo.InvariantCulture);
                        nf         = new DoubleField("nf", d, Field.Store.NO);
                        sf         = new StoredField("nf", d);
                        typeAnswer = NumericType.DOUBLE;
                    }
                }
                else
                {
                    // int/long
                    if (Random().NextBoolean())
                    {
                        int i = Random().Next();
                        answer     = Convert.ToInt32(i, CultureInfo.InvariantCulture);
                        nf         = new Int32Field("nf", i, Field.Store.NO);
                        sf         = new StoredField("nf", i);
                        typeAnswer = NumericType.INT32;
                    }
                    else
                    {
                        long l = Random().NextLong();
                        answer     = Convert.ToInt64(l, CultureInfo.InvariantCulture);
                        nf         = new Int64Field("nf", l, Field.Store.NO);
                        sf         = new StoredField("nf", l);
                        typeAnswer = NumericType.INT64;
                    }
                }
                doc.Add(nf);
                doc.Add(sf);
                answers[id]     = answer;
                typeAnswers[id] = typeAnswer;
                FieldType ft = new FieldType(Int32Field.TYPE_STORED);
                ft.NumericPrecisionStep = int.MaxValue;
                doc.Add(new Int32Field("id", id, ft));
                w.AddDocument(doc);
            }
            DirectoryReader r = w.Reader;

            w.Dispose();

            Assert.AreEqual(numDocs, r.NumDocs);

            foreach (AtomicReaderContext ctx in r.Leaves)
            {
                AtomicReader      sub = (AtomicReader)ctx.Reader;
                FieldCache.Int32s ids = FieldCache.DEFAULT.GetInt32s(sub, "id", false);
                for (int docID = 0; docID < sub.NumDocs; docID++)
                {
                    Document doc = sub.Document(docID);
                    Field    f   = (Field)doc.GetField("nf");
                    Assert.IsTrue(f is StoredField, "got f=" + f);
                    Assert.AreEqual(answers[ids.Get(docID)], f.GetNumericValue());
                }
            }
            r.Dispose();
            dir.Dispose();
        }
示例#9
0
        /// <summary>
        /// Test stored fields.
        /// @lucene.experimental
        /// </summary>
        public static Status.StoredFieldStatus TestStoredFields(AtomicReader reader, TextWriter infoStream)
        {
            Status.StoredFieldStatus status = new Status.StoredFieldStatus();

            try
            {
                if (infoStream != null)
                {
                    infoStream.Write("    test: stored fields.......");
                }

                // Scan stored fields for all documents
                Bits liveDocs = reader.LiveDocs;
                for (int j = 0; j < reader.MaxDoc; ++j)
                {
                    // Intentionally pull even deleted documents to
                    // make sure they too are not corrupt:
                    Document doc = reader.Document(j);
                    if (liveDocs == null || liveDocs.Get(j))
                    {
                        status.DocCount++;
                        status.TotFields += doc.Fields.Count;
                    }
                }

                // Validate docCount
                if (status.DocCount != reader.NumDocs)
                {
                    throw new Exception("docCount=" + status.DocCount + " but saw " + status.DocCount + " undeleted docs");
                }

                Msg(infoStream, "OK [" + status.TotFields + " total field count; avg " + ((((float)status.TotFields) / status.DocCount)).ToString(CultureInfo.InvariantCulture.NumberFormat) + " fields per doc]");
            }
            catch (Exception e)
            {
                Msg(infoStream, "ERROR [" + Convert.ToString(e.Message) + "]");
                status.Error = e;
                if (infoStream != null)
                {
                    // LUCENENET NOTE: Some tests rely on the error type being in
                    // the message. We can't get the error type with StackTrace, we
                    // need ToString() for that.
                    infoStream.WriteLine(e.ToString());
                    //infoStream.WriteLine(e.StackTrace);
                }
            }

            return status;
        }
示例#10
0
        public override void Warm(AtomicReader reader)
        {
            long startTime      = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
            int  indexedCount   = 0;
            int  docValuesCount = 0;
            int  normsCount     = 0;

            foreach (FieldInfo info in reader.FieldInfos)
            {
                if (info.IsIndexed)
                {
                    reader.GetTerms(info.Name);
                    indexedCount++;

                    if (info.HasNorms)
                    {
                        reader.GetNormValues(info.Name);
                        normsCount++;
                    }
                }

                if (info.HasDocValues)
                {
                    switch (info.DocValuesType)
                    {
                    case DocValuesType.NUMERIC:
                        reader.GetNumericDocValues(info.Name);
                        break;

                    case DocValuesType.BINARY:
                        reader.GetBinaryDocValues(info.Name);
                        break;

                    case DocValuesType.SORTED:
                        reader.GetSortedDocValues(info.Name);
                        break;

                    case DocValuesType.SORTED_SET:
                        reader.GetSortedSetDocValues(info.Name);
                        break;

                    default:
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(false);                               // unknown dv type
                        }
                        break;
                    }
                    docValuesCount++;
                }
            }

            reader.Document(0);
            reader.GetTermVectors(0);

            if (infoStream.IsEnabled("SMSW"))
            {
                infoStream.Message("SMSW",
                                   "Finished warming segment: " + reader +
                                   ", indexed=" + indexedCount +
                                   ", docValues=" + docValuesCount +
                                   ", norms=" + normsCount +
                                   ", time=" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - startTime)); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
            }
        }
            public override void Warm(AtomicReader reader)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: now warm merged reader=" + reader);
                }
                OuterInstance.Warmed[(SegmentCoreReaders)reader.CoreCacheKey] = true;
                int maxDoc = reader.MaxDoc;
                Bits liveDocs = reader.LiveDocs;
                int sum = 0;
                int inc = Math.Max(1, maxDoc / 50);
                for (int docID = 0; docID < maxDoc; docID += inc)
                {
                    if (liveDocs == null || liveDocs.Get(docID))
                    {
                        Document doc = reader.Document(docID);
                        sum += doc.Fields.Count;
                    }
                }

                IndexSearcher searcher = OuterInstance.NewSearcher(reader);
                sum += searcher.Search(new TermQuery(new Term("body", "united")), 10).TotalHits;

                if (VERBOSE)
                {
                    Console.WriteLine("TEST: warm visited " + sum + " fields");
                }
            }
示例#12
0
 public override void Document(int docID, StoredFieldVisitor visitor)
 {
     EnsureOpen();
     m_input.Document(docID, visitor);
 }
        public override void Warm(AtomicReader reader)
        {
            long startTime = DateTime.Now.Millisecond;
            int indexedCount = 0;
            int docValuesCount = 0;
            int normsCount = 0;
            foreach (FieldInfo info in reader.FieldInfos)
            {
                if (info.Indexed)
                {
                    reader.Terms(info.Name);
                    indexedCount++;

                    if (info.HasNorms())
                    {
                        reader.GetNormValues(info.Name);
                        normsCount++;
                    }
                }

                if (info.HasDocValues())
                {
                    switch (info.DocValuesType)
                    {
                        case DocValuesType_e.NUMERIC:
                            reader.GetNumericDocValues(info.Name);
                            break;

                        case DocValuesType_e.BINARY:
                            reader.GetBinaryDocValues(info.Name);
                            break;

                        case DocValuesType_e.SORTED:
                            reader.GetSortedDocValues(info.Name);
                            break;

                        case DocValuesType_e.SORTED_SET:
                            reader.GetSortedSetDocValues(info.Name);
                            break;

                        default:
                            Debug.Assert(false); // unknown dv type
                            break;
                    }
                    docValuesCount++;
                }
            }

            reader.Document(0);
            reader.GetTermVectors(0);

            if (InfoStream.IsEnabled("SMSW"))
            {
                InfoStream.Message("SMSW", "Finished warming segment: " + reader + ", indexed=" + indexedCount + ", docValues=" + docValuesCount + ", norms=" + normsCount + ", time=" + (DateTime.Now.Millisecond - startTime));
            }
        }
示例#14
0
        /**
         * Split a given index into 3 indexes for training, test and cross validation tasks respectively
         *
         * @param originalIndex        an {@link AtomicReader} on the source index
         * @param trainingIndex        a {@link Directory} used to write the training index
         * @param testIndex            a {@link Directory} used to write the test index
         * @param crossValidationIndex a {@link Directory} used to write the cross validation index
         * @param analyzer             {@link Analyzer} used to create the new docs
         * @param fieldNames           names of fields that need to be put in the new indexes or <code>null</code> if all should be used
         * @throws IOException if any writing operation fails on any of the indexes
         */
        public void Split(AtomicReader originalIndex, Directory trainingIndex, Directory testIndex, Directory crossValidationIndex, Analyzer analyzer, params string[] fieldNames)
        {
            // create IWs for train / test / cv IDXs
            IndexWriter testWriter = new IndexWriter(testIndex, new IndexWriterConfig(Util.Version.LUCENE_CURRENT, analyzer));
            IndexWriter cvWriter = new IndexWriter(crossValidationIndex, new IndexWriterConfig(Util.Version.LUCENE_CURRENT, analyzer));
            IndexWriter trainingWriter = new IndexWriter(trainingIndex, new IndexWriterConfig(Util.Version.LUCENE_CURRENT, analyzer));

            try
            {
                int size = originalIndex.MaxDoc;

                IndexSearcher indexSearcher = new IndexSearcher(originalIndex);
                TopDocs topDocs = indexSearcher.Search(new MatchAllDocsQuery(), Int32.MaxValue);

                // set the type to be indexed, stored, with term vectors
                FieldType ft = new FieldType(TextField.TYPE_STORED);
                ft.StoreTermVectors = true;
                ft.StoreTermVectorOffsets = true;
                ft.StoreTermVectorPositions = true;

                int b = 0;

                // iterate over existing documents
                foreach (ScoreDoc scoreDoc in topDocs.ScoreDocs)
                {
                    // create a new document for indexing
                    Document doc = new Document();
                    if (fieldNames != null && fieldNames.Length > 0)
                    {
                        foreach (String fieldName in fieldNames)
                        {
                            doc.Add(new Field(fieldName, originalIndex.Document(scoreDoc.Doc).GetField(fieldName).ToString(), ft));
                        }
                    }
                    else
                    {
                        foreach (IndexableField storableField in originalIndex.Document(scoreDoc.Doc).Fields)
                        {
                            if (storableField.ReaderValue != null)
                            {
                                doc.Add(new Field(storableField.Name(), storableField.ReaderValue, ft));
                            }
                            else if (storableField.BinaryValue() != null)
                            {
                                doc.Add(new Field(storableField.Name(), storableField.BinaryValue(), ft));
                            }
                            else if (storableField.StringValue != null)
                            {
                                doc.Add(new Field(storableField.Name(), storableField.StringValue, ft));
                            }
                            else if (storableField.NumericValue != null)
                            {
                                doc.Add(new Field(storableField.Name(), storableField.NumericValue.ToString(), ft));
                            }
                        }
                    }

                    // add it to one of the IDXs
                    if (b % 2 == 0 && testWriter.MaxDoc < size * _testRatio)
                    {
                        testWriter.AddDocument(doc);
                    }
                    else if (cvWriter.MaxDoc < size * _crossValidationRatio)
                    {
                        cvWriter.AddDocument(doc);
                    }
                    else
                    {
                        trainingWriter.AddDocument(doc);
                    }
                    b++;
                }
            }
            catch (Exception e)
            {
                throw new IOException("Exceptio in DatasetSplitter", e);
            }
            finally
            {
                testWriter.Commit();
                cvWriter.Commit();
                trainingWriter.Commit();
                // close IWs
                testWriter.Dispose();
                cvWriter.Dispose();
                trainingWriter.Dispose();
            }
        }
示例#15
0
        public virtual void TestNumericField()
        {
            using Directory dir = NewDirectory();
            DirectoryReader r = null;

            try
            {
                var numDocs = AtLeast(500);
                var answers = new Number[numDocs];
                using (var w = new RandomIndexWriter(Random, dir))
                {
                    NumericType[] typeAnswers = new NumericType[numDocs];
                    for (int id = 0; id < numDocs; id++)
                    {
                        Document    doc = new Document();
                        Field       nf;
                        Field       sf;
                        Number      answer;
                        NumericType typeAnswer;
                        if (Random.NextBoolean())
                        {
                            // float/double
                            if (Random.NextBoolean())
                            {
                                float f = Random.NextSingle();
                                answer     = Single.GetInstance(f);
                                nf         = new SingleField("nf", f, Field.Store.NO);
                                sf         = new StoredField("nf", f);
                                typeAnswer = NumericType.SINGLE;
                            }
                            else
                            {
                                double d = Random.NextDouble();
                                answer     = Double.GetInstance(d);
                                nf         = new DoubleField("nf", d, Field.Store.NO);
                                sf         = new StoredField("nf", d);
                                typeAnswer = NumericType.DOUBLE;
                            }
                        }
                        else
                        {
                            // int/long
                            if (Random.NextBoolean())
                            {
                                int i = Random.Next();
                                answer     = Int32.GetInstance(i);
                                nf         = new Int32Field("nf", i, Field.Store.NO);
                                sf         = new StoredField("nf", i);
                                typeAnswer = NumericType.INT32;
                            }
                            else
                            {
                                long l = Random.NextInt64();
                                answer     = Int64.GetInstance(l);
                                nf         = new Int64Field("nf", l, Field.Store.NO);
                                sf         = new StoredField("nf", l);
                                typeAnswer = NumericType.INT64;
                            }
                        }
                        doc.Add(nf);
                        doc.Add(sf);
                        answers[id]     = answer;
                        typeAnswers[id] = typeAnswer;
                        FieldType ft = new FieldType(Int32Field.TYPE_STORED);
                        ft.NumericPrecisionStep = int.MaxValue;
                        doc.Add(new Int32Field("id", id, ft));
                        w.AddDocument(doc);
                    }
                    r = w.GetReader();
                } // w.Dispose();

                Assert.AreEqual(numDocs, r.NumDocs);

                foreach (AtomicReaderContext ctx in r.Leaves)
                {
                    AtomicReader      sub = ctx.AtomicReader;
                    FieldCache.Int32s ids = FieldCache.DEFAULT.GetInt32s(sub, "id", false);
                    for (int docID = 0; docID < sub.NumDocs; docID++)
                    {
                        Document doc = sub.Document(docID);
                        Field    f   = doc.GetField <Field>("nf");
                        Assert.IsTrue(f is StoredField, "got f=" + f);
#pragma warning disable 612, 618
                        Assert.AreEqual(answers[ids.Get(docID)], f.GetNumericValue());
#pragma warning restore 612, 618
                    }
                }
            }
            finally
            {
                r?.Dispose();
            }
        }