Document() public method

public Document ( int n, Lucene.Net.Documents.FieldSelector fieldSelector ) : Lucene.Net.Documents.Document
n int
fieldSelector Lucene.Net.Documents.FieldSelector
return Lucene.Net.Documents.Document
Beispiel #1
0
        public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")] Func <IConcurrentMergeScheduler> newScheduler)
        {
            MockDirectoryWrapper dir = new MockDirectoryWrapper(Random(), new MMapDirectory(CreateTempDir("4GBStoredFields")));

            dir.Throttling = MockDirectoryWrapper.Throttling_e.NEVER;

            var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                         .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                         .SetRAMBufferSizeMB(256.0)
                         .SetMergeScheduler(newScheduler())
                         .SetMergePolicy(NewLogMergePolicy(false, 10))
                         .SetOpenMode(OpenMode.CREATE);
            IndexWriter w = new IndexWriter(dir, config);

            MergePolicy mp = w.Config.MergePolicy;

            if (mp is LogByteSizeMergePolicy)
            {
                // 1 petabyte:
                ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
            }

            Document  doc = new Document();
            FieldType ft  = new FieldType();

            ft.IsIndexed = false;
            ft.IsStored  = true;
            ft.Freeze();
            int valueLength = RandomInts.NextIntBetween(Random(), 1 << 13, 1 << 20);
            var value       = new byte[valueLength];

            for (int i = 0; i < valueLength; ++i)
            {
                // random so that even compressing codecs can't compress it
                value[i] = (byte)Random().Next(256);
            }
            Field f = new Field("fld", value, ft);

            doc.Add(f);

            int numDocs = (int)((1L << 32) / valueLength + 100);

            for (int i = 0; i < numDocs; ++i)
            {
                w.AddDocument(doc);
                if (VERBOSE && i % (numDocs / 10) == 0)
                {
                    Console.WriteLine(i + " of " + numDocs + "...");
                }
            }
            w.ForceMerge(1);
            w.Dispose();
            if (VERBOSE)
            {
                bool found = false;
                foreach (string file in dir.ListAll())
                {
                    if (file.EndsWith(".fdt", StringComparison.Ordinal))
                    {
                        long fileLength = dir.FileLength(file);
                        if (fileLength >= 1L << 32)
                        {
                            found = true;
                        }
                        Console.WriteLine("File length of " + file + " : " + fileLength);
                    }
                }
                if (!found)
                {
                    Console.WriteLine("No .fdt file larger than 4GB, test bug?");
                }
            }

            DirectoryReader rd = DirectoryReader.Open(dir);
            Document        sd = rd.Document(numDocs - 1);

            Assert.IsNotNull(sd);
            Assert.AreEqual(1, sd.Fields.Count);
            BytesRef valueRef = sd.GetBinaryValue("fld");

            Assert.IsNotNull(valueRef);
            Assert.AreEqual(new BytesRef(value), valueRef);
            rd.Dispose();

            dir.Dispose();
        }
Beispiel #2
0
        // [Test, LongRunningTest, Timeout(int.MaxValue)] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestBigDocuments()
        {
            // "big" as "much bigger than the chunk size"
            // for this test we force a FS dir
            // we can't just use newFSDirectory, because this test doesn't really index anything.
            // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484)
            Directory         dir    = new MockDirectoryWrapper(Random(), new MMapDirectory(CreateTempDir("testBigDocuments")));
            IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30));
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf);

            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            Document emptyDoc = new Document(); // emptyDoc
            Document bigDoc1  = new Document(); // lot of small fields
            Document bigDoc2  = new Document(); // 1 very big field

            Field idField = new StringField("id", "", Field.Store.NO);

            emptyDoc.Add(idField);
            bigDoc1.Add(idField);
            bigDoc2.Add(idField);

            FieldType onlyStored = new FieldType(StringField.TYPE_STORED);

            onlyStored.IsIndexed = false;

            Field smallField = new Field("fld", RandomByteArray(Random().Next(10), 256), onlyStored);
            int   numFields  = RandomInts.NextIntBetween(Random(), 500000, 1000000);

            for (int i = 0; i < numFields; ++i)
            {
                bigDoc1.Add(smallField);
            }

            Field bigField = new Field("fld", RandomByteArray(RandomInts.NextIntBetween(Random(), 1000000, 5000000), 2), onlyStored);

            bigDoc2.Add(bigField);

            int numDocs = AtLeast(5);

            Document[] docs = new Document[numDocs];
            for (int i = 0; i < numDocs; ++i)
            {
                docs[i] = RandomInts.RandomFrom(Random(), Arrays.AsList(emptyDoc, bigDoc1, bigDoc2));
            }
            for (int i = 0; i < numDocs; ++i)
            {
                idField.SetStringValue("" + i);
                iw.AddDocument(docs[i]);
                if (Random().Next(numDocs) == 0)
                {
                    iw.Commit();
                }
            }
            iw.Commit();
            iw.ForceMerge(1); // look at what happens when big docs are merged
            DirectoryReader rd       = DirectoryReader.Open(dir);
            IndexSearcher   searcher = new IndexSearcher(rd);

            for (int i = 0; i < numDocs; ++i)
            {
                Query   query   = new TermQuery(new Term("id", "" + i));
                TopDocs topDocs = searcher.Search(query, 1);
                Assert.AreEqual(1, topDocs.TotalHits, "" + i);
                Document doc = rd.Document(topDocs.ScoreDocs[0].Doc);
                Assert.IsNotNull(doc);
                IIndexableField[] fieldValues = doc.GetFields("fld");
                Assert.AreEqual(docs[i].GetFields("fld").Length, fieldValues.Length);
                if (fieldValues.Length > 0)
                {
                    Assert.AreEqual(docs[i].GetFields("fld")[0].GetBinaryValue(), fieldValues[0].GetBinaryValue());
                }
            }
            rd.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
Beispiel #3
0
        public virtual void VerifyEquals(DirectoryReader r1, DirectoryReader r2, string idField)
        {
            if (VERBOSE)
            {
                Console.WriteLine("\nr1 docs:");
                PrintDocs(r1);
                Console.WriteLine("\nr2 docs:");
                PrintDocs(r2);
            }
            if (r1.NumDocs != r2.NumDocs)
            {
                Debug.Assert(false, "r1.NumDocs=" + r1.NumDocs + " vs r2.NumDocs=" + r2.NumDocs);
            }
            bool hasDeletes = !(r1.MaxDoc == r2.MaxDoc && r1.NumDocs == r1.MaxDoc);

            int[] r2r1 = new int[r2.MaxDoc]; // r2 id to r1 id mapping

            // create mapping from id2 space to id2 based on idField
            Fields f1 = MultiFields.GetFields(r1);

            if (f1 == null)
            {
                // make sure r2 is empty
                Assert.IsNull(MultiFields.GetFields(r2));
                return;
            }
            Terms terms1 = f1.GetTerms(idField);

            if (terms1 == null)
            {
                Assert.IsTrue(MultiFields.GetFields(r2) == null || MultiFields.GetFields(r2).GetTerms(idField) == null);
                return;
            }
            TermsEnum termsEnum = terms1.GetIterator(null);

            IBits liveDocs1 = MultiFields.GetLiveDocs(r1);
            IBits liveDocs2 = MultiFields.GetLiveDocs(r2);

            Fields fields = MultiFields.GetFields(r2);

            if (fields == null)
            {
                // make sure r1 is in fact empty (eg has only all
                // deleted docs):
                IBits    liveDocs = MultiFields.GetLiveDocs(r1);
                DocsEnum docs     = null;
                while (termsEnum.Next() != null)
                {
                    docs = TestUtil.Docs(Random(), termsEnum, liveDocs, docs, DocsFlags.NONE);
                    while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        Assert.Fail("r1 is not empty but r2 is");
                    }
                }
                return;
            }
            Terms     terms2     = fields.GetTerms(idField);
            TermsEnum termsEnum2 = terms2.GetIterator(null);

            DocsEnum termDocs1 = null;
            DocsEnum termDocs2 = null;

            while (true)
            {
                BytesRef term = termsEnum.Next();
                //System.out.println("TEST: match id term=" + term);
                if (term == null)
                {
                    break;
                }

                termDocs1 = TestUtil.Docs(Random(), termsEnum, liveDocs1, termDocs1, DocsFlags.NONE);
                if (termsEnum2.SeekExact(term))
                {
                    termDocs2 = TestUtil.Docs(Random(), termsEnum2, liveDocs2, termDocs2, DocsFlags.NONE);
                }
                else
                {
                    termDocs2 = null;
                }

                if (termDocs1.NextDoc() == DocIdSetIterator.NO_MORE_DOCS)
                {
                    // this doc is deleted and wasn't replaced
                    Assert.IsTrue(termDocs2 == null || termDocs2.NextDoc() == DocIdSetIterator.NO_MORE_DOCS);
                    continue;
                }

                int id1 = termDocs1.DocID;
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs1.NextDoc());

                Assert.IsTrue(termDocs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                int id2 = termDocs2.DocID;
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs2.NextDoc());

                r2r1[id2] = id1;

                // verify stored fields are equivalent
                try
                {
                    VerifyEquals(r1.Document(id1), r2.Document(id2));
                }
                catch (Exception t)
                {
                    Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term);
                    Console.WriteLine("  d1=" + r1.Document(id1));
                    Console.WriteLine("  d2=" + r2.Document(id2));
                    throw t;
                }

                try
                {
                    // verify term vectors are equivalent
                    VerifyEquals(r1.GetTermVectors(id1), r2.GetTermVectors(id2));
                }
                catch (Exception e)
                {
                    Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
                    Fields tv1 = r1.GetTermVectors(id1);
                    Console.WriteLine("  d1=" + tv1);
                    if (tv1 != null)
                    {
                        DocsAndPositionsEnum dpEnum = null;
                        DocsEnum             dEnum  = null;
                        foreach (string field in tv1)
                        {
                            Console.WriteLine("    " + field + ":");
                            Terms terms3 = tv1.GetTerms(field);
                            Assert.IsNotNull(terms3);
                            TermsEnum termsEnum3 = terms3.GetIterator(null);
                            BytesRef  term2;
                            while ((term2 = termsEnum3.Next()) != null)
                            {
                                Console.WriteLine("      " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq);
                                dpEnum = termsEnum3.DocsAndPositions(null, dpEnum);
                                if (dpEnum != null)
                                {
                                    Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dpEnum.Freq;
                                    Console.WriteLine("        doc=" + dpEnum.DocID + " freq=" + freq);
                                    for (int posUpto = 0; posUpto < freq; posUpto++)
                                    {
                                        Console.WriteLine("          pos=" + dpEnum.NextPosition());
                                    }
                                }
                                else
                                {
                                    dEnum = TestUtil.Docs(Random(), termsEnum3, null, dEnum, DocsFlags.FREQS);
                                    Assert.IsNotNull(dEnum);
                                    Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dEnum.Freq;
                                    Console.WriteLine("        doc=" + dEnum.DocID + " freq=" + freq);
                                }
                            }
                        }
                    }

                    Fields tv2 = r2.GetTermVectors(id2);
                    Console.WriteLine("  d2=" + tv2);
                    if (tv2 != null)
                    {
                        DocsAndPositionsEnum dpEnum = null;
                        DocsEnum             dEnum  = null;
                        foreach (string field in tv2)
                        {
                            Console.WriteLine("    " + field + ":");
                            Terms terms3 = tv2.GetTerms(field);
                            Assert.IsNotNull(terms3);
                            TermsEnum termsEnum3 = terms3.GetIterator(null);
                            BytesRef  term2;
                            while ((term2 = termsEnum3.Next()) != null)
                            {
                                Console.WriteLine("      " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq);
                                dpEnum = termsEnum3.DocsAndPositions(null, dpEnum);
                                if (dpEnum != null)
                                {
                                    Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dpEnum.Freq;
                                    Console.WriteLine("        doc=" + dpEnum.DocID + " freq=" + freq);
                                    for (int posUpto = 0; posUpto < freq; posUpto++)
                                    {
                                        Console.WriteLine("          pos=" + dpEnum.NextPosition());
                                    }
                                }
                                else
                                {
                                    dEnum = TestUtil.Docs(Random(), termsEnum3, null, dEnum, DocsFlags.FREQS);
                                    Assert.IsNotNull(dEnum);
                                    Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dEnum.Freq;
                                    Console.WriteLine("        doc=" + dEnum.DocID + " freq=" + freq);
                                }
                            }
                        }
                    }

                    throw e;
                }
            }

            //System.out.println("TEST: done match id");

            // Verify postings
            //System.out.println("TEST: create te1");
            Fields fields1 = MultiFields.GetFields(r1);
            IEnumerator <string> fields1Enum = fields1.GetEnumerator();
            Fields fields2 = MultiFields.GetFields(r2);
            IEnumerator <string> fields2Enum = fields2.GetEnumerator();

            string    field1 = null, field2 = null;
            TermsEnum termsEnum1 = null;

            termsEnum2 = null;
            DocsEnum docs1 = null, docs2 = null;

            // pack both doc and freq into single element for easy sorting
            long[] info1 = new long[r1.NumDocs];
            long[] info2 = new long[r2.NumDocs];

            for (; ;)
            {
                BytesRef term1 = null, term2 = null;

                // iterate until we get some docs
                int len1;
                for (; ;)
                {
                    len1 = 0;
                    if (termsEnum1 == null)
                    {
                        if (!fields1Enum.MoveNext())
                        {
                            break;
                        }
                        field1 = fields1Enum.Current;
                        Terms terms = fields1.GetTerms(field1);
                        if (terms == null)
                        {
                            continue;
                        }
                        termsEnum1 = terms.GetIterator(null);
                    }
                    term1 = termsEnum1.Next();
                    if (term1 == null)
                    {
                        // no more terms in this field
                        termsEnum1 = null;
                        continue;
                    }

                    //System.out.println("TEST: term1=" + term1);
                    docs1 = TestUtil.Docs(Random(), termsEnum1, liveDocs1, docs1, DocsFlags.FREQS);
                    while (docs1.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        int d = docs1.DocID;
                        int f = docs1.Freq;
                        info1[len1] = (((long)d) << 32) | (uint)f;
                        len1++;
                    }
                    if (len1 > 0)
                    {
                        break;
                    }
                }

                // iterate until we get some docs
                int len2;
                for (; ;)
                {
                    len2 = 0;
                    if (termsEnum2 == null)
                    {
                        if (!fields2Enum.MoveNext())
                        {
                            break;
                        }
                        field2 = fields2Enum.Current;
                        Terms terms = fields2.GetTerms(field2);
                        if (terms == null)
                        {
                            continue;
                        }
                        termsEnum2 = terms.GetIterator(null);
                    }
                    term2 = termsEnum2.Next();
                    if (term2 == null)
                    {
                        // no more terms in this field
                        termsEnum2 = null;
                        continue;
                    }

                    //System.out.println("TEST: term1=" + term1);
                    docs2 = TestUtil.Docs(Random(), termsEnum2, liveDocs2, docs2, DocsFlags.FREQS);
                    while (docs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        int d = r2r1[docs2.DocID];
                        int f = docs2.Freq;
                        info2[len2] = (((long)d) << 32) | (uint)f;
                        len2++;
                    }
                    if (len2 > 0)
                    {
                        break;
                    }
                }

                Assert.AreEqual(len1, len2);
                if (len1 == 0) // no more terms
                {
                    break;
                }

                Assert.AreEqual(field1, field2);
                Assert.IsTrue(term1.BytesEquals(term2));

                if (!hasDeletes)
                {
                    Assert.AreEqual(termsEnum1.DocFreq, termsEnum2.DocFreq);
                }

                Assert.AreEqual(term1, term2, "len1=" + len1 + " len2=" + len2 + " deletes?=" + hasDeletes);

                // sort info2 to get it into ascending docid
                Array.Sort(info2, 0, len2);

                // now compare
                for (int i = 0; i < len1; i++)
                {
                    Assert.AreEqual(info1[i], info2[i], "i=" + i + " len=" + len1 + " d1=" + ((long)((ulong)info1[i] >> 32)) + " f1=" + (info1[i] & int.MaxValue) + " d2=" + ((long)((ulong)info2[i] >> 32)) + " f2=" + (info2[i] & int.MaxValue) + " field=" + field1 + " term=" + term1.Utf8ToString());
                }
            }
        }
Beispiel #4
0
        // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestWriteReadMerge()
        {
            // get another codec, other than the default: so we are merging segments across different codecs
            Codec otherCodec;

            if ("SimpleText".Equals(Codec.Default.Name, StringComparison.Ordinal))
            {
                otherCodec = new Lucene46Codec();
            }
            else
            {
                otherCodec = new SimpleTextCodec();
            }
            Directory         dir    = NewDirectory();
            IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30));
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, (IndexWriterConfig)iwConf.Clone());

            int docCount = AtLeast(200);
            var data     = new byte[docCount][][];

            for (int i = 0; i < docCount; ++i)
            {
                int fieldCount = Rarely() ? RandomInts.NextIntBetween(Random(), 1, 500) : RandomInts.NextIntBetween(Random(), 1, 5);
                data[i] = new byte[fieldCount][];
                for (int j = 0; j < fieldCount; ++j)
                {
                    int length = Rarely() ? Random().Next(1000) : Random().Next(10);
                    int max    = Rarely() ? 256 : 2;
                    data[i][j] = RandomByteArray(length, max);
                }
            }

            FieldType type = new FieldType(StringField.TYPE_STORED);

            type.IsIndexed = false;
            type.Freeze();
            Int32Field id = new Int32Field("id", 0, Field.Store.YES);

            for (int i = 0; i < data.Length; ++i)
            {
                Document doc = new Document();
                doc.Add(id);
                id.SetInt32Value(i);
                for (int j = 0; j < data[i].Length; ++j)
                {
                    Field f = new Field("bytes" + j, data[i][j], type);
                    doc.Add(f);
                }
                iw.w.AddDocument(doc);
                if (Random().NextBoolean() && (i % (data.Length / 10) == 0))
                {
                    iw.w.Dispose();
                    // test merging against a non-compressing codec
                    if (iwConf.Codec == otherCodec)
                    {
                        iwConf.SetCodec(Codec.Default);
                    }
                    else
                    {
                        iwConf.SetCodec(otherCodec);
                    }
                    iw = new RandomIndexWriter(Random(), dir, (IndexWriterConfig)iwConf.Clone());
                }
            }

            for (int i = 0; i < 10; ++i)
            {
                int min = Random().Next(data.Length);
                int max = min + Random().Next(20);
                iw.DeleteDocuments(NumericRangeQuery.NewInt32Range("id", min, max, true, false));
            }

            iw.ForceMerge(2); // force merges with deletions

            iw.Commit();

            DirectoryReader ir = DirectoryReader.Open(dir);

            Assert.IsTrue(ir.NumDocs > 0);
            int numDocs = 0;

            for (int i = 0; i < ir.MaxDoc; ++i)
            {
                Document doc = ir.Document(i);
                if (doc == null)
                {
                    continue;
                }
                ++numDocs;
                int docId = (int)doc.GetField("id").GetNumericValue();
                Assert.AreEqual(data[docId].Length + 1, doc.Fields.Count);
                for (int j = 0; j < data[docId].Length; ++j)
                {
                    var      arr     = data[docId][j];
                    BytesRef arr2Ref = doc.GetBinaryValue("bytes" + j);
                    var      arr2    = Arrays.CopyOfRange(arr2Ref.Bytes, arr2Ref.Offset, arr2Ref.Offset + arr2Ref.Length);
                    Assert.AreEqual(arr, arr2);
                }
            }
            Assert.IsTrue(ir.NumDocs <= numDocs);
            ir.Dispose();

            iw.DeleteAll();
            iw.Commit();
            iw.ForceMerge(1);

            iw.Dispose();
            dir.Dispose();
        }
        private void DoTestReopenWithCommit(Random random, Directory dir, bool withReopen)
        {
            IndexWriter iwriter = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetOpenMode(OpenMode.CREATE).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(NewLogMergePolicy()));

            iwriter.Commit();
            DirectoryReader reader = DirectoryReader.Open(dir);

            try
            {
                int       M          = 3;
                FieldType customType = new FieldType(TextField.TYPE_STORED);
                customType.IsTokenized = false;
                FieldType customType2 = new FieldType(TextField.TYPE_STORED);
                customType2.IsTokenized = false;
                customType2.OmitNorms   = true;
                FieldType customType3 = new FieldType();
                customType3.IsStored = true;
                for (int i = 0; i < 4; i++)
                {
                    for (int j = 0; j < M; j++)
                    {
                        Document doc = new Document();
                        doc.Add(NewField("id", i + "_" + j, customType));
                        doc.Add(NewField("id2", i + "_" + j, customType2));
                        doc.Add(NewField("id3", i + "_" + j, customType3));
                        iwriter.AddDocument(doc);
                        if (i > 0)
                        {
                            int      k = i - 1;
                            int      n = j + k * M;
                            Document prevItereationDoc = reader.Document(n);
                            Assert.IsNotNull(prevItereationDoc);
                            string id = prevItereationDoc.Get("id");
                            Assert.AreEqual(k + "_" + j, id);
                        }
                    }
                    iwriter.Commit();
                    if (withReopen)
                    {
                        // reopen
                        DirectoryReader r2 = DirectoryReader.OpenIfChanged(reader);
                        if (r2 != null)
                        {
                            reader.Dispose();
                            reader = r2;
                        }
                    }
                    else
                    {
                        // recreate
                        reader.Dispose();
                        reader = DirectoryReader.Open(dir);
                    }
                }
            }
            finally
            {
                iwriter.Dispose();
                reader.Dispose();
            }
        }
Beispiel #6
0
        public virtual void TestStressAdvance_Mem()
        {
            for (int iter = 0; iter < 3; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter=" + iter);
                }
                Directory          dir   = NewDirectory();
                RandomIndexWriter  w     = new RandomIndexWriter(Random(), dir);
                HashSet <int>      aDocs = new HashSet <int>();
                Documents.Document doc   = new Documents.Document();
                Field f = NewStringField("field", "", Field.Store.NO);
                doc.Add(f);
                Field idField = NewStringField("id", "", Field.Store.YES);
                doc.Add(idField);
                int num = AtLeast(4097);
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: numDocs=" + num);
                }
                for (int id = 0; id < num; id++)
                {
                    if (Random().Next(4) == 3)
                    {
                        f.StringValue = "a";
                        aDocs.Add(id);
                    }
                    else
                    {
                        f.StringValue = "b";
                    }
                    idField.StringValue = "" + id;
                    w.AddDocument(doc);
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: doc upto " + id);
                    }
                }

                w.ForceMerge(1);

                IList <int> aDocIDs = new List <int>();
                IList <int> bDocIDs = new List <int>();

                DirectoryReader r         = w.Reader;
                int[]           idToDocID = new int[r.MaxDoc];
                for (int docID = 0; docID < idToDocID.Length; docID++)
                {
                    int id = Convert.ToInt32(r.Document(docID).Get("id"));
                    if (aDocs.Contains(id))
                    {
                        aDocIDs.Add(docID);
                    }
                    else
                    {
                        bDocIDs.Add(docID);
                    }
                }
                TermsEnum te = GetOnlySegmentReader(r).Fields.Terms("field").Iterator(null);

                DocsEnum de = null;
                for (int iter2 = 0; iter2 < 10; iter2++)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: iter=" + iter + " iter2=" + iter2);
                    }
                    Assert.AreEqual(TermsEnum.SeekStatus.FOUND, te.SeekCeil(new BytesRef("a")));
                    de = TestUtil.Docs(Random(), te, null, de, DocsEnum.FLAG_NONE);
                    TestOne(de, aDocIDs);

                    Assert.AreEqual(TermsEnum.SeekStatus.FOUND, te.SeekCeil(new BytesRef("b")));
                    de = TestUtil.Docs(Random(), te, null, de, DocsEnum.FLAG_NONE);
                    TestOne(de, bDocIDs);
                }

                w.Dispose();
                r.Dispose();
                dir.Dispose();
            }
        }
        // TODO: maybe this can reuse the logic of test dueling codecs?
        public static void AssertIndexEquals(DirectoryReader index1, DirectoryReader index2)
        {
            Assert.AreEqual(index1.NumDocs, index2.NumDocs, "IndexReaders have different values for numDocs.");
            Assert.AreEqual(index1.MaxDoc, index2.MaxDoc, "IndexReaders have different values for maxDoc.");
            Assert.AreEqual(index1.HasDeletions, index2.HasDeletions, "Only one IndexReader has deletions.");
            Assert.AreEqual(index1.Leaves.Count == 1, index2.Leaves.Count == 1, "Single segment test differs.");

            // check field names
            FieldInfos fieldInfos1 = MultiFields.GetMergedFieldInfos(index1);
            FieldInfos fieldInfos2 = MultiFields.GetMergedFieldInfos(index2);
            Assert.AreEqual(fieldInfos1.Size(), fieldInfos2.Size(), "IndexReaders have different numbers of fields.");
            int numFields = fieldInfos1.Size();
            for (int fieldID = 0; fieldID < numFields; fieldID++)
            {
                FieldInfo fieldInfo1 = fieldInfos1.FieldInfo(fieldID);
                FieldInfo fieldInfo2 = fieldInfos2.FieldInfo(fieldID);
                Assert.AreEqual(fieldInfo1.Name, fieldInfo2.Name, "Different field names.");
            }

            // check norms
            foreach (FieldInfo fieldInfo in fieldInfos1)
            {
                string curField = fieldInfo.Name;
                NumericDocValues norms1 = MultiDocValues.GetNormValues(index1, curField);
                NumericDocValues norms2 = MultiDocValues.GetNormValues(index2, curField);
                if (norms1 != null && norms2 != null)
                {
                    // todo: generalize this (like TestDuelingCodecs assert)
                    for (int i = 0; i < index1.MaxDoc; i++)
                    {
                        Assert.AreEqual(norms1.Get(i), norms2.Get(i), "Norm different for doc " + i + " and field '" + curField + "'.");
                    }
                }
                else
                {
                    Assert.IsNull(norms1);
                    Assert.IsNull(norms2);
                }
            }

            // check deletions
            Bits liveDocs1 = MultiFields.GetLiveDocs(index1);
            Bits liveDocs2 = MultiFields.GetLiveDocs(index2);
            for (int i = 0; i < index1.MaxDoc; i++)
            {
                Assert.AreEqual(liveDocs1 == null || !liveDocs1.Get(i), liveDocs2 == null || !liveDocs2.Get(i), "Doc " + i + " only deleted in one index.");
            }

            // check stored fields
            for (int i = 0; i < index1.MaxDoc; i++)
            {
                if (liveDocs1 == null || liveDocs1.Get(i))
                {
                    Document doc1 = index1.Document(i);
                    Document doc2 = index2.Document(i);
                    IList<IndexableField> field1 = doc1.Fields;
                    IList<IndexableField> field2 = doc2.Fields;
                    Assert.AreEqual(field1.Count, field2.Count, "Different numbers of fields for doc " + i + ".");
                    IEnumerator<IndexableField> itField1 = field1.GetEnumerator();
                    IEnumerator<IndexableField> itField2 = field2.GetEnumerator();
                    while (itField1.MoveNext())
                    {
                        Field curField1 = (Field)itField1.Current;
                        itField2.MoveNext();
                        Field curField2 = (Field)itField2.Current;
                        Assert.AreEqual(curField1.Name(), curField2.Name(), "Different fields names for doc " + i + ".");
                        Assert.AreEqual(curField1.StringValue, curField2.StringValue, "Different field values for doc " + i + ".");
                    }
                }
            }

            // check dictionary and posting lists
            Fields fields1 = MultiFields.GetFields(index1);
            Fields fields2 = MultiFields.GetFields(index2);
            IEnumerator<string> fenum2 = fields2.GetEnumerator();
            Bits liveDocs = MultiFields.GetLiveDocs(index1);
            foreach (string field1 in fields1)
            {
                fenum2.MoveNext();
                Assert.AreEqual(field1, fenum2.Current, "Different fields");
                Terms terms1 = fields1.Terms(field1);
                if (terms1 == null)
                {
                    Assert.IsNull(fields2.Terms(field1));
                    continue;
                }
                TermsEnum enum1 = terms1.Iterator(null);

                Terms terms2 = fields2.Terms(field1);
                Assert.IsNotNull(terms2);
                TermsEnum enum2 = terms2.Iterator(null);

                while (enum1.Next() != null)
                {
                    Assert.AreEqual(enum1.Term(), enum2.Next(), "Different terms");
                    DocsAndPositionsEnum tp1 = enum1.DocsAndPositions(liveDocs, null);
                    DocsAndPositionsEnum tp2 = enum2.DocsAndPositions(liveDocs, null);

                    while (tp1.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        Assert.IsTrue(tp2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                        Assert.AreEqual(tp1.DocID(), tp2.DocID(), "Different doc id in postinglist of term " + enum1.Term() + ".");
                        Assert.AreEqual(tp1.Freq(), tp2.Freq(), "Different term frequence in postinglist of term " + enum1.Term() + ".");
                        for (int i = 0; i < tp1.Freq(); i++)
                        {
                            Assert.AreEqual(tp1.NextPosition(), tp2.NextPosition(), "Different positions in postinglist of term " + enum1.Term() + ".");
                        }
                    }
                }
            }
            Assert.IsFalse(fenum2.MoveNext());
        }
Beispiel #8
0
        public virtual void TestReadSkip()
        {
            using Directory dir = NewDirectory();
            IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwConf.SetMaxBufferedDocs(RandomInts.RandomInt32Between(Random, 2, 30));
            using RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwConf);
            FieldType ft = new FieldType();

            ft.IsStored = true;
            ft.Freeze();

            string @string = TestUtil.RandomSimpleString(Random, 50);
            var    bytes   = @string.GetBytes(Encoding.UTF8);
            long   l       = Random.NextBoolean() ? Random.Next(42) : Random.NextInt64();
            int    i       = Random.NextBoolean() ? Random.Next(42) : Random.Next();
            float  f       = Random.NextSingle();
            double d       = Random.NextDouble();

            Field[] fields = new Field[]
            {
                new Field("bytes", bytes, ft),
                new Field("string", @string, ft),
                new Int64Field("long", l, Field.Store.YES),
                new Int32Field("int", i, Field.Store.YES),
                new SingleField("float", f, Field.Store.YES),
                new DoubleField("double", d, Field.Store.YES)
            };

            for (int k = 0; k < 100; ++k)
            {
                Document doc = new Document();
                foreach (Field fld in fields)
                {
                    doc.Add(fld);
                }
                iw.IndexWriter.AddDocument(doc);
            }
            iw.Commit();

            using DirectoryReader reader = DirectoryReader.Open(dir);
            int docID = Random.Next(100);

            foreach (Field fld in fields)
            {
                string   fldName = fld.Name;
                Document sDoc    = reader.Document(docID, new JCG.HashSet <string> {
                    fldName
                });
                IIndexableField sField = sDoc.GetField(fldName);
                if (typeof(Field) == fld.GetType())
                {
                    Assert.AreEqual(fld.GetBinaryValue(), sField.GetBinaryValue());
                    Assert.AreEqual(fld.GetStringValue(), sField.GetStringValue());
                }
                else
                {
#pragma warning disable 612, 618
                    Assert.AreEqual(fld.GetNumericValue(), sField.GetNumericValue());
#pragma warning restore 612, 618
                }
            }
        }