Пример #1
0
        public virtual void TestNumericFieldAsString()
        {
            Documents.Document doc = new Documents.Document();
            doc.Add(new Int32Field("int", 5, Field.Store.YES));
            Assert.AreEqual("5", doc.Get("int"));
            Assert.IsNull(doc.Get("somethingElse"));
            doc.Add(new Int32Field("int", 4, Field.Store.YES));
            Assert.AreEqual(new string[] { "5", "4" }, doc.GetValues("int"));

            Directory         dir = NewDirectory();
            RandomIndexWriter iw  = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            iw.AddDocument(doc);
            DirectoryReader ir = iw.GetReader();

            Documents.Document sdoc = ir.Document(0);
            Assert.AreEqual("5", sdoc.Get("int"));
            Assert.IsNull(sdoc.Get("somethingElse"));
            Assert.AreEqual(new string[] { "5", "4" }, sdoc.GetValues("int"));
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
Пример #2
0
        public virtual void TestBinaryFieldInIndex()
        {
            FieldType ft = new FieldType();

            ft.IsStored = true;
            IIndexableField binaryFldStored = new StoredField("binaryStored", System.Text.UTF8Encoding.UTF8.GetBytes(BinaryValStored));
            IIndexableField stringFldStored = new Field("stringStored", BinaryValStored, ft);

            Documents.Document doc = new Documents.Document();

            doc.Add(binaryFldStored);

            doc.Add(stringFldStored);

            /// <summary>
            /// test for field count </summary>
            Assert.AreEqual(2, doc.Fields.Count);

            /// <summary>
            /// add the doc to a ram index </summary>
            Directory         dir    = NewDirectory();
            Random            r      = Random;
            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                r, dir);

            writer.AddDocument(doc);

            /// <summary>
            /// open a reader and fetch the document </summary>
            IndexReader reader = writer.GetReader();

            Documents.Document docFromReader = reader.Document(0);
            Assert.IsTrue(docFromReader != null);

            /// <summary>
            /// fetch the binary stored field and compare it's content with the original one </summary>
            BytesRef bytes = docFromReader.GetBinaryValue("binaryStored");

            Assert.IsNotNull(bytes);

            string binaryFldStoredTest = Encoding.UTF8.GetString(bytes.Bytes).Substring(bytes.Offset, bytes.Length);

            //new string(bytes.Bytes, bytes.Offset, bytes.Length, IOUtils.CHARSET_UTF_8);
            Assert.IsTrue(binaryFldStoredTest.Equals(BinaryValStored, StringComparison.Ordinal));

            /// <summary>
            /// fetch the string field and compare it's content with the original one </summary>
            string stringFldStoredTest = docFromReader.Get("stringStored");

            Assert.IsTrue(stringFldStoredTest.Equals(BinaryValStored, StringComparison.Ordinal));

            writer.Dispose();
            reader.Dispose();
            dir.Dispose();
        }
Пример #3
0
        public virtual void TestBinaryField()
        {
            Documents.Document doc = new Documents.Document();

            FieldType ft = new FieldType();

            ft.Stored = true;
            IndexableField stringFld  = new Field("string", BinaryVal, ft);
            IndexableField binaryFld  = new StoredField("binary", BinaryVal.GetBytes(Encoding.UTF8));
            IndexableField binaryFld2 = new StoredField("binary", BinaryVal2.GetBytes(Encoding.UTF8));

            doc.Add(stringFld);
            doc.Add(binaryFld);

            Assert.AreEqual(2, doc.Fields.Count);

            Assert.IsTrue(binaryFld.BinaryValue != null);
            Assert.IsTrue(binaryFld.FieldType.Stored);
            Assert.IsFalse(binaryFld.FieldType.Indexed);

            string binaryTest = doc.GetBinaryValue("binary").Utf8ToString();

            Assert.IsTrue(binaryTest.Equals(BinaryVal));

            string stringTest = doc.Get("string");

            Assert.IsTrue(binaryTest.Equals(stringTest));

            doc.Add(binaryFld2);

            Assert.AreEqual(3, doc.Fields.Count);

            BytesRef[] binaryTests = doc.GetBinaryValues("binary");

            Assert.AreEqual(2, binaryTests.Length);

            binaryTest = binaryTests[0].Utf8ToString();
            string binaryTest2 = binaryTests[1].Utf8ToString();

            Assert.IsFalse(binaryTest.Equals(binaryTest2));

            Assert.IsTrue(binaryTest.Equals(BinaryVal));
            Assert.IsTrue(binaryTest2.Equals(BinaryVal2));

            doc.RemoveField("string");
            Assert.AreEqual(2, doc.Fields.Count);

            doc.RemoveFields("binary");
            Assert.AreEqual(0, doc.Fields.Count);
        }
Пример #4
0
        public virtual void TestNumericFieldAsString()
        {
            Documents.Document doc = new Documents.Document();
            doc.Add(new IntField("int", 5, Field.Store.YES));
            Assert.AreEqual("5", doc.Get("int"));
            Assert.IsNull(doc.Get("somethingElse"));
            doc.Add(new IntField("int", 4, Field.Store.YES));
            Assert.AreEqual(new string[] { "5", "4" }, doc.GetValues("int"));

            Directory         dir = NewDirectory();
            RandomIndexWriter iw  = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            iw.AddDocument(doc);
            DirectoryReader ir = iw.Reader;

            Documents.Document sdoc = ir.Document(0);
            Assert.AreEqual("5", sdoc.Get("int"));
            Assert.IsNull(sdoc.Get("somethingElse"));
            Assert.AreEqual(new string[] { "5", "4" }, sdoc.GetValues("int"));
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
Пример #5
0
        public virtual void TestBinaryField()
        {
            Documents.Document doc = new Documents.Document();

            FieldType ft = new FieldType();
            ft.Stored = true;
            IndexableField stringFld = new Field("string", BinaryVal, ft);
            IndexableField binaryFld = new StoredField("binary", BinaryVal.GetBytes(Encoding.UTF8));
            IndexableField binaryFld2 = new StoredField("binary", BinaryVal2.GetBytes(Encoding.UTF8));

            doc.Add(stringFld);
            doc.Add(binaryFld);

            Assert.AreEqual(2, doc.Fields.Count);

            Assert.IsTrue(binaryFld.BinaryValue != null);
            Assert.IsTrue(binaryFld.FieldType.Stored);
            Assert.IsFalse(binaryFld.FieldType.Indexed);

            string binaryTest = doc.GetBinaryValue("binary").Utf8ToString();
            Assert.IsTrue(binaryTest.Equals(BinaryVal));

            string stringTest = doc.Get("string");
            Assert.IsTrue(binaryTest.Equals(stringTest));

            doc.Add(binaryFld2);

            Assert.AreEqual(3, doc.Fields.Count);

            BytesRef[] binaryTests = doc.GetBinaryValues("binary");

            Assert.AreEqual(2, binaryTests.Length);

            binaryTest = binaryTests[0].Utf8ToString();
            string binaryTest2 = binaryTests[1].Utf8ToString();

            Assert.IsFalse(binaryTest.Equals(binaryTest2));

            Assert.IsTrue(binaryTest.Equals(BinaryVal));
            Assert.IsTrue(binaryTest2.Equals(BinaryVal2));

            doc.RemoveField("string");
            Assert.AreEqual(2, doc.Fields.Count);

            doc.RemoveFields("binary");
            Assert.AreEqual(0, doc.Fields.Count);
        }
Пример #6
0
        public virtual void TestTransitionAPI()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Documents.Document doc = new Documents.Document();
            doc.Add(new Field("stored", "abc", Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("stored_indexed", "abc xyz", Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("stored_tokenized", "abc xyz", Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field("indexed", "abc xyz", Field.Store.NO, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("tokenized", "abc xyz", Field.Store.NO, Field.Index.ANALYZED));
            doc.Add(new Field("tokenized_reader", new StringReader("abc xyz")));
            doc.Add(new Field("tokenized_tokenstream", w.w.Analyzer.TokenStream("tokenized_tokenstream", new StringReader("abc xyz"))));
            doc.Add(new Field("binary", new byte[10]));
            doc.Add(new Field("tv", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
            doc.Add(new Field("tv_pos", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
            doc.Add(new Field("tv_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS));
            doc.Add(new Field("tv_pos_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
            w.AddDocument(doc);
            IndexReader r = w.Reader;

            w.Dispose();

            doc = r.Document(0);
            // 4 stored fields
            Assert.AreEqual(4, doc.Fields.Count);
            Assert.AreEqual("abc", doc.Get("stored"));
            Assert.AreEqual("abc xyz", doc.Get("stored_indexed"));
            Assert.AreEqual("abc xyz", doc.Get("stored_tokenized"));
            BytesRef br = doc.GetBinaryValue("binary");

            Assert.IsNotNull(br);
            Assert.AreEqual(10, br.Length);

            IndexSearcher s = new IndexSearcher(r);

            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_indexed", "abc xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("indexed", "abc xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "xyz")), 1).TotalHits);

            foreach (string field in new string[] { "tv", "tv_pos", "tv_off", "tv_pos_off" })
            {
                Fields tvFields = r.GetTermVectors(0);
                Terms  tvs      = tvFields.Terms(field);
                Assert.IsNotNull(tvs);
                Assert.AreEqual(2, tvs.Size());
                TermsEnum tvsEnum = tvs.Iterator(null);
                Assert.AreEqual(new BytesRef("abc"), tvsEnum.Next());
                DocsAndPositionsEnum dpEnum = tvsEnum.DocsAndPositions(null, null);
                if (field.Equals("tv"))
                {
                    Assert.IsNull(dpEnum);
                }
                else
                {
                    Assert.IsNotNull(dpEnum);
                }
                Assert.AreEqual(new BytesRef("xyz"), tvsEnum.Next());
                Assert.IsNull(tvsEnum.Next());
            }

            r.Dispose();
            dir.Dispose();
        }
        public virtual void TestRandomStoredFields()
        {
            Directory dir = NewDirectory();
            Random rand = Random();
            RandomIndexWriter w = new RandomIndexWriter(rand, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(rand, 5, 20)));
            //w.w.setNoCFSRatio(0.0);
            int docCount = AtLeast(200);
            int fieldCount = TestUtil.NextInt(rand, 1, 5);

            IList<int?> fieldIDs = new List<int?>();

            FieldType customType = new FieldType(TextField.TYPE_STORED);
            customType.Tokenized = false;
            Field idField = NewField("id", "", customType);

            for (int i = 0; i < fieldCount; i++)
            {
                fieldIDs.Add(i);
            }

            IDictionary<string, Document> docs = new Dictionary<string, Document>();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: build index docCount=" + docCount);
            }

            FieldType customType2 = new FieldType();
            customType2.Stored = true;
            for (int i = 0; i < docCount; i++)
            {
                Document doc = new Document();
                doc.Add(idField);
                string id = "" + i;
                idField.StringValue = id;
                docs[id] = doc;
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: add doc id=" + id);
                }

                foreach (int field in fieldIDs)
                {
                    string s;
                    if (rand.Next(4) != 3)
                    {
                        s = TestUtil.RandomUnicodeString(rand, 1000);
                        doc.Add(NewField("f" + field, s, customType2));
                    }
                    else
                    {
                        s = null;
                    }
                }
                w.AddDocument(doc);
                if (rand.Next(50) == 17)
                {
                    // mixup binding of field name -> Number every so often
                    fieldIDs = CollectionsHelper.Shuffle(fieldIDs);
                }
                if (rand.Next(5) == 3 && i > 0)
                {
                    string delID = "" + rand.Next(i);
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: delete doc id=" + delID);
                    }
                    w.DeleteDocuments(new Term("id", delID));
                    docs.Remove(delID);
                }
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: " + docs.Count + " docs in index; now load fields");
            }
            if (docs.Count > 0)
            {
                string[] idsList = docs.Keys.ToArray(/*new string[docs.Count]*/);

                for (int x = 0; x < 2; x++)
                {
                    IndexReader r = w.Reader;
                    IndexSearcher s = NewSearcher(r);

                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: cycle x=" + x + " r=" + r);
                    }

                    int num = AtLeast(1000);
                    for (int iter = 0; iter < num; iter++)
                    {
                        string testID = idsList[rand.Next(idsList.Length)];
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: test id=" + testID);
                        }
                        TopDocs hits = s.Search(new TermQuery(new Term("id", testID)), 1);
                        Assert.AreEqual(1, hits.TotalHits);
                        Document doc = r.Document(hits.ScoreDocs[0].Doc);
                        Document docExp = docs[testID];
                        for (int i = 0; i < fieldCount; i++)
                        {
                            Assert.AreEqual("doc " + testID + ", field f" + fieldCount + " is wrong", docExp.Get("f" + i), doc.Get("f" + i));
                        }
                    }
                    r.Dispose();
                    w.ForceMerge(1);
                }
            }
            w.Dispose();
            dir.Dispose();
        }
Пример #8
0
        public virtual void TestNumericFieldAsString()
        {
            Documents.Document doc = new Documents.Document();
            doc.Add(new IntField("int", 5, Field.Store.YES));
            Assert.AreEqual("5", doc.Get("int"));
            Assert.IsNull(doc.Get("somethingElse"));
            doc.Add(new IntField("int", 4, Field.Store.YES));
            Assert.AreEqual(new string[] { "5", "4" }, doc.GetValues("int"));

            Directory dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            iw.AddDocument(doc);
            DirectoryReader ir = iw.Reader;
            Documents.Document sdoc = ir.Document(0);
            Assert.AreEqual("5", sdoc.Get("int"));
            Assert.IsNull(sdoc.Get("somethingElse"));
            Assert.AreEqual(new string[] { "5", "4" }, sdoc.GetValues("int"));
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
Пример #9
0
        public virtual void TestTransitionAPI()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Documents.Document doc = new Documents.Document();
            doc.Add(new Field("stored", "abc", Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("stored_indexed", "abc xyz", Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("stored_tokenized", "abc xyz", Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field("indexed", "abc xyz", Field.Store.NO, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("tokenized", "abc xyz", Field.Store.NO, Field.Index.ANALYZED));
            doc.Add(new Field("tokenized_reader", new StringReader("abc xyz")));
            doc.Add(new Field("tokenized_tokenstream", w.w.Analyzer.TokenStream("tokenized_tokenstream", new StringReader("abc xyz"))));
            doc.Add(new Field("binary", new byte[10]));
            doc.Add(new Field("tv", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
            doc.Add(new Field("tv_pos", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
            doc.Add(new Field("tv_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS));
            doc.Add(new Field("tv_pos_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
            w.AddDocument(doc);
            IndexReader r = w.Reader;
            w.Dispose();

            doc = r.Document(0);
            // 4 stored fields
            Assert.AreEqual(4, doc.Fields.Count);
            Assert.AreEqual("abc", doc.Get("stored"));
            Assert.AreEqual("abc xyz", doc.Get("stored_indexed"));
            Assert.AreEqual("abc xyz", doc.Get("stored_tokenized"));
            BytesRef br = doc.GetBinaryValue("binary");
            Assert.IsNotNull(br);
            Assert.AreEqual(10, br.Length);

            IndexSearcher s = new IndexSearcher(r);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_indexed", "abc xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("indexed", "abc xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "xyz")), 1).TotalHits);

            foreach (string field in new string[] { "tv", "tv_pos", "tv_off", "tv_pos_off" })
            {
                Fields tvFields = r.GetTermVectors(0);
                Terms tvs = tvFields.Terms(field);
                Assert.IsNotNull(tvs);
                Assert.AreEqual(2, tvs.Size());
                TermsEnum tvsEnum = tvs.Iterator(null);
                Assert.AreEqual(new BytesRef("abc"), tvsEnum.Next());
                DocsAndPositionsEnum dpEnum = tvsEnum.DocsAndPositions(null, null);
                if (field.Equals("tv"))
                {
                    Assert.IsNull(dpEnum);
                }
                else
                {
                    Assert.IsNotNull(dpEnum);
                }
                Assert.AreEqual(new BytesRef("xyz"), tvsEnum.Next());
                Assert.IsNull(tvsEnum.Next());
            }

            r.Dispose();
            dir.Dispose();
        }
Пример #10
0
        public virtual void TestTransitionAPI()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            Documents.Document doc = new Documents.Document();
#pragma warning disable 612, 618
            doc.Add(new Field("stored", "abc", Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("stored_indexed", "abc xyz", Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("stored_tokenized", "abc xyz", Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field("indexed", "abc xyz", Field.Store.NO, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("tokenized", "abc xyz", Field.Store.NO, Field.Index.ANALYZED));
            doc.Add(new Field("tokenized_reader", new StringReader("abc xyz")));
            doc.Add(new Field("tokenized_tokenstream", w.IndexWriter.Analyzer.GetTokenStream("tokenized_tokenstream", new StringReader("abc xyz"))));
            doc.Add(new Field("binary", new byte[10]));
            doc.Add(new Field("tv", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
            doc.Add(new Field("tv_pos", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
            doc.Add(new Field("tv_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS));
            doc.Add(new Field("tv_pos_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
#pragma warning restore 612, 618
            w.AddDocument(doc);
            IndexReader r = w.GetReader();
            w.Dispose();

            doc = r.Document(0);
            // 4 stored fields
            Assert.AreEqual(4, doc.Fields.Count);
            Assert.AreEqual("abc", doc.Get("stored"));
            Assert.AreEqual("abc xyz", doc.Get("stored_indexed"));
            Assert.AreEqual("abc xyz", doc.Get("stored_tokenized"));
            BytesRef br = doc.GetBinaryValue("binary");
            Assert.IsNotNull(br);
            Assert.AreEqual(10, br.Length);

            IndexSearcher s = new IndexSearcher(r);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_indexed", "abc xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("indexed", "abc xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "xyz")), 1).TotalHits);

            foreach (string field in new string[] { "tv", "tv_pos", "tv_off", "tv_pos_off" })
            {
                Fields tvFields = r.GetTermVectors(0);
                Terms  tvs      = tvFields.GetTerms(field);
                Assert.IsNotNull(tvs);
                Assert.AreEqual(2, tvs.Count);
                TermsEnum tvsEnum = tvs.GetEnumerator();
                Assert.IsTrue(tvsEnum.MoveNext());
                Assert.AreEqual(new BytesRef("abc"), tvsEnum.Term);
                DocsAndPositionsEnum dpEnum = tvsEnum.DocsAndPositions(null, null);
                if (field.Equals("tv", StringComparison.Ordinal))
                {
                    Assert.IsNull(dpEnum);
                }
                else
                {
                    Assert.IsNotNull(dpEnum);
                }
                Assert.IsTrue(tvsEnum.MoveNext());
                Assert.AreEqual(new BytesRef("xyz"), tvsEnum.Term);
                Assert.IsFalse(tvsEnum.MoveNext());
            }

            r.Dispose();
            dir.Dispose();
        }
        public void TestHugeBinaryValues()
        {
            Analyzer analyzer = new MockAnalyzer(Random());
            // FSDirectory because SimpleText will consume gobbs of
            // space when storing big binary values:
            Directory d = NewFSDirectory(CreateTempDir("hugeBinaryValues"));
            bool doFixed = Random().NextBoolean();
            int numDocs;
            int fixedLength = 0;
            if (doFixed)
            {
                // Sometimes make all values fixed length since some
                // codecs have different code paths for this:
                numDocs = TestUtil.NextInt(Random(), 10, 20);
                fixedLength = TestUtil.NextInt(Random(), 65537, 256 * 1024);
            }
            else
            {
                numDocs = TestUtil.NextInt(Random(), 100, 200);
            }
            IndexWriter w = new IndexWriter(d, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            var docBytes = new List<byte[]>();
            long totalBytes = 0;
            for (int docID = 0; docID < numDocs; docID++)
            {
                // we don't use RandomIndexWriter because it might add
                // more docvalues than we expect !!!!

                // Must be > 64KB in size to ensure more than 2 pages in
                // PagedBytes would be needed:
                int numBytes;
                if (doFixed)
                {
                    numBytes = fixedLength;
                }
                else if (docID == 0 || Random().Next(5) == 3)
                {
                    numBytes = TestUtil.NextInt(Random(), 65537, 3 * 1024 * 1024);
                }
                else
                {
                    numBytes = TestUtil.NextInt(Random(), 1, 1024 * 1024);
                }
                totalBytes += numBytes;
                if (totalBytes > 5 * 1024 * 1024)
                {
                    break;
                }
                var bytes = new byte[numBytes];
                Random().NextBytes(bytes);
                docBytes.Add(bytes);
                Document doc = new Document();
                BytesRef b = new BytesRef(bytes);
                b.Length = bytes.Length;
                doc.Add(new BinaryDocValuesField("field", b));
                doc.Add(new StringField("id", "" + docID, Field.Store.YES));
                try
                {
                    w.AddDocument(doc);
                }
                catch (System.ArgumentException iae)
                {
                    if (iae.Message.IndexOf("is too large") == -1)
                    {
                        throw iae;
                    }
                    else
                    {
                        // OK: some codecs can't handle binary DV > 32K
                        Assert.IsFalse(CodecAcceptsHugeBinaryValues("field"));
                        w.Rollback();
                        d.Dispose();
                        return;
                    }
                }
            }

            DirectoryReader r;
            try
            {
                r = w.Reader;
            }
            catch (System.ArgumentException iae)
            {
                if (iae.Message.IndexOf("is too large") == -1)
                {
                    throw iae;
                }
                else
                {
                    Assert.IsFalse(CodecAcceptsHugeBinaryValues("field"));

                    // OK: some codecs can't handle binary DV > 32K
                    w.Rollback();
                    d.Dispose();
                    return;
                }
            }
            w.Dispose();

            AtomicReader ar = SlowCompositeReaderWrapper.Wrap(r);

            BinaryDocValues s = FieldCache.DEFAULT.GetTerms(ar, "field", false);
            for (int docID = 0; docID < docBytes.Count; docID++)
            {
                Document doc = ar.Document(docID);
                BytesRef bytes = new BytesRef();
                s.Get(docID, bytes);
                var expected = docBytes[Convert.ToInt32(doc.Get("id"))];
                Assert.AreEqual(expected.Length, bytes.Length);
                Assert.AreEqual(new BytesRef(expected), bytes);
            }

            Assert.IsTrue(CodecAcceptsHugeBinaryValues("field"));

            ar.Dispose();
            d.Dispose();
        }
        public void TestHugeBinaryValueLimit()
        {
            // We only test DVFormats that have a limit
            AssumeFalse("test requires codec with limits on max binary field length", CodecAcceptsHugeBinaryValues("field"));
            Analyzer analyzer = new MockAnalyzer(Random());
            // FSDirectory because SimpleText will consume gobbs of
            // space when storing big binary values:
            Directory d = NewFSDirectory(CreateTempDir("hugeBinaryValues"));
            bool doFixed = Random().NextBoolean();
            int numDocs;
            int fixedLength = 0;
            if (doFixed)
            {
                // Sometimes make all values fixed length since some
                // codecs have different code paths for this:
                numDocs = TestUtil.NextInt(Random(), 10, 20);
                fixedLength = Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH;
            }
            else
            {
                numDocs = TestUtil.NextInt(Random(), 100, 200);
            }
            IndexWriter w = new IndexWriter(d, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            var docBytes = new List<byte[]>();
            long totalBytes = 0;
            for (int docID = 0; docID < numDocs; docID++)
            {
                // we don't use RandomIndexWriter because it might add
                // more docvalues than we expect !!!!

                // Must be > 64KB in size to ensure more than 2 pages in
                // PagedBytes would be needed:
                int numBytes;
                if (doFixed)
                {
                    numBytes = fixedLength;
                }
                else if (docID == 0 || Random().Next(5) == 3)
                {
                    numBytes = Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH;
                }
                else
                {
                    numBytes = TestUtil.NextInt(Random(), 1, Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH);
                }
                totalBytes += numBytes;
                if (totalBytes > 5 * 1024 * 1024)
                {
                    break;
                }
                var bytes = new byte[numBytes];
                Random().NextBytes(bytes);
                docBytes.Add(bytes);
                Document doc = new Document();
                BytesRef b = new BytesRef(bytes);
                b.Length = bytes.Length;
                doc.Add(new BinaryDocValuesField("field", b));
                doc.Add(new StringField("id", "" + docID, Field.Store.YES));
                w.AddDocument(doc);
            }

            DirectoryReader r = w.Reader;
            w.Dispose();

            AtomicReader ar = SlowCompositeReaderWrapper.Wrap(r);

            BinaryDocValues s = FieldCache.DEFAULT.GetTerms(ar, "field", false);
            for (int docID = 0; docID < docBytes.Count; docID++)
            {
                Document doc = ar.Document(docID);
                BytesRef bytes = new BytesRef();
                s.Get(docID, bytes);
                var expected = docBytes[Convert.ToInt32(doc.Get("id"))];
                Assert.AreEqual(expected.Length, bytes.Length);
                Assert.AreEqual(new BytesRef(expected), bytes);
            }

            ar.Dispose();
            d.Dispose();
        }