public virtual void TestNumericFieldAsString() { Documents.Document doc = new Documents.Document(); doc.Add(new Int32Field("int", 5, Field.Store.YES)); Assert.AreEqual("5", doc.Get("int")); Assert.IsNull(doc.Get("somethingElse")); doc.Add(new Int32Field("int", 4, Field.Store.YES)); Assert.AreEqual(new string[] { "5", "4" }, doc.GetValues("int")); Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); iw.AddDocument(doc); DirectoryReader ir = iw.GetReader(); Documents.Document sdoc = ir.Document(0); Assert.AreEqual("5", sdoc.Get("int")); Assert.IsNull(sdoc.Get("somethingElse")); Assert.AreEqual(new string[] { "5", "4" }, sdoc.GetValues("int")); ir.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void TestBinaryFieldInIndex() { FieldType ft = new FieldType(); ft.IsStored = true; IIndexableField binaryFldStored = new StoredField("binaryStored", System.Text.UTF8Encoding.UTF8.GetBytes(BinaryValStored)); IIndexableField stringFldStored = new Field("stringStored", BinaryValStored, ft); Documents.Document doc = new Documents.Document(); doc.Add(binaryFldStored); doc.Add(stringFldStored); /// <summary> /// test for field count </summary> Assert.AreEqual(2, doc.Fields.Count); /// <summary> /// add the doc to a ram index </summary> Directory dir = NewDirectory(); Random r = Random; RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif r, dir); writer.AddDocument(doc); /// <summary> /// open a reader and fetch the document </summary> IndexReader reader = writer.GetReader(); Documents.Document docFromReader = reader.Document(0); Assert.IsTrue(docFromReader != null); /// <summary> /// fetch the binary stored field and compare it's content with the original one </summary> BytesRef bytes = docFromReader.GetBinaryValue("binaryStored"); Assert.IsNotNull(bytes); string binaryFldStoredTest = Encoding.UTF8.GetString(bytes.Bytes).Substring(bytes.Offset, bytes.Length); //new string(bytes.Bytes, bytes.Offset, bytes.Length, IOUtils.CHARSET_UTF_8); Assert.IsTrue(binaryFldStoredTest.Equals(BinaryValStored, StringComparison.Ordinal)); /// <summary> /// fetch the string field and compare it's content with the original one </summary> string stringFldStoredTest = docFromReader.Get("stringStored"); Assert.IsTrue(stringFldStoredTest.Equals(BinaryValStored, StringComparison.Ordinal)); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public virtual void TestBinaryField() { Documents.Document doc = new Documents.Document(); FieldType ft = new FieldType(); ft.Stored = true; IndexableField stringFld = new Field("string", BinaryVal, ft); IndexableField binaryFld = new StoredField("binary", BinaryVal.GetBytes(Encoding.UTF8)); IndexableField binaryFld2 = new StoredField("binary", BinaryVal2.GetBytes(Encoding.UTF8)); doc.Add(stringFld); doc.Add(binaryFld); Assert.AreEqual(2, doc.Fields.Count); Assert.IsTrue(binaryFld.BinaryValue != null); Assert.IsTrue(binaryFld.FieldType.Stored); Assert.IsFalse(binaryFld.FieldType.Indexed); string binaryTest = doc.GetBinaryValue("binary").Utf8ToString(); Assert.IsTrue(binaryTest.Equals(BinaryVal)); string stringTest = doc.Get("string"); Assert.IsTrue(binaryTest.Equals(stringTest)); doc.Add(binaryFld2); Assert.AreEqual(3, doc.Fields.Count); BytesRef[] binaryTests = doc.GetBinaryValues("binary"); Assert.AreEqual(2, binaryTests.Length); binaryTest = binaryTests[0].Utf8ToString(); string binaryTest2 = binaryTests[1].Utf8ToString(); Assert.IsFalse(binaryTest.Equals(binaryTest2)); Assert.IsTrue(binaryTest.Equals(BinaryVal)); Assert.IsTrue(binaryTest2.Equals(BinaryVal2)); doc.RemoveField("string"); Assert.AreEqual(2, doc.Fields.Count); doc.RemoveFields("binary"); Assert.AreEqual(0, doc.Fields.Count); }
public virtual void TestNumericFieldAsString() { Documents.Document doc = new Documents.Document(); doc.Add(new IntField("int", 5, Field.Store.YES)); Assert.AreEqual("5", doc.Get("int")); Assert.IsNull(doc.Get("somethingElse")); doc.Add(new IntField("int", 4, Field.Store.YES)); Assert.AreEqual(new string[] { "5", "4" }, doc.GetValues("int")); Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); iw.AddDocument(doc); DirectoryReader ir = iw.Reader; Documents.Document sdoc = ir.Document(0); Assert.AreEqual("5", sdoc.Get("int")); Assert.IsNull(sdoc.Get("somethingElse")); Assert.AreEqual(new string[] { "5", "4" }, sdoc.GetValues("int")); ir.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void TestTransitionAPI() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Documents.Document doc = new Documents.Document(); doc.Add(new Field("stored", "abc", Field.Store.YES, Field.Index.NO)); doc.Add(new Field("stored_indexed", "abc xyz", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("stored_tokenized", "abc xyz", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("indexed", "abc xyz", Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("tokenized", "abc xyz", Field.Store.NO, Field.Index.ANALYZED)); doc.Add(new Field("tokenized_reader", new StringReader("abc xyz"))); doc.Add(new Field("tokenized_tokenstream", w.w.Analyzer.TokenStream("tokenized_tokenstream", new StringReader("abc xyz")))); doc.Add(new Field("binary", new byte[10])); doc.Add(new Field("tv", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); doc.Add(new Field("tv_pos", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS)); doc.Add(new Field("tv_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS)); doc.Add(new Field("tv_pos_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); w.AddDocument(doc); IndexReader r = w.Reader; w.Dispose(); doc = r.Document(0); // 4 stored fields Assert.AreEqual(4, doc.Fields.Count); Assert.AreEqual("abc", doc.Get("stored")); Assert.AreEqual("abc xyz", doc.Get("stored_indexed")); Assert.AreEqual("abc xyz", doc.Get("stored_tokenized")); BytesRef br = doc.GetBinaryValue("binary"); Assert.IsNotNull(br); Assert.AreEqual(10, br.Length); IndexSearcher s = new IndexSearcher(r); Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_indexed", "abc xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("indexed", "abc xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "xyz")), 1).TotalHits); foreach (string field in new string[] { "tv", "tv_pos", "tv_off", "tv_pos_off" }) { Fields tvFields = r.GetTermVectors(0); Terms tvs = tvFields.Terms(field); Assert.IsNotNull(tvs); Assert.AreEqual(2, tvs.Size()); TermsEnum tvsEnum = tvs.Iterator(null); Assert.AreEqual(new BytesRef("abc"), tvsEnum.Next()); DocsAndPositionsEnum dpEnum = tvsEnum.DocsAndPositions(null, null); if (field.Equals("tv")) { Assert.IsNull(dpEnum); } else { Assert.IsNotNull(dpEnum); } Assert.AreEqual(new BytesRef("xyz"), tvsEnum.Next()); Assert.IsNull(tvsEnum.Next()); } r.Dispose(); dir.Dispose(); }
public virtual void TestRandomStoredFields() { Directory dir = NewDirectory(); Random rand = Random(); RandomIndexWriter w = new RandomIndexWriter(rand, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(rand, 5, 20))); //w.w.setNoCFSRatio(0.0); int docCount = AtLeast(200); int fieldCount = TestUtil.NextInt(rand, 1, 5); IList<int?> fieldIDs = new List<int?>(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.Tokenized = false; Field idField = NewField("id", "", customType); for (int i = 0; i < fieldCount; i++) { fieldIDs.Add(i); } IDictionary<string, Document> docs = new Dictionary<string, Document>(); if (VERBOSE) { Console.WriteLine("TEST: build index docCount=" + docCount); } FieldType customType2 = new FieldType(); customType2.Stored = true; for (int i = 0; i < docCount; i++) { Document doc = new Document(); doc.Add(idField); string id = "" + i; idField.StringValue = id; docs[id] = doc; if (VERBOSE) { Console.WriteLine("TEST: add doc id=" + id); } foreach (int field in fieldIDs) { string s; if (rand.Next(4) != 3) { s = TestUtil.RandomUnicodeString(rand, 1000); doc.Add(NewField("f" + field, s, customType2)); } else { s = null; } } w.AddDocument(doc); if (rand.Next(50) == 17) { // mixup binding of field name -> Number every so often fieldIDs = CollectionsHelper.Shuffle(fieldIDs); } if (rand.Next(5) == 3 && i > 0) { string delID = "" + rand.Next(i); if (VERBOSE) { Console.WriteLine("TEST: delete doc id=" + delID); } w.DeleteDocuments(new Term("id", delID)); docs.Remove(delID); } } if (VERBOSE) { Console.WriteLine("TEST: " + docs.Count + " docs in index; now load fields"); } if (docs.Count > 0) { string[] idsList = docs.Keys.ToArray(/*new string[docs.Count]*/); for (int x = 0; x < 2; x++) { IndexReader r = w.Reader; IndexSearcher s = NewSearcher(r); if (VERBOSE) { Console.WriteLine("TEST: cycle x=" + x + " r=" + r); } int num = AtLeast(1000); for (int iter = 0; iter < num; iter++) { string testID = idsList[rand.Next(idsList.Length)]; if (VERBOSE) { Console.WriteLine("TEST: test id=" + testID); } TopDocs hits = s.Search(new TermQuery(new Term("id", testID)), 1); Assert.AreEqual(1, hits.TotalHits); Document doc = r.Document(hits.ScoreDocs[0].Doc); Document docExp = docs[testID]; for (int i = 0; i < fieldCount; i++) { Assert.AreEqual("doc " + testID + ", field f" + fieldCount + " is wrong", docExp.Get("f" + i), doc.Get("f" + i)); } } r.Dispose(); w.ForceMerge(1); } } w.Dispose(); dir.Dispose(); }
public virtual void TestTransitionAPI() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Documents.Document doc = new Documents.Document(); #pragma warning disable 612, 618 doc.Add(new Field("stored", "abc", Field.Store.YES, Field.Index.NO)); doc.Add(new Field("stored_indexed", "abc xyz", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("stored_tokenized", "abc xyz", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("indexed", "abc xyz", Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("tokenized", "abc xyz", Field.Store.NO, Field.Index.ANALYZED)); doc.Add(new Field("tokenized_reader", new StringReader("abc xyz"))); doc.Add(new Field("tokenized_tokenstream", w.IndexWriter.Analyzer.GetTokenStream("tokenized_tokenstream", new StringReader("abc xyz")))); doc.Add(new Field("binary", new byte[10])); doc.Add(new Field("tv", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); doc.Add(new Field("tv_pos", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS)); doc.Add(new Field("tv_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS)); doc.Add(new Field("tv_pos_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); #pragma warning restore 612, 618 w.AddDocument(doc); IndexReader r = w.GetReader(); w.Dispose(); doc = r.Document(0); // 4 stored fields Assert.AreEqual(4, doc.Fields.Count); Assert.AreEqual("abc", doc.Get("stored")); Assert.AreEqual("abc xyz", doc.Get("stored_indexed")); Assert.AreEqual("abc xyz", doc.Get("stored_tokenized")); BytesRef br = doc.GetBinaryValue("binary"); Assert.IsNotNull(br); Assert.AreEqual(10, br.Length); IndexSearcher s = new IndexSearcher(r); Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_indexed", "abc xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("indexed", "abc xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "xyz")), 1).TotalHits); foreach (string field in new string[] { "tv", "tv_pos", "tv_off", "tv_pos_off" }) { Fields tvFields = r.GetTermVectors(0); Terms tvs = tvFields.GetTerms(field); Assert.IsNotNull(tvs); Assert.AreEqual(2, tvs.Count); TermsEnum tvsEnum = tvs.GetEnumerator(); Assert.IsTrue(tvsEnum.MoveNext()); Assert.AreEqual(new BytesRef("abc"), tvsEnum.Term); DocsAndPositionsEnum dpEnum = tvsEnum.DocsAndPositions(null, null); if (field.Equals("tv", StringComparison.Ordinal)) { Assert.IsNull(dpEnum); } else { Assert.IsNotNull(dpEnum); } Assert.IsTrue(tvsEnum.MoveNext()); Assert.AreEqual(new BytesRef("xyz"), tvsEnum.Term); Assert.IsFalse(tvsEnum.MoveNext()); } r.Dispose(); dir.Dispose(); }
public void TestHugeBinaryValues() { Analyzer analyzer = new MockAnalyzer(Random()); // FSDirectory because SimpleText will consume gobbs of // space when storing big binary values: Directory d = NewFSDirectory(CreateTempDir("hugeBinaryValues")); bool doFixed = Random().NextBoolean(); int numDocs; int fixedLength = 0; if (doFixed) { // Sometimes make all values fixed length since some // codecs have different code paths for this: numDocs = TestUtil.NextInt(Random(), 10, 20); fixedLength = TestUtil.NextInt(Random(), 65537, 256 * 1024); } else { numDocs = TestUtil.NextInt(Random(), 100, 200); } IndexWriter w = new IndexWriter(d, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); var docBytes = new List<byte[]>(); long totalBytes = 0; for (int docID = 0; docID < numDocs; docID++) { // we don't use RandomIndexWriter because it might add // more docvalues than we expect !!!! // Must be > 64KB in size to ensure more than 2 pages in // PagedBytes would be needed: int numBytes; if (doFixed) { numBytes = fixedLength; } else if (docID == 0 || Random().Next(5) == 3) { numBytes = TestUtil.NextInt(Random(), 65537, 3 * 1024 * 1024); } else { numBytes = TestUtil.NextInt(Random(), 1, 1024 * 1024); } totalBytes += numBytes; if (totalBytes > 5 * 1024 * 1024) { break; } var bytes = new byte[numBytes]; Random().NextBytes(bytes); docBytes.Add(bytes); Document doc = new Document(); BytesRef b = new BytesRef(bytes); b.Length = bytes.Length; doc.Add(new BinaryDocValuesField("field", b)); doc.Add(new StringField("id", "" + docID, Field.Store.YES)); try { w.AddDocument(doc); } catch (System.ArgumentException iae) { if (iae.Message.IndexOf("is too large") == -1) { throw iae; } else { // OK: some codecs can't handle binary DV > 32K Assert.IsFalse(CodecAcceptsHugeBinaryValues("field")); w.Rollback(); d.Dispose(); return; } } } DirectoryReader r; try { r = w.Reader; } catch (System.ArgumentException iae) { if (iae.Message.IndexOf("is too large") == -1) { throw iae; } else { Assert.IsFalse(CodecAcceptsHugeBinaryValues("field")); // OK: some codecs can't handle binary DV > 32K w.Rollback(); d.Dispose(); return; } } w.Dispose(); AtomicReader ar = SlowCompositeReaderWrapper.Wrap(r); BinaryDocValues s = FieldCache.DEFAULT.GetTerms(ar, "field", false); for (int docID = 0; docID < docBytes.Count; docID++) { Document doc = ar.Document(docID); BytesRef bytes = new BytesRef(); s.Get(docID, bytes); var expected = docBytes[Convert.ToInt32(doc.Get("id"))]; Assert.AreEqual(expected.Length, bytes.Length); Assert.AreEqual(new BytesRef(expected), bytes); } Assert.IsTrue(CodecAcceptsHugeBinaryValues("field")); ar.Dispose(); d.Dispose(); }
public void TestHugeBinaryValueLimit() { // We only test DVFormats that have a limit AssumeFalse("test requires codec with limits on max binary field length", CodecAcceptsHugeBinaryValues("field")); Analyzer analyzer = new MockAnalyzer(Random()); // FSDirectory because SimpleText will consume gobbs of // space when storing big binary values: Directory d = NewFSDirectory(CreateTempDir("hugeBinaryValues")); bool doFixed = Random().NextBoolean(); int numDocs; int fixedLength = 0; if (doFixed) { // Sometimes make all values fixed length since some // codecs have different code paths for this: numDocs = TestUtil.NextInt(Random(), 10, 20); fixedLength = Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH; } else { numDocs = TestUtil.NextInt(Random(), 100, 200); } IndexWriter w = new IndexWriter(d, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); var docBytes = new List<byte[]>(); long totalBytes = 0; for (int docID = 0; docID < numDocs; docID++) { // we don't use RandomIndexWriter because it might add // more docvalues than we expect !!!! // Must be > 64KB in size to ensure more than 2 pages in // PagedBytes would be needed: int numBytes; if (doFixed) { numBytes = fixedLength; } else if (docID == 0 || Random().Next(5) == 3) { numBytes = Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH; } else { numBytes = TestUtil.NextInt(Random(), 1, Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH); } totalBytes += numBytes; if (totalBytes > 5 * 1024 * 1024) { break; } var bytes = new byte[numBytes]; Random().NextBytes(bytes); docBytes.Add(bytes); Document doc = new Document(); BytesRef b = new BytesRef(bytes); b.Length = bytes.Length; doc.Add(new BinaryDocValuesField("field", b)); doc.Add(new StringField("id", "" + docID, Field.Store.YES)); w.AddDocument(doc); } DirectoryReader r = w.Reader; w.Dispose(); AtomicReader ar = SlowCompositeReaderWrapper.Wrap(r); BinaryDocValues s = FieldCache.DEFAULT.GetTerms(ar, "field", false); for (int docID = 0; docID < docBytes.Count; docID++) { Document doc = ar.Document(docID); BytesRef bytes = new BytesRef(); s.Get(docID, bytes); var expected = docBytes[Convert.ToInt32(doc.Get("id"))]; Assert.AreEqual(expected.Length, bytes.Length); Assert.AreEqual(new BytesRef(expected), bytes); } ar.Dispose(); d.Dispose(); }