private IDictionary<string, Document> GenerateIndexDocuments(int ndocs) { IDictionary<string, Document> docs = new HashMap<string, Document>(); for (int i = 0; i < ndocs; i++) { Field field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES); Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i)); Field weight1 = new NumericDocValuesField(WEIGHT_FIELD_NAME_1, 10 + i); Field weight2 = new NumericDocValuesField(WEIGHT_FIELD_NAME_2, 20 + i); Field weight3 = new NumericDocValuesField(WEIGHT_FIELD_NAME_3, 30 + i); Field contexts = new StoredField(CONTEXTS_FIELD_NAME, new BytesRef("ctx_" + i + "_0")); Document doc = new Document(); doc.Add(field); doc.Add(payload); doc.Add(weight1); doc.Add(weight2); doc.Add(weight3); doc.Add(contexts); for (int j = 1; j < AtLeast(3); j++) { contexts.BytesValue = new BytesRef("ctx_" + i + "_" + j); doc.Add(contexts); } docs.Put(field.StringValue, doc); } return docs; }
public virtual void TestBinaryFieldInIndex() { FieldType ft = new FieldType(); ft.Stored = true; IndexableField binaryFldStored = new StoredField("binaryStored", System.Text.UTF8Encoding.UTF8.GetBytes(BinaryValStored)); IndexableField stringFldStored = new Field("stringStored", BinaryValStored, ft); Documents.Document doc = new Documents.Document(); doc.Add(binaryFldStored); doc.Add(stringFldStored); /// <summary> /// test for field count </summary> Assert.AreEqual(2, doc.Fields.Count); /// <summary> /// add the doc to a ram index </summary> Directory dir = NewDirectory(); Random r = Random(); RandomIndexWriter writer = new RandomIndexWriter(r, dir); writer.AddDocument(doc); /// <summary> /// open a reader and fetch the document </summary> IndexReader reader = writer.Reader; Documents.Document docFromReader = reader.Document(0); Assert.IsTrue(docFromReader != null); /// <summary> /// fetch the binary stored field and compare it's content with the original one </summary> BytesRef bytes = docFromReader.GetBinaryValue("binaryStored"); Assert.IsNotNull(bytes); string binaryFldStoredTest = Encoding.UTF8.GetString((byte[])(Array)bytes.Bytes).Substring(bytes.Offset, bytes.Length); //new string(bytes.Bytes, bytes.Offset, bytes.Length, IOUtils.CHARSET_UTF_8); Assert.IsTrue(binaryFldStoredTest.Equals(BinaryValStored)); /// <summary> /// fetch the string field and compare it's content with the original one </summary> string stringFldStoredTest = docFromReader.Get("stringStored"); Assert.IsTrue(stringFldStoredTest.Equals(BinaryValStored)); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public virtual void TestBinaryField() { Documents.Document doc = new Documents.Document(); FieldType ft = new FieldType(); ft.Stored = true; IndexableField stringFld = new Field("string", BinaryVal, ft); IndexableField binaryFld = new StoredField("binary", BinaryVal.GetBytes(Encoding.UTF8)); IndexableField binaryFld2 = new StoredField("binary", BinaryVal2.GetBytes(Encoding.UTF8)); doc.Add(stringFld); doc.Add(binaryFld); Assert.AreEqual(2, doc.Fields.Count); Assert.IsTrue(binaryFld.BinaryValue != null); Assert.IsTrue(binaryFld.FieldType.Stored); Assert.IsFalse(binaryFld.FieldType.Indexed); string binaryTest = doc.GetBinaryValue("binary").Utf8ToString(); Assert.IsTrue(binaryTest.Equals(BinaryVal)); string stringTest = doc.Get("string"); Assert.IsTrue(binaryTest.Equals(stringTest)); doc.Add(binaryFld2); Assert.AreEqual(3, doc.Fields.Count); BytesRef[] binaryTests = doc.GetBinaryValues("binary"); Assert.AreEqual(2, binaryTests.Length); binaryTest = binaryTests[0].Utf8ToString(); string binaryTest2 = binaryTests[1].Utf8ToString(); Assert.IsFalse(binaryTest.Equals(binaryTest2)); Assert.IsTrue(binaryTest.Equals(BinaryVal)); Assert.IsTrue(binaryTest2.Equals(BinaryVal2)); doc.RemoveField("string"); Assert.AreEqual(2, doc.Fields.Count); doc.RemoveFields("binary"); Assert.AreEqual(0, doc.Fields.Count); }
public virtual void TestStoredFieldLong() { Field field = new StoredField("foo", 1L); TrySetBoost(field); TrySetByteValue(field); TrySetBytesValue(field); TrySetBytesRefValue(field); TrySetDoubleValue(field); TrySetIntValue(field); TrySetFloatValue(field); field.LongValue = 5; TrySetReaderValue(field); TrySetShortValue(field); TrySetStringValue(field); TrySetTokenStreamValue(field); Assert.AreEqual(5L, (long)field.NumericValue); }
public virtual void TestStoredFieldFloat() { Field field = new StoredField("foo", 1F); TrySetBoost(field); TrySetByteValue(field); TrySetBytesValue(field); TrySetBytesRefValue(field); TrySetDoubleValue(field); TrySetIntValue(field); field.FloatValue = 5f; TrySetLongValue(field); TrySetReaderValue(field); TrySetShortValue(field); TrySetStringValue(field); TrySetTokenStreamValue(field); Assert.AreEqual(5f, (float)field.NumericValue, 0.0f); }
public virtual void TestStoredFieldDouble() { Field field = new StoredField("foo", 1D); TrySetBoost(field); TrySetByteValue(field); TrySetBytesValue(field); TrySetBytesRefValue(field); field.DoubleValue = 5D; TrySetIntValue(field); TrySetFloatValue(field); TrySetLongValue(field); TrySetReaderValue(field); TrySetShortValue(field); TrySetStringValue(field); TrySetTokenStreamValue(field); Assert.AreEqual(5D, (double)field.NumericValue, 0.0D); }
public virtual void TestStoredFieldString() { Field field = new StoredField("foo", "bar"); TrySetBoost(field); TrySetByteValue(field); TrySetBytesValue(field); TrySetBytesRefValue(field); TrySetDoubleValue(field); TrySetIntValue(field); TrySetFloatValue(field); TrySetLongValue(field); TrySetReaderValue(field); TrySetShortValue(field); field.StringValue = "baz"; TrySetTokenStreamValue(field); Assert.AreEqual("baz", field.StringValue); }
public void TestNumericField() { Directory dir = NewDirectory(); var w = new RandomIndexWriter(Random(), dir); var numDocs = AtLeast(500); var answers = new object[numDocs]; FieldType.NumericType[] typeAnswers = new FieldType.NumericType[numDocs]; for (int id = 0; id < numDocs; id++) { Document doc = new Document(); Field nf; Field sf; object answer; FieldType.NumericType typeAnswer; if (Random().NextBoolean()) { // float/double if (Random().NextBoolean()) { float f = Random().NextFloat(); answer = Convert.ToSingle(f); nf = new FloatField("nf", f, Field.Store.NO); sf = new StoredField("nf", f); typeAnswer = FieldType.NumericType.FLOAT; } else { double d = Random().NextDouble(); answer = Convert.ToDouble(d); nf = new DoubleField("nf", d, Field.Store.NO); sf = new StoredField("nf", d); typeAnswer = FieldType.NumericType.DOUBLE; } } else { // int/long if (Random().NextBoolean()) { int i = Random().Next(); answer = Convert.ToInt32(i); nf = new IntField("nf", i, Field.Store.NO); sf = new StoredField("nf", i); typeAnswer = FieldType.NumericType.INT; } else { long l = Random().NextLong(); answer = Convert.ToInt64(l); nf = new LongField("nf", l, Field.Store.NO); sf = new StoredField("nf", l); typeAnswer = FieldType.NumericType.LONG; } } doc.Add(nf); doc.Add(sf); answers[id] = answer; typeAnswers[id] = typeAnswer; FieldType ft = new FieldType(IntField.TYPE_STORED); ft.NumericPrecisionStep = int.MaxValue; doc.Add(new IntField("id", id, ft)); w.AddDocument(doc); } DirectoryReader r = w.Reader; w.Dispose(); Assert.AreEqual(numDocs, r.NumDocs); foreach (AtomicReaderContext ctx in r.Leaves) { AtomicReader sub = (AtomicReader)ctx.Reader; FieldCache.Ints ids = FieldCache.DEFAULT.GetInts(sub, "id", false); for (int docID = 0; docID < sub.NumDocs; docID++) { Document doc = sub.Document(docID); Field f = (Field)doc.GetField("nf"); Assert.IsTrue(f is StoredField, "got f=" + f); Assert.AreEqual(answers[ids.Get(docID)], f.NumericValue); } } r.Dispose(); dir.Dispose(); }
public void TestBinaryFieldOffsetLength() { Directory dir = NewDirectory(); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); var b = new byte[50]; for (int i = 0; i < 50; i++) { b[i] = (byte)(i + 77); } Document doc = new Document(); Field f = new StoredField("binary", b, 10, 17); var bx = f.BinaryValue().Bytes; Assert.IsTrue(bx != null); Assert.AreEqual(50, bx.Length); Assert.AreEqual(10, f.BinaryValue().Offset); Assert.AreEqual(17, f.BinaryValue().Length); doc.Add(f); w.AddDocument(doc); w.Dispose(); IndexReader ir = DirectoryReader.Open(dir); Document doc2 = ir.Document(0); IndexableField f2 = doc2.GetField("binary"); b = f2.BinaryValue().Bytes; Assert.IsTrue(b != null); Assert.AreEqual(17, b.Length, 17); Assert.AreEqual(87, b[0]); ir.Dispose(); dir.Dispose(); }
/** Returns Pair(list of invalid document terms, Map of document term -> document) */ private KeyValuePair<List<string>, IDictionary<string, Document>> GenerateIndexDocuments(int ndocs, bool requiresPayload, bool requiresContexts) { IDictionary<string, Document> docs = new HashMap<string, Document>(); List<string> invalidDocTerms = new List<string>(); for (int i = 0; i < ndocs; i++) { Document doc = new Document(); bool invalidDoc = false; Field field = null; // usually have valid term field in document if (Usually()) { field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES); doc.Add(field); } else { invalidDoc = true; } // even if payload is not required usually have it if (requiresPayload || Usually()) { // usually have valid payload field in document if (Usually()) { Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i)); doc.Add(payload); } else if (requiresPayload) { invalidDoc = true; } } if (requiresContexts || Usually()) { if (Usually()) { for (int j = 0; j < AtLeast(2); j++) { doc.Add(new StoredField(CONTEXT_FIELD_NAME, new BytesRef("context_" + i + "_" + j))); } } // we should allow entries without context } // usually have valid weight field in document if (Usually()) { Field weight = (Rarely()) ? (Field)new StoredField(WEIGHT_FIELD_NAME, 100d + i) : (Field)new NumericDocValuesField(WEIGHT_FIELD_NAME, 100 + i); doc.Add(weight); } string term = null; if (invalidDoc) { term = (field != null) ? field.StringValue : "invalid_" + i; invalidDocTerms.Add(term); } else { term = field.StringValue; } docs.Put(term, doc); } return new KeyValuePair<List<string>, IDictionary<string, Document>>(invalidDocTerms, docs); }
public virtual void TestCompressionTools() { IndexableField binaryFldCompressed = new StoredField("binaryCompressed", CompressionTools.Compress(BinaryValCompressed.GetBytes(Encoding.UTF8))); IndexableField stringFldCompressed = new StoredField("stringCompressed", CompressionTools.CompressString(BinaryValCompressed)); var doc = new Documents.Document {binaryFldCompressed, stringFldCompressed}; using (Directory dir = NewDirectory()) using (RandomIndexWriter writer = new RandomIndexWriter(Random(), dir)) { writer.AddDocument(doc); using (IndexReader reader = writer.Reader) { Documents.Document docFromReader = reader.Document(0); Assert.IsTrue(docFromReader != null); string binaryFldCompressedTest = Encoding.UTF8.GetString( CompressionTools.Decompress(docFromReader.GetBinaryValue("binaryCompressed"))); //new string(CompressionTools.Decompress(docFromReader.GetBinaryValue("binaryCompressed")), IOUtils.CHARSET_UTF_8); Assert.IsTrue(binaryFldCompressedTest.Equals(BinaryValCompressed)); Assert.IsTrue( CompressionTools.DecompressString(docFromReader.GetBinaryValue("stringCompressed")) .Equals(BinaryValCompressed)); } } }
/// <summary> /// Tests dv against stored fields with threads (all types + missing) /// </summary> // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestThreads2() { AssumeTrue("Codec does not support GetDocsWithField", DefaultCodecSupportsDocsWithField()); AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet()); Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf); Field idField = new StringField("id", "", Field.Store.NO); Field storedBinField = new StoredField("storedBin", new byte[0]); Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef()); Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef()); Field storedNumericField = new StoredField("storedNum", ""); Field dvNumericField = new NumericDocValuesField("dvNum", 0); // index some docs int numDocs = AtLeast(300); for (int i = 0; i < numDocs; i++) { idField.StringValue = Convert.ToString(i); int length = TestUtil.NextInt(Random(), 0, 8); var buffer = new byte[length]; Random().NextBytes(buffer); storedBinField.BytesValue = new BytesRef(buffer); dvBinField.BytesValue = new BytesRef(buffer); dvSortedField.BytesValue = new BytesRef(buffer); long numericValue = Random().NextLong(); storedNumericField.StringValue = Convert.ToString(numericValue); dvNumericField.LongValue = numericValue; Document doc = new Document(); doc.Add(idField); if (Random().Next(4) > 0) { doc.Add(storedBinField); doc.Add(dvBinField); doc.Add(dvSortedField); } if (Random().Next(4) > 0) { doc.Add(storedNumericField); doc.Add(dvNumericField); } int numSortedSetFields = Random().Next(3); SortedSet<string> values = new SortedSet<string>(); for (int j = 0; j < numSortedSetFields; j++) { values.Add(TestUtil.RandomSimpleString(Random())); } foreach (string v in values) { doc.Add(new SortedSetDocValuesField("dvSortedSet", new BytesRef(v))); doc.Add(new StoredField("storedSortedSet", v)); } writer.AddDocument(doc); if (Random().Next(31) == 0) { writer.Commit(); } } // delete some docs int numDeletions = Random().Next(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = Random().Next(numDocs); writer.DeleteDocuments(new Term("id", Convert.ToString(id))); } writer.Dispose(); // compare DirectoryReader ir = DirectoryReader.Open(dir); int numThreads = TestUtil.NextInt(Random(), 2, 7); ThreadClass[] threads = new ThreadClass[numThreads]; CountdownEvent startingGun = new CountdownEvent(1); for (int i = 0; i < threads.Length; i++) { threads[i] = new ThreadAnonymousInnerClassHelper2(this, ir, startingGun); threads[i].Start(); } startingGun.Signal(); foreach (ThreadClass t in threads) { t.Join(); } ir.Dispose(); dir.Dispose(); }
private void DoTestSortedVsStoredFields(int minLength, int maxLength) { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf); Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); Field storedField = new StoredField("stored", new byte[0]); Field dvField = new SortedDocValuesField("dv", new BytesRef()); doc.Add(idField); doc.Add(storedField); doc.Add(dvField); // index some docs int numDocs = AtLeast(300); for (int i = 0; i < numDocs; i++) { idField.StringValue = Convert.ToString(i); int length; if (minLength == maxLength) { length = minLength; // fixed length } else { length = TestUtil.NextInt(Random(), minLength, maxLength); } var buffer = new byte[length]; Random().NextBytes(buffer); storedField.BytesValue = new BytesRef(buffer); dvField.BytesValue = new BytesRef(buffer); writer.AddDocument(doc); if (Random().Next(31) == 0) { writer.Commit(); } } // delete some docs int numDeletions = Random().Next(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = Random().Next(numDocs); writer.DeleteDocuments(new Term("id", Convert.ToString(id))); } writer.Dispose(); // compare DirectoryReader ir = DirectoryReader.Open(dir); foreach (AtomicReaderContext context in ir.Leaves) { AtomicReader r = context.AtomicReader; BinaryDocValues docValues = r.GetSortedDocValues("dv"); for (int i = 0; i < r.MaxDoc; i++) { BytesRef binaryValue = r.Document(i).GetBinaryValue("stored"); BytesRef scratch = new BytesRef(); docValues.Get(i, scratch); Assert.AreEqual(binaryValue, scratch); } } ir.Dispose(); dir.Dispose(); }
public void TestThreads() { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf); Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); Field storedBinField = new StoredField("storedBin", new byte[0]); Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef()); Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef()); Field storedNumericField = new StoredField("storedNum", ""); Field dvNumericField = new NumericDocValuesField("dvNum", 0); doc.Add(idField); doc.Add(storedBinField); doc.Add(dvBinField); doc.Add(dvSortedField); doc.Add(storedNumericField); doc.Add(dvNumericField); // index some docs int numDocs = AtLeast(300); for (int i = 0; i < numDocs; i++) { idField.StringValue = Convert.ToString(i); int length = TestUtil.NextInt(Random(), 0, 8); var buffer = new byte[length]; Random().NextBytes(buffer); storedBinField.BytesValue = new BytesRef(buffer); dvBinField.BytesValue = new BytesRef(buffer); dvSortedField.BytesValue = new BytesRef(buffer); long numericValue = Random().NextLong(); storedNumericField.StringValue = Convert.ToString(numericValue); dvNumericField.LongValue = numericValue; writer.AddDocument(doc); if (Random().Next(31) == 0) { writer.Commit(); } } // delete some docs int numDeletions = Random().Next(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = Random().Next(numDocs); writer.DeleteDocuments(new Term("id", Convert.ToString(id))); } writer.Dispose(); // compare DirectoryReader ir = DirectoryReader.Open(dir); int numThreads = TestUtil.NextInt(Random(), 2, 7); ThreadClass[] threads = new ThreadClass[numThreads]; CountdownEvent startingGun = new CountdownEvent(1); for (int i = 0; i < threads.Length; i++) { threads[i] = new ThreadAnonymousInnerClassHelper(this, ir, startingGun); threads[i].Start(); } startingGun.Signal(); foreach (ThreadClass t in threads) { t.Join(); } ir.Dispose(); dir.Dispose(); }