private IDictionary<string, Document> GenerateIndexDocuments(int ndocs) { IDictionary<string, Document> docs = new HashMap<string, Document>(); for (int i = 0; i < ndocs; i++) { Field field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES); Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i)); Field weight1 = new NumericDocValuesField(WEIGHT_FIELD_NAME_1, 10 + i); Field weight2 = new NumericDocValuesField(WEIGHT_FIELD_NAME_2, 20 + i); Field weight3 = new NumericDocValuesField(WEIGHT_FIELD_NAME_3, 30 + i); Field contexts = new StoredField(CONTEXTS_FIELD_NAME, new BytesRef("ctx_" + i + "_0")); Document doc = new Document(); doc.Add(field); doc.Add(payload); doc.Add(weight1); doc.Add(weight2); doc.Add(weight3); doc.Add(contexts); for (int j = 1; j < AtLeast(3); j++) { contexts.BytesValue = new BytesRef("ctx_" + i + "_" + j); doc.Add(contexts); } docs.Put(field.StringValue, doc); } return docs; }
public override void SetUp() { base.SetUp(); dir = NewDirectory(); IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConfig.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConfig); Document document = new Document(); Field idField = new StringField("id", "", Field.Store.NO); document.Add(idField); Field byteField = new StringField("byte", "", Field.Store.NO); document.Add(byteField); Field doubleField = new StringField("double", "", Field.Store.NO); document.Add(doubleField); Field floatField = new StringField("float", "", Field.Store.NO); document.Add(floatField); Field intField = new StringField("int", "", Field.Store.NO); document.Add(intField); Field longField = new StringField("long", "", Field.Store.NO); document.Add(longField); Field shortField = new StringField("short", "", Field.Store.NO); document.Add(shortField); Field stringField = new StringField("string", "", Field.Store.NO); document.Add(stringField); Field textField = new TextField("text", "", Field.Store.NO); document.Add(textField); foreach (string[] doc in documents) { idField.StringValue = doc[0]; byteField.StringValue = doc[1]; doubleField.StringValue = doc[2]; floatField.StringValue = doc[3]; intField.StringValue = doc[4]; longField.StringValue = doc[5]; shortField.StringValue = doc[6]; stringField.StringValue = doc[7]; textField.StringValue = doc[8]; iw.AddDocument(document); } reader = iw.Reader; searcher = NewSearcher(reader); iw.Dispose(); }
public virtual void TestFloatNorms() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); Similarity provider = new MySimProvider(this); config.SetSimilarity(provider); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, config); LineFileDocs docs = new LineFileDocs(Random()); int num = AtLeast(100); for (int i = 0; i < num; i++) { Document doc = docs.NextDoc(); float nextFloat = (float)Random().NextDouble(); // Cast to a double to get more precision output to the string. Field f = new TextField(FloatTestField, "" + (double)nextFloat, Field.Store.YES); f.Boost = nextFloat; doc.Add(f); writer.AddDocument(doc); doc.RemoveField(FloatTestField); if (Rarely()) { writer.Commit(); } } writer.Commit(); writer.Dispose(); AtomicReader open = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir)); NumericDocValues norms = open.GetNormValues(FloatTestField); Assert.IsNotNull(norms); for (int i = 0; i < open.MaxDoc; i++) { Document document = open.Document(i); float expected = Convert.ToSingle(document.Get(FloatTestField)); Assert.AreEqual(expected, Number.IntBitsToFloat((int)norms.Get(i)), 0.0f); } open.Dispose(); dir.Dispose(); docs.Dispose(); }
public void BeforeClass() { Random random = Random(); Directory = NewDirectory(); Stopword = "" + RandomChar(); CharacterRunAutomaton stopset = new CharacterRunAutomaton(BasicAutomata.MakeString(Stopword)); Analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopset); RandomIndexWriter iw = new RandomIndexWriter(random, Directory, Analyzer, ClassEnvRule.Similarity, ClassEnvRule.TimeZone); Document doc = new Document(); Field id = new StringField("id", "", Field.Store.NO); Field field = new TextField("field", "", Field.Store.NO); doc.Add(id); doc.Add(field); // index some docs int numDocs = AtLeast(1000); for (int i = 0; i < numDocs; i++) { id.StringValue = Convert.ToString(i); field.StringValue = RandomFieldContents(); iw.AddDocument(doc); } // delete some docs int numDeletes = numDocs / 20; for (int i = 0; i < numDeletes; i++) { Term toDelete = new Term("id", Convert.ToString(random.Next(numDocs))); if (random.NextBoolean()) { iw.DeleteDocuments(toDelete); } else { iw.DeleteDocuments(new TermQuery(toDelete)); } } Reader = iw.Reader; S1 = NewSearcher(Reader); S2 = NewSearcher(Reader); iw.Dispose(); }
// TODO: create a testNormsNotPresent ourselves by adding/deleting/merging docs public virtual void BuildIndex(Directory dir) { Random random = Random(); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); Similarity provider = new MySimProvider(this); config.SetSimilarity(provider); RandomIndexWriter writer = new RandomIndexWriter(random, dir, config); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues()); int num = AtLeast(100); for (int i = 0; i < num; i++) { Document doc = docs.NextDoc(); int boost = Random().Next(255); Field f = new TextField(ByteTestField, "" + boost, Field.Store.YES); f.Boost = boost; doc.Add(f); writer.AddDocument(doc); doc.RemoveField(ByteTestField); if (Rarely()) { writer.Commit(); } } writer.Commit(); writer.Dispose(); docs.Dispose(); }
public virtual void TestTextFieldReader() { Field field = new TextField("foo", new StringReader("bar")); field.Boost = 5f; TrySetByteValue(field); TrySetBytesValue(field); TrySetBytesRefValue(field); TrySetDoubleValue(field); TrySetIntValue(field); TrySetFloatValue(field); TrySetLongValue(field); field.ReaderValue = new StringReader("foobar"); TrySetShortValue(field); TrySetStringValue(field); field.TokenStream = new CannedTokenStream(new Token("foo", 0, 3)); Assert.IsNotNull(field.ReaderValue); Assert.AreEqual(5f, field.Boost, 0f); }
/** Returns Pair(list of invalid document terms, Map of document term -> document) */ private KeyValuePair<List<string>, IDictionary<string, Document>> GenerateIndexDocuments(int ndocs, bool requiresPayload, bool requiresContexts) { IDictionary<string, Document> docs = new HashMap<string, Document>(); List<string> invalidDocTerms = new List<string>(); for (int i = 0; i < ndocs; i++) { Document doc = new Document(); bool invalidDoc = false; Field field = null; // usually have valid term field in document if (Usually()) { field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES); doc.Add(field); } else { invalidDoc = true; } // even if payload is not required usually have it if (requiresPayload || Usually()) { // usually have valid payload field in document if (Usually()) { Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i)); doc.Add(payload); } else if (requiresPayload) { invalidDoc = true; } } if (requiresContexts || Usually()) { if (Usually()) { for (int j = 0; j < AtLeast(2); j++) { doc.Add(new StoredField(CONTEXT_FIELD_NAME, new BytesRef("context_" + i + "_" + j))); } } // we should allow entries without context } // usually have valid weight field in document if (Usually()) { Field weight = (Rarely()) ? (Field)new StoredField(WEIGHT_FIELD_NAME, 100d + i) : (Field)new NumericDocValuesField(WEIGHT_FIELD_NAME, 100 + i); doc.Add(weight); } string term = null; if (invalidDoc) { term = (field != null) ? field.StringValue : "invalid_" + i; invalidDocTerms.Add(term); } else { term = field.StringValue; } docs.Put(term, doc); } return new KeyValuePair<List<string>, IDictionary<string, Document>>(invalidDocTerms, docs); }
public virtual void TestMixupDocs() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc); Document doc = new Document(); Field field = new TextField("field", "", Field.Store.NO); TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute<PayloadAttribute>()); field.TokenStream = ts; doc.Add(field); writer.AddDocument(doc); Token withPayload = new Token("withPayload", 0, 11); withPayload.Payload = new BytesRef("test"); ts = new CannedTokenStream(withPayload); Assert.IsTrue(ts.HasAttribute<IPayloadAttribute>()); field.TokenStream = ts; writer.AddDocument(doc); ts = new MockTokenizer(new StringReader("another"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute<PayloadAttribute>()); field.TokenStream = ts; writer.AddDocument(doc); DirectoryReader reader = writer.Reader; AtomicReader sr = SlowCompositeReaderWrapper.Wrap(reader); DocsAndPositionsEnum de = sr.TermPositionsEnum(new Term("field", "withPayload")); de.NextDoc(); de.NextPosition(); Assert.AreEqual(new BytesRef("test"), de.Payload); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public virtual void TestMixupMultiValued() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); Field field = new TextField("field", "", Field.Store.NO); TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute<PayloadAttribute>()); field.TokenStream = ts; doc.Add(field); Field field2 = new TextField("field", "", Field.Store.NO); Token withPayload = new Token("withPayload", 0, 11); withPayload.Payload = new BytesRef("test"); ts = new CannedTokenStream(withPayload); Assert.IsTrue(ts.HasAttribute<IPayloadAttribute>()); field2.TokenStream = ts; doc.Add(field2); Field field3 = new TextField("field", "", Field.Store.NO); ts = new MockTokenizer(new StringReader("nopayload"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute<PayloadAttribute>()); field3.TokenStream = ts; doc.Add(field3); writer.AddDocument(doc); DirectoryReader reader = writer.Reader; SegmentReader sr = GetOnlySegmentReader(reader); DocsAndPositionsEnum de = sr.TermPositionsEnum(new Term("field", "withPayload")); de.NextDoc(); de.NextPosition(); Assert.AreEqual(new BytesRef("test"), de.Payload); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
private static Document Doc(int id, PositionsTokenStream positions) { Document doc = new Document(); doc.Add(new StringField(ID_FIELD, id.ToString(), Field.Store.YES)); doc.Add(new StringField(DOCS_ENUM_FIELD, DOCS_ENUM_TERM, Field.Store.NO)); positions.SetId(id); if (DoesntSupportOffsets.contains(TestUtil.GetPostingsFormat(DOC_POSITIONS_FIELD))) { // codec doesnt support offsets: just index positions for the field doc.Add(new Field(DOC_POSITIONS_FIELD, positions, TextField.TYPE_NOT_STORED)); } else { doc.Add(new Field(DOC_POSITIONS_FIELD, positions, POSITIONS_TYPE)); } doc.Add(new NumericDocValuesField(NUMERIC_DV_FIELD, id)); TextField norms = new TextField(NORMS_FIELD, id.ToString(), Field.Store.NO); norms.Boost = (Number.IntBitsToFloat(id)); doc.Add(norms); doc.Add(new BinaryDocValuesField(BINARY_DV_FIELD, new BytesRef(id.ToString()))); doc.Add(new SortedDocValuesField(SORTED_DV_FIELD, new BytesRef(id.ToString()))); if (DefaultCodecSupportsSortedSet()) { doc.Add(new SortedSetDocValuesField(SORTED_SET_DV_FIELD, new BytesRef(id.ToString()))); doc.Add(new SortedSetDocValuesField(SORTED_SET_DV_FIELD, new BytesRef((id + 1).ToString()))); } doc.Add(new Field(TERM_VECTORS_FIELD, id.ToString(), TERM_VECTORS_TYPE)); return doc; }