private void CreateRandomIndexes(int maxSegments) { dir = NewDirectory(); numDocs = AtLeast(150); int numTerms = TestUtil.NextInt(Random(), 1, numDocs / 5); ISet<string> randomTerms = new HashSet<string>(); while (randomTerms.size() < numTerms) { randomTerms.add(TestUtil.RandomSimpleString(Random())); } terms = new List<string>(randomTerms); int seed = Random().Next(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed))); iwc.SetMergePolicy(TestSortingMergePolicy.NewSortingMergePolicy(sort)); iw = new RandomIndexWriter(new Random(seed), dir, iwc); for (int i = 0; i < numDocs; ++i) { Document doc = RandomDocument(); iw.AddDocument(doc); if (i == numDocs / 2 || (i != numDocs - 1 && Random().nextInt(8) == 0)) { iw.Commit(); } if (Random().nextInt(15) == 0) { string term = RandomInts.RandomFrom(Random(), terms); iw.DeleteDocuments(new Term("s", term)); } } reader = iw.Reader; }
public void TestWithContexts() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc); KeyValuePair<List<string>, IDictionary<string, Document>> res = GenerateIndexDocuments(AtLeast(1000), true, true); IDictionary<string, Document> docs = res.Value; List<string> invalidDocTerms = res.Key; foreach (Document doc in docs.Values) { writer.AddDocument(doc); } writer.Commit(); writer.Dispose(); IndexReader ir = DirectoryReader.Open(dir); IDictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME, CONTEXT_FIELD_NAME); IInputIterator inputIterator = dictionary.EntryIterator; BytesRef f; while ((f = inputIterator.Next()) != null) { string field = f.Utf8ToString(); Document doc = docs.ContainsKey(field) ? docs[field] : null; docs.Remove(field); //Document doc = docs.remove(f.utf8ToString()); assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME)))); IndexableField weightField = doc.GetField(WEIGHT_FIELD_NAME); assertEquals(inputIterator.Weight, (weightField != null) ? Convert.ToInt64(weightField.NumericValue) : 0); assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).BinaryValue)); ISet<BytesRef> oriCtxs = new HashSet<BytesRef>(); IEnumerable<BytesRef> contextSet = inputIterator.Contexts; foreach (IndexableField ctxf in doc.GetFields(CONTEXT_FIELD_NAME)) { oriCtxs.add(ctxf.BinaryValue); } assertEquals(oriCtxs.size(), contextSet.Count()); } foreach (string invalidTerm in invalidDocTerms) { var invalid = docs[invalidTerm]; docs.Remove(invalidTerm); assertNotNull(invalid); } assertTrue(!docs.Any()); ir.Dispose(); dir.Dispose(); }
/** * Makes a bunch of single-char tokens (the max # unique terms will at most be 26). * puts the # unique terms into expected, to be checked against the norm. */ private string AddValue() { StringBuilder sb = new StringBuilder(); HashSet<string> terms = new HashSet<string>(); int num = TestUtil.NextInt(Random(), 0, 255); for (int i = 0; i < num; i++) { sb.append(' '); char term = (char)TestUtil.NextInt(Random(), 'a', 'z'); sb.append(term); terms.add("" + term); } expected.Add(terms.size()); return sb.toString(); }
private void CreateRandomIndexes() { dir1 = NewDirectory(); dir2 = NewDirectory(); int numDocs = AtLeast(150); int numTerms = TestUtil.NextInt(Random(), 1, numDocs / 5); ISet<string> randomTerms = new HashSet<string>(); while (randomTerms.size() < numTerms) { randomTerms.add(TestUtil.RandomSimpleString(Random())); } terms = new List<string>(randomTerms); long seed = Random().NextLong(); IndexWriterConfig iwc1 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed))); IndexWriterConfig iwc2 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed))); iwc2.SetMergePolicy(NewSortingMergePolicy(sort)); RandomIndexWriter iw1 = new RandomIndexWriter(new Random((int)seed), dir1, iwc1); RandomIndexWriter iw2 = new RandomIndexWriter(new Random((int)seed), dir2, iwc2); for (int i = 0; i < numDocs; ++i) { if (Random().nextInt(5) == 0 && i != numDocs - 1) { string term = RandomInts.RandomFrom(Random(), terms); iw1.DeleteDocuments(new Term("s", term)); iw2.DeleteDocuments(new Term("s", term)); } Document doc = randomDocument(); iw1.AddDocument(doc); iw2.AddDocument(doc); if (Random().nextInt(8) == 0) { iw1.Commit(); iw2.Commit(); } } // Make sure we have something to merge iw1.Commit(); iw2.Commit(); Document doc2 = randomDocument(); // NOTE: don't use RIW.addDocument directly, since it sometimes commits // which may trigger a merge, at which case forceMerge may not do anything. // With field updates this is a problem, since the updates can go into the // single segment in the index, and threefore the index won't be sorted. // This hurts the assumption of the test later on, that the index is sorted // by SortingMP. iw1.w.AddDocument(doc2); iw2.w.AddDocument(doc2); if (DefaultCodecSupportsFieldUpdates()) { // update NDV of docs belonging to one term (covers many documents) long value = Random().NextLong(); string term = RandomInts.RandomFrom(Random(), terms); iw1.w.UpdateNumericDocValue(new Term("s", term), "ndv", value); iw2.w.UpdateNumericDocValue(new Term("s", term), "ndv", value); } iw1.ForceMerge(1); iw2.ForceMerge(1); iw1.Dispose(); iw2.Dispose(); reader = DirectoryReader.Open(dir1); sortedReader = DirectoryReader.Open(dir2); }