public void TestBasic() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc); KeyValuePair<List<string>, IDictionary<string, Document>> res = GenerateIndexDocuments(AtLeast(1000), true, false); IDictionary<string, Document> docs = res.Value; List<String> invalidDocTerms = res.Key; foreach (Document doc in docs.Values) { writer.AddDocument(doc); } writer.Commit(); writer.Dispose(); IndexReader ir = DirectoryReader.Open(dir); IDictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME); IInputIterator inputIterator = dictionary.EntryIterator; BytesRef f; while ((f = inputIterator.Next()) != null) { string field = f.Utf8ToString(); Document doc = docs.ContainsKey(field) ? docs[field] : null; docs.Remove(field); //Document doc = docs.Remove(f.Utf8ToString()); assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME)))); IndexableField weightField = doc.GetField(WEIGHT_FIELD_NAME); assertEquals(inputIterator.Weight, (weightField != null) ? Convert.ToInt64(weightField.NumericValue) : 0); assertTrue(inputIterator.Payload.Equals(doc.GetField(PAYLOAD_FIELD_NAME).BinaryValue)); } foreach (string invalidTerm in invalidDocTerms) { var invalid = docs[invalidTerm]; docs.Remove(invalidTerm); assertNotNull(invalid); } assertTrue(!docs.Any()); ir.Dispose(); dir.Dispose(); }
public void TestEmptyReader() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwc.SetMergePolicy(NewLogMergePolicy()); // Make sure the index is created? RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc); writer.Commit(); writer.Dispose(); IndexReader ir = DirectoryReader.Open(dir); IDictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME); IInputIterator inputIterator = dictionary.GetEntryIterator(); assertNull(inputIterator.Next()); assertEquals(inputIterator.Weight, 0); assertNull(inputIterator.Payload); ir.Dispose(); dir.Dispose(); }
public void TestWithDeletions() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc); KeyValuePair <List <string>, IDictionary <string, Document> > res = GenerateIndexDocuments(AtLeast(1000), false, false); IDictionary <string, Document> docs = res.Value; List <String> invalidDocTerms = res.Key; Random rand = Random(); List <string> termsToDel = new List <string>(); foreach (Document doc in docs.Values) { IIndexableField f2 = doc.GetField(FIELD_NAME); if (rand.nextBoolean() && f2 != null && !invalidDocTerms.Contains(f2.GetStringValue())) { termsToDel.Add(doc.Get(FIELD_NAME)); } writer.AddDocument(doc); } writer.Commit(); Term[] delTerms = new Term[termsToDel.size()]; for (int i = 0; i < termsToDel.size(); i++) { delTerms[i] = new Term(FIELD_NAME, termsToDel[i]); } foreach (Term delTerm in delTerms) { writer.DeleteDocuments(delTerm); } writer.Commit(); writer.Dispose(); foreach (string termToDel in termsToDel) { var toDel = docs[termToDel]; assertTrue(toDel != null); docs.Remove(termToDel); } IndexReader ir = DirectoryReader.Open(dir); assertEquals(ir.NumDocs, docs.size()); IDictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME); IInputIterator inputIterator = dictionary.GetEntryIterator(); BytesRef f; while ((f = inputIterator.Next()) != null) { var field = f.Utf8ToString(); Document doc = docs.ContainsKey(field) ? docs[field] : null; docs.Remove(field); assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME)))); IIndexableField weightField = doc.GetField(WEIGHT_FIELD_NAME); assertEquals(inputIterator.Weight, (weightField != null) ? Convert.ToInt64(weightField.GetNumericValue()) : 0); assertEquals(inputIterator.Payload, null); } foreach (string invalidTerm in invalidDocTerms) { var invalid = docs[invalidTerm]; docs.Remove(invalidTerm); assertNotNull(invalid); } assertTrue(!docs.Any()); ir.Dispose(); dir.Dispose(); }
public void TestEmptyReader() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergePolicy(NewLogMergePolicy()); // Make sure the index is created? RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc); writer.Commit(); writer.Dispose(); IndexReader ir = DirectoryReader.Open(dir); IDictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME); IInputIterator inputIterator = dictionary.EntryIterator; assertNull(inputIterator.Next()); assertEquals(inputIterator.Weight, 0); assertNull(inputIterator.Payload); ir.Dispose(); dir.Dispose(); }
public void TestWithDeletions() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc); KeyValuePair<List<string>, IDictionary<string, Document>> res = GenerateIndexDocuments(AtLeast(1000), false, false); IDictionary<string, Document> docs = res.Value; List<String> invalidDocTerms = res.Key; Random rand = Random(); List<string> termsToDel = new List<string>(); foreach (Document doc in docs.Values) { IndexableField f2 = doc.GetField(FIELD_NAME); if (rand.nextBoolean() && f2 != null && !invalidDocTerms.Contains(f2.StringValue)) { termsToDel.Add(doc.Get(FIELD_NAME)); } writer.AddDocument(doc); } writer.Commit(); Term[] delTerms = new Term[termsToDel.size()]; for (int i = 0; i < termsToDel.size(); i++) { delTerms[i] = new Term(FIELD_NAME, termsToDel[i]); } foreach (Term delTerm in delTerms) { writer.DeleteDocuments(delTerm); } writer.Commit(); writer.Dispose(); foreach (string termToDel in termsToDel) { var toDel = docs[termToDel]; assertTrue(toDel != null); docs.Remove(termToDel); } IndexReader ir = DirectoryReader.Open(dir); assertEquals(ir.NumDocs, docs.size()); IDictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME); IInputIterator inputIterator = dictionary.EntryIterator; BytesRef f; while ((f = inputIterator.Next()) != null) { var field = f.Utf8ToString(); Document doc = docs.ContainsKey(field) ? docs[field] : null; docs.Remove(field); assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME)))); IndexableField weightField = doc.GetField(WEIGHT_FIELD_NAME); assertEquals(inputIterator.Weight, (weightField != null) ? Convert.ToInt64(weightField.NumericValue) : 0); assertEquals(inputIterator.Payload, null); } foreach (string invalidTerm in invalidDocTerms) { var invalid = docs[invalidTerm]; docs.Remove(invalidTerm); assertNotNull(invalid); } assertTrue(!docs.Any()); ir.Dispose(); dir.Dispose(); }
/// <summary> /// Creates an iterator over term, weight and payload fields from the lucene /// index. setting <code>withPayload</code> to false, implies an iterator /// over only term and weight. /// </summary> public DocumentInputIterator(DocumentDictionary outerInstance, bool hasPayloads, bool hasContexts) { this.outerInstance = outerInstance; this.hasPayloads = hasPayloads; this.hasContexts = hasContexts; docCount = outerInstance.reader.MaxDoc() - 1; weightValues = (outerInstance.weightField != null) ? MultiDocValues.GetNumericValues(outerInstance.reader, outerInstance.weightField) : null; liveDocs = (outerInstance.reader.Leaves().Count > 0) ? MultiFields.GetLiveDocs(outerInstance.reader) : null; relevantFields = GetRelevantFields(new string[] { outerInstance.field, outerInstance.weightField, outerInstance.payloadField, outerInstance.contextsField }); }