public void TestWithValueSource() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc); IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100)); foreach (Document doc in docs.Values) { writer.AddDocument(doc); } writer.Commit(); writer.Dispose(); IndexReader ir = DirectoryReader.Open(dir); IDictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME); IInputIterator inputIterator = dictionary.GetEntryIterator(); BytesRef f; while ((f = inputIterator.Next()) != null) { string field = f.Utf8ToString(); Document doc = docs.ContainsKey(field) ? docs[field] : null; docs.Remove(field); assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME)))); assertEquals(inputIterator.Weight, 10); assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue())); } assertTrue(!docs.Any()); ir.Dispose(); dir.Dispose(); }
/* * LUCENE-3528 - NRTManager hangs in certain situations */ public virtual void TestThreadStarvationNoDeleteNRTReader() { IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); conf.SetMergePolicy(Random().NextBoolean() ? NoMergePolicy.COMPOUND_FILES : NoMergePolicy.NO_COMPOUND_FILES); Directory d = NewDirectory(); CountDownLatch latch = new CountDownLatch(1); CountDownLatch signal = new CountDownLatch(1); LatchedIndexWriter _writer = new LatchedIndexWriter(d, conf, latch, signal); TrackingIndexWriter writer = new TrackingIndexWriter(_writer); SearcherManager manager = new SearcherManager(_writer, false, null); Document doc = new Document(); doc.Add(NewTextField("test", "test", Field.Store.YES)); writer.AddDocument(doc); manager.MaybeRefresh(); ThreadClass t = new ThreadAnonymousInnerClassHelper(this, latch, signal, writer, manager); t.Start(); _writer.WaitAfterUpdate = true; // wait in addDocument to let some reopens go through long lastGen = writer.UpdateDocument(new Term("foo", "bar"), doc); // once this returns the doc is already reflected in the last reopen Assert.IsFalse(manager.SearcherCurrent); // false since there is a delete in the queue IndexSearcher searcher = manager.Acquire(); try { Assert.AreEqual(2, searcher.IndexReader.NumDocs()); } finally { manager.Release(searcher); } ControlledRealTimeReopenThread <IndexSearcher> thread = new ControlledRealTimeReopenThread <IndexSearcher>(writer, manager, 0.01, 0.01); thread.Start(); // start reopening if (VERBOSE) { Console.WriteLine("waiting now for generation " + lastGen); } AtomicBoolean finished = new AtomicBoolean(false); ThreadClass waiter = new ThreadAnonymousInnerClassHelper2(this, lastGen, thread, finished); waiter.Start(); manager.MaybeRefresh(); waiter.Join(1000); if (!finished.Get()) { waiter.Interrupt(); Assert.Fail("thread deadlocked on waitForGeneration"); } thread.Dispose(); thread.Join(); IOUtils.Close(manager, _writer, d); }
public virtual void TestLongFieldCache() { Directory dir = NewDirectory(); IndexWriterConfig cfg = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); cfg.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, cfg); Document doc = new Document(); Int64Field field = new Int64Field("f", 0L, Field.Store.YES); doc.Add(field); long[] values = new long[TestUtil.NextInt32(Random, 1, 10)]; for (int i = 0; i < values.Length; ++i) { long v; switch (Random.Next(10)) { case 0: v = long.MinValue; break; case 1: v = 0; break; case 2: v = long.MaxValue; break; default: v = TestUtil.NextInt64(Random, -10, 10); break; } values[i] = v; if (v == 0 && Random.NextBoolean()) { // missing iw.AddDocument(new Document()); } else { field.SetInt64Value(v); iw.AddDocument(doc); } } iw.ForceMerge(1); DirectoryReader reader = iw.GetReader(); Int64s longs = FieldCache.DEFAULT.GetInt64s(GetOnlySegmentReader(reader), "f", false); for (int i = 0; i < values.Length; ++i) { Assert.AreEqual(values[i], longs.Get(i)); } reader.Dispose(); iw.Dispose(); dir.Dispose(); }
public override void SetUp() { base.SetUp(); dir = NewDirectory(); IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwConfig.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwConfig); Document document = new Document(); Field idField = new StringField("id", "", Field.Store.NO); document.Add(idField); Field byteField = new StringField("byte", "", Field.Store.NO); document.Add(byteField); Field doubleField = new StringField("double", "", Field.Store.NO); document.Add(doubleField); Field floatField = new StringField("float", "", Field.Store.NO); document.Add(floatField); Field intField = new StringField("int", "", Field.Store.NO); document.Add(intField); Field longField = new StringField("long", "", Field.Store.NO); document.Add(longField); Field shortField = new StringField("short", "", Field.Store.NO); document.Add(shortField); Field stringField = new StringField("string", "", Field.Store.NO); document.Add(stringField); Field textField = new TextField("text", "", Field.Store.NO); document.Add(textField); foreach (string[] doc in documents) { idField.SetStringValue(doc[0]); byteField.SetStringValue(doc[1]); doubleField.SetStringValue(doc[2]); floatField.SetStringValue(doc[3]); intField.SetStringValue(doc[4]); longField.SetStringValue(doc[5]); shortField.SetStringValue(doc[6]); stringField.SetStringValue(doc[7]); textField.SetStringValue(doc[8]); iw.AddDocument(document); } reader = iw.GetReader(); searcher = NewSearcher(reader); iw.Dispose(); }
private IndexWriter NewWriter(Directory dir, IndexWriterConfig conf) { LogDocMergePolicy logByteSizeMergePolicy = new LogDocMergePolicy(); logByteSizeMergePolicy.NoCFSRatio = 0.0; // make sure we use plain // files conf.SetMergePolicy(logByteSizeMergePolicy); IndexWriter writer = new IndexWriter(dir, conf); return writer; }
public void TestWhichMTQMatched() { Directory dir = NewDirectory(); // use simpleanalyzer for more natural tokenization (else "test." is a token) Analyzer analyzer = new MockAnalyzer(Random, MockTokenizer.SIMPLE, true); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc); FieldType offsetsType = new FieldType(TextField.TYPE_STORED); offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field body = new Field("body", "", offsetsType); Document doc = new Document(); doc.Add(body); body.SetStringValue("Test a one sentence document."); iw.AddDocument(doc); IndexReader ir = iw.GetReader(); iw.Dispose(); IndexSearcher searcher = NewSearcher(ir); ICUPostingsHighlighter highlighter = new PostingsHighlighterAnalyzerHelper(analyzer); BooleanQuery query = new BooleanQuery(); query.Add(new WildcardQuery(new Term("body", "te*")), Occur.SHOULD); query.Add(new WildcardQuery(new Term("body", "one")), Occur.SHOULD); query.Add(new WildcardQuery(new Term("body", "se*")), Occur.SHOULD); TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER); assertEquals(1, topDocs.TotalHits); String[] snippets = highlighter.Highlight("body", query, searcher, topDocs); assertEquals(1, snippets.Length); // Default formatter just bolds each hit: assertEquals("<b>Test</b> a <b>one</b> <b>sentence</b> document.", snippets[0]); // Now use our own formatter, that also stuffs the // matching term's text into the result: highlighter = new PostingsHighlighterAnalyzerAndFormatterHelper(analyzer, new PassageFormatterHelper()); assertEquals(1, topDocs.TotalHits); snippets = highlighter.Highlight("body", query, searcher, topDocs); assertEquals(1, snippets.Length); // Default formatter bolds each hit: assertEquals("<b>Test(body:te*)</b> a <b>one(body:one)</b> <b>sentence(body:se*)</b> document.", snippets[0]); ir.Dispose(); dir.Dispose(); }
public void TestWildcardInBoolean() { Directory dir = NewDirectory(); // use simpleanalyzer for more natural tokenization (else "test." is a token) Analyzer analyzer = new MockAnalyzer(Random, MockTokenizer.SIMPLE, true); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc); FieldType offsetsType = new FieldType(TextField.TYPE_STORED); offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field body = new Field("body", "", offsetsType); Document doc = new Document(); doc.Add(body); body.SetStringValue("This is a test."); iw.AddDocument(doc); body.SetStringValue("Test a one sentence document."); iw.AddDocument(doc); IndexReader ir = iw.GetReader(); iw.Dispose(); IndexSearcher searcher = NewSearcher(ir); ICUPostingsHighlighter highlighter = new PostingsHighlighterAnalyzerHelper(analyzer); BooleanQuery query = new BooleanQuery(); query.Add(new WildcardQuery(new Term("body", "te*")), Occur.SHOULD); TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER); assertEquals(2, topDocs.TotalHits); String[] snippets = highlighter.Highlight("body", query, searcher, topDocs); assertEquals(2, snippets.Length); assertEquals("This is a <b>test</b>.", snippets[0]); assertEquals("<b>Test</b> a one sentence document.", snippets[1]); // must not query = new BooleanQuery(); query.Add(new MatchAllDocsQuery(), Occur.SHOULD); query.Add(new WildcardQuery(new Term("bogus", "te*")), Occur.MUST_NOT); topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER); assertEquals(2, topDocs.TotalHits); snippets = highlighter.Highlight("body", query, searcher, topDocs); assertEquals(2, snippets.Length); assertEquals("This is a test.", snippets[0]); assertEquals("Test a one sentence document.", snippets[1]); ir.Dispose(); dir.Dispose(); }
private IndexWriter NewWriter(Directory dir, IndexWriterConfig conf) { LogDocMergePolicy logByteSizeMergePolicy = new LogDocMergePolicy(); logByteSizeMergePolicy.NoCFSRatio = 0.0; // make sure we use plain // files conf.SetMergePolicy(logByteSizeMergePolicy); IndexWriter writer = new IndexWriter(dir, conf); return(writer); }
public void TestWithContexts() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc); KeyValuePair <List <string>, IDictionary <string, Document> > res = GenerateIndexDocuments(AtLeast(1000), true, true); IDictionary <string, Document> docs = res.Value; List <string> invalidDocTerms = res.Key; foreach (Document doc in docs.Values) { writer.AddDocument(doc); } writer.Commit(); writer.Dispose(); IndexReader ir = DirectoryReader.Open(dir); IDictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME, CONTEXT_FIELD_NAME); IInputIterator inputIterator = dictionary.GetEntryIterator(); BytesRef f; while ((f = inputIterator.Next()) != null) { string field = f.Utf8ToString(); Document doc = docs.ContainsKey(field) ? docs[field] : null; docs.Remove(field); //Document doc = docs.remove(f.utf8ToString()); assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME)))); IIndexableField weightField = doc.GetField(WEIGHT_FIELD_NAME); assertEquals(inputIterator.Weight, (weightField != null) ? Convert.ToInt64(weightField.GetNumericValue()) : 0); assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue())); ISet <BytesRef> oriCtxs = new HashSet <BytesRef>(); IEnumerable <BytesRef> contextSet = inputIterator.Contexts; foreach (IIndexableField ctxf in doc.GetFields(CONTEXT_FIELD_NAME)) { oriCtxs.add(ctxf.GetBinaryValue()); } assertEquals(oriCtxs.size(), contextSet.Count()); } foreach (string invalidTerm in invalidDocTerms) { var invalid = docs[invalidTerm]; docs.Remove(invalidTerm); assertNotNull(invalid); } assertTrue(!docs.Any()); ir.Dispose(); dir.Dispose(); }
public void TestWithContext() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc); IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100)); foreach (Document doc in docs.Values) { writer.AddDocument(doc); } writer.Commit(); writer.Dispose(); IndexReader ir = DirectoryReader.Open(dir); ValueSource[] toAdd = new ValueSource[] { new Int64FieldSource(WEIGHT_FIELD_NAME_1), new Int64FieldSource(WEIGHT_FIELD_NAME_2), new Int64FieldSource(WEIGHT_FIELD_NAME_3) }; IDictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumSingleFunction(toAdd), PAYLOAD_FIELD_NAME, CONTEXTS_FIELD_NAME); IInputEnumerator inputIterator = dictionary.GetEntryEnumerator(); while (inputIterator.MoveNext()) { string field = inputIterator.Current.Utf8ToString(); Document doc = docs[field]; docs.Remove(field); long w1 = doc.GetField(WEIGHT_FIELD_NAME_1).GetInt64ValueOrDefault(); long w2 = doc.GetField(WEIGHT_FIELD_NAME_2).GetInt64ValueOrDefault(); long w3 = doc.GetField(WEIGHT_FIELD_NAME_3).GetInt64ValueOrDefault(); assertTrue(inputIterator.Current.equals(new BytesRef(doc.Get(FIELD_NAME)))); assertEquals(inputIterator.Weight, (w1 + w2 + w3)); assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue())); // LUCENENET NOTE: This test was once failing because we used SCG.HashSet<T> whose // Equals() implementation does not check for set equality. As a result SortedInputEnumerator // had been modified to reverse the results to get the test to pass. However, using JCG.HashSet<T> // ensures that set equality (that is equality that doesn't care about order of items) is respected. // SortedInputEnumerator has also had the specific sorting removed. ISet <BytesRef> originalCtxs = new JCG.HashSet <BytesRef>(); foreach (IIndexableField ctxf in doc.GetFields(CONTEXTS_FIELD_NAME)) { originalCtxs.add(ctxf.GetBinaryValue()); } assertEquals(originalCtxs, inputIterator.Contexts); } assertTrue(docs.Count == 0); ir.Dispose(); dir.Dispose(); }
public void TestSpanNot() { Directory dir = NewDirectory(); // use simpleanalyzer for more natural tokenization (else "test." is a token) Analyzer analyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); FieldType offsetsType = new FieldType(TextField.TYPE_STORED); offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field body = new Field("body", "", offsetsType); Document doc = new Document(); doc.Add(body); body.SetStringValue("This is a test."); iw.AddDocument(doc); body.SetStringValue("Test a one sentence document."); iw.AddDocument(doc); IndexReader ir = iw.Reader; iw.Dispose(); IndexSearcher searcher = NewSearcher(ir); PostingsHighlighter highlighter = new PostingsHighlighterAnalyzerHelper(analyzer); //PostingsHighlighter highlighter = new PostingsHighlighter() { // @Override // protected Analyzer getIndexAnalyzer(String field) //{ // return analyzer; //} // }; SpanQuery include = new SpanMultiTermQueryWrapper <WildcardQuery>(new WildcardQuery(new Term("body", "te*"))); SpanQuery exclude = new SpanTermQuery(new Term("body", "bogus")); Query query = new SpanNotQuery(include, exclude); TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER); assertEquals(2, topDocs.TotalHits); String[] snippets = highlighter.Highlight("body", query, searcher, topDocs); assertEquals(2, snippets.Length); assertEquals("This is a <b>test</b>.", snippets[0]); assertEquals("<b>Test</b> a one sentence document.", snippets[1]); ir.Dispose(); dir.Dispose(); }
public virtual void TestDeletePartiallyWrittenFilesIfAbort() { Directory dir = NewDirectory(); IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30)); iwConf.SetCodec(CompressingCodec.RandomInstance(Random())); // disable CFS because this test checks file names iwConf.SetMergePolicy(NewLogMergePolicy(false)); iwConf.SetUseCompoundFile(false); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf); Document validDoc = new Document(); validDoc.Add(new IntField("id", 0, Field.Store.YES)); iw.AddDocument(validDoc); iw.Commit(); // make sure that #writeField will fail to trigger an abort Document invalidDoc = new Document(); FieldType fieldType = new FieldType(); fieldType.Stored = true; invalidDoc.Add(new FieldAnonymousInnerClassHelper(this, fieldType)); Assert.Throws <ArgumentException>(() => { try { iw.AddDocument(invalidDoc); iw.Commit(); } finally { int counter = 0; foreach (string fileName in dir.ListAll()) { if (fileName.EndsWith(".fdt") || fileName.EndsWith(".fdx")) { counter++; } } // Only one .fdt and one .fdx files must have been found Assert.AreEqual(2, counter); iw.Dispose(); dir.Dispose(); } }); }
/// <summary> /// Override this to customize index settings, e.g. which /// codec to use. /// </summary> protected internal virtual IndexWriterConfig GetIndexWriterConfig(LuceneVersion matchVersion, Analyzer indexAnalyzer, IndexWriterConfig.OpenMode_e openMode) { IndexWriterConfig iwc = new IndexWriterConfig(matchVersion, indexAnalyzer); iwc.SetCodec(new Lucene46Codec()); iwc.SetOpenMode(openMode); // This way all merged segments will be sorted at // merge time, allow for per-segment early termination // when those segments are searched: iwc.SetMergePolicy(new SortingMergePolicy(iwc.MergePolicy, SORT)); return(iwc); }
public void TestCustomK1() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true)); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); FieldType offsetsType = new FieldType(TextField.TYPE_STORED); offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field body = new Field("body", "", offsetsType); Document doc = new Document(); doc.Add(body); body.SetStringValue("This has only foo foo. " + "On the other hand this sentence contains both foo and bar. " + "This has only bar bar bar bar bar bar bar bar bar bar bar bar."); iw.AddDocument(doc); IndexReader ir = iw.Reader; iw.Dispose(); IndexSearcher searcher = NewSearcher(ir); PostingsHighlighter highlighter = new CustomK1PostingsHighlighter(); //PostingsHighlighter highlighter = new PostingsHighlighter(10000) { // @Override // protected PassageScorer getScorer(String field) //{ // return new PassageScorer(0, 0.75f, 87); //} // }; BooleanQuery query = new BooleanQuery(); query.Add(new TermQuery(new Term("body", "foo")), Occur.SHOULD); query.Add(new TermQuery(new Term("body", "bar")), Occur.SHOULD); TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER); assertEquals(1, topDocs.TotalHits); String[] snippets = highlighter.Highlight("body", query, searcher, topDocs, 1); assertEquals(1, snippets.Length); assertTrue(snippets[0].StartsWith("On the other hand", StringComparison.Ordinal)); ir.Dispose(); dir.Dispose(); }
public override void BeforeClass() { base.BeforeClass(); // NOTE: turn off compound file, this test will open some index files directly. OldFormatImpersonationIsActive = true; IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)).SetUseCompoundFile(false); TermIndexInterval = config.TermIndexInterval; IndexDivisor = TestUtil.NextInt32(Random, 1, 10); NUMBER_OF_DOCUMENTS = AtLeast(100); NUMBER_OF_FIELDS = AtLeast(Math.Max(10, 3 * TermIndexInterval * IndexDivisor / NUMBER_OF_DOCUMENTS)); Directory = NewDirectory(); config.SetCodec(new PreFlexRWCodec()); LogMergePolicy mp = NewLogMergePolicy(); // NOTE: turn off compound file, this test will open some index files directly. mp.NoCFSRatio = 0.0; config.SetMergePolicy(mp); Populate(Directory, config); DirectoryReader r0 = IndexReader.Open(Directory); SegmentReader r = LuceneTestCase.GetOnlySegmentReader(r0); string segment = r.SegmentName; r.Dispose(); FieldInfosReader infosReader = (new PreFlexRWCodec()).FieldInfosFormat.FieldInfosReader; FieldInfos fieldInfos = infosReader.Read(Directory, segment, "", IOContext.READ_ONCE); string segmentFileName = IndexFileNames.SegmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION); long tiiFileLength = Directory.FileLength(segmentFileName); IndexInput input = Directory.OpenInput(segmentFileName, NewIOContext(Random)); TermEnum = new SegmentTermEnum(Directory.OpenInput(IndexFileNames.SegmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_EXTENSION), NewIOContext(Random)), fieldInfos, false); int totalIndexInterval = TermEnum.indexInterval * IndexDivisor; SegmentTermEnum indexEnum = new SegmentTermEnum(input, fieldInfos, true); Index = new TermInfosReaderIndex(indexEnum, IndexDivisor, tiiFileLength, totalIndexInterval); indexEnum.Dispose(); input.Dispose(); Reader = IndexReader.Open(Directory); SampleTerms = Sample(Random, Reader, 1000); }
public void TestWithoutPayload() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc); KeyValuePair <List <string>, IDictionary <string, Document> > res = GenerateIndexDocuments(AtLeast(1000), false, false); IDictionary <string, Document> docs = res.Value; List <string> invalidDocTerms = res.Key; foreach (Document doc in docs.Values) { writer.AddDocument(doc); } writer.Commit(); writer.Dispose(); IndexReader ir = DirectoryReader.Open(dir); IDictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME); IInputIterator inputIterator = dictionary.GetEntryIterator(); BytesRef f; while ((f = inputIterator.Next()) != null) { var field = f.Utf8ToString(); Document doc = docs[field]; docs.Remove(field); assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME)))); IIndexableField weightField = doc.GetField(WEIGHT_FIELD_NAME); assertEquals(inputIterator.Weight, (weightField != null) ? weightField.GetInt64ValueOrDefault() : 0); assertEquals(inputIterator.Payload, null); } foreach (string invalidTerm in invalidDocTerms) { var invalid = docs[invalidTerm]; docs.Remove(invalidTerm); assertNotNull(invalid); } assertTrue(!docs.Any()); ir.Dispose(); dir.Dispose(); }
public override void SetUp() { base.SetUp(); dir = NewDirectory(); IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwConfig.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwConfig); Document document = new Document(); Field idField = new StringField("id", "", Field.Store.NO); document.Add(idField); iw.AddDocument(document); ir = iw.GetReader(); @is = NewSearcher(ir); iw.Dispose(); }
public override void SetUp() { base.SetUp(); Analyzer = new MockAnalyzer(Random()); Dir = NewDirectory(); IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, Analyzer); config.SetMergePolicy(NewLogMergePolicy()); // we will use docids to validate RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, config); writer.AddDocument(Doc("lucene", "lucene is a very popular search engine library")); writer.AddDocument(Doc("solr", "solr is a very popular search server and is using lucene")); writer.AddDocument(Doc("nutch", "nutch is an internet search engine with web crawler and is using lucene and hadoop")); Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public void TestWithContext() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc); IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100)); foreach (Document doc in docs.Values) { writer.AddDocument(doc); } writer.Commit(); writer.Dispose(); IndexReader ir = DirectoryReader.Open(dir); ValueSource[] toAdd = new ValueSource[] { new Int64FieldSource(WEIGHT_FIELD_NAME_1), new Int64FieldSource(WEIGHT_FIELD_NAME_2), new Int64FieldSource(WEIGHT_FIELD_NAME_3) }; IDictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumSingleFunction(toAdd), PAYLOAD_FIELD_NAME, CONTEXTS_FIELD_NAME); IInputIterator inputIterator = dictionary.GetEntryIterator(); BytesRef f; while ((f = inputIterator.Next()) != null) { string field = f.Utf8ToString(); Document doc = docs.ContainsKey(field) ? docs[field] : null; docs.Remove(field); long w1 = doc.GetField(WEIGHT_FIELD_NAME_1).GetInt64ValueOrDefault(); long w2 = doc.GetField(WEIGHT_FIELD_NAME_2).GetInt64ValueOrDefault(); long w3 = doc.GetField(WEIGHT_FIELD_NAME_3).GetInt64ValueOrDefault(); assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME)))); assertEquals(inputIterator.Weight, (w1 + w2 + w3)); assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue())); ISet <BytesRef> originalCtxs = new HashSet <BytesRef>(); foreach (IIndexableField ctxf in doc.GetFields(CONTEXTS_FIELD_NAME)) { originalCtxs.add(ctxf.GetBinaryValue()); } assertEquals(originalCtxs, inputIterator.Contexts); } assertTrue(!docs.Any()); ir.Dispose(); dir.Dispose(); }
public void TestCustomB() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true)); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); FieldType offsetsType = new FieldType(TextField.TYPE_STORED); offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field body = new Field("body", "", offsetsType); Document doc = new Document(); doc.Add(body); body.SetStringValue("This is a test. This test is a better test but the sentence is excruiatingly long, " + "you have no idea how painful it was for me to type this long sentence into my IDE."); iw.AddDocument(doc); IndexReader ir = iw.Reader; iw.Dispose(); IndexSearcher searcher = NewSearcher(ir); PostingsHighlighter highlighter = new CustomBPostingsHighlighter(); //PostingsHighlighter highlighter = new PostingsHighlighter(10000) { // @Override // protected PassageScorer getScorer(String field) //{ // return new PassageScorer(1.2f, 0, 87); //} // }; Query query = new TermQuery(new Term("body", "test")); TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER); assertEquals(1, topDocs.TotalHits); String[] snippets = highlighter.Highlight("body", query, searcher, topDocs, 1); assertEquals(1, snippets.Length); assertTrue(snippets[0].StartsWith("This <b>test</b> is a better <b>test</b>", StringComparison.Ordinal)); ir.Dispose(); dir.Dispose(); }
public void TestBasic() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc); IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100)); foreach (Document doc in docs.Values) { writer.AddDocument(doc); } writer.Commit(); writer.Dispose(); IndexReader ir = DirectoryReader.Open(dir); ValueSource[] toAdd = new ValueSource[] { new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2), new LongFieldSource(WEIGHT_FIELD_NAME_3) }; IDictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd), PAYLOAD_FIELD_NAME); IInputIterator inputIterator = dictionary.EntryIterator; BytesRef f; while ((f = inputIterator.Next()) != null) { string field = f.Utf8ToString(); Document doc = docs.ContainsKey(field) ? docs[field] : null; docs.Remove(field); //Document doc = docs.remove(f.utf8ToString()); long w1 = Convert.ToInt64(doc.GetField(WEIGHT_FIELD_NAME_1).NumericValue); long w2 = Convert.ToInt64(doc.GetField(WEIGHT_FIELD_NAME_2).NumericValue); long w3 = Convert.ToInt64(doc.GetField(WEIGHT_FIELD_NAME_3).NumericValue); assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME)))); assertEquals(inputIterator.Weight, (w1 + w2 + w3)); assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).BinaryValue)); } assertTrue(!docs.Any()); ir.Dispose(); dir.Dispose(); }
public void TestEmptyReader() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergePolicy(NewLogMergePolicy()); // Make sure the index is created? RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc); writer.Commit(); writer.Dispose(); IndexReader ir = DirectoryReader.Open(dir); IDictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME); IInputIterator inputIterator = dictionary.GetEntryIterator(); assertNull(inputIterator.Next()); assertEquals(inputIterator.Weight, 0); assertNull(inputIterator.Payload); ir.Dispose(); dir.Dispose(); }
public void TestWithoutPayload() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc); IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100)); foreach (Document doc in docs.Values) { writer.AddDocument(doc); } writer.Commit(); writer.Dispose(); IndexReader ir = DirectoryReader.Open(dir); ValueSource[] toAdd = new ValueSource[] { new Int64FieldSource(WEIGHT_FIELD_NAME_1), new Int64FieldSource(WEIGHT_FIELD_NAME_2), new Int64FieldSource(WEIGHT_FIELD_NAME_3) }; IDictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumSingleFunction(toAdd)); IInputIterator inputIterator = dictionary.GetEntryIterator(); BytesRef f; while ((f = inputIterator.Next()) != null) { string field = f.Utf8ToString(); Document doc = docs[field]; docs.Remove(field); long w1 = doc.GetField(WEIGHT_FIELD_NAME_1).GetInt64ValueOrDefault(); long w2 = doc.GetField(WEIGHT_FIELD_NAME_2).GetInt64ValueOrDefault(); long w3 = doc.GetField(WEIGHT_FIELD_NAME_3).GetInt64ValueOrDefault(); assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME)))); assertEquals(inputIterator.Weight, (w1 + w2 + w3)); assertEquals(inputIterator.Payload, null); } assertTrue(docs.Count == 0); ir.Dispose(); dir.Dispose(); }
public override void SetUp() { base.SetUp(); dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer (Random)); iwc.SetMergePolicy(NewLogMergePolicy()); var iw = new RandomIndexWriter(Random, dir, iwc); var doc = new Document { NewStringField("id", "1", Field.Store.YES), NewTextField("body", "some contents and more contents", Field.Store.NO), new NumericDocValuesField("popularity", 5) }; iw.AddDocument(doc); doc = new Document { NewStringField("id", "2", Field.Store.YES), NewTextField("body", "another document with different contents", Field.Store.NO), new NumericDocValuesField("popularity", 20) }; iw.AddDocument(doc); doc = new Document { NewStringField("id", "3", Field.Store.YES), NewTextField("body", "crappy contents", Field.Store.NO), new NumericDocValuesField("popularity", 2) }; iw.AddDocument(doc); iw.ForceMerge(1); reader = iw.GetReader(); iw.Dispose(); }
public override void SetUp() { base.SetUp(); dir = NewDirectory(); IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwConfig.SetMergePolicy(NewLogMergePolicy()); iwConfig.SetSimilarity(sim); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwConfig); Document doc = new Document(); doc.Add(new TextField("text", "this is a test test test", Field.Store.NO)); iw.AddDocument(doc); doc = new Document(); doc.Add(new TextField("text", "second test", Field.Store.NO)); iw.AddDocument(doc); reader = iw.GetReader(); searcher = NewSearcher(reader); iw.Dispose(); }
public void TestWithDeletions() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc); KeyValuePair <List <string>, IDictionary <string, Document> > res = GenerateIndexDocuments(AtLeast(1000), false, false); IDictionary <string, Document> docs = res.Value; List <String> invalidDocTerms = res.Key; Random rand = Random(); List <string> termsToDel = new List <string>(); foreach (Document doc in docs.Values) { IIndexableField f2 = doc.GetField(FIELD_NAME); if (rand.nextBoolean() && f2 != null && !invalidDocTerms.Contains(f2.GetStringValue())) { termsToDel.Add(doc.Get(FIELD_NAME)); } writer.AddDocument(doc); } writer.Commit(); Term[] delTerms = new Term[termsToDel.size()]; for (int i = 0; i < termsToDel.size(); i++) { delTerms[i] = new Term(FIELD_NAME, termsToDel[i]); } foreach (Term delTerm in delTerms) { writer.DeleteDocuments(delTerm); } writer.Commit(); writer.Dispose(); foreach (string termToDel in termsToDel) { var toDel = docs[termToDel]; assertTrue(toDel != null); docs.Remove(termToDel); } IndexReader ir = DirectoryReader.Open(dir); assertEquals(ir.NumDocs, docs.size()); IDictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME); IInputIterator inputIterator = dictionary.GetEntryIterator(); BytesRef f; while ((f = inputIterator.Next()) != null) { var field = f.Utf8ToString(); Document doc = docs.ContainsKey(field) ? docs[field] : null; docs.Remove(field); assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME)))); IIndexableField weightField = doc.GetField(WEIGHT_FIELD_NAME); assertEquals(inputIterator.Weight, (weightField != null) ? Convert.ToInt64(weightField.GetNumericValue()) : 0); assertEquals(inputIterator.Payload, null); } foreach (string invalidTerm in invalidDocTerms) { var invalid = docs[invalidTerm]; docs.Remove(invalidTerm); assertNotNull(invalid); } assertTrue(!docs.Any()); ir.Dispose(); dir.Dispose(); }
public void TestWithDeletions() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc); IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100)); Random rand = Random(); List <string> termsToDel = new List <string>(); foreach (Document doc in docs.Values) { if (rand.nextBoolean() && termsToDel.size() < docs.size() - 1) { termsToDel.Add(doc.Get(FIELD_NAME)); } writer.AddDocument(doc); } writer.Commit(); Term[] delTerms = new Term[termsToDel.size()]; for (int i = 0; i < termsToDel.size(); i++) { delTerms[i] = new Term(FIELD_NAME, termsToDel[i]); } foreach (Term delTerm in delTerms) { writer.DeleteDocuments(delTerm); } writer.Commit(); writer.Dispose(); foreach (string termToDel in termsToDel) { var toDel = docs[termToDel]; docs.Remove(termToDel); assertTrue(null != toDel); } IndexReader ir = DirectoryReader.Open(dir); assertTrue("NumDocs should be > 0 but was " + ir.NumDocs, ir.NumDocs > 0); assertEquals(ir.NumDocs, docs.size()); ValueSource[] toAdd = new ValueSource[] { new Int64FieldSource(WEIGHT_FIELD_NAME_1), new Int64FieldSource(WEIGHT_FIELD_NAME_2) }; IDictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumSingleFunction(toAdd), PAYLOAD_FIELD_NAME); IInputIterator inputIterator = dictionary.GetEntryIterator(); BytesRef f; while ((f = inputIterator.Next()) != null) { string field = f.Utf8ToString(); Document doc = docs.ContainsKey(field) ? docs[field] : null; docs.Remove(field); long w1 = doc.GetField(WEIGHT_FIELD_NAME_1).GetInt64ValueOrDefault(); long w2 = doc.GetField(WEIGHT_FIELD_NAME_2).GetInt64ValueOrDefault(); assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME)))); assertEquals(inputIterator.Weight, w2 + w1); assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue())); } assertTrue(!docs.Any()); ir.Dispose(); dir.Dispose(); }
public void Test() { RandomIndexWriter writer; DirectoryReader indexReader; int numParents = AtLeast(200); IndexWriterConfig cfg = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); cfg.SetMergePolicy(NewLogMergePolicy()); using (writer = new RandomIndexWriter(Random(), NewDirectory(), cfg)) { Document parentDoc = new Document(); NumericDocValuesField parentVal = new NumericDocValuesField("parent_val", 0L); parentDoc.Add(parentVal); StringField parent = new StringField("parent", "true", Field.Store.YES); parentDoc.Add(parent); for (int i = 0; i < numParents; ++i) { List <Document> documents = new List <Document>(); int numChildren = Random().nextInt(10); for (int j = 0; j < numChildren; ++j) { Document childDoc = new Document(); childDoc.Add(new NumericDocValuesField("child_val", Random().nextInt(5))); documents.Add(childDoc); } parentVal.SetInt64Value(Random().nextInt(50)); documents.Add(parentDoc); writer.AddDocuments(documents); } writer.ForceMerge(1); indexReader = writer.Reader; } AtomicReader reader = GetOnlySegmentReader(indexReader); Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("parent", "true")))); FixedBitSet parentBits = (FixedBitSet)parentsFilter.GetDocIdSet(reader.AtomicContext, null); NumericDocValues parentValues = reader.GetNumericDocValues("parent_val"); NumericDocValues childValues = reader.GetNumericDocValues("child_val"); Sort parentSort = new Sort(new SortField("parent_val", SortFieldType.INT64)); Sort childSort = new Sort(new SortField("child_val", SortFieldType.INT64)); Sort sort = new Sort(new SortField("custom", new BlockJoinComparerSource(parentsFilter, parentSort, childSort))); Sorter sorter = new Sorter(sort); Sorter.DocMap docMap = sorter.Sort(reader); assertEquals(reader.MaxDoc, docMap.Count); int[] children = new int[1]; int numChildren2 = 0; int previousParent = -1; for (int i = 0; i < docMap.Count; ++i) { int oldID = docMap.NewToOld(i); if (parentBits.Get(oldID)) { // check that we have the right children for (int j = 0; j < numChildren2; ++j) { assertEquals(oldID, parentBits.NextSetBit(children[j])); } // check that children are sorted for (int j = 1; j < numChildren2; ++j) { int doc1 = children[j - 1]; int doc2 = children[j]; if (childValues.Get(doc1) == childValues.Get(doc2)) { assertTrue(doc1 < doc2); // sort is stable } else { assertTrue(childValues.Get(doc1) < childValues.Get(doc2)); } } // check that parents are sorted if (previousParent != -1) { if (parentValues.Get(previousParent) == parentValues.Get(oldID)) { assertTrue(previousParent < oldID); } else { assertTrue(parentValues.Get(previousParent) < parentValues.Get(oldID)); } } // reset previousParent = oldID; numChildren2 = 0; } else { children = ArrayUtil.Grow(children, numChildren2 + 1); children[numChildren2++] = oldID; } } indexReader.Dispose(); writer.w.Directory.Dispose(); }
public void TestRanges() { Directory dir = NewDirectory(); // use simpleanalyzer for more natural tokenization (else "test." is a token) Analyzer analyzer = new MockAnalyzer(Random, MockTokenizer.SIMPLE, true); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc); FieldType offsetsType = new FieldType(TextField.TYPE_STORED); offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field body = new Field("body", "", offsetsType); Document doc = new Document(); doc.Add(body); body.SetStringValue("This is a test."); iw.AddDocument(doc); body.SetStringValue("Test a one sentence document."); iw.AddDocument(doc); IndexReader ir = iw.GetReader(); iw.Dispose(); IndexSearcher searcher = NewSearcher(ir); ICUPostingsHighlighter highlighter = new PostingsHighlighterAnalyzerHelper(analyzer); Query query = TermRangeQuery.NewStringRange("body", "ta", "tf", true, true); TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER); assertEquals(2, topDocs.TotalHits); String[] snippets = highlighter.Highlight("body", query, searcher, topDocs); assertEquals(2, snippets.Length); assertEquals("This is a <b>test</b>.", snippets[0]); assertEquals("<b>Test</b> a one sentence document.", snippets[1]); // null start query = TermRangeQuery.NewStringRange("body", null, "tf", true, true); topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER); assertEquals(2, topDocs.TotalHits); snippets = highlighter.Highlight("body", query, searcher, topDocs); assertEquals(2, snippets.Length); assertEquals("This <b>is</b> <b>a</b> <b>test</b>.", snippets[0]); assertEquals("<b>Test</b> <b>a</b> <b>one</b> <b>sentence</b> <b>document</b>.", snippets[1]); // null end query = TermRangeQuery.NewStringRange("body", "ta", null, true, true); topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER); assertEquals(2, topDocs.TotalHits); snippets = highlighter.Highlight("body", query, searcher, topDocs); assertEquals(2, snippets.Length); assertEquals("<b>This</b> is a <b>test</b>.", snippets[0]); assertEquals("<b>Test</b> a one sentence document.", snippets[1]); // exact start inclusive query = TermRangeQuery.NewStringRange("body", "test", "tf", true, true); topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER); assertEquals(2, topDocs.TotalHits); snippets = highlighter.Highlight("body", query, searcher, topDocs); assertEquals(2, snippets.Length); assertEquals("This is a <b>test</b>.", snippets[0]); assertEquals("<b>Test</b> a one sentence document.", snippets[1]); // exact end inclusive query = TermRangeQuery.NewStringRange("body", "ta", "test", true, true); topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER); assertEquals(2, topDocs.TotalHits); snippets = highlighter.Highlight("body", query, searcher, topDocs); assertEquals(2, snippets.Length); assertEquals("This is a <b>test</b>.", snippets[0]); assertEquals("<b>Test</b> a one sentence document.", snippets[1]); // exact start exclusive BooleanQuery bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), Occur.SHOULD); bq.Add(TermRangeQuery.NewStringRange("body", "test", "tf", false, true), Occur.SHOULD); topDocs = searcher.Search(bq, null, 10, Sort.INDEXORDER); assertEquals(2, topDocs.TotalHits); snippets = highlighter.Highlight("body", bq, searcher, topDocs); assertEquals(2, snippets.Length); assertEquals("This is a test.", snippets[0]); assertEquals("Test a one sentence document.", snippets[1]); // exact end exclusive bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), Occur.SHOULD); bq.Add(TermRangeQuery.NewStringRange("body", "ta", "test", true, false), Occur.SHOULD); topDocs = searcher.Search(bq, null, 10, Sort.INDEXORDER); assertEquals(2, topDocs.TotalHits); snippets = highlighter.Highlight("body", bq, searcher, topDocs); assertEquals(2, snippets.Length); assertEquals("This is a test.", snippets[0]); assertEquals("Test a one sentence document.", snippets[1]); // wrong field bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), Occur.SHOULD); bq.Add(TermRangeQuery.NewStringRange("bogus", "ta", "tf", true, true), Occur.SHOULD); topDocs = searcher.Search(bq, null, 10, Sort.INDEXORDER); assertEquals(2, topDocs.TotalHits); snippets = highlighter.Highlight("body", bq, searcher, topDocs); assertEquals(2, snippets.Length); assertEquals("This is a test.", snippets[0]); assertEquals("Test a one sentence document.", snippets[1]); ir.Dispose(); dir.Dispose(); }
public virtual void TestNrt() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); // Don't allow tiny maxBufferedDocs; it can make this // test too slow: iwc.SetMaxBufferedDocs(Math.Max(500, iwc.MaxBufferedDocs)); // MockRandom/AlcololicMergePolicy are too slow: TieredMergePolicy tmp = new TieredMergePolicy(); tmp.FloorSegmentMB = .001; iwc.SetMergePolicy(tmp); IndexWriter w = new IndexWriter(dir, iwc); var tw = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("field", true); AtomicBoolean stop = new AtomicBoolean(); // How many unique facets to index before stopping: int ordLimit = TEST_NIGHTLY ? 100000 : 6000; var indexer = new IndexerThread(w, config, tw, null, ordLimit, stop); var mgr = new SearcherTaxonomyManager(w, true, null, tw); var reopener = new ThreadAnonymousInnerClassHelper(this, stop, mgr); reopener.Name = "reopener"; reopener.Start(); indexer.Name = "indexer"; indexer.Start(); try { while (!stop.Get()) { SearcherAndTaxonomy pair = mgr.Acquire(); try { //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize()); FacetsCollector sfc = new FacetsCollector(); pair.searcher.Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(pair.taxonomyReader, config, sfc); FacetResult result = facets.GetTopChildren(10, "field"); if (pair.searcher.IndexReader.NumDocs > 0) { //System.out.println(pair.taxonomyReader.getSize()); Assert.True(result.ChildCount > 0); Assert.True(result.LabelValues.Length > 0); } //if (VERBOSE) { //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0))); //} } finally { mgr.Release(pair); } } } finally { indexer.Join(); reopener.Join(); } if (VERBOSE) { Console.WriteLine("TEST: now stop"); } IOUtils.Close(mgr, tw, w, taxoDir, dir); }
public void TestSearchAfterWhenSortingByFunctionValues() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); // depends on docid order RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc); Document doc = new Document(); Field field = new StringField("value", "", Field.Store.YES); doc.Add(field); // Save docs unsorted (decreasing value n, n-1, ...) const int NUM_VALS = 5; for (int val = NUM_VALS; val > 0; val--) { field.SetStringValue(Convert.ToString(val)); writer.AddDocument(doc); } // Open index IndexReader reader = writer.GetReader(); writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); // Get ValueSource from FieldCache Int32FieldSource src = new Int32FieldSource("value"); // ...and make it a sort criterion SortField sf = src.GetSortField(false).Rewrite(searcher); Sort orderBy = new Sort(sf); // Get hits sorted by our FunctionValues (ascending values) Query q = new MatchAllDocsQuery(); TopDocs hits = searcher.Search(q, reader.MaxDoc, orderBy); assertEquals(NUM_VALS, hits.ScoreDocs.Length); // Verify that sorting works in general int i = 0; foreach (ScoreDoc hit in hits.ScoreDocs) { int valueFromDoc = Convert.ToInt32(reader.Document(hit.Doc).Get("value")); assertEquals(++i, valueFromDoc); } // Now get hits after hit #2 using IS.searchAfter() int afterIdx = 1; FieldDoc afterHit = (FieldDoc)hits.ScoreDocs[afterIdx]; hits = searcher.SearchAfter(afterHit, q, reader.MaxDoc, orderBy); // Expected # of hits: NUM_VALS - 2 assertEquals(NUM_VALS - (afterIdx + 1), hits.ScoreDocs.Length); // Verify that hits are actually "after" int afterValue = (int)((double?)afterHit.Fields[0]); foreach (ScoreDoc hit in hits.ScoreDocs) { int val = Convert.ToInt32(reader.Document(hit.Doc).Get("value")); assertTrue(afterValue <= val); assertFalse(hit.Doc == afterHit.Doc); } reader.Dispose(); dir.Dispose(); }