public virtual void TestByte() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(new NumericDocValuesField("value", 23)); doc.Add(NewStringField("value", "23", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new NumericDocValuesField("value", -1)); doc.Add(NewStringField("value", "-1", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new NumericDocValuesField("value", 4)); doc.Add(NewStringField("value", "4", Field.Store.YES)); writer.AddDocument(doc); IndexReader ir = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(ir); Sort sort = new Sort(new SortField("value", SortField.Type_e.BYTE)); TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort); Assert.AreEqual(3, td.TotalHits); // numeric order Assert.AreEqual("-1", searcher.Doc(td.ScoreDocs[0].Doc).Get("value")); Assert.AreEqual("4", searcher.Doc(td.ScoreDocs[1].Doc).Get("value")); Assert.AreEqual("23", searcher.Doc(td.ScoreDocs[2].Doc).Get("value")); AssertNoFieldCaches(); ir.Dispose(); dir.Dispose(); }
public virtual void TestConstantScoreQueryAndFilter() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), d); Document doc = new Document(); doc.Add(NewStringField("field", "a", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("field", "b", Field.Store.NO)); w.AddDocument(doc); IndexReader r = w.Reader; w.Dispose(); Filter filterB = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "b")))); Query query = new ConstantScoreQuery(filterB); IndexSearcher s = NewSearcher(r); Assert.AreEqual(1, s.Search(query, filterB, 1).TotalHits); // Query for field:b, Filter field:b Filter filterA = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "a")))); query = new ConstantScoreQuery(filterA); Assert.AreEqual(0, s.Search(query, filterB, 1).TotalHits); // Query field:b, Filter field:a r.Dispose(); d.Dispose(); }
public virtual void TestString() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(new SortedDocValuesField("value", new BytesRef("foo"))); doc.Add(NewStringField("value", "foo", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new SortedDocValuesField("value", new BytesRef("bar"))); doc.Add(NewStringField("value", "bar", Field.Store.YES)); writer.AddDocument(doc); IndexReader ir = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(ir); Sort sort = new Sort(new SortField("value", SortField.Type_e.STRING)); TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort); Assert.AreEqual(2, td.TotalHits); // 'bar' comes before 'foo' Assert.AreEqual("bar", searcher.Doc(td.ScoreDocs[0].Doc).Get("value")); Assert.AreEqual("foo", searcher.Doc(td.ScoreDocs[1].Doc).Get("value")); AssertNoFieldCaches(); ir.Dispose(); dir.Dispose(); }
public virtual void TestPrefixQuery_Mem() { Directory directory = NewDirectory(); string[] categories = new string[] { "/Computers", "/Computers/Mac", "/Computers/Windows" }; RandomIndexWriter writer = new RandomIndexWriter(Random(), directory); for (int i = 0; i < categories.Length; i++) { Document doc = new Document(); doc.Add(NewStringField("category", categories[i], Field.Store.YES)); writer.AddDocument(doc); } IndexReader reader = writer.Reader; PrefixQuery query = new PrefixQuery(new Term("category", "/Computers")); IndexSearcher searcher = NewSearcher(reader); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length, "All documents in /Computers category and below"); query = new PrefixQuery(new Term("category", "/Computers/Mac")); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "One in /Computers/Mac"); query = new PrefixQuery(new Term("category", "")); Terms terms = MultiFields.GetTerms(searcher.IndexReader, "category"); Assert.IsFalse(query.GetTermsEnum(terms) is PrefixTermsEnum); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length, "everything"); writer.Dispose(); reader.Dispose(); directory.Dispose(); }
public static void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory); writer.Dispose(); Reader = DirectoryReader.Open(Directory); }
public virtual void TestGetScores() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); writer.Commit(); IndexReader ir = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(ir); Weight fake = (new TermQuery(new Term("fake", "weight"))).CreateWeight(searcher); Scorer s = new SimpleScorer(fake); ScoreCachingCollector scc = new ScoreCachingCollector(Scores.Length); scc.Scorer = s; // We need to iterate on the scorer so that its doc() advances. int doc; while ((doc = s.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { scc.Collect(doc); } for (int i = 0; i < Scores.Length; i++) { Assert.AreEqual(Scores[i], scc.Mscores[i], 0f); } ir.Dispose(); directory.Dispose(); }
public virtual void TestGetScores() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, directory); writer.Commit(); IndexReader ir = writer.GetReader(); writer.Dispose(); IndexSearcher searcher = NewSearcher(ir); Weight fake = (new TermQuery(new Term("fake", "weight"))).CreateWeight(searcher); Scorer s = new SimpleScorer(fake); ScoreCachingCollector scc = new ScoreCachingCollector(Scores.Length); scc.SetScorer(s); // We need to iterate on the scorer so that its doc() advances. int doc; while ((doc = s.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { scc.Collect(doc); } for (int i = 0; i < Scores.Length; i++) { Assert.AreEqual(Scores[i], scc.Mscores[i], 0f); } ir.Dispose(); directory.Dispose(); }
public static void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory); Document doc = new Document(); Field field = NewStringField(FIELD, "meaninglessnames", Field.Store.NO); doc.Add(field); for (int i = 0; i < 5137; ++i) { writer.AddDocument(doc); } field.StringValue = "tangfulin"; writer.AddDocument(doc); field.StringValue = "meaninglessnames"; for (int i = 5138; i < 11377; ++i) { writer.AddDocument(doc); } field.StringValue = "tangfulin"; writer.AddDocument(doc); Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); }
public virtual void TestStartPositions() { Directory dir = NewDirectory(); // mimic StopAnalyzer CharacterRunAutomaton stopSet = new CharacterRunAutomaton((new RegExp("the|a|of")).ToAutomaton()); Analyzer analyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, stopSet); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, analyzer); Document doc = new Document(); doc.Add(NewTextField("field", "the quick brown fox", Field.Store.NO)); writer.AddDocument(doc); Document doc2 = new Document(); doc2.Add(NewTextField("field", "quick brown fox", Field.Store.NO)); writer.AddDocument(doc2); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); // user queries on "starts-with quick" SpanQuery sfq = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 1); Assert.AreEqual(1, searcher.Search(sfq, 10).TotalHits); // user queries on "starts-with the quick" SpanQuery include = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 2); sfq = new SpanNotQuery(include, sfq); Assert.AreEqual(1, searcher.Search(sfq, 10).TotalHits); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public virtual void Test2() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)); AddDoc("LANGE", writer); AddDoc("LUETH", writer); AddDoc("PIRSING", writer); AddDoc("RIEGEL", writer); AddDoc("TRZECZIAK", writer); AddDoc("WALKER", writer); AddDoc("WBR", writer); AddDoc("WE", writer); AddDoc("WEB", writer); AddDoc("WEBE", writer); AddDoc("WEBER", writer); AddDoc("WEBERE", writer); AddDoc("WEBREE", writer); AddDoc("WEBEREI", writer); AddDoc("WBRE", writer); AddDoc("WITTKOPF", writer); AddDoc("WOJNAROWSKI", writer); AddDoc("WRICKE", writer); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); writer.Dispose(); FuzzyQuery query = new FuzzyQuery(new Term("field", "WEBER"), 2, 1); //query.setRewriteMethod(FuzzyQuery.SCORING_BOOLEAN_QUERY_REWRITE); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(8, hits.Length); reader.Dispose(); directory.Dispose(); }
public override void SetUp() { base.SetUp(); dir = NewDirectory(); fieldName = Random.NextBoolean() ? "field" : ""; // sometimes use an empty string as field name RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt32(Random, 50, 1000))); Document doc = new Document(); Field field = NewStringField(fieldName, "", Field.Store.NO); doc.Add(field); List<string> terms = new List<string>(); int num = AtLeast(200); for (int i = 0; i < num; i++) { string s = TestUtil.RandomUnicodeString(Random); field.SetStringValue(s); terms.Add(s); writer.AddDocument(doc); } if (Verbose) { // utf16 order terms.Sort(); Console.WriteLine("UTF16 order:"); foreach (string s in terms) { Console.WriteLine(" " + UnicodeUtil.ToHexString(s)); } } reader = writer.GetReader(); searcher1 = NewSearcher(reader); searcher2 = NewSearcher(reader); writer.Dispose(); }
public virtual void TestGetScores() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory); writer.Commit(); IndexReader ir = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(ir); Weight fake = (new TermQuery(new Term("fake", "weight"))).CreateWeight(searcher); Scorer s = new SimpleScorer(fake); ScoreCachingCollector scc = new ScoreCachingCollector(Scores.Length); scc.Scorer = s; // We need to iterate on the scorer so that its doc() advances. int doc; while ((doc = s.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { scc.Collect(doc); } for (int i = 0; i < Scores.Length; i++) { Assert.AreEqual(Scores[i], scc.Mscores[i], 0f); } ir.Dispose(); directory.Dispose(); }
public void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone); writer.Dispose(); Reader = DirectoryReader.Open(Directory); }
public virtual void TestAfter() { // create an index Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore); long now = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond; Document doc = new Document(); // add time that is in the future doc.Add(NewStringField("datefield", DateTools.TimeToString(now + 888888, DateTools.Resolution.MILLISECOND), Field.Store.YES)); doc.Add(NewTextField("body", "Today is a very sunny day in New York City", Field.Store.YES)); writer.AddDocument(doc); IndexReader reader = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); // filter that should preserve matches // DateFilter df1 = DateFilter.After("datefield", now); TermRangeFilter df1 = TermRangeFilter.NewStringRange("datefield", DateTools.TimeToString(now, DateTools.Resolution.MILLISECOND), DateTools.TimeToString(now + 999999, DateTools.Resolution.MILLISECOND), true, false); // filter that should discard matches // DateFilter df2 = DateFilter.After("datefield", now + 999999); TermRangeFilter df2 = TermRangeFilter.NewStringRange("datefield", DateTools.TimeToString(now + 999999, DateTools.Resolution.MILLISECOND), DateTools.TimeToString(now + 999999999, DateTools.Resolution.MILLISECOND), false, true); // search something that doesn't exist with DateFilter Query query1 = new TermQuery(new Term("body", "NoMatchForthis")); // search for something that does exists Query query2 = new TermQuery(new Term("body", "sunny")); ScoreDoc[] result; // ensure that queries return expected results without DateFilter first result = searcher.Search(query1, null, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query2, null, 1000).ScoreDocs; Assert.AreEqual(1, result.Length); // run queries with DateFilter result = searcher.Search(query1, df1, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query1, df2, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query2, df1, 1000).ScoreDocs; Assert.AreEqual(1, result.Length); result = searcher.Search(query2, df2, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); reader.Dispose(); indexStore.Dispose(); }
public override void SetUp() { base.SetUp(); Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory); Document doc = new Document(); doc.Add(NewTextField(FN, "the quick brown fox jumps over the lazy ??? dog 493432 49344", Field.Store.NO)); writer.AddDocument(doc); Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public virtual void TestBasic() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(NewTextField("field", "value", Field.Store.NO)); writer.AddDocument(doc); IndexReader reader = writer.Reader; writer.Dispose(); TermQuery termQuery = new TermQuery(new Term("field", "value")); // should not throw exception with primitive query QueryWrapperFilter qwf = new QueryWrapperFilter(termQuery); IndexSearcher searcher = NewSearcher(reader); TopDocs hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.TotalHits); hits = searcher.Search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10); Assert.AreEqual(1, hits.TotalHits); // should not throw exception with complex primitive query BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(new TermQuery(new Term("field", "missing")), Occur.MUST_NOT); qwf = new QueryWrapperFilter(termQuery); hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.TotalHits); hits = searcher.Search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10); Assert.AreEqual(1, hits.TotalHits); // should not throw exception with non primitive Query (doesn't implement // Query#createWeight) qwf = new QueryWrapperFilter(new FuzzyQuery(new Term("field", "valu"))); hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.TotalHits); hits = searcher.Search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10); Assert.AreEqual(1, hits.TotalHits); // test a query with no hits termQuery = new TermQuery(new Term("field", "not_exist")); qwf = new QueryWrapperFilter(termQuery); hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(0, hits.TotalHits); hits = searcher.Search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10); Assert.AreEqual(0, hits.TotalHits); reader.Dispose(); dir.Dispose(); }
public virtual void TestNegativeScores() { // The Top*Collectors previously filtered out documents with <= scores. this // behavior has changed. this test checks that if PositiveOnlyScoresFilter // wraps one of these collectors, documents with <= 0 scores are indeed // filtered. int numPositiveScores = 0; for (int i = 0; i < Scores.Length; i++) { if (Scores[i] > 0) { ++numPositiveScores; } } Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, directory); writer.Commit(); IndexReader ir = writer.GetReader(); writer.Dispose(); IndexSearcher searcher = NewSearcher(ir); Weight fake = (new TermQuery(new Term("fake", "weight"))).CreateWeight(searcher); Scorer s = new SimpleScorer(fake); TopDocsCollector <ScoreDoc> tdc = TopScoreDocCollector.Create(Scores.Length, true); ICollector c = new PositiveScoresOnlyCollector(tdc); c.SetScorer(s); while (s.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { c.Collect(0); } TopDocs td = tdc.GetTopDocs(); ScoreDoc[] sd = td.ScoreDocs; Assert.AreEqual(numPositiveScores, td.TotalHits); for (int i = 0; i < sd.Length; i++) { Assert.IsTrue(sd[i].Score > 0, "only positive scores should return: " + sd[i].Score); } ir.Dispose(); directory.Dispose(); }
internal virtual void MakeIndex() { // we use RAMDirectory here, because we dont want to stay on open files on Windows: d = new RAMDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), d); Document doc = new Document(); doc.Add(newField("ints", "1", StringField.TYPE_NOT_STORED)); w.AddDocument(doc); w.ForceMerge(1); r = w.Reader; w.Dispose(); SubR = (AtomicReader)(r.Leaves()[0]).Reader(); }
public override void BeforeClass() { base.BeforeClass(); Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, Directory); writer.Dispose(); Reader = DirectoryReader.Open(Directory); }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), Dir); int numDocs = AtLeast(100); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); iw.AddDocument(doc); } Ir = iw.Reader; iw.Dispose(); @is = NewSearcher(Ir); }
public override void SetUp() { base.SetUp(); Analyzer = new MockAnalyzer(Random()); Dir = NewDirectory(); IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, Analyzer); config.SetMergePolicy(NewLogMergePolicy()); // we will use docids to validate RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, config); writer.AddDocument(Doc("lucene", "lucene is a very popular search engine library")); writer.AddDocument(Doc("solr", "solr is a very popular search server and is using lucene")); writer.AddDocument(Doc("nutch", "nutch is an internet search engine with web crawler and is using lucene and hadoop")); Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), Dir); for (int i = 0; i < 100; i++) { Document doc = new Document(); doc.Add(NewStringField("field", Convert.ToString(i), Field.Store.NO)); doc.Add(NewStringField("field2", Convert.ToString(i % 2 == 0), Field.Store.NO)); iw.AddDocument(doc); } Reader = iw.Reader; iw.Dispose(); }
public override void SetUp() { base.SetUp(); // populate an index with 30 documents, this should be enough for the test. // The documents have no content - the test uses MatchAllDocsQuery(). Dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, Similarity, TimeZone); for (int i = 0; i < 30; i++) { writer.AddDocument(new Document()); } Reader = writer.Reader; writer.Dispose(); }
public virtual void TestOutOfOrderCollection() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); for (int i = 0; i < 10; i++) { writer.AddDocument(new Document()); } bool[] inOrder = new bool[] { false, true }; string[] actualTSDCClass = new string[] { "OutOfOrderTopScoreDocCollector", "InOrderTopScoreDocCollector" }; BooleanQuery bq = new BooleanQuery(); // Add a Query with SHOULD, since bw.Scorer() returns BooleanScorer2 // which delegates to BS if there are no mandatory clauses. bq.Add(new MatchAllDocsQuery(), Occur.SHOULD); // Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to return // the clause instead of BQ. bq.MinimumNumberShouldMatch = 1; IndexReader reader = writer.GetReader(); IndexSearcher searcher = NewSearcher(reader); for (int i = 0; i < inOrder.Length; i++) { TopDocsCollector <ScoreDoc> tdc = TopScoreDocCollector.Create(3, inOrder[i]); Assert.AreEqual("Lucene.Net.Search.TopScoreDocCollector+" + actualTSDCClass[i], tdc.GetType().FullName); searcher.Search(new MatchAllDocsQuery(), tdc); ScoreDoc[] sd = tdc.GetTopDocs().ScoreDocs; Assert.AreEqual(3, sd.Length); for (int j = 0; j < sd.Length; j++) { Assert.AreEqual(j, sd[j].Doc, "expected doc Id " + j + " found " + sd[j].Doc); } } writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public override void SetUp() { base.SetUp(); // create test index RandomIndexWriter writer = new RandomIndexWriter(Random(), MDirectory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)).SetOpenMode(OpenMode.APPEND).SetMergePolicy(NewLogMergePolicy()).SetSimilarity(new DefaultSimilarity())); AddDocument(writer, "A", "Should we, could we, would we?"); AddDocument(writer, "B", "It should. Should it?"); AddDocument(writer, "C", "It shouldn't."); AddDocument(writer, "D", "Should we, should we, should we."); Reader2 = writer.Reader; writer.Dispose(); // re-open the searcher since we added more docs Searcher2 = NewSearcher(Reader2); Searcher2.Similarity = new DefaultSimilarity(); }
public static void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); writer.AddDocument(Doc(new Field[] { GetField("id", "0"), GetField("gender", "male"), GetField("first", "james"), GetField("last", "jones") })); writer.AddDocument(Doc(new Field[] { GetField("id", "1"), GetField("gender", "male"), GetField("first", "james"), GetField("last", "smith"), GetField("gender", "female"), GetField("first", "sally"), GetField("last", "jones") })); writer.AddDocument(Doc(new Field[] { GetField("id", "2"), GetField("gender", "female"), GetField("first", "greta"), GetField("last", "jones"), GetField("gender", "female"), GetField("first", "sally"), GetField("last", "smith"), GetField("gender", "male"), GetField("first", "james"), GetField("last", "jones") })); writer.AddDocument(Doc(new Field[] { GetField("id", "3"), GetField("gender", "female"), GetField("first", "lisa"), GetField("last", "jones"), GetField("gender", "male"), GetField("first", "bob"), GetField("last", "costas") })); writer.AddDocument(Doc(new Field[] { GetField("id", "4"), GetField("gender", "female"), GetField("first", "sally"), GetField("last", "smith"), GetField("gender", "female"), GetField("first", "linda"), GetField("last", "dixit"), GetField("gender", "male"), GetField("first", "bubba"), GetField("last", "jones") })); Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public virtual void TestSimilarity_Mem() { Directory store = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), store, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetSimilarity(new SimpleSimilarity())); Document d1 = new Document(); d1.Add(NewTextField("field", "a c", Field.Store.YES)); Document d2 = new Document(); d2.Add(NewTextField("field", "a b c", Field.Store.YES)); writer.AddDocument(d1); writer.AddDocument(d2); IndexReader reader = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); searcher.Similarity = new SimpleSimilarity(); Term a = new Term("field", "a"); Term b = new Term("field", "b"); Term c = new Term("field", "c"); searcher.Search(new TermQuery(b), new CollectorAnonymousInnerClassHelper(this)); BooleanQuery bq = new BooleanQuery(); bq.Add(new TermQuery(a), BooleanClause.Occur.SHOULD); bq.Add(new TermQuery(b), BooleanClause.Occur.SHOULD); //System.out.println(bq.toString("field")); searcher.Search(bq, new CollectorAnonymousInnerClassHelper2(this)); PhraseQuery pq = new PhraseQuery(); pq.Add(a); pq.Add(c); //System.out.println(pq.toString("field")); searcher.Search(pq, new CollectorAnonymousInnerClassHelper3(this)); pq.Slop = 2; //System.out.println(pq.toString("field")); searcher.Search(pq, new CollectorAnonymousInnerClassHelper4(this)); reader.Dispose(); store.Dispose(); }
public override void SetUp() { base.SetUp(); Directory = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), Directory); Document doc = new Document(); Field field = NewTextField("field", "", Field.Store.NO); doc.Add(field); field.StringValue = "quick brown fox"; iw.AddDocument(doc); field.StringValue = "jumps over lazy broun dog"; iw.AddDocument(doc); field.StringValue = "jumps over extremely very lazy broxn dog"; iw.AddDocument(doc); Reader = iw.Reader; iw.Dispose(); Searcher = NewSearcher(Reader); }
public virtual void TestMultiValuedNRQ() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); const string format = "D11"; int num = AtLeast(500); for (int l = 0; l < num; l++) { Document doc = new Document(); for (int m = 0, c = Random().Next(10); m <= c; m++) { int value = Random().Next(int.MaxValue); doc.Add(NewStringField("asc", value.ToString(format), Field.Store.NO)); doc.Add(new IntField("trie", value, Field.Store.NO)); } writer.AddDocument(doc); } IndexReader reader = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); num = AtLeast(50); for (int i = 0; i < num; i++) { int lower = Random().Next(int.MaxValue); int upper = Random().Next(int.MaxValue); if (lower > upper) { int a = lower; lower = upper; upper = a; } TermRangeQuery cq = TermRangeQuery.NewStringRange("asc", lower.ToString(format), upper.ToString(format), true, true); NumericRangeQuery<int> tq = NumericRangeQuery.NewIntRange("trie", lower, upper, true, true); TopDocs trTopDocs = searcher.Search(cq, 1); TopDocs nrTopDocs = searcher.Search(tq, 1); Assert.AreEqual(trTopDocs.TotalHits, nrTopDocs.TotalHits, "Returned count for NumericRangeQuery and TermRangeQuery must be equal"); } reader.Dispose(); directory.Dispose(); }
public override void SetUp() { base.SetUp(); // populate an index with 30 documents, this should be enough for the test. // The documents have no content - the test uses MatchAllDocsQuery(). Dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, Dir); for (int i = 0; i < 30; i++) { writer.AddDocument(new Document()); } Reader = writer.GetReader(); writer.Dispose(); }
public override void SetUp() { base.SetUp(); Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory); Document doc = new Document(); Field titleField = NewTextField("title", "some title", Field.Store.NO); Field field = NewTextField(FN, "", Field.Store.NO); Field footerField = NewTextField("footer", "a footer", Field.Store.NO); doc.Add(titleField); doc.Add(field); doc.Add(footerField); field.StringValue = "\uD866\uDF05abcdef"; writer.AddDocument(doc); field.StringValue = "\uD866\uDF06ghijkl"; writer.AddDocument(doc); // this sorts before the previous two in UTF-8/UTF-32, but after in UTF-16!!! field.StringValue = "\uFB94mnopqr"; writer.AddDocument(doc); field.StringValue = "\uFB95stuvwx"; // this one too. writer.AddDocument(doc); field.StringValue = "a\uFFFCbc"; writer.AddDocument(doc); field.StringValue = "a\uFFFDbc"; writer.AddDocument(doc); field.StringValue = "a\uFFFEbc"; writer.AddDocument(doc); field.StringValue = "a\uFB94bc"; writer.AddDocument(doc); field.StringValue = "bacadaba"; writer.AddDocument(doc); field.StringValue = "\uFFFD"; writer.AddDocument(doc); field.StringValue = "\uFFFD\uD866\uDF05"; writer.AddDocument(doc); field.StringValue = "\uFFFD\uFFFD"; writer.AddDocument(doc); Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); }
public virtual void TestNegativeScores() { // The Top*Collectors previously filtered out documents with <= scores. this // behavior has changed. this test checks that if PositiveOnlyScoresFilter // wraps one of these collectors, documents with <= 0 scores are indeed // filtered. int numPositiveScores = 0; for (int i = 0; i < Scores.Length; i++) { if (Scores[i] > 0) { ++numPositiveScores; } } Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory); writer.Commit(); IndexReader ir = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(ir); Weight fake = (new TermQuery(new Term("fake", "weight"))).CreateWeight(searcher); Scorer s = new SimpleScorer(fake); TopDocsCollector<ScoreDoc> tdc = TopScoreDocCollector.Create(Scores.Length, true); Collector c = new PositiveScoresOnlyCollector(tdc); c.Scorer = s; while (s.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { c.Collect(0); } TopDocs td = tdc.TopDocs(); ScoreDoc[] sd = td.ScoreDocs; Assert.AreEqual(numPositiveScores, td.TotalHits); for (int i = 0; i < sd.Length; i++) { Assert.IsTrue(sd[i].Score > 0, "only positive scores should return: " + sd[i].Score); } ir.Dispose(); directory.Dispose(); }
public virtual void TestDocBoost_Mem() { Directory store = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), store, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); Field f1 = NewTextField("field", "word", Field.Store.YES); Field f2 = NewTextField("field", "word", Field.Store.YES); f2.Boost = 2.0f; Documents.Document d1 = new Documents.Document(); Documents.Document d2 = new Documents.Document(); d1.Add(f1); // boost = 1 d2.Add(f2); // boost = 2 writer.AddDocument(d1); writer.AddDocument(d2); IndexReader reader = writer.Reader; writer.Dispose(); float[] scores = new float[4]; IndexSearcher searcher = NewSearcher(reader); searcher.Search(new TermQuery(new Term("field", "word")), new CollectorAnonymousInnerClassHelper(this, scores)); float lastScore = 0.0f; for (int i = 0; i < 2; i++) { if (VERBOSE) { Console.WriteLine(searcher.Explain(new TermQuery(new Term("field", "word")), i)); } Assert.IsTrue(scores[i] > lastScore, "score: " + scores[i] + " should be > lastScore: " + lastScore); lastScore = scores[i]; } reader.Dispose(); store.Dispose(); }
public virtual void TestBasics() { Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore); for (int i = 0; i < 5; i++) { Document doc = new Document(); doc.Add(new StringField("string", "a" + i, Field.Store.NO)); doc.Add(new StringField("string", "b" + i, Field.Store.NO)); writer.AddDocument(doc); } IndexReader reader = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); TotalHitCountCollector c = new TotalHitCountCollector(); searcher.Search(new MatchAllDocsQuery(), null, c); Assert.AreEqual(5, c.TotalHits); reader.Dispose(); indexStore.Dispose(); }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.OmitNorms = true; Field field = NewField("field", "", customType); doc.Add(field); for (int i = 0; i < 1000; i++) { field.StringValue = i.ToString("D3"); writer.AddDocument(doc); } Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public virtual void TestOutOfOrderCollection() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); for (int i = 0; i < 10; i++) { writer.AddDocument(new Document()); } bool[] inOrder = new bool[] { false, true }; string[] actualTSDCClass = new string[] { "OutOfOrderTopScoreDocCollector", "InOrderTopScoreDocCollector" }; BooleanQuery bq = new BooleanQuery(); // Add a Query with SHOULD, since bw.Scorer() returns BooleanScorer2 // which delegates to BS if there are no mandatory clauses. bq.Add(new MatchAllDocsQuery(), Occur.SHOULD); // Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to return // the clause instead of BQ. bq.MinimumNumberShouldMatch = 1; IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); for (int i = 0; i < inOrder.Length; i++) { TopDocsCollector<ScoreDoc> tdc = TopScoreDocCollector.Create(3, inOrder[i]); Assert.AreEqual("Lucene.Net.Search.TopScoreDocCollector+" + actualTSDCClass[i], tdc.GetType().FullName); searcher.Search(new MatchAllDocsQuery(), tdc); ScoreDoc[] sd = tdc.TopDocs().ScoreDocs; Assert.AreEqual(3, sd.Length); for (int j = 0; j < sd.Length; j++) { Assert.AreEqual(j, sd[j].Doc, "expected doc Id " + j + " found " + sd[j].Doc); } } writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public virtual void TestMissingTerms() { string fieldName = "field1"; Directory rd = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), rd); for (int i = 0; i < 100; i++) { Document doc = new Document(); int term = i * 10; //terms are units of 10; doc.Add(NewStringField(fieldName, "" + term, Field.Store.YES)); w.AddDocument(doc); } IndexReader reader = w.Reader; w.Dispose(); IndexSearcher searcher = NewSearcher(reader); int numDocs = reader.NumDocs; ScoreDoc[] results; MatchAllDocsQuery q = new MatchAllDocsQuery(); List<string> terms = new List<string>(); terms.Add("5"); results = searcher.Search(q, new FieldCacheTermsFilter(fieldName, terms.ToArray()), numDocs).ScoreDocs; Assert.AreEqual(0, results.Length, "Must match nothing"); terms = new List<string>(); terms.Add("10"); results = searcher.Search(q, new FieldCacheTermsFilter(fieldName, terms.ToArray()), numDocs).ScoreDocs; Assert.AreEqual(1, results.Length, "Must match 1"); terms = new List<string>(); terms.Add("10"); terms.Add("20"); results = searcher.Search(q, new FieldCacheTermsFilter(fieldName, terms.ToArray()), numDocs).ScoreDocs; Assert.AreEqual(2, results.Length, "Must match 2"); reader.Dispose(); rd.Dispose(); }
public virtual void TestEmbeddedBooleanScorer() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(NewTextField("field", "doctors are people who prescribe medicines of which they know little, to cure diseases of which they know less, in human beings of whom they know nothing", Field.Store.NO)); w.AddDocument(doc); IndexReader r = w.Reader; w.Dispose(); IndexSearcher s = NewSearcher(r); BooleanQuery q1 = new BooleanQuery(); q1.Add(new TermQuery(new Term("field", "little")), BooleanClause.Occur.SHOULD); q1.Add(new TermQuery(new Term("field", "diseases")), BooleanClause.Occur.SHOULD); BooleanQuery q2 = new BooleanQuery(); q2.Add(q1, BooleanClause.Occur.SHOULD); q2.Add(new CrazyMustUseBulkScorerQuery(), BooleanClause.Occur.SHOULD); Assert.AreEqual(1, s.Search(q2, 10).TotalHits); r.Dispose(); dir.Dispose(); }
public virtual void TestNoOrds() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random, dir); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.StoreTermVectors = true; doc.Add(new Field("foo", "this is a test", ft)); iw.AddDocument(doc); AtomicReader ir = GetOnlySegmentReader(iw.GetReader()); Terms terms = ir.GetTermVector(0, "foo"); Assert.IsNotNull(terms); TermsEnum termsEnum = terms.GetEnumerator(); Assert.AreEqual(TermsEnum.SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("this"))); try { var _ = termsEnum.Ord; Assert.Fail(); } catch (Exception expected) when (expected.IsUnsupportedOperationException()) { // expected exception } try { termsEnum.SeekExact(0); Assert.Fail(); } catch (Exception expected) when (expected.IsUnsupportedOperationException()) { // expected exception } ir.Dispose(); iw.Dispose(); dir.Dispose(); }