public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(this); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetSimilarity(similarity); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); Field noPayloadField = new Field(PayloadHelper.NO_PAYLOAD_FIELD, English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED); //noPayloadField.setBoost(0); doc.Add(noPayloadField); doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("multiField", English.IntToEnglish(i) + " " + English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory); searcher.SetSimilarity(similarity); }
public virtual void TestIgnoreSpanScorer() { PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"), new MaxPayloadFunction(), false); IndexSearcher theSearcher = new IndexSearcher(directory, true); theSearcher.SetSimilarity(new FullSimilarity()); TopDocs hits = searcher.Search(query, null, 100); Assert.IsTrue(hits != null, "hits is null and it shouldn't be"); Assert.IsTrue(hits.TotalHits == 100, "hits Size: " + hits.TotalHits + " is not: " + 100); //they should all have the exact same score, because they all contain seventy once, and we set //all the other similarity factors to be 1 //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash); Assert.IsTrue(hits.GetMaxScore() == 4.0, hits.GetMaxScore() + " does not equal: " + 4.0); //there should be exactly 10 items that score a 4, all the rest should score a 2 //The 10 items are: 70 + i*100 where i in [0-9] int numTens = 0; for (int i = 0; i < hits.ScoreDocs.Length; i++) { ScoreDoc doc = hits.ScoreDocs[i]; if (doc.doc % 10 == 0) { numTens++; Assert.IsTrue(doc.score == 4.0, doc.score + " does not equal: " + 4.0); } else { Assert.IsTrue(doc.score == 2, doc.score + " does not equal: " + 2); } } Assert.IsTrue(numTens == 10, numTens + " does not equal: " + 10); CheckHits.CheckExplanations(query, "field", searcher, true); Lucene.Net.Search.Spans.Spans spans = query.GetSpans(searcher.GetIndexReader()); Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); Assert.IsTrue(spans is TermSpans, "spans is not an instanceof " + typeof(TermSpans)); //should be two matches per document int count = 0; //100 hits times 2 matches per hit, we should have 200 in count while (spans.Next()) { count++; } }
public virtual IndexSearcher GetSpanNotSearcher() { RAMDirectory directory = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(this); IndexWriter writer = new IndexWriter(directory, analyzer, true); writer.SetSimilarity(similarity); Document doc = new Document(); doc.Add(new Field(PayloadHelper.FIELD, "one two three one four three", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory); searcher.SetSimilarity(similarity); return(searcher); }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(this); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetSimilarity(similarity); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); System.String txt = English.IntToEnglish(i) + ' ' + English.IntToEnglish(i + 1); doc.Add(new Field("field2", txt, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory, true); searcher.SetSimilarity(similarity); }
/// <summary> Sets up a RAMDirectory, and adds documents (using English.intToEnglish()) with two fields: field and multiField /// and analyzes them using the PayloadAnalyzer /// </summary> /// <param name="similarity">The Similarity class to use in the Searcher /// </param> /// <param name="numDocs">The num docs to add /// </param> /// <returns> An IndexSearcher /// </returns> /// <throws> IOException </throws> public virtual IndexSearcher SetUp(Similarity similarity, int numDocs) { RAMDirectory directory = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(this); IndexWriter writer = new IndexWriter(directory, analyzer, true); writer.SetSimilarity(similarity); //writer.infoStream = System.out; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new Field(FIELD, English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field(MULTI_FIELD, English.IntToEnglish(i) + " " + English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field(NO_PAYLOAD_FIELD, English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } //writer.optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory); searcher.SetSimilarity(similarity); return(searcher); }
public virtual void TestIgnoreSpanScorer() { PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"), new MaxPayloadFunction(), false); IndexSearcher theSearcher = new IndexSearcher(directory, true); theSearcher.SetSimilarity(new FullSimilarity()); TopDocs hits = searcher.Search(query, null, 100); Assert.IsTrue(hits != null, "hits is null and it shouldn't be"); Assert.IsTrue(hits.totalHits == 100, "hits Size: " + hits.totalHits + " is not: " + 100); //they should all have the exact same score, because they all contain seventy once, and we set //all the other similarity factors to be 1 //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash); Assert.IsTrue(hits.GetMaxScore() == 4.0, hits.GetMaxScore() + " does not equal: " + 4.0); //there should be exactly 10 items that score a 4, all the rest should score a 2 //The 10 items are: 70 + i*100 where i in [0-9] int numTens = 0; for (int i = 0; i < hits.scoreDocs.Length; i++) { ScoreDoc doc = hits.scoreDocs[i]; if (doc.doc % 10 == 0) { numTens++; Assert.IsTrue(doc.score == 4.0, doc.score + " does not equal: " + 4.0); } else { Assert.IsTrue(doc.score == 2, doc.score + " does not equal: " + 2); } } Assert.IsTrue(numTens == 10, numTens + " does not equal: " + 10); CheckHits.CheckExplanations(query, "field", searcher, true); Lucene.Net.Search.Spans.Spans spans = query.GetSpans(searcher.GetIndexReader()); Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); Assert.IsTrue(spans is TermSpans, "spans is not an instanceof " + typeof(TermSpans)); //should be two matches per document int count = 0; //100 hits times 2 matches per hit, we should have 200 in count while (spans.Next()) { count++; } }
public virtual IndexSearcher GetSpanNotSearcher() { RAMDirectory directory = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(this); IndexWriter writer = new IndexWriter(directory, analyzer, true); writer.SetSimilarity(similarity); Document doc = new Document(); doc.Add(new Field(PayloadHelper.FIELD, "one two three one four three", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory); searcher.SetSimilarity(similarity); return searcher; }
public virtual void TestBasic() { Directory dir = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMergeFactor(2); writer.SetMaxBufferedDocs(2); writer.SetSimilarity(new SimpleSimilarity()); System.Text.StringBuilder sb = new System.Text.StringBuilder(265); System.String term = "term"; for (int i = 0; i < 30; i++) { Document d = new Document(); sb.Append(term).Append(" "); System.String content = sb.ToString(); Field noTf = new Field("noTf", content + (i % 2 == 0?"":" notf"), Field.Store.NO, Field.Index.ANALYZED); noTf.SetOmitTermFreqAndPositions(true); d.Add(noTf); Field tf = new Field("tf", content + (i % 2 == 0?" tf":""), Field.Store.NO, Field.Index.ANALYZED); d.Add(tf); writer.AddDocument(d); //System.out.println(d); } writer.Optimize(); // flush writer.Close(); _TestUtil.CheckIndex(dir); /* * Verify the index */ Searcher searcher = new IndexSearcher(dir); searcher.SetSimilarity(new SimpleSimilarity()); Term a = new Term("noTf", term); Term b = new Term("tf", term); Term c = new Term("noTf", "notf"); Term d2 = new Term("tf", "tf"); TermQuery q1 = new TermQuery(a); TermQuery q2 = new TermQuery(b); TermQuery q3 = new TermQuery(c); TermQuery q4 = new TermQuery(d2); searcher.Search(q1, new AnonymousClassCountingHitCollector(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q2, new AnonymousClassCountingHitCollector1(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q3, new AnonymousClassCountingHitCollector2(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q4, new AnonymousClassCountingHitCollector3(this)); //System.out.println(CountingHitCollector.getCount()); BooleanQuery bq = new BooleanQuery(); bq.Add(q1, Occur.MUST); bq.Add(q4, Occur.MUST); searcher.Search(bq, new AnonymousClassCountingHitCollector4(this)); Assert.IsTrue(15 == CountingHitCollector.GetCount()); searcher.Close(); dir.Close(); }
/// <summary> Sets up a RAMDirectory, and adds documents (using English.intToEnglish()) with two fields: field and multiField /// and analyzes them using the PayloadAnalyzer /// </summary> /// <param name="similarity">The Similarity class to use in the Searcher /// </param> /// <param name="numDocs">The num docs to add /// </param> /// <returns> An IndexSearcher /// </returns> /// <throws> IOException </throws> public virtual IndexSearcher SetUp(Similarity similarity, int numDocs) { RAMDirectory directory = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(this); IndexWriter writer = new IndexWriter(directory, analyzer, true); writer.SetSimilarity(similarity); //writer.infoStream = System.out; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new Field(FIELD, English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field(MULTI_FIELD, English.IntToEnglish(i) + " " + English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field(NO_PAYLOAD_FIELD, English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } //writer.optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory); searcher.SetSimilarity(similarity); return searcher; }