private static Open ( Directory directory, IndexDeletionPolicy deletionPolicy, IndexCommit commit, bool readOnly, int termInfosIndexDivisor ) : IndexReader | ||
directory | Directory | |
deletionPolicy | IndexDeletionPolicy | |
commit | IndexCommit | |
readOnly | bool | |
termInfosIndexDivisor | int | |
return | IndexReader |
public virtual void TestFarsi() { /* build an index */ RAMDirectory farsiIndex = new RAMDirectory(); IndexWriter writer = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED, null); Document doc = new Document(); doc.Add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc, null); writer.Optimize(null); writer.Close(); IndexReader reader = IndexReader.Open((Directory)farsiIndex, true, null); IndexSearcher search = new IndexSearcher(reader); Query q = new TermQuery(new Term("body", "body")); // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi // characters properly. System.Globalization.CompareInfo collator = new System.Globalization.CultureInfo("ar").CompareInfo; // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi // orders the U+0698 character before the U+0633 character, so the single // index Term below should NOT be returned by a TermRangeFilter with a Farsi // Collator (or an Arabic one for the case when Farsi is not supported). int numHits = search.Search(q, new TermRangeFilter("content", "\u062F", "\u0698", T, T, collator), 1000, null).TotalHits; Assert.AreEqual(0, numHits, "The index Term should not be included."); numHits = search.Search(q, new TermRangeFilter("content", "\u0633", "\u0638", T, T, collator), 1000, null).TotalHits; Assert.AreEqual(1, numHits, "The index Term should be included."); search.Close(); }
public virtual void Test() { IndexReader reader = null; try { reader = IndexReader.Open((Directory)directory, true, null); for (int i = 1; i <= numThreads; i++) { TestTermPositionVectors(reader, i); } } catch (System.IO.IOException ioe) { Assert.Fail(ioe.Message); } finally { if (reader != null) { try { /* close the opened reader */ reader.Close(); } catch (System.IO.IOException ioe) { System.Console.Error.WriteLine(ioe.StackTrace); } } } }
public virtual void TestNPESpanQuery() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Util.Version.LUCENE_CURRENT, Support.Compatibility.SetFactory.CreateHashSet <string>()), IndexWriter.MaxFieldLength.LIMITED, null); // Add documents AddDoc(writer, "1", "the big dogs went running to the market"); AddDoc(writer, "2", "the cat chased the mouse, then the cat ate the mouse quickly"); // Commit writer.Close(); // Get searcher IndexReader reader = IndexReader.Open(dir, true, null); IndexSearcher searcher = new IndexSearcher(reader); // Control (make sure docs indexed) Assert.AreEqual(2, HitCount(searcher, "the")); Assert.AreEqual(1, HitCount(searcher, "cat")); Assert.AreEqual(1, HitCount(searcher, "dogs")); Assert.AreEqual(0, HitCount(searcher, "rabbit")); // This throws exception (it shouldn't) Assert.AreEqual(1, searcher.Search(CreateSpan(0, true, new SpanQuery[] { CreateSpan(4, false, "chased", "cat"), CreateSpan("ate") }), 10, null).TotalHits); reader.Close(); dir.Close(); }
/// <summary> Given an IndexSearcher, returns a new IndexSearcher whose IndexReader /// is a MultiReader containing the Reader of the original IndexSearcher, /// as well as several "empty" IndexReaders -- some of which will have /// deleted documents in them. This new IndexSearcher should /// behave exactly the same as the original IndexSearcher. /// </summary> /// <param name="s">the searcher to wrap /// </param> /// <param name="edge">if negative, s will be the first sub; if 0, s will be in the middle, if positive s will be the last sub /// </param> public static IndexSearcher WrapUnderlyingReader(IndexSearcher s, int edge) { IndexReader r = s.IndexReader; // we can't put deleted docs before the nested reader, because // it will throw off the docIds IndexReader[] readers = new IndexReader[] { edge < 0 ? r : IndexReader.Open((Directory)MakeEmptyIndex(0), true, null), IndexReader.Open((Directory)MakeEmptyIndex(0), true, null), new MultiReader(new IndexReader[] { IndexReader.Open((Directory)MakeEmptyIndex(edge < 0 ? 4 : 0), true, null), IndexReader.Open((Directory)MakeEmptyIndex(0), true, null), 0 == edge ? r : IndexReader.Open((Directory)MakeEmptyIndex(0), true, null) }), IndexReader.Open((Directory)MakeEmptyIndex(0 < edge ? 0 : 7), true, null), IndexReader.Open((Directory)MakeEmptyIndex(0), true, null), new MultiReader(new IndexReader[] { IndexReader.Open((Directory)MakeEmptyIndex(0 < edge ? 0 : 5), true, null), IndexReader.Open((Directory)MakeEmptyIndex(0), true, null), 0 < edge ? r : IndexReader.Open((Directory)MakeEmptyIndex(0), true, null) }) }; IndexSearcher out_Renamed = new IndexSearcher(new MultiReader(readers)); out_Renamed.Similarity = s.Similarity; return(out_Renamed); }
public virtual void TestBooleanOrderUnAffected() { // NOTE: uses index build in *this* setUp IndexReader reader = IndexReader.Open(small, true, null); IndexSearcher search = new IndexSearcher(reader); // first do a regular TermRangeQuery which uses term expansion so // docs with more terms in range get higher scores Query rq = new TermRangeQuery("data", "1", "4", T, T); ScoreDoc[] expected = search.Search(rq, null, 1000, null).ScoreDocs; int numHits = expected.Length; // now do a boolean where which also contains a // ConstantScoreRangeQuery and make sure hte order is the same BooleanQuery q = new BooleanQuery(); q.Add(rq, Occur.MUST); // T, F); q.Add(Csrq("data", "1", "6", T, T), Occur.MUST); // T, F); ScoreDoc[] actual = search.Search(q, null, 1000, null).ScoreDocs; AssertEquals("wrong numebr of hits", numHits, actual.Length); for (int i = 0; i < numHits; i++) { AssertEquals("mismatch in docid for hit#" + i, expected[i].Doc, actual[i].Doc); } }
public virtual void TestDanish() { /* build an index */ RAMDirectory danishIndex = new RAMDirectory(); IndexWriter writer = new IndexWriter(danishIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED, null); // Danish collation orders the words below in the given order // (example taken from TestSort.testInternationalSort() ). System.String[] words = new System.String[] { "H\u00D8T", "H\u00C5T", "MAND" }; for (int docnum = 0; docnum < words.Length; ++docnum) { Document doc = new Document(); doc.Add(new Field("content", words[docnum], Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc, null); } writer.Optimize(null); writer.Close(); IndexReader reader = IndexReader.Open((Directory)danishIndex, true, null); IndexSearcher search = new IndexSearcher(reader); System.Globalization.CompareInfo c = new System.Globalization.CultureInfo("da" + "-" + "dk").CompareInfo; // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ], // but Danish collation does. ScoreDoc[] result = search.Search(Csrq("content", "H\u00D8T", "MAND", F, F, c), null, 1000, null).ScoreDocs; AssertEquals("The index Term should be included.", 1, result.Length); result = search.Search(Csrq("content", "H\u00C5T", "MAND", F, F, c), null, 1000, null).ScoreDocs; AssertEquals("The index Term should not be included.", 0, result.Length); search.Close(); }
public virtual void TestEqualScores() { // NOTE: uses index build in *this* setUp IndexReader reader = IndexReader.Open(small, true, null); IndexSearcher search = new IndexSearcher(reader); ScoreDoc[] result; // some hits match more terms then others, score should be the same result = search.Search(Csrq("data", "1", "6", T, T), null, 1000, null).ScoreDocs; int numHits = result.Length; AssertEquals("wrong number of results", 6, numHits); float score = result[0].Score; for (int i = 1; i < numHits; i++) { AssertEquals("score for " + i + " was not the same", score, result[i].Score); } result = search.Search(Csrq("data", "1", "6", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE), null, 1000, null).ScoreDocs; numHits = result.Length; AssertEquals("wrong number of results", 6, numHits); for (int i = 0; i < numHits; i++) { AssertEquals("score for " + i + " was not the same", score, result[i].Score); } }
public virtual void TestRAMDirectory_Renamed() { Directory dir = FSDirectory.Open(indexDir); MockRAMDirectory ramDir = new MockRAMDirectory(dir); // close the underlaying directory dir.Close(); // Check size Assert.AreEqual(ramDir.SizeInBytes(), ramDir.GetRecomputedSizeInBytes()); // open reader to test document count IndexReader reader = IndexReader.Open((Directory)ramDir, true, null); Assert.AreEqual(docsToAdd, reader.NumDocs()); // open search zo check if all doc's are there IndexSearcher searcher = new IndexSearcher(reader); // search for all documents for (int i = 0; i < docsToAdd; i++) { Document doc = searcher.Doc(i, null); Assert.IsTrue(doc.GetField("content") != null); } // cleanup reader.Close(); searcher.Close(); }
private static RAMDirectory MakeEmptyIndex(int numDeletedDocs) { RAMDirectory d = new RAMDirectory(); IndexWriter w = new IndexWriter(d, new WhitespaceAnalyzer(), true, MaxFieldLength.LIMITED, null); for (int i = 0; i < numDeletedDocs; i++) { w.AddDocument(new Document(), null); } w.Commit(null); w.DeleteDocuments(null, new MatchAllDocsQuery()); w.Commit(null); if (0 < numDeletedDocs) { Assert.IsTrue(w.HasDeletions(null), "writer has no deletions"); } Assert.AreEqual(numDeletedDocs, w.MaxDoc(), "writer is missing some deleted docs"); Assert.AreEqual(0, w.NumDocs(null), "writer has non-deleted docs"); w.Close(); IndexReader r = IndexReader.Open((Directory)d, true, null); Assert.AreEqual(numDeletedDocs, r.NumDeletedDocs, "reader has wrong number of deleted docs"); r.Close(); return(d); }
public virtual void TestVerifyIndex() { IndexReader reader = IndexReader.Open(mDirectory, true, null); Assert.AreEqual(8, reader.NumDocs()); reader.Close(); }
public virtual void TestFieldCacheRangeFilterRand() { IndexReader reader = IndexReader.Open((Directory)signedIndex.index, true, null); IndexSearcher Search = new IndexSearcher(reader); System.String minRP = Pad(signedIndex.minR); System.String maxRP = Pad(signedIndex.maxR); int numDocs = reader.NumDocs(); Assert.AreEqual(numDocs, 1 + maxId - minId, "num of docs"); ScoreDoc[] result; Query q = new TermQuery(new Term("body", "body")); // test extremes, bounded on both ends result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, maxRP, T, T), numDocs, null).ScoreDocs; Assert.AreEqual(numDocs, result.Length, "find all"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, maxRP, T, F), numDocs, null).ScoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "all but biggest"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, maxRP, F, T), numDocs, null).ScoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "all but smallest"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, maxRP, F, F), numDocs, null).ScoreDocs; Assert.AreEqual(numDocs - 2, result.Length, "all but extremes"); // unbounded result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, null, T, F), numDocs, null).ScoreDocs; Assert.AreEqual(numDocs, result.Length, "smallest and up"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", null, maxRP, F, T), numDocs, null).ScoreDocs; Assert.AreEqual(numDocs, result.Length, "biggest and down"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, null, F, F), numDocs, null).ScoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "not smallest, but up"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", null, maxRP, F, F), numDocs, null).ScoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "not biggest, but down"); // very small sets result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, minRP, F, F), numDocs, null).ScoreDocs; Assert.AreEqual(0, result.Length, "min,min,F,F"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", maxRP, maxRP, F, F), numDocs, null).ScoreDocs; Assert.AreEqual(0, result.Length, "max,max,F,F"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, minRP, T, T), numDocs, null).ScoreDocs; Assert.AreEqual(1, result.Length, "min,min,T,T"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", null, minRP, F, T), numDocs, null).ScoreDocs; Assert.AreEqual(1, result.Length, "nul,min,F,T"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", maxRP, maxRP, T, T), numDocs, null).ScoreDocs; Assert.AreEqual(1, result.Length, "max,max,T,T"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", maxRP, null, T, F), numDocs, null).ScoreDocs; Assert.AreEqual(1, result.Length, "max,nul,T,T"); }
private static void doSpellCheckerIndexing(string LuceneIndexDir, string SpellCheckerIndexDir) { try { // http://lucene.apache.org/java/2_2_0/api/org/apache/lucene/search/spell/SpellChecker.html FSDirectory spellCheckerIndexDir = FSDirectory.GetDirectory(SpellCheckerIndexDir, false); FSDirectory indexDir = FSDirectory.GetDirectory(LuceneIndexDir, false); SpellChecker.Net.Search.Spell.SpellChecker spellchecker = new SpellChecker.Net.Search.Spell.SpellChecker(spellCheckerIndexDir); spellchecker.ClearIndex(); // SpellChecker.Net.Search.Spell.SpellChecker spellchecker = new SpellChecker.Net.Search.Spell.SpellChecker (global::Lucene.Net.Store.Directory SpellChecker(spellIndexDirectory); IndexReader r = IndexReader.Open(indexDir); try { // To index a field of a user index: Dictionary dict = new SpellChecker.Net.Search.Spell.LuceneDictionary(r, "title"); spellchecker.IndexDictionary(dict); } finally { r.Close(); } } catch (Exception ex) { Console.Write("Could not create spell-checking index" + ex.Message); } }
} // SearchActiveDocument public string getSpellingSuggestion(string query) { FSDirectory indexDir = FSDirectory.GetDirectory(this.spellingIndexDir, false); SpellChecker.Net.Search.Spell.SpellChecker spellchecker = new SpellChecker.Net.Search.Spell.SpellChecker(indexDir); IndexReader my_lucene_reader = IndexReader.Open(indexDir); string[] words = query.Split(new char[] { ' ', ',', ';' }, StringSplitOptions.RemoveEmptyEntries); List <string> allSuggestions = new List <string>(); foreach (string word in words) { string[] suggestions = spellchecker.SuggestSimilar(word, 1); if (suggestions.Length > 0) { allSuggestions.Add(suggestions[0]); } else { allSuggestions.Add(word); } } string completeSuggestion = String.Join(" ", allSuggestions.ToArray()); return(completeSuggestion); }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); long theLong = System.Int64.MaxValue; double theDouble = System.Double.MaxValue; sbyte theByte = (sbyte)System.SByte.MaxValue; short theShort = System.Int16.MaxValue; int theInt = System.Int32.MaxValue; float theFloat = System.Single.MaxValue; for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); doc.Add(new Field("theLong", System.Convert.ToString(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theDouble", (theDouble--).ToString("E16"), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theByte", System.Convert.ToString((sbyte)theByte--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theShort", System.Convert.ToString(theShort--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theInt", System.Convert.ToString(theInt--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theFloat", (theFloat--).ToString("E8"), Field.Store.NO, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc, null); } writer.Close(); reader = IndexReader.Open((Directory)directory, true, null); }
public virtual void TestFieldCacheRangeFilterDoubles() { IndexReader reader = IndexReader.Open((Directory)signedIndex.index, true, null); IndexSearcher Search = new IndexSearcher(reader); int numDocs = reader.NumDocs(); System.Double minIdO = (double)(minId + .5); System.Double medIdO = (double)((float)minIdO + ((double)(maxId - minId)) / 2.0); ScoreDoc[] result; Query q = new TermQuery(new Term("body", "body")); result = Search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", minIdO, medIdO, T, T), numDocs, null).ScoreDocs; Assert.AreEqual(numDocs / 2, result.Length, "find all"); int count = 0; result = Search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", null, medIdO, F, T), numDocs, null).ScoreDocs; count += result.Length; result = Search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", medIdO, null, F, F), numDocs, null).ScoreDocs; count += result.Length; Assert.AreEqual(numDocs, count, "sum of two concenatted ranges"); result = Search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", null, null, T, T), numDocs, null).ScoreDocs; Assert.AreEqual(numDocs, result.Length, "find all"); System.Double tempAux = (double)System.Double.PositiveInfinity; result = Search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", tempAux, null, F, F), numDocs, null).ScoreDocs; Assert.AreEqual(0, result.Length, "infinity special case"); System.Double tempAux2 = (double)System.Double.NegativeInfinity; result = Search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", null, tempAux2, F, F), numDocs, null).ScoreDocs; Assert.AreEqual(0, result.Length, "infinity special case"); }
public virtual void TestCompressionTools() { IFieldable binaryFldCompressed = new Field("binaryCompressed", CompressionTools.Compress(System.Text.UTF8Encoding.UTF8.GetBytes(binaryValCompressed)), Field.Store.YES); IFieldable stringFldCompressed = new Field("stringCompressed", CompressionTools.CompressString(binaryValCompressed), Field.Store.YES); Document doc = new Document(); doc.Add(binaryFldCompressed); doc.Add(stringFldCompressed); /* add the doc to a ram index */ MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED, null); writer.AddDocument(doc, null); writer.Close(); /* open a reader and fetch the document */ IndexReader reader = IndexReader.Open((Directory)dir, false, null); Document docFromReader = reader.Document(0, null); Assert.IsTrue(docFromReader != null); /* fetch the binary compressed field and compare it's content with the original one */ System.String binaryFldCompressedTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(CompressionTools.Decompress(docFromReader.GetBinaryValue("binaryCompressed", null)))); Assert.IsTrue(binaryFldCompressedTest.Equals(binaryValCompressed)); Assert.IsTrue(CompressionTools.DecompressString(docFromReader.GetBinaryValue("stringCompressed", null)).Equals(binaryValCompressed)); reader.Close(); dir.Close(); }
public override void SetUp() { base.SetUp(); System.String[] data = new System.String[] { "A 1 2 3 4 5 6", "Z 4 5 6", null, "B 2 4 5 6", "Y 3 5 6", null, "C 3 6", "X 4 5 6" }; index = new RAMDirectory(); IndexWriter writer = new IndexWriter(index, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); for (int i = 0; i < data.Length; i++) { Document doc = new Document(); doc.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id",String.valueOf(i))); doc.Add(new Field("all", "all", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("all","all")); if (null != data[i]) { doc.Add(new Field("data", data[i], Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("data",data[i])); } writer.AddDocument(doc, null); } writer.Optimize(null); writer.Close(); r = IndexReader.Open(index, true, null); s = new IndexSearcher(r); //System.out.println("Set up " + getName()); }
public virtual void TestPhrasePrefix() { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); Document doc1 = new Document(); Document doc2 = new Document(); Document doc3 = new Document(); Document doc4 = new Document(); Document doc5 = new Document(); doc1.Add(new Field("body", "blueberry pie", Field.Store.YES, Field.Index.ANALYZED)); doc2.Add(new Field("body", "blueberry strudel", Field.Store.YES, Field.Index.ANALYZED)); doc3.Add(new Field("body", "blueberry pizza", Field.Store.YES, Field.Index.ANALYZED)); doc4.Add(new Field("body", "blueberry chewing gum", Field.Store.YES, Field.Index.ANALYZED)); doc5.Add(new Field("body", "piccadilly circus", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc1, null); writer.AddDocument(doc2, null); writer.AddDocument(doc3, null); writer.AddDocument(doc4, null); writer.AddDocument(doc5, null); writer.Optimize(null); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore, true, null); //PhrasePrefixQuery query1 = new PhrasePrefixQuery(); MultiPhraseQuery query1 = new MultiPhraseQuery(); //PhrasePrefixQuery query2 = new PhrasePrefixQuery(); MultiPhraseQuery query2 = new MultiPhraseQuery(); query1.Add(new Term("body", "blueberry")); query2.Add(new Term("body", "strawberry")); System.Collections.ArrayList termsWithPrefix = new System.Collections.ArrayList(); IndexReader ir = IndexReader.Open((Directory)indexStore, true, null); // this TermEnum gives "piccadilly", "pie" and "pizza". System.String prefix = "pi"; TermEnum te = ir.Terms(new Term("body", prefix + "*"), null); do { if (te.Term.Text.StartsWith(prefix)) { termsWithPrefix.Add(te.Term); } }while (te.Next(null)); query1.Add((Term[])termsWithPrefix.ToArray(typeof(Term))); query2.Add((Term[])termsWithPrefix.ToArray(typeof(Term))); ScoreDoc[] result; result = searcher.Search(query1, null, 1000, null).ScoreDocs; Assert.AreEqual(2, result.Length); result = searcher.Search(query2, null, 1000, null).ScoreDocs; Assert.AreEqual(0, result.Length); }
public virtual void TestBoost() { // NOTE: uses index build in *this* setUp IndexReader reader = IndexReader.Open(small, true, null); IndexSearcher search = new IndexSearcher(reader); // test for correct application of query normalization // must use a non score normalizing method for this. Query q = Csrq("data", "1", "6", T, T); q.Boost = 100; search.Search(q, null, new AnonymousClassCollector(this), null); // // Ensure that boosting works to score one clause of a query higher // than another. // Query q1 = Csrq("data", "A", "A", T, T); // matches document #0 q1.Boost = .1f; Query q2 = Csrq("data", "Z", "Z", T, T); // matches document #1 BooleanQuery bq = new BooleanQuery(true); bq.Add(q1, Occur.SHOULD); bq.Add(q2, Occur.SHOULD); ScoreDoc[] hits = search.Search(bq, null, 1000, null).ScoreDocs; Assert.AreEqual(1, hits[0].Doc); Assert.AreEqual(0, hits[1].Doc); Assert.IsTrue(hits[0].Score > hits[1].Score); q1 = Csrq("data", "A", "A", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); // matches document #0 q1.Boost = .1f; q2 = Csrq("data", "Z", "Z", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); // matches document #1 bq = new BooleanQuery(true); bq.Add(q1, Occur.SHOULD); bq.Add(q2, Occur.SHOULD); hits = search.Search(bq, null, 1000, null).ScoreDocs; Assert.AreEqual(1, hits[0].Doc); Assert.AreEqual(0, hits[1].Doc); Assert.IsTrue(hits[0].Score > hits[1].Score); q1 = Csrq("data", "A", "A", T, T); // matches document #0 q1.Boost = 10f; q2 = Csrq("data", "Z", "Z", T, T); // matches document #1 bq = new BooleanQuery(true); bq.Add(q1, Occur.SHOULD); bq.Add(q2, Occur.SHOULD); hits = search.Search(bq, null, 1000, null).ScoreDocs; Assert.AreEqual(0, hits[0].Doc); Assert.AreEqual(1, hits[1].Doc); Assert.IsTrue(hits[0].Score > hits[1].Score); }
public override void SetUp() { base.SetUp(); index = new RAMDirectory(); IndexWriter writer = new IndexWriter(index, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); writer.SetSimilarity(sim); // hed is the most important field, dek is secondary // d1 is an "ok" match for: albino elephant { Document d1 = new Document(); d1.Add(new Field("id", "d1", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id", "d1")); d1.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "elephant")); d1.Add(new Field("dek", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("dek", "elephant")); writer.AddDocument(d1, null); } // d2 is a "good" match for: albino elephant { Document d2 = new Document(); d2.Add(new Field("id", "d2", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id", "d2")); d2.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "elephant")); d2.Add(new Field("dek", "albino", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("dek", "albino")); d2.Add(new Field("dek", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("dek", "elephant")); writer.AddDocument(d2, null); } // d3 is a "better" match for: albino elephant { Document d3 = new Document(); d3.Add(new Field("id", "d3", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id", "d3")); d3.Add(new Field("hed", "albino", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "albino")); d3.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "elephant")); writer.AddDocument(d3, null); } // d4 is the "best" match for: albino elephant { Document d4 = new Document(); d4.Add(new Field("id", "d4", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id", "d4")); d4.Add(new Field("hed", "albino", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "albino")); d4.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "elephant")); d4.Add(new Field("dek", "albino", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("dek", "albino")); writer.AddDocument(d4, null); } writer.Close(); r = IndexReader.Open(index, true, null); s = new IndexSearcher(r); s.Similarity = sim; }
public virtual void TestSetBufferSize() { System.IO.DirectoryInfo indexDir = new System.IO.DirectoryInfo(System.IO.Path.Combine(AppSettings.Get("tempDir", ""), "testSetBufferSize")); MockFSDirectory dir = new MockFSDirectory(indexDir, NewRandom()); try { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); writer.UseCompoundFile = false; for (int i = 0; i < 37; i++) { Document doc = new Document(); doc.Add(new Field("content", "aaa bbb ccc ddd" + i, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("id", "" + i, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc, null); } writer.Close(); dir.allIndexInputs.Clear(); IndexReader reader = IndexReader.Open((Directory)dir, false, null); Term aaa = new Term("content", "aaa"); Term bbb = new Term("content", "bbb"); Term ccc = new Term("content", "ccc"); Assert.AreEqual(37, reader.DocFreq(ccc, null)); reader.DeleteDocument(0, null); Assert.AreEqual(37, reader.DocFreq(aaa, null)); dir.tweakBufferSizes(); reader.DeleteDocument(4, null); Assert.AreEqual(reader.DocFreq(bbb, null), 37); dir.tweakBufferSizes(); IndexSearcher searcher = new IndexSearcher(reader); ScoreDoc[] hits = searcher.Search(new TermQuery(bbb), null, 1000, null).ScoreDocs; dir.tweakBufferSizes(); Assert.AreEqual(35, hits.Length); dir.tweakBufferSizes(); hits = searcher.Search(new TermQuery(new Term("id", "33")), null, 1000, null).ScoreDocs; dir.tweakBufferSizes(); Assert.AreEqual(1, hits.Length); hits = searcher.Search(new TermQuery(aaa), null, 1000, null).ScoreDocs; dir.tweakBufferSizes(); Assert.AreEqual(35, hits.Length); searcher.Close(); reader.Close(); } finally { _TestUtil.RmDir(indexDir); } }
public virtual void TestLazyLoadThreadSafety() { r = NewRandom(); dir1 = new RAMDirectory(); // test w/ field sizes bigger than the buffer of an index input BuildDir(dir1, 15, 5, 2000); // do many small tests so the thread locals go away inbetween for (int i = 0; i < 100; i++) { ir1 = IndexReader.Open(dir1, false, null); DoTest(10, 100); } }
// test using a sparse index (with deleted docs). The DocIdSet should be not cacheable, as it uses TermDocs if the range contains 0 public void TestSparseIndex() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED, null); for (int d = -20; d <= 20; d++) { Document doc = new Document(); doc.Add(new Field("id", d.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("body", "body", Field.Store.NO, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc, null); } writer.Optimize(null); writer.DeleteDocuments(null, new Term("id", "0")); writer.Close(); IndexReader reader = IndexReader.Open((Directory)dir, true, null); IndexSearcher Search = new IndexSearcher(reader); Assert.True(reader.HasDeletions); ScoreDoc[] result; Query q = new TermQuery(new Term("body", "body")); FieldCacheRangeFilter <sbyte?> fcrf; result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", -20, 20, T, T), 100, null).ScoreDocs; Assert.False(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0], null).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(40, result.Length, "find all"); result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", 0, 20, T, T), 100, null).ScoreDocs; Assert.False(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0], null).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(20, result.Length, "find all"); result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", -20, 0, T, T), 100, null).ScoreDocs; Assert.False(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0], null).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(20, result.Length, "find all"); result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", 10, 20, T, T), 100, null).ScoreDocs; Assert.True(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0], null).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(11, result.Length, "find all"); result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", -20, -10, T, T), 100, null).ScoreDocs; Assert.True(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0], null).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(11, result.Length, "find all"); }
} // doIndex private static void removeAllDuplicateAndDeletedFiles(IndexableFileInfo[] fileInfos, string LuceneIndexDir, IndexCreationMode indexCreationMode) { if (indexCreationMode != IndexCreationMode.AppendToExistingIndex) { return; } IndexReader reader = IndexReader.Open(LuceneIndexDir); try { int numDocs = reader.NumDocs(); for (int i = 0; i < numDocs; i++) { Document docToCheck = reader.Document(i); bool removeDocFromIndex = true; string filenameField = docToCheck.GetField("filename").StringValue(); string lastModified = (docToCheck.GetField("LastModified").StringValue()); foreach (IndexableFileInfo fi in fileInfos) { if (String.Compare(fi.Filename, filenameField, true) == 0 && DateTools.DateToString(fi.LastModified, DateTools.Resolution.SECOND) == lastModified) { removeDocFromIndex = false; break; } } // foreach if (removeDocFromIndex) { reader.DeleteDocument(i); if (!reader.HasDeletions()) { throw new Exception("error: deletion failed!!"); } } } // for each lucene doc } finally { reader.Close(); } LuceneIndexer indexer = new LuceneIndexer(LuceneIndexDir, indexCreationMode); // open up the index again indexer.CloseIndexWriter(OptimizeMode.DoOptimization); // just to optimize the index (which removes deleted items). }
public virtual void TestBinaryFieldInIndex() { IFieldable binaryFldStored = new Field("binaryStored", System.Text.UTF8Encoding.UTF8.GetBytes(binaryValStored), Field.Store.YES); IFieldable stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO); // binary fields with store off are not allowed Assert.Throws <ArgumentException>( () => new Field("fail", System.Text.Encoding.UTF8.GetBytes(binaryValStored), Field.Store.NO)); Document doc = new Document(); doc.Add(binaryFldStored); doc.Add(stringFldStored); /* test for field count */ Assert.AreEqual(2, doc.fields_ForNUnit.Count); /* add the doc to a ram index */ MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED, null); writer.AddDocument(doc, null); writer.Close(); /* open a reader and fetch the document */ IndexReader reader = IndexReader.Open((Directory)dir, false, null); Document docFromReader = reader.Document(0, null); Assert.IsTrue(docFromReader != null); /* fetch the binary stored field and compare it's content with the original one */ System.String binaryFldStoredTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(docFromReader.GetBinaryValue("binaryStored", null))); Assert.IsTrue(binaryFldStoredTest.Equals(binaryValStored)); /* fetch the string field and compare it's content with the original one */ System.String stringFldStoredTest = docFromReader.Get("stringStored", null); Assert.IsTrue(stringFldStoredTest.Equals(binaryValStored)); /* delete the document from index */ reader.DeleteDocument(0, null); Assert.AreEqual(0, reader.NumDocs()); reader.Close(); dir.Close(); }
public virtual void TestCaching() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), IndexWriter.MaxFieldLength.LIMITED, null); Document doc = new Document(); TokenStream stream = new AnonymousClassTokenStream(this); stream = new CachingTokenFilter(stream); doc.Add(new Field("preanalyzed", stream, TermVector.NO)); // 1) we consume all tokens twice before we add the doc to the index checkTokens(stream); stream.Reset(); checkTokens(stream); // 2) now add the document to the index and verify if all tokens are indexed // don't reset the stream here, the DocumentWriter should do that implicitly writer.AddDocument(doc, null); writer.Close(); IndexReader reader = IndexReader.Open(dir, true, null); TermPositions termPositions = reader.TermPositions(new Term("preanalyzed", "term1"), null); Assert.IsTrue(termPositions.Next(null)); Assert.AreEqual(1, termPositions.Freq); Assert.AreEqual(0, termPositions.NextPosition(null)); termPositions.Seek(new Term("preanalyzed", "term2"), null); Assert.IsTrue(termPositions.Next(null)); Assert.AreEqual(2, termPositions.Freq); Assert.AreEqual(1, termPositions.NextPosition(null)); Assert.AreEqual(3, termPositions.NextPosition(null)); termPositions.Seek(new Term("preanalyzed", "term3"), null); Assert.IsTrue(termPositions.Next(null)); Assert.AreEqual(1, termPositions.Freq); Assert.AreEqual(2, termPositions.NextPosition(null)); reader.Close(); // 3) reset stream and consume tokens again stream.Reset(); checkTokens(stream); }
public virtual void TestMutipleDocument() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new KeywordAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); Document doc = new Document(); doc.Add(new Field("partnum", "Q36", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc, null); doc = new Document(); doc.Add(new Field("partnum", "Q37", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc, null); writer.Close(); IndexReader reader = IndexReader.Open((Directory)dir, true, null); TermDocs td = reader.TermDocs(new Term("partnum", "Q36"), null); Assert.IsTrue(td.Next(null)); td = reader.TermDocs(new Term("partnum", "Q37"), null); Assert.IsTrue(td.Next(null)); }
public virtual void TestMissingTerms() { System.String fieldName = "field1"; MockRAMDirectory rd = new MockRAMDirectory(); IndexWriter w = new IndexWriter(rd, new KeywordAnalyzer(), MaxFieldLength.UNLIMITED, null); for (int i = 0; i < 100; i++) { Document doc = new Document(); int term = i * 10; //terms are units of 10; doc.Add(new Field(fieldName, "" + term, Field.Store.YES, Field.Index.NOT_ANALYZED)); w.AddDocument(doc, null); } w.Close(); IndexReader reader = IndexReader.Open((Directory)rd, true, null); IndexSearcher searcher = new IndexSearcher(reader); int numDocs = reader.NumDocs(); ScoreDoc[] results; MatchAllDocsQuery q = new MatchAllDocsQuery(); System.Collections.ArrayList terms = new System.Collections.ArrayList(); terms.Add("5"); results = searcher.Search(q, new FieldCacheTermsFilter(fieldName, (System.String[])terms.ToArray(typeof(System.String))), numDocs, null).ScoreDocs; Assert.AreEqual(0, results.Length, "Must match nothing"); terms = new System.Collections.ArrayList(); terms.Add("10"); results = searcher.Search(q, new FieldCacheTermsFilter(fieldName, (System.String[])terms.ToArray(typeof(System.String))), numDocs, null).ScoreDocs; Assert.AreEqual(1, results.Length, "Must match 1"); terms = new System.Collections.ArrayList(); terms.Add("10"); terms.Add("20"); results = searcher.Search(q, new FieldCacheTermsFilter(fieldName, (System.String[])terms.ToArray(typeof(System.String))), numDocs, null).ScoreDocs; Assert.AreEqual(2, results.Length, "Must match 2"); reader.Close(); rd.Close(); }
public virtual void TestFilterWorks() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); for (int i = 0; i < 500; i++) { Document document = new Document(); document.Add(new Field("field", English.IntToEnglish(i) + " equals " + English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(document, null); } writer.Close(); IndexReader reader = IndexReader.Open(dir, true, null); SpanTermQuery query = new SpanTermQuery(new Term("field", English.IntToEnglish(10).Trim())); SpanQueryFilter filter = new SpanQueryFilter(query); SpanFilterResult result = filter.BitSpans(reader, null); DocIdSet docIdSet = result.DocIdSet; Assert.IsTrue(docIdSet != null, "docIdSet is null and it shouldn't be"); AssertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10); var spans = result.Positions; Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); int size = GetDocIdSetSize(docIdSet); Assert.IsTrue(spans.Count == size, "spans Size: " + spans.Count + " is not: " + size); for (System.Collections.IEnumerator iterator = spans.GetEnumerator(); iterator.MoveNext();) { SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo)iterator.Current; Assert.IsTrue(info != null, "info is null and it shouldn't be"); //The doc should indicate the bit is on AssertContainsDocId("docIdSet doesn't contain docId " + info.Doc, docIdSet, info.Doc); //There should be two positions in each Assert.IsTrue(info.Positions.Count == 2, "info.getPositions() Size: " + info.Positions.Count + " is not: " + 2); } reader.Close(); }
public virtual void TestSetNorm_Renamed() { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); // add the same document four times IFieldable f1 = new Field("field", "word", Field.Store.YES, Field.Index.ANALYZED); Document d1 = new Document(); d1.Add(f1); writer.AddDocument(d1, null); writer.AddDocument(d1, null); writer.AddDocument(d1, null); writer.AddDocument(d1, null); writer.Close(); // reset the boost of each instance of this document IndexReader reader = IndexReader.Open((Directory)store, false, null); reader.SetNorm(0, "field", 1.0f, null); reader.SetNorm(1, "field", 2.0f, null); reader.SetNorm(2, "field", 4.0f, null); reader.SetNorm(3, "field", 16.0f, null); reader.Close(); // check that searches are ordered by this boost float[] scores = new float[4]; new IndexSearcher(store, true, null).Search(new TermQuery(new Term("field", "word")), new AnonymousClassCollector(scores, this), null); float lastScore = 0.0f; for (int i = 0; i < 4; i++) { Assert.IsTrue(scores[i] > lastScore); lastScore = scores[i]; } }