public virtual void TestNPESpanQuery() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Util.Version.LUCENE_CURRENT, Support.Compatibility.SetFactory.CreateHashSet <string>()), IndexWriter.MaxFieldLength.LIMITED); // Add documents AddDoc(writer, "1", "the big dogs went running to the market"); AddDoc(writer, "2", "the cat chased the mouse, then the cat ate the mouse quickly"); // Commit writer.Close(); // Get searcher IndexReader reader = IndexReader.Open(dir, true); IndexSearcher searcher = new IndexSearcher(reader); // Control (make sure docs indexed) Assert.AreEqual(2, HitCount(searcher, "the")); Assert.AreEqual(1, HitCount(searcher, "cat")); Assert.AreEqual(1, HitCount(searcher, "dogs")); Assert.AreEqual(0, HitCount(searcher, "rabbit")); // This throws exception (it shouldn't) Assert.AreEqual(1, searcher.Search(CreateSpan(0, true, new SpanQuery[] { CreateSpan(4, false, "chased", "cat"), CreateSpan("ate") }), 10).TotalHits); reader.Close(); dir.Close(); }
public virtual void TestCompressionTools() { Fieldable binaryFldCompressed = new Field("binaryCompressed", CompressionTools.Compress(System.Text.UTF8Encoding.UTF8.GetBytes(binaryValCompressed)), Field.Store.YES); Fieldable stringFldCompressed = new Field("stringCompressed", CompressionTools.CompressString(binaryValCompressed), Field.Store.YES); Document doc = new Document(); doc.Add(binaryFldCompressed); doc.Add(stringFldCompressed); /** add the doc to a ram index */ MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.AddDocument(doc); writer.Close(); /** open a reader and fetch the document */ IndexReader reader = IndexReader.Open(dir); Document docFromReader = reader.Document(0); Assert.IsTrue(docFromReader != null); /** fetch the binary compressed field and compare it's content with the original one */ System.String binaryFldCompressedTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(CompressionTools.Decompress(docFromReader.GetBinaryValue("binaryCompressed")))); Assert.IsTrue(binaryFldCompressedTest.Equals(binaryValCompressed)); Assert.IsTrue(CompressionTools.DecompressString(docFromReader.GetBinaryValue("stringCompressed")).Equals(binaryValCompressed)); reader.Close(); dir.Close(); }
public virtual void TestDanish() { /* build an index */ RAMDirectory danishIndex = new RAMDirectory(); IndexWriter writer = new IndexWriter(danishIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED); // Danish collation orders the words below in the given order // (example taken from TestSort.testInternationalSort() ). System.String[] words = new System.String[] { "H\u00D8T", "H\u00C5T", "MAND" }; for (int docnum = 0; docnum < words.Length; ++docnum) { Document doc = new Document(); doc.Add(new Field("content", words[docnum], Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.UN_TOKENIZED)); writer.AddDocument(doc); } writer.Optimize(); writer.Close(); IndexReader reader = IndexReader.Open(danishIndex); IndexSearcher search = new IndexSearcher(reader); System.Globalization.CompareInfo c = new System.Globalization.CultureInfo("da" + "-" + "dk").CompareInfo; // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ], // but Danish collation does. ScoreDoc[] result = search.Search(Csrq("content", "H\u00D8T", "MAND", F, F, c), null, 1000).scoreDocs; AssertEquals("The index Term should be included.", 1, result.Length); result = search.Search(Csrq("content", "H\u00C5T", "MAND", F, F, c), null, 1000).scoreDocs; AssertEquals("The index Term should not be included.", 0, result.Length); search.Close(); }
public virtual void TestFarsi() { /* build an index */ RAMDirectory farsiIndex = new RAMDirectory(); IndexWriter writer = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexReader reader = IndexReader.Open(farsiIndex); IndexSearcher search = new IndexSearcher(reader); // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi // characters properly. System.Globalization.CompareInfo c = new System.Globalization.CultureInfo("ar").CompareInfo; // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi // orders the U+0698 character before the U+0633 character, so the single // index Term below should NOT be returned by a ConstantScoreRangeQuery // with a Farsi Collator (or an Arabic one for the case when Farsi is // not supported). ScoreDoc[] result = search.Search(Csrq("content", "\u062F", "\u0698", T, T, c), null, 1000).scoreDocs; AssertEquals("The index Term should not be included.", 0, result.Length); result = search.Search(Csrq("content", "\u0633", "\u0638", T, T, c), null, 1000).scoreDocs; AssertEquals("The index Term should be included.", 1, result.Length); search.Close(); }
/// <summary> Given an IndexSearcher, returns a new IndexSearcher whose IndexReader /// is a MultiReader containing the Reader of the original IndexSearcher, /// as well as several "empty" IndexReaders -- some of which will have /// deleted documents in them. This new IndexSearcher should /// behave exactly the same as the original IndexSearcher. /// </summary> /// <param name="s">the searcher to wrap /// </param> /// <param name="edge">if negative, s will be the first sub; if 0, s will be in the middle, if positive s will be the last sub /// </param> public static IndexSearcher WrapUnderlyingReader(IndexSearcher s, int edge) { IndexReader r = s.IndexReader; // we can't put deleted docs before the nested reader, because // it will throw off the docIds IndexReader[] readers = new IndexReader[] { edge < 0 ? r : IndexReader.Open(MakeEmptyIndex(0), true), IndexReader.Open(MakeEmptyIndex(0), true), new MultiReader(new IndexReader[] { IndexReader.Open(MakeEmptyIndex(edge < 0 ? 4 : 0), true), IndexReader.Open(MakeEmptyIndex(0), true), 0 == edge ? r : IndexReader.Open(MakeEmptyIndex(0), true) }), IndexReader.Open(MakeEmptyIndex(0 < edge ? 0 : 7), true), IndexReader.Open(MakeEmptyIndex(0), true), new MultiReader(new IndexReader[] { IndexReader.Open(MakeEmptyIndex(0 < edge ? 0 : 5), true), IndexReader.Open(MakeEmptyIndex(0), true), 0 < edge ? r : IndexReader.Open(MakeEmptyIndex(0), true) }) }; IndexSearcher out_Renamed = new IndexSearcher(new MultiReader(readers)); out_Renamed.Similarity = s.Similarity; return(out_Renamed); }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); long theLong = System.Int64.MaxValue; double theDouble = System.Double.MaxValue; sbyte theByte = (sbyte)System.SByte.MaxValue; short theShort = System.Int16.MaxValue; int theInt = System.Int32.MaxValue; float theFloat = System.Single.MaxValue; for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); doc.Add(new Field("theLong", System.Convert.ToString(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theDouble", (theDouble--).ToString("E16"), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theByte", System.Convert.ToString((sbyte)theByte--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theShort", System.Convert.ToString(theShort--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theInt", System.Convert.ToString(theInt--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theFloat", (theFloat--).ToString("E8"), Field.Store.NO, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } writer.Close(); reader = IndexReader.Open(directory); }
public virtual void TestRAMDirectory_Renamed() { Directory dir = FSDirectory.Open(indexDir); MockRAMDirectory ramDir = new MockRAMDirectory(dir); // close the underlaying directory dir.Close(); // Check size Assert.AreEqual(ramDir.SizeInBytes(), ramDir.GetRecomputedSizeInBytes()); // open reader to test document count IndexReader reader = IndexReader.Open(ramDir, true); Assert.AreEqual(docsToAdd, reader.NumDocs()); // open search zo check if all doc's are there IndexSearcher searcher = new IndexSearcher(reader); // search for all documents for (int i = 0; i < docsToAdd; i++) { Document doc = searcher.Doc(i); Assert.IsTrue(doc.GetField("content") != null); } // cleanup reader.Close(); searcher.Close(); }
public virtual void Test() { IndexReader reader = null; try { reader = IndexReader.Open(directory); for (int i = 1; i <= numThreads; i++) { TestTermPositionVectors(reader, i); } } catch (System.IO.IOException ioe) { Assert.Fail(ioe.Message); } finally { if (reader != null) { try { /** close the opened reader */ reader.Close(); } catch (System.IO.IOException ioe) { System.Console.Error.WriteLine(ioe.StackTrace); } } } }
public override void SetUp() { base.SetUp(); System.String[] data = new System.String[] { "A 1 2 3 4 5 6", "Z 4 5 6", null, "B 2 4 5 6", "Y 3 5 6", null, "C 3 6", "X 4 5 6" }; index = new RAMDirectory(); IndexWriter writer = new IndexWriter(index, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < data.Length; i++) { Document doc = new Document(); doc.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id",String.valueOf(i))); doc.Add(new Field("all", "all", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("all","all")); if (null != data[i]) { doc.Add(new Field("data", data[i], Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("data",data[i])); } writer.AddDocument(doc); } writer.Optimize(); writer.Close(); r = IndexReader.Open(index); s = new IndexSearcher(r); //System.out.println("Set up " + getName()); }
public virtual void TestEqualScores() { // NOTE: uses index build in *this* setUp IndexReader reader = IndexReader.Open(small, true); IndexSearcher search = new IndexSearcher(reader); ScoreDoc[] result; // some hits match more terms then others, score should be the same result = search.Search(Csrq("data", "1", "6", T, T), null, 1000).ScoreDocs; int numHits = result.Length; AssertEquals("wrong number of results", 6, numHits); float score = result[0].Score; for (int i = 1; i < numHits; i++) { AssertEquals("score for " + i + " was not the same", score, result[i].Score); } result = search.Search(Csrq("data", "1", "6", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE), null, 1000).ScoreDocs; numHits = result.Length; AssertEquals("wrong number of results", 6, numHits); for (int i = 0; i < numHits; i++) { AssertEquals("score for " + i + " was not the same", score, result[i].Score); } }
/* Walk directory hierarchy in uid order, while keeping uid iterator from * /* existing index in sync. Mismatches indicate one of: (a) old documents to * /* be deleted; (b) unchanged documents, to be left alone; or (c) new * /* documents, to be indexed. */ private static void IndexDocs(System.IO.FileInfo file, System.String index, bool create) { if (!create) { // incrementally update reader = IndexReader.Open(index); // open existing index uidIter = reader.Terms(new Term("uid", "")); // init uid iterator IndexDocs(file); if (deleting) { // delete rest of stale docs while (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid") { System.Console.Out.WriteLine("deleting " + HTMLDocument.UID2URL(uidIter.Term().Text())); reader.Delete(uidIter.Term()); uidIter.Next(); } deleting = false; } uidIter.Close(); // close uid iterator reader.Close(); // close existing index } // don't have exisiting else { IndexDocs(file); } }
public virtual void TestBooleanOrderUnAffected() { // NOTE: uses index build in *this* setUp IndexReader reader = IndexReader.Open(small); IndexSearcher search = new IndexSearcher(reader); // first do a regular TermRangeQuery which uses term expansion so // docs with more terms in range get higher scores Query rq = new TermRangeQuery("data", "1", "4", T, T); ScoreDoc[] expected = search.Search(rq, null, 1000).scoreDocs; int numHits = expected.Length; // now do a boolean where which also contains a // ConstantScoreRangeQuery and make sure hte order is the same BooleanQuery q = new BooleanQuery(); q.Add(rq, BooleanClause.Occur.MUST); // T, F); q.Add(Csrq("data", "1", "6", T, T), BooleanClause.Occur.MUST); // T, F); ScoreDoc[] actual = search.Search(q, null, 1000).scoreDocs; AssertEquals("wrong numebr of hits", numHits, actual.Length); for (int i = 0; i < numHits; i++) { AssertEquals("mismatch in docid for hit#" + i, expected[i].doc, actual[i].doc); } }
public virtual void TestFieldCacheRangeFilterDoubles() { IndexReader reader = IndexReader.Open(signedIndex.index); IndexSearcher search = new IndexSearcher(reader); int numDocs = reader.NumDocs(); System.Double minIdO = (double)(minId + .5); System.Double medIdO = (double)((float)minIdO + ((double)(maxId - minId)) / 2.0); ScoreDoc[] result; Query q = new TermQuery(new Term("body", "body")); result = search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", minIdO, medIdO, T, T), numDocs).scoreDocs; Assert.AreEqual(numDocs / 2, result.Length, "find all"); int count = 0; result = search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", null, medIdO, F, T), numDocs).scoreDocs; count += result.Length; result = search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", medIdO, null, F, F), numDocs).scoreDocs; count += result.Length; Assert.AreEqual(numDocs, count, "sum of two concenatted ranges"); result = search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", null, null, T, T), numDocs).scoreDocs; Assert.AreEqual(numDocs, result.Length, "find all"); System.Double tempAux = (double)System.Double.PositiveInfinity; result = search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", tempAux, null, F, F), numDocs).scoreDocs; Assert.AreEqual(0, result.Length, "infinity special case"); System.Double tempAux2 = (double)System.Double.NegativeInfinity; result = search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", null, tempAux2, F, F), numDocs).scoreDocs; Assert.AreEqual(0, result.Length, "infinity special case"); }
private static RAMDirectory MakeEmptyIndex(int numDeletedDocs) { RAMDirectory d = new RAMDirectory(); IndexWriter w = new IndexWriter(d, new WhitespaceAnalyzer(), true, MaxFieldLength.LIMITED); for (int i = 0; i < numDeletedDocs; i++) { w.AddDocument(new Document()); } w.Commit(); w.DeleteDocuments(new MatchAllDocsQuery()); w.Commit(); if (0 < numDeletedDocs) { Assert.IsTrue(w.HasDeletions(), "writer has no deletions"); } Assert.AreEqual(numDeletedDocs, w.MaxDoc(), "writer is missing some deleted docs"); Assert.AreEqual(0, w.NumDocs(), "writer has non-deleted docs"); w.Close(); IndexReader r = IndexReader.Open(d); Assert.AreEqual(numDeletedDocs, r.NumDeletedDocs(), "reader has wrong number of deleted docs"); r.Close(); return(d); }
public virtual void TestFieldCacheRangeFilterRand() { IndexReader reader = IndexReader.Open(signedIndex.index); IndexSearcher search = new IndexSearcher(reader); System.String minRP = Pad(signedIndex.minR); System.String maxRP = Pad(signedIndex.maxR); int numDocs = reader.NumDocs(); Assert.AreEqual(numDocs, 1 + maxId - minId, "num of docs"); ScoreDoc[] result; Query q = new TermQuery(new Term("body", "body")); // test extremes, bounded on both ends result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, maxRP, T, T), numDocs).scoreDocs; Assert.AreEqual(numDocs, result.Length, "find all"); result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, maxRP, T, F), numDocs).scoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "all but biggest"); result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, maxRP, F, T), numDocs).scoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "all but smallest"); result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, maxRP, F, F), numDocs).scoreDocs; Assert.AreEqual(numDocs - 2, result.Length, "all but extremes"); // unbounded result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, null, T, F), numDocs).scoreDocs; Assert.AreEqual(numDocs, result.Length, "smallest and up"); result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", null, maxRP, F, T), numDocs).scoreDocs; Assert.AreEqual(numDocs, result.Length, "biggest and down"); result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, null, F, F), numDocs).scoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "not smallest, but up"); result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", null, maxRP, F, F), numDocs).scoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "not biggest, but down"); // very small sets result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, minRP, F, F), numDocs).scoreDocs; Assert.AreEqual(0, result.Length, "min,min,F,F"); result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", maxRP, maxRP, F, F), numDocs).scoreDocs; Assert.AreEqual(0, result.Length, "max,max,F,F"); result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, minRP, T, T), numDocs).scoreDocs; Assert.AreEqual(1, result.Length, "min,min,T,T"); result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", null, minRP, F, T), numDocs).scoreDocs; Assert.AreEqual(1, result.Length, "nul,min,F,T"); result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", maxRP, maxRP, T, T), numDocs).scoreDocs; Assert.AreEqual(1, result.Length, "max,max,T,T"); result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", maxRP, null, T, F), numDocs).scoreDocs; Assert.AreEqual(1, result.Length, "max,nul,T,T"); }
public virtual void TestFarsi() { /* build an index */ RAMDirectory farsiIndex = new RAMDirectory(); IndexWriter writer = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexReader reader = IndexReader.Open(farsiIndex, true); IndexSearcher search = new IndexSearcher(reader); Query q = new TermQuery(new Term("body", "body")); // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi // characters properly. System.Globalization.CompareInfo collator = new System.Globalization.CultureInfo("ar").CompareInfo; // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi // orders the U+0698 character before the U+0633 character, so the single // index Term below should NOT be returned by a TermRangeFilter with a Farsi // Collator (or an Arabic one for the case when Farsi is not supported). int numHits = search.Search(q, new TermRangeFilter("content", "\u062F", "\u0698", T, T, collator), 1000).TotalHits; Assert.AreEqual(0, numHits, "The index Term should not be included."); numHits = search.Search(q, new TermRangeFilter("content", "\u0633", "\u0638", T, T, collator), 1000).TotalHits; Assert.AreEqual(1, numHits, "The index Term should be included."); search.Close(); }
public virtual void TestCachingWorks() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.Close(); IndexReader reader = IndexReader.Open(dir); MockFilter filter = new MockFilter(); CachingWrapperFilter cacher = new CachingWrapperFilter(filter); // first time, nested filter is called cacher.GetDocIdSet(reader); Assert.IsTrue(filter.WasCalled(), "first time"); // make sure no exception if cache is holding the wrong bitset cacher.Bits(reader); cacher.GetDocIdSet(reader); // second time, nested filter should not be called filter.Clear(); cacher.GetDocIdSet(reader); Assert.IsFalse(filter.WasCalled(), "second time"); reader.Close(); }
/// <summary> Check whether the word exists in the index.</summary> /// <param name="word">String /// </param> /// <throws> IOException </throws> /// <returns> true iff the word exists in the index /// </returns> public virtual bool Exist(System.String word) { if (reader == null) { reader = IndexReader.Open(spellindex); } return(reader.DocFreq(new Term(F_WORD, word)) > 0); }
public virtual void TestPhrasePrefix() { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc1 = new Document(); Document doc2 = new Document(); Document doc3 = new Document(); Document doc4 = new Document(); Document doc5 = new Document(); doc1.Add(new Field("body", "blueberry pie", Field.Store.YES, Field.Index.ANALYZED)); doc2.Add(new Field("body", "blueberry strudel", Field.Store.YES, Field.Index.ANALYZED)); doc3.Add(new Field("body", "blueberry pizza", Field.Store.YES, Field.Index.ANALYZED)); doc4.Add(new Field("body", "blueberry chewing gum", Field.Store.YES, Field.Index.ANALYZED)); doc5.Add(new Field("body", "piccadilly circus", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc1); writer.AddDocument(doc2); writer.AddDocument(doc3); writer.AddDocument(doc4); writer.AddDocument(doc5); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore); //PhrasePrefixQuery query1 = new PhrasePrefixQuery(); MultiPhraseQuery query1 = new MultiPhraseQuery(); //PhrasePrefixQuery query2 = new PhrasePrefixQuery(); MultiPhraseQuery query2 = new MultiPhraseQuery(); query1.Add(new Term("body", "blueberry")); query2.Add(new Term("body", "strawberry")); System.Collections.ArrayList termsWithPrefix = new System.Collections.ArrayList(); IndexReader ir = IndexReader.Open(indexStore); // this TermEnum gives "piccadilly", "pie" and "pizza". System.String prefix = "pi"; TermEnum te = ir.Terms(new Term("body", prefix + "*")); do { if (te.Term().Text().StartsWith(prefix)) { termsWithPrefix.Add(te.Term()); } }while (te.Next()); query1.Add((Term[])termsWithPrefix.ToArray(typeof(Term))); query2.Add((Term[])termsWithPrefix.ToArray(typeof(Term))); ScoreDoc[] result; result = searcher.Search(query1, null, 1000).scoreDocs; Assert.AreEqual(2, result.Length); result = searcher.Search(query2, null, 1000).scoreDocs; Assert.AreEqual(0, result.Length); }
public virtual void TestBuild() { try { IndexReader r = IndexReader.Open(userindex); spellChecker.ClearIndex(); Addwords(r, "field1"); int num_field1 = this.Numdoc(); Addwords(r, "field2"); int num_field2 = this.Numdoc(); Assert.AreEqual(num_field2, num_field1 + 1); // test small word System.String[] similar = spellChecker.SuggestSimilar("fvie", 2); Assert.AreEqual(1, similar.Length); Assert.AreEqual(similar[0], "five"); similar = spellChecker.SuggestSimilar("five", 2); Assert.AreEqual(1, similar.Length); Assert.AreEqual(similar[0], "nine"); // don't suggest a word for itself similar = spellChecker.SuggestSimilar("fiv", 2); Assert.AreEqual(1, similar.Length); Assert.AreEqual(similar[0], "five"); similar = spellChecker.SuggestSimilar("ive", 2); Assert.AreEqual(1, similar.Length); Assert.AreEqual(similar[0], "five"); similar = spellChecker.SuggestSimilar("fives", 2); Assert.AreEqual(1, similar.Length); Assert.AreEqual(similar[0], "five"); similar = spellChecker.SuggestSimilar("fie", 2); Assert.AreEqual(1, similar.Length); Assert.AreEqual(similar[0], "five"); similar = spellChecker.SuggestSimilar("fi", 2); Assert.AreEqual(0, similar.Length); // test restraint to a field similar = spellChecker.SuggestSimilar("tousand", 10, r, "field1", false); Assert.AreEqual(0, similar.Length); // there isn't the term thousand in the field field1 similar = spellChecker.SuggestSimilar("tousand", 10, r, "field2", false); Assert.AreEqual(1, similar.Length); // there is the term thousand in the field field2 } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.Fail(); } }
public virtual void TestBoost() { // NOTE: uses index build in *this* setUp IndexReader reader = IndexReader.Open(small); IndexSearcher search = new IndexSearcher(reader); // test for correct application of query normalization // must use a non score normalizing method for this. Query q = Csrq("data", "1", "6", T, T); q.SetBoost(100); search.Search(q, null, new AnonymousClassCollector(this)); // // Ensure that boosting works to score one clause of a query higher // than another. // Query q1 = Csrq("data", "A", "A", T, T); // matches document #0 q1.SetBoost(.1f); Query q2 = Csrq("data", "Z", "Z", T, T); // matches document #1 BooleanQuery bq = new BooleanQuery(true); bq.Add(q1, BooleanClause.Occur.SHOULD); bq.Add(q2, BooleanClause.Occur.SHOULD); ScoreDoc[] hits = search.Search(bq, null, 1000).scoreDocs; Assert.AreEqual(1, hits[0].doc); Assert.AreEqual(0, hits[1].doc); Assert.IsTrue(hits[0].score > hits[1].score); q1 = Csrq("data", "A", "A", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); // matches document #0 q1.SetBoost(.1f); q2 = Csrq("data", "Z", "Z", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); // matches document #1 bq = new BooleanQuery(true); bq.Add(q1, BooleanClause.Occur.SHOULD); bq.Add(q2, BooleanClause.Occur.SHOULD); hits = search.Search(bq, null, 1000).scoreDocs; Assert.AreEqual(1, hits[0].doc); Assert.AreEqual(0, hits[1].doc); Assert.IsTrue(hits[0].score > hits[1].score); q1 = Csrq("data", "A", "A", T, T); // matches document #0 q1.SetBoost(10f); q2 = Csrq("data", "Z", "Z", T, T); // matches document #1 bq = new BooleanQuery(true); bq.Add(q1, BooleanClause.Occur.SHOULD); bq.Add(q2, BooleanClause.Occur.SHOULD); hits = search.Search(bq, null, 1000).scoreDocs; Assert.AreEqual(0, hits[0].doc); Assert.AreEqual(1, hits[1].doc); Assert.IsTrue(hits[0].score > hits[1].score); }
private int Numdoc() { IndexReader rs = IndexReader.Open(spellindex); int num = rs.NumDocs(); Assert.IsTrue(num != 0); //System.out.println("num docs: " + num); rs.Close(); return(num); }
private int Numdoc() { var rs = IndexReader.Open(spellindex, true); int num = rs.NumDocs(); Assert.IsTrue(num != 0); rs.Close(); return(num); }
public virtual void TestMethod() { RAMDirectory directory = new RAMDirectory(); System.String[] categories = new System.String[] { "food", "foodanddrink", "foodanddrinkandgoodtimes", "food and drink" }; Query rw1 = null; Query rw2 = null; try { IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); for (int i = 0; i < categories.Length; i++) { Document doc = new Document(); doc.Add(Field.Keyword("category", categories[i])); writer.AddDocument(doc); } writer.Close(); IndexReader reader = IndexReader.Open(directory); PrefixQuery query = new PrefixQuery(new Term("category", "foo")); rw1 = query.Rewrite(reader); BooleanQuery bq = new BooleanQuery(); bq.Add(query, true, false); rw2 = bq.Rewrite(reader); } catch (System.IO.IOException e) { Assert.Fail(e.Message); } BooleanQuery bq1 = null; if (rw1 is BooleanQuery) { bq1 = (BooleanQuery)rw1; } BooleanQuery bq2 = null; if (rw2 is BooleanQuery) { bq2 = (BooleanQuery)rw2; } else { Assert.Fail("Rewrite"); } Assert.AreEqual(bq1.GetClauses().Length, bq2.GetClauses().Length, "Number of Clauses Mismatch"); }
public override void SetUp() { base.SetUp(); index = new RAMDirectory(); IndexWriter writer = new IndexWriter(index, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetSimilarity(sim); // hed is the most important field, dek is secondary // d1 is an "ok" match for: albino elephant { Document d1 = new Document(); d1.Add(new Field("id", "d1", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id", "d1")); d1.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "elephant")); d1.Add(new Field("dek", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("dek", "elephant")); writer.AddDocument(d1); } // d2 is a "good" match for: albino elephant { Document d2 = new Document(); d2.Add(new Field("id", "d2", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id", "d2")); d2.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "elephant")); d2.Add(new Field("dek", "albino", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("dek", "albino")); d2.Add(new Field("dek", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("dek", "elephant")); writer.AddDocument(d2); } // d3 is a "better" match for: albino elephant { Document d3 = new Document(); d3.Add(new Field("id", "d3", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id", "d3")); d3.Add(new Field("hed", "albino", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "albino")); d3.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "elephant")); writer.AddDocument(d3); } // d4 is the "best" match for: albino elephant { Document d4 = new Document(); d4.Add(new Field("id", "d4", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id", "d4")); d4.Add(new Field("hed", "albino", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "albino")); d4.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "elephant")); d4.Add(new Field("dek", "albino", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("dek", "albino")); writer.AddDocument(d4); } writer.Close(); r = IndexReader.Open(index, true); s = new IndexSearcher(r); s.Similarity = sim; }
public virtual void TestSetBufferSize() { System.IO.FileInfo indexDir = new System.IO.FileInfo(System.IO.Path.Combine(SupportClass.AppSettings.Get("tempDir", ""), "testSetBufferSize")); MockFSDirectory dir = new MockFSDirectory(indexDir, NewRandom()); try { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(false); for (int i = 0; i < 37; i++) { Document doc = new Document(); doc.Add(new Field("content", "aaa bbb ccc ddd" + i, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("id", "" + i, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Close(); dir.allIndexInputs.Clear(); IndexReader reader = IndexReader.Open(dir); Term aaa = new Term("content", "aaa"); Term bbb = new Term("content", "bbb"); Term ccc = new Term("content", "ccc"); Assert.AreEqual(37, reader.DocFreq(ccc)); reader.DeleteDocument(0); Assert.AreEqual(37, reader.DocFreq(aaa)); dir.tweakBufferSizes(); reader.DeleteDocument(4); Assert.AreEqual(reader.DocFreq(bbb), 37); dir.tweakBufferSizes(); IndexSearcher searcher = new IndexSearcher(reader); ScoreDoc[] hits = searcher.Search(new TermQuery(bbb), null, 1000).scoreDocs; dir.tweakBufferSizes(); Assert.AreEqual(35, hits.Length); dir.tweakBufferSizes(); hits = searcher.Search(new TermQuery(new Term("id", "33")), null, 1000).scoreDocs; dir.tweakBufferSizes(); Assert.AreEqual(1, hits.Length); hits = searcher.Search(new TermQuery(aaa), null, 1000).scoreDocs; dir.tweakBufferSizes(); Assert.AreEqual(35, hits.Length); searcher.Close(); reader.Close(); } finally { _TestUtil.RmDir(indexDir); } }
public void TestConcurrentAccess() { Assert.AreEqual(1, searchers.Count); IndexReader r = IndexReader.Open(userindex, true); spellChecker.ClearIndex(); Assert.AreEqual(2, searchers.Count); Addwords(r, "field1"); Assert.AreEqual(3, searchers.Count); int num_field1 = this.Numdoc(); Addwords(r, "field2"); Assert.AreEqual(4, searchers.Count); int num_field2 = this.Numdoc(); Assert.AreEqual(num_field2, num_field1 + 1); int numThreads = 5 + this.random.Next(5); SpellCheckWorker[] workers = new SpellCheckWorker[numThreads]; for (int i = 0; i < numThreads; i++) { SpellCheckWorker spellCheckWorker = new SpellCheckWorker(r, this); spellCheckWorker.start(); workers[i] = spellCheckWorker; } int iterations = 5 + random.Next(5); for (int i = 0; i < iterations; i++) { Thread.Sleep(100); // concurrently reset the spell index spellChecker.SetSpellIndex(this.spellindex); // for debug - prints the internal Open searchers // showSearchersOpen(); } spellChecker.Close(); joinAll(workers, 5000); for (int i = 0; i < workers.Length; i++) { Assert.False(workers[i].failed); Assert.True(workers[i].terminated); } // 4 searchers more than iterations // 1. at creation // 2. ClearIndex() // 2. and 3. during Addwords Assert.AreEqual(iterations + 4, searchers.Count); AssertSearchersClosed(); }
public virtual void TestLazyLoadThreadSafety() { r = NewRandom(); dir1 = new RAMDirectory(); // test w/ field sizes bigger than the buffer of an index input BuildDir(dir1, 15, 5, 2000); // do many small tests so the thread locals go away inbetween for (int i = 0; i < 100; i++) { ir1 = IndexReader.Open(dir1); DoTest(10, 100); } }
// test using a sparse index (with deleted docs). The DocIdSet should be not cacheable, as it uses TermDocs if the range contains 0 public void TestSparseIndex() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED); for (int d = -20; d <= 20; d++) { Document doc = new Document(); doc.Add(new Field("id", d.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("body", "body", Field.Store.NO, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } writer.Optimize(); writer.DeleteDocuments(new Term("id", "0")); writer.Close(); IndexReader reader = IndexReader.Open(dir, true); IndexSearcher Search = new IndexSearcher(reader); Assert.True(reader.HasDeletions); ScoreDoc[] result; Query q = new TermQuery(new Term("body", "body")); FieldCacheRangeFilter <sbyte?> fcrf; result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", -20, 20, T, T), 100).ScoreDocs; Assert.False(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0]).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(40, result.Length, "find all"); result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", 0, 20, T, T), 100).ScoreDocs; Assert.False(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0]).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(20, result.Length, "find all"); result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", -20, 0, T, T), 100).ScoreDocs; Assert.False(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0]).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(20, result.Length, "find all"); result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", 10, 20, T, T), 100).ScoreDocs; Assert.True(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0]).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(11, result.Length, "find all"); result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", -20, -10, T, T), 100).ScoreDocs; Assert.True(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0]).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(11, result.Length, "find all"); }
public virtual void TestBinaryFieldInIndex() { IFieldable binaryFldStored = new Field("binaryStored", System.Text.UTF8Encoding.UTF8.GetBytes(binaryValStored), Field.Store.YES); IFieldable stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO); // binary fields with store off are not allowed Assert.Throws <ArgumentException>( () => new Field("fail", System.Text.Encoding.UTF8.GetBytes(binaryValStored), Field.Store.NO)); Document doc = new Document(); doc.Add(binaryFldStored); doc.Add(stringFldStored); /* test for field count */ Assert.AreEqual(2, doc.fields_ForNUnit.Count); /* add the doc to a ram index */ MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); writer.AddDocument(doc); writer.Close(); /* open a reader and fetch the document */ IndexReader reader = IndexReader.Open(dir, false); Document docFromReader = reader.Document(0); Assert.IsTrue(docFromReader != null); /* fetch the binary stored field and compare it's content with the original one */ System.String binaryFldStoredTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(docFromReader.GetBinaryValue("binaryStored"))); Assert.IsTrue(binaryFldStoredTest.Equals(binaryValStored)); /* fetch the string field and compare it's content with the original one */ System.String stringFldStoredTest = docFromReader.Get("stringStored"); Assert.IsTrue(stringFldStoredTest.Equals(binaryValStored)); /* delete the document from index */ reader.DeleteDocument(0); Assert.AreEqual(0, reader.NumDocs()); reader.Close(); dir.Close(); }