public virtual void TestDemo_Renamed_Method() { Analyzer analyzer = new StandardAnalyzer(); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead (note that the // parameter true will overwrite the index in that directory // if one exists): //Directory directory = FSDirectory.getDirectory("/tmp/testindex", true); IndexWriter iwriter = new IndexWriter(directory, analyzer, true); iwriter.SetMaxFieldLength(25000); Document doc = new Document(); System.String text = "This is the text to be indexed."; doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.TOKENIZED)); iwriter.AddDocument(doc); iwriter.Close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory); // Parse a simple query that searches for "text": Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("fieldname", analyzer); Query query = parser.Parse("text"); Hits hits = isearcher.Search(query); Assert.AreEqual(1, hits.Length()); // Iterate through the results: for (int i = 0; i < hits.Length(); i++) { Document hitDoc = hits.Doc(i); Assert.AreEqual("This is the text to be indexed.", hitDoc.Get("fieldname")); } isearcher.Close(); directory.Close(); }
public virtual void TestPerFieldAnalyzer() { PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer()); analyzer.AddAnalyzer("partnum", new KeywordAnalyzer()); Lucene.Net.QueryParsers.QueryParser queryParser = new Lucene.Net.QueryParsers.QueryParser("description", analyzer); Query query = queryParser.Parse("partnum:Q36 AND SPACE"); Hits hits = searcher.Search(query); Assert.AreEqual("+partnum:Q36 +space", query.ToString("description"), "Q36 kept as-is"); Assert.AreEqual(1, hits.Length(), "doc found!"); }
public virtual void TestNot_Renamed_Method() { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true); Lucene.Net.Documents.Document d1 = new Lucene.Net.Documents.Document(); d1.Add(new Field("field", "a b", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(d1); writer.Optimize(); writer.Close(); Searcher searcher = new IndexSearcher(store); Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("field", new SimpleAnalyzer()); Lucene.Net.Search.Query query = parser.Parse("a NOT b"); //System.out.println(query); Hits hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); }
/* public void testTermRepeatedQuery() throws IOException, ParseException { // TODO: this corner case yields different results. checkQuery("multi* multi* foo"); } */ /// <summary> checks if a query yields the same result when executed on /// a single IndexSearcher containing all documents and on a /// MultiSearcher aggregating sub-searchers /// </summary> /// <param name="queryStr"> the query to check. /// </param> /// <throws> IOException </throws> /// <throws> ParseException </throws> private void CheckQuery(System.String queryStr) { // check result hit ranking if (verbose) System.Console.Out.WriteLine("Query: " + queryStr); Lucene.Net.QueryParsers.QueryParser queryParser = new Lucene.Net.QueryParsers.QueryParser(FIELD_NAME, new StandardAnalyzer()); Lucene.Net.Search.Query query = queryParser.Parse(queryStr); Hits multiSearcherHits = multiSearcher.Search(query); Hits singleSearcherHits = singleSearcher.Search(query); Assert.AreEqual(multiSearcherHits.Length(), singleSearcherHits.Length()); for (int i = 0; i < multiSearcherHits.Length(); i++) { Lucene.Net.Documents.Document docMulti = multiSearcherHits.Doc(i); Lucene.Net.Documents.Document docSingle = singleSearcherHits.Doc(i); if (verbose) System.Console.Out.WriteLine("Multi: " + docMulti.Get(FIELD_NAME) + " score=" + multiSearcherHits.Score(i)); if (verbose) System.Console.Out.WriteLine("Single: " + docSingle.Get(FIELD_NAME) + " score=" + singleSearcherHits.Score(i)); Assert.AreEqual(multiSearcherHits.Score(i), singleSearcherHits.Score(i), 0.001f); Assert.AreEqual(docMulti.Get(FIELD_NAME), docSingle.Get(FIELD_NAME)); } if (verbose) System.Console.Out.WriteLine(); }
// Test that FieldScoreQuery returns docs with expected score. private void DoTestCustomScore(System.String field, FieldScoreQuery.Type tp, double dboost) { float boost = (float) dboost; IndexSearcher s = new IndexSearcher(dir); FieldScoreQuery qValSrc = new FieldScoreQuery(field, tp); // a query that would score by the field Lucene.Net.QueryParsers.QueryParser qp = new Lucene.Net.QueryParsers.QueryParser(TEXT_FIELD, anlzr); System.String qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup. // regular (boolean) query. Query q1 = qp.Parse(qtxt); Log(q1); // custom query, that should score the same as q1. CustomScoreQuery q2CustomNeutral = new CustomScoreQuery(q1); q2CustomNeutral.SetBoost(boost); Log(q2CustomNeutral); // custom query, that should (by default) multiply the scores of q1 by that of the field CustomScoreQuery q3CustomMul = new CustomScoreQuery(q1, qValSrc); q3CustomMul.SetStrict(true); q3CustomMul.SetBoost(boost); Log(q3CustomMul); // custom query, that should add the scores of q1 to that of the field CustomScoreQuery q4CustomAdd = new CustomAddQuery(q1, qValSrc); q4CustomAdd.SetStrict(true); q4CustomAdd.SetBoost(boost); Log(q4CustomAdd); // custom query, that multiplies and adds the field score to that of q1 CustomScoreQuery q5CustomMulAdd = new CustomMulAddQuery(q1, qValSrc, qValSrc); q5CustomMulAdd.SetStrict(true); q5CustomMulAdd.SetBoost(boost); Log(q5CustomMulAdd); // do al the searches TopDocs td1 = s.Search(q1, null, 1000); TopDocs td2CustomNeutral = s.Search(q2CustomNeutral, null, 1000); TopDocs td3CustomMul = s.Search(q3CustomMul, null, 1000); TopDocs td4CustomAdd = s.Search(q4CustomAdd, null, 1000); TopDocs td5CustomMulAdd = s.Search(q5CustomMulAdd, null, 1000); // put results in map so we can verify the scores although they have changed System.Collections.Hashtable h1 = TopDocsToMap(td1); System.Collections.Hashtable h2CustomNeutral = TopDocsToMap(td2CustomNeutral); System.Collections.Hashtable h3CustomMul = TopDocsToMap(td3CustomMul); System.Collections.Hashtable h4CustomAdd = TopDocsToMap(td4CustomAdd); System.Collections.Hashtable h5CustomMulAdd = TopDocsToMap(td5CustomMulAdd); VerifyResults(boost, s, h1, h2CustomNeutral, h3CustomMul, h4CustomAdd, h5CustomMulAdd, q1, q2CustomNeutral, q3CustomMul, q4CustomAdd, q5CustomMulAdd); }
public virtual void TestSetPosition() { Analyzer analyzer = new AnonymousClassAnalyzer(this); RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, analyzer, true); Document d = new Document(); d.Add(new Field("field", "bogus", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(d); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(store); PhraseQuery q; Hits hits; q = new PhraseQuery(); q.Add(new Term("field", "1")); q.Add(new Term("field", "2")); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); // same as previous, just specify positions explicitely. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 1); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); // specifying correct positions should find the phrase. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 2); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "3")); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "4")); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); // phrase query would find it when correct positions are specified. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "4"), 0); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); // phrase query should fail for non existing searched term // even if there exist another searched terms in the same searched position. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "9"), 0); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); // multi-phrase query should succed for non existing searched term // because there exist another searched terms in the same searched position. MultiPhraseQuery mq = new MultiPhraseQuery(); mq.Add(new Term[]{new Term("field", "3"), new Term("field", "9")}, 0); hits = searcher.Search(mq); Assert.AreEqual(1, hits.Length()); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "4")); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "5")); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); q = new PhraseQuery(); q.Add(new Term("field", "4")); q.Add(new Term("field", "5")); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "5")); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); // analyzer to introduce stopwords and increment gaps Analyzer stpa = new AnonymousClassAnalyzer1(this); // should not find "1 2" because there is a gap of 1 in the index Lucene.Net.QueryParsers.QueryParser qp = new Lucene.Net.QueryParsers.QueryParser("field", stpa); q = (PhraseQuery) qp.Parse("\"1 2\""); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); // omitted stop word cannot help because stop filter swallows the increments. q = (PhraseQuery) qp.Parse("\"1 stop 2\""); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); // query parser alone won't help, because stop filter swallows the increments. qp.SetEnablePositionIncrements(true); q = (PhraseQuery) qp.Parse("\"1 stop 2\""); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); bool dflt = StopFilter.GetEnablePositionIncrementsDefault(); try { // stop filter alone won't help, because query parser swallows the increments. qp.SetEnablePositionIncrements(false); StopFilter.SetEnablePositionIncrementsDefault(true); q = (PhraseQuery) qp.Parse("\"1 stop 2\""); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); // when both qp qnd stopFilter propagate increments, we should find the doc. qp.SetEnablePositionIncrements(true); q = (PhraseQuery) qp.Parse("\"1 stop 2\""); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); } finally { StopFilter.SetEnablePositionIncrementsDefault(dflt); } }
public virtual void TestEmptyIndex() { // creating two directories for indices Directory indexStoreA = new MockRAMDirectory(); Directory indexStoreB = new MockRAMDirectory(); // creating a document to store Document lDoc = new Document(); lDoc.Add(new Field("fulltext", "Once upon a time.....", Field.Store.YES, Field.Index.TOKENIZED)); lDoc.Add(new Field("id", "doc1", Field.Store.YES, Field.Index.UN_TOKENIZED)); lDoc.Add(new Field("handle", "1", Field.Store.YES, Field.Index.UN_TOKENIZED)); // creating a document to store Document lDoc2 = new Document(); lDoc2.Add(new Field("fulltext", "in a galaxy far far away.....", Field.Store.YES, Field.Index.TOKENIZED)); lDoc2.Add(new Field("id", "doc2", Field.Store.YES, Field.Index.UN_TOKENIZED)); lDoc2.Add(new Field("handle", "1", Field.Store.YES, Field.Index.UN_TOKENIZED)); // creating a document to store Document lDoc3 = new Document(); lDoc3.Add(new Field("fulltext", "a bizarre bug manifested itself....", Field.Store.YES, Field.Index.TOKENIZED)); lDoc3.Add(new Field("id", "doc3", Field.Store.YES, Field.Index.UN_TOKENIZED)); lDoc3.Add(new Field("handle", "1", Field.Store.YES, Field.Index.UN_TOKENIZED)); // creating an index writer for the first index IndexWriter writerA = new IndexWriter(indexStoreA, new StandardAnalyzer(), true); // creating an index writer for the second index, but writing nothing IndexWriter writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(), true); //-------------------------------------------------------------------- // scenario 1 //-------------------------------------------------------------------- // writing the documents to the first index writerA.AddDocument(lDoc); writerA.AddDocument(lDoc2); writerA.AddDocument(lDoc3); writerA.Optimize(); writerA.Close(); // closing the second index writerB.Close(); // creating the query Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("fulltext", new StandardAnalyzer()); Query query = parser.Parse("handle:1"); // building the searchables Searcher[] searchers = new Searcher[2]; // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index searchers[0] = new IndexSearcher(indexStoreB); searchers[1] = new IndexSearcher(indexStoreA); // creating the multiSearcher Searcher mSearcher = GetMultiSearcherInstance(searchers); // performing the search Hits hits = mSearcher.Search(query); Assert.AreEqual(3, hits.Length()); // iterating over the hit documents for (int i = 0; i < hits.Length(); i++) { Document d = hits.Doc(i); } mSearcher.Close(); //-------------------------------------------------------------------- // scenario 2 //-------------------------------------------------------------------- // adding one document to the empty index writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(), false); writerB.AddDocument(lDoc); writerB.Optimize(); writerB.Close(); // building the searchables Searcher[] searchers2 = new Searcher[2]; // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index searchers2[0] = new IndexSearcher(indexStoreB); searchers2[1] = new IndexSearcher(indexStoreA); // creating the mulitSearcher MultiSearcher mSearcher2 = GetMultiSearcherInstance(searchers2); // performing the same search Hits hits2 = mSearcher2.Search(query); Assert.AreEqual(4, hits2.Length()); // iterating over the hit documents for (int i = 0; i < hits2.Length(); i++) { // no exception should happen at this point Document d = hits2.Doc(i); } // test the subSearcher() method: Query subSearcherQuery = parser.Parse("id:doc1"); hits2 = mSearcher2.Search(subSearcherQuery); Assert.AreEqual(2, hits2.Length()); Assert.AreEqual(0, mSearcher2.SubSearcher(hits2.Id(0))); // hit from searchers2[0] Assert.AreEqual(1, mSearcher2.SubSearcher(hits2.Id(1))); // hit from searchers2[1] subSearcherQuery = parser.Parse("id:doc2"); hits2 = mSearcher2.Search(subSearcherQuery); Assert.AreEqual(1, hits2.Length()); Assert.AreEqual(1, mSearcher2.SubSearcher(hits2.Id(0))); // hit from searchers2[1] mSearcher2.Close(); //-------------------------------------------------------------------- // scenario 3 //-------------------------------------------------------------------- // deleting the document just added, this will cause a different exception to take place Term term = new Term("id", "doc1"); IndexReader readerB = IndexReader.Open(indexStoreB); readerB.DeleteDocuments(term); readerB.Close(); // optimizing the index with the writer writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(), false); writerB.Optimize(); writerB.Close(); // building the searchables Searcher[] searchers3 = new Searcher[2]; searchers3[0] = new IndexSearcher(indexStoreB); searchers3[1] = new IndexSearcher(indexStoreA); // creating the mulitSearcher Searcher mSearcher3 = GetMultiSearcherInstance(searchers3); // performing the same search Hits hits3 = mSearcher3.Search(query); Assert.AreEqual(3, hits3.Length()); // iterating over the hit documents for (int i = 0; i < hits3.Length(); i++) { Document d = hits3.Doc(i); } mSearcher3.Close(); indexStoreA.Close(); indexStoreB.Close(); }
public virtual void TestParsingAndSearching() { System.String field = "content"; bool dbg = false; Lucene.Net.QueryParsers.QueryParser qp = new Lucene.Net.QueryParsers.QueryParser(field, new WhitespaceAnalyzer()); qp.SetAllowLeadingWildcard(true); System.String[] docs = new System.String[]{"\\ abcdefg1", "\\79 hijklmn1", "\\\\ opqrstu1"}; // queries that should find all docs System.String[] matchAll = new System.String[]{"*", "*1", "**1", "*?", "*?1", "?*1", "**", "***", "\\\\*"}; // queries that should find no docs System.String[] matchNone = new System.String[]{"a*h", "a?h", "*a*h", "?a", "a?"}; // queries that should be parsed to prefix queries System.String[][] matchOneDocPrefix = new System.String[][]{new System.String[]{"a*", "ab*", "abc*"}, new System.String[]{"h*", "hi*", "hij*", "\\\\7*"}, new System.String[]{"o*", "op*", "opq*", "\\\\\\\\*"}}; // queries that should be parsed to wildcard queries System.String[][] matchOneDocWild = new System.String[][]{new System.String[]{"*a*", "*ab*", "*abc**", "ab*e*", "*g?", "*f?1", "abc**"}, new System.String[]{"*h*", "*hi*", "*hij**", "hi*k*", "*n?", "*m?1", "hij**"}, new System.String[]{"*o*", "*op*", "*opq**", "op*q*", "*u?", "*t?1", "opq**"}}; // prepare the index RAMDirectory dir = new RAMDirectory(); IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer()); for (int i = 0; i < docs.Length; i++) { Document doc = new Document(); doc.Add(new Field(field, docs[i], Field.Store.NO, Field.Index.TOKENIZED)); iw.AddDocument(doc); } iw.Close(); IndexSearcher searcher = new IndexSearcher(dir); // test queries that must find all for (int i = 0; i < matchAll.Length; i++) { System.String qtxt = matchAll[i]; Query q = qp.Parse(qtxt); if (dbg) { System.Console.Out.WriteLine("matchAll: qtxt=" + qtxt + " q=" + q + " " + q.GetType().FullName); } Hits hits = searcher.Search(q); Assert.AreEqual(docs.Length, hits.Length()); } // test queries that must find none for (int i = 0; i < matchNone.Length; i++) { System.String qtxt = matchNone[i]; Query q = qp.Parse(qtxt); if (dbg) { System.Console.Out.WriteLine("matchNone: qtxt=" + qtxt + " q=" + q + " " + q.GetType().FullName); } Hits hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); } // test queries that must be prefix queries and must find only one doc for (int i = 0; i < matchOneDocPrefix.Length; i++) { for (int j = 0; j < matchOneDocPrefix[i].Length; j++) { System.String qtxt = matchOneDocPrefix[i][j]; Query q = qp.Parse(qtxt); if (dbg) { System.Console.Out.WriteLine("match 1 prefix: doc=" + docs[i] + " qtxt=" + qtxt + " q=" + q + " " + q.GetType().FullName); } Assert.AreEqual(typeof(PrefixQuery), q.GetType()); Hits hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); Assert.AreEqual(i, hits.Id(0)); } } // test queries that must be wildcard queries and must find only one doc for (int i = 0; i < matchOneDocPrefix.Length; i++) { for (int j = 0; j < matchOneDocWild[i].Length; j++) { System.String qtxt = matchOneDocWild[i][j]; Query q = qp.Parse(qtxt); if (dbg) { System.Console.Out.WriteLine("match 1 wild: doc=" + docs[i] + " qtxt=" + qtxt + " q=" + q + " " + q.GetType().FullName); } Assert.AreEqual(typeof(WildcardQuery), q.GetType()); Hits hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); Assert.AreEqual(i, hits.Id(0)); } } searcher.Close(); }
public virtual void TestReverseDateSort() { IndexSearcher searcher = new IndexSearcher(directory); // Create a Sort object. reverse is set to true. // problem occurs only with SortField.AUTO: Sort sort = new Sort(new SortField(DATE_TIME_FIELD, SortField.AUTO, true)); Lucene.Net.QueryParsers.QueryParser queryParser = new Lucene.Net.QueryParsers.QueryParser(TEXT_FIELD, new WhitespaceAnalyzer()); Query query = queryParser.Parse("Document"); // Execute the search and process the search results. System.String[] actualOrder = new System.String[5]; Hits hits = searcher.Search(query, sort); for (int i = 0; i < hits.Length(); i++) { Document document = hits.Doc(i); System.String text = document.Get(TEXT_FIELD); actualOrder[i] = text; } searcher.Close(); // Set up the expected order (i.e. Document 5, 4, 3, 2, 1). System.String[] expectedOrder = new System.String[5]; expectedOrder[0] = "Document 5"; expectedOrder[1] = "Document 4"; expectedOrder[2] = "Document 3"; expectedOrder[3] = "Document 2"; expectedOrder[4] = "Document 1"; for (int i = 0; i < expectedOrder.Length; i++) { Assert.AreEqual(expectedOrder[i], actualOrder[i]); } }