public void CanQueryLuceneIndexCreatedOnDisk() { CanCreateLuceneIndexOnDisk(); System.IO.DirectoryInfo di = new System.IO.DirectoryInfo(System.IO.Path.GetTempPath()); using (Lucene.Net.Store.Directory directory = Lucene.Net.Store.FSDirectory.Open(di)) { Lucene.Net.Index.IndexReader ir = Lucene.Net.Index.IndexReader.Open(directory, true); Lucene.Net.Search.Searcher searcher = new Lucene.Net.Search.IndexSearcher(ir); using (Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30)) { Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser(Version.LUCENE_30, "content", analyzer); Lucene.Net.Search.Query query = parser.Parse("lorem"); Lucene.Net.Search.TopScoreDocCollector collector = Lucene.Net.Search.TopScoreDocCollector.Create(100, true); searcher.Search(query, collector); Lucene.Net.Search.ScoreDoc[] docs = collector.TopDocs().ScoreDocs; foreach (Lucene.Net.Search.ScoreDoc scoreDoc in docs) { //Get the document that represents the search result. Document document = searcher.Doc(scoreDoc.Doc); var id = document.Get("Id"); var content = document.Get("content"); } } } }
public virtual void TestDemo_Renamed() { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead: //Directory directory = FSDirectory.open("/tmp/testindex"); IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000)); Document doc = new Document(); System.String text = "This is the text to be indexed."; doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.AddDocument(doc); iwriter.Close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true // Parse a simple query that searches for "text": QueryParser parser = new QueryParser("fieldname", analyzer); Query query = parser.Parse("text"); ScoreDoc[] hits = isearcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); // Iterate through the results: for (int i = 0; i < hits.Length; i++) { Document hitDoc = isearcher.Doc(hits[i].doc); Assert.AreEqual(hitDoc.Get("fieldname"), "This is the text to be indexed."); } isearcher.Close(); directory.Close(); }
public virtual void TestReverseDateSort() { IndexSearcher searcher = new IndexSearcher(directory); // Create a Sort object. reverse is set to true. // problem occurs only with SortField.AUTO: Sort sort = new Sort(new SortField(DATE_TIME_FIELD, SortField.AUTO, true)); QueryParser queryParser = new QueryParser(TEXT_FIELD, new WhitespaceAnalyzer()); Query query = queryParser.Parse("Document"); // Execute the search and process the search results. System.String[] actualOrder = new System.String[5]; ScoreDoc[] hits = searcher.Search(query, null, 1000, sort).scoreDocs; for (int i = 0; i < hits.Length; i++) { Document document = searcher.Doc(hits[i].doc); System.String text = document.Get(TEXT_FIELD); actualOrder[i] = text; } searcher.Close(); // Set up the expected order (i.e. Document 5, 4, 3, 2, 1). System.String[] expectedOrder = new System.String[5]; expectedOrder[0] = "Document 5"; expectedOrder[1] = "Document 4"; expectedOrder[2] = "Document 3"; expectedOrder[3] = "Document 2"; expectedOrder[4] = "Document 1"; Assert.AreEqual(new System.Collections.ArrayList(expectedOrder), new System.Collections.ArrayList(actualOrder)); }
public Lucene.Net.Search.TopDocs Search(string queryString) { Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_30, "contents", new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30)); Lucene.Net.Search.Query query = parser.Parse(queryString); return this.IndexSearcher.Search(query, 100); }
public virtual void TestPerFieldAnalyzer() { PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer()); analyzer.AddAnalyzer("partnum", new KeywordAnalyzer()); QueryParser queryParser = new QueryParser("description", analyzer); Query query = queryParser.Parse("partnum:Q36 AND SPACE"); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual("+partnum:Q36 +space", query.ToString("description"), "Q36 kept as-is"); Assert.AreEqual(1, hits.Length, "doc found!"); }
public static void searchFor(Searcher searcher, string querystr) { QueryParser parser = new QueryParser("body", new StandardAnalyzer()); // could be outside this function Query query = parser.Parse(querystr); var hits = new AnonymousClassCollector(); // more accurate timer var timer = new Stopwatch(); timer.Start(); searcher.Search(query, hits); timer.Stop(); Console.WriteLine("search for [{0}] returned {1} hits in {2}ms )", query, hits.Count, timer.ElapsedMilliseconds); }
static void SearchForPhrase(IndexSearcher searcher, string phrase) { using (new AutoStopWatch(string.Format("Search for {0}", phrase))) { Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_CURRENT, "Body", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT)); Lucene.Net.Search.Query query = parser.Parse(phrase); var hits = searcher.Search(query, 100); Console.WriteLine("Found {0} results for {1}", hits.TotalHits, phrase); int max = Math.Min(hits.TotalHits,100); for (int i = 0; i < max; i++) { Console.WriteLine(hits.ScoreDocs[i].Doc); } } }
public virtual void TestNot_Renamed() { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document d1 = new Document(); d1.Add(new Field("field", "a b", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d1); writer.Optimize(); writer.Close(); Searcher searcher = new IndexSearcher(store, true); QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "field", new SimpleAnalyzer()); Query query = parser.Parse("a NOT b"); //System.out.println(query); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); }
public void TestCustomExternalQuery() { QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT, TEXT_FIELD, anlzr); String qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup. Query q1 = qp.Parse(qtxt); Query q = new CustomExternalQuery(q1); Log(q); IndexSearcher s = new IndexSearcher(dir); TopDocs hits = s.Search(q, 1000); Assert.AreEqual(N_DOCS, hits.TotalHits); for (int i = 0; i < N_DOCS; i++) { int doc = hits.ScoreDocs[i].Doc; float score = hits.ScoreDocs[i].Score; Assert.AreEqual(score, (float)1 + (4 * doc) % N_DOCS, 0.0001, "doc=" + doc); } s.Close(); }
/* public void testTermRepeatedQuery() throws IOException, ParseException { // TODO: this corner case yields different results. checkQuery("multi* multi* foo"); } */ /// <summary> checks if a query yields the same result when executed on /// a single IndexSearcher containing all documents and on a /// MultiSearcher aggregating sub-searchers /// </summary> /// <param name="queryStr"> the query to check. /// </param> /// <throws> IOException </throws> /// <throws> ParseException </throws> private void CheckQuery(System.String queryStr) { // check result hit ranking if (verbose) System.Console.Out.WriteLine("Query: " + queryStr); QueryParser queryParser = new QueryParser(FIELD_NAME, new StandardAnalyzer()); Query query = queryParser.Parse(queryStr); ScoreDoc[] multiSearcherHits = multiSearcher.Search(query, null, 1000).scoreDocs; ScoreDoc[] singleSearcherHits = singleSearcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(multiSearcherHits.Length, singleSearcherHits.Length); for (int i = 0; i < multiSearcherHits.Length; i++) { Document docMulti = multiSearcher.Doc(multiSearcherHits[i].doc); Document docSingle = singleSearcher.Doc(singleSearcherHits[i].doc); if (verbose) System.Console.Out.WriteLine("Multi: " + docMulti.Get(FIELD_NAME) + " score=" + multiSearcherHits[i].score); if (verbose) System.Console.Out.WriteLine("Single: " + docSingle.Get(FIELD_NAME) + " score=" + singleSearcherHits[i].score); Assert.AreEqual(multiSearcherHits[i].score, singleSearcherHits[i].score, 0.001f); Assert.AreEqual(docMulti.Get(FIELD_NAME), docSingle.Get(FIELD_NAME)); } if (verbose) System.Console.Out.WriteLine(); }
public virtual void DoSearching(System.String queryString) { QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer()); parser.SetMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); query = parser.Parse(queryString); DoSearching(query); }
public string GetStudentsByYearIdAndTimesIdAndSchoolIdAndStudentName(string schoolYear, string times, string schoolId, string StudentName, string pIndex) { string result = string.Empty; int pageIndex = Int32.Parse(pIndex); ArrayList students = new ArrayList(); string pathOfIndexFile = Server.MapPath(System.Configuration.ConfigurationManager.AppSettings["StudentIndexing"].ToString()); if (Int32.Parse(schoolYear) >= 2000) { pathOfIndexFile += "\\" + schoolYear + "\\Index"; } string studentName = StudentName.Replace("\"", ""); studentName = "\"" + studentName + "\""; Lucene.Net.Search.IndexSearcher iSearcher = new Lucene.Net.Search.IndexSearcher(pathOfIndexFile); Lucene.Net.QueryParsers.QueryParser qYearParser = new Lucene.Net.QueryParsers.QueryParser("YearId", new Lucene.Net.Analysis.Standard.StandardAnalyzer()); Lucene.Net.Search.Query iYearQuery = qYearParser.Parse(schoolYear); Lucene.Net.QueryParsers.QueryParser qTestDayParser = new Lucene.Net.QueryParsers.QueryParser("TestDayId", new Lucene.Net.Analysis.Standard.StandardAnalyzer()); Lucene.Net.Search.Query iTestDayQuery = qTestDayParser.Parse(times); Lucene.Net.QueryParsers.QueryParser qStudentIdParser = new Lucene.Net.QueryParsers.QueryParser("StudentID", new Lucene.Net.Analysis.Standard.StandardAnalyzer()); Lucene.Net.Search.Query iStudentIdQuery = qStudentIdParser.Parse("1"); ////////////////////////////////////////////////////////////////////// Lucene.Net.Search.BooleanQuery bQuery = new Lucene.Net.Search.BooleanQuery(); bQuery.Add(iYearQuery, Lucene.Net.Search.BooleanClause.Occur.MUST); bQuery.Add(iTestDayQuery, Lucene.Net.Search.BooleanClause.Occur.MUST); if (StudentName != " " && StudentName != "") { Lucene.Net.QueryParsers.QueryParser qStudentParser = new Lucene.Net.QueryParsers.QueryParser("StudentName", new Lucene.Net.Analysis.Standard.StandardAnalyzer()); Lucene.Net.Search.Query iStudentQuery = qStudentParser.Parse(studentName); bQuery.Add(iStudentQuery, Lucene.Net.Search.BooleanClause.Occur.MUST); } Lucene.Net.Search.Hits iHits = iSearcher.Search(bQuery); using (System.Data.SqlClient.SqlConnection con = new System.Data.SqlClient.SqlConnection(System.Configuration.ConfigurationManager.ConnectionStrings["PSCPortalConnectionString"].ConnectionString)) { con.Open(); //paging for (int i = pageIndex * 20 - 20; i < pageIndex * 20 && i < iHits.Length(); i++) { string yId = iHits.Doc(i).Get("YearId"); string stuId = iHits.Doc(i).Get("StudentID"); string testDayId = iHits.Doc(i).Get("TestDayId"); System.Data.SqlClient.SqlCommand com = new System.Data.SqlClient.SqlCommand(); com.Connection = con; com.CommandType = CommandType.Text; com.CommandText = @" select StudentTHPT.TotalMark,[RoundTotalMark],StudentTHPT.YearId,StudentTHPT.TestDayId,StudentId,FirstName+' '+MiddleName+' '+LastName as FullName,Sex,Birthday,MarkEncourage,Section.Name from StudentTHPT inner join Section on StudentTHPT.SectionId = Section.SectionId where StudentTHPT.YearId=@yearId and StudentTHPT.TestDayId=@timeId and StudentId = @studentId Order by LastName " ; com.Parameters.Add("@yearId", SqlDbType.NChar); com.Parameters["@yearId"].Value = yId; com.Parameters.Add("@timeId", SqlDbType.NVarChar); com.Parameters["@timeId"].Value = testDayId; com.Parameters.Add("@studentId", SqlDbType.NVarChar); com.Parameters["@studentId"].Value = stuId; using (System.Data.SqlClient.SqlDataReader reader = com.ExecuteReader()) { while (reader.Read()) { string fullName = reader["FullName"].ToString(); string birthday = reader["Birthday"].ToString().Trim(); string studentId = reader["StudentId"].ToString(); string total = iHits.Length().ToString(); // string markEncourage = reader["MarkEncourage"].ToString(); string totalMark = reader["TotalMark"].ToString(); string section = reader["Name"].ToString(); string roundTotalMark = reader["RoundTotalMark"].ToString(); Student s = new Student { StudentId = studentId, FullName = fullName, Birthday = birthday, Total = total, Section = section, TotalMark = totalMark, RoundTotalMark = roundTotalMark }; students.Add(s); } } } } iSearcher.Close(); System.Web.Script.Serialization.JavaScriptSerializer serialize = new System.Web.Script.Serialization.JavaScriptSerializer(); result = serialize.Serialize(students); return(result); }
public virtual void TestOverlapAnalyzer() { //UPGRADE_TODO: Class 'java.util.HashMap' was converted to 'System.Collections.Hashtable' which has a different behavior. 'ms-help://MS.VSCC.2003/commoner/redir/redirect.htm?keyword="jlca1073_javautilHashMap_3"' System.Collections.Hashtable synonyms = new System.Collections.Hashtable(); synonyms["football"] = "soccer,footie"; Analyzer analyzer = new SynonymAnalyzer(synonyms); System.String srchkey = "football"; System.String s = "football-soccer in the euro 2004 footie competition"; QueryParser parser = new QueryParser("bookid", analyzer); Query query = parser.Parse(srchkey); Highlighter highlighter = new Highlighter(new QueryScorer(query)); TokenStream tokenStream = analyzer.TokenStream(null, new System.IO.StringReader(s)); // Get 3 best fragments and seperate with a "..." System.String result = highlighter.GetBestFragments(tokenStream, s, 3, "..."); System.String expectedResult = "<B>football</B>-<B>soccer</B> in the euro 2004 <B>footie</B> competition"; Assert.IsTrue(expectedResult.Equals(result), "overlapping analyzer should handle highlights OK"); }
public virtual void TestMultiSearcher() { //setup index 1 RAMDirectory ramDir1 = new RAMDirectory(); IndexWriter writer1 = new IndexWriter(ramDir1, new StandardAnalyzer(), true); Document d = new Document(); Field f = new Field(FIELD_NAME, "multiOne", Field.Store.YES, Field.Index.TOKENIZED); d.Add(f); writer1.AddDocument(d); writer1.Optimize(); writer1.Close(); IndexReader reader1 = IndexReader.Open(ramDir1); //setup index 2 RAMDirectory ramDir2 = new RAMDirectory(); IndexWriter writer2 = new IndexWriter(ramDir2, new StandardAnalyzer(), true); d = new Document(); f = new Field(FIELD_NAME, "multiTwo", Field.Store.YES, Field.Index.TOKENIZED); d.Add(f); writer2.AddDocument(d); writer2.Optimize(); writer2.Close(); IndexReader reader2 = IndexReader.Open(ramDir2); IndexSearcher[] searchers = new IndexSearcher[2]; searchers[0] = new IndexSearcher(ramDir1); searchers[1] = new IndexSearcher(ramDir2); MultiSearcher multiSearcher = new MultiSearcher(searchers); QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer()); parser.SetMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); query = parser.Parse("multi*"); System.Console.Out.WriteLine("Searching for: " + query.ToString(FIELD_NAME)); //at this point the multisearcher calls combine(query[]) hits = multiSearcher.Search(query); //query = QueryParser.parse("multi*", FIELD_NAME, new StandardAnalyzer()); Query[] expandedQueries = new Query[2]; expandedQueries[0] = query.Rewrite(reader1); expandedQueries[1] = query.Rewrite(reader2); query = query.Combine(expandedQueries); //create an instance of the highlighter with the tags used to surround highlighted text Highlighter highlighter = new Highlighter(this, new QueryScorer(query)); for (int i = 0; i < hits.Length(); i++) { System.String text = hits.Doc(i).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text)); System.String highlightedText = highlighter.GetBestFragment(tokenStream, text); System.Console.Out.WriteLine(highlightedText); } Assert.IsTrue(numHighlights == 2, "Failed to find correct number of highlights " + numHighlights + " found"); }
public virtual void TestParsingAndSearching() { System.String field = "content"; bool dbg = false; QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT, field, new WhitespaceAnalyzer()); qp.AllowLeadingWildcard = true; System.String[] docs = new System.String[]{"\\ abcdefg1", "\\79 hijklmn1", "\\\\ opqrstu1"}; // queries that should find all docs System.String[] matchAll = new System.String[]{"*", "*1", "**1", "*?", "*?1", "?*1", "**", "***", "\\\\*"}; // queries that should find no docs System.String[] matchNone = new System.String[]{"a*h", "a?h", "*a*h", "?a", "a?"}; // queries that should be parsed to prefix queries System.String[][] matchOneDocPrefix = new System.String[][]{new System.String[]{"a*", "ab*", "abc*"}, new System.String[]{"h*", "hi*", "hij*", "\\\\7*"}, new System.String[]{"o*", "op*", "opq*", "\\\\\\\\*"}}; // queries that should be parsed to wildcard queries System.String[][] matchOneDocWild = new System.String[][]{new System.String[]{"*a*", "*ab*", "*abc**", "ab*e*", "*g?", "*f?1", "abc**"}, new System.String[]{"*h*", "*hi*", "*hij**", "hi*k*", "*n?", "*m?1", "hij**"}, new System.String[]{"*o*", "*op*", "*opq**", "op*q*", "*u?", "*t?1", "opq**"}}; // prepare the index RAMDirectory dir = new RAMDirectory(); IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < docs.Length; i++) { Document doc = new Document(); doc.Add(new Field(field, docs[i], Field.Store.NO, Field.Index.ANALYZED)); iw.AddDocument(doc); } iw.Close(); IndexSearcher searcher = new IndexSearcher(dir, true); // test queries that must find all for (int i = 0; i < matchAll.Length; i++) { System.String qtxt = matchAll[i]; Query q = qp.Parse(qtxt); if (dbg) { System.Console.Out.WriteLine("matchAll: qtxt=" + qtxt + " q=" + q + " " + q.GetType().FullName); } ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(docs.Length, hits.Length); } // test queries that must find none for (int i = 0; i < matchNone.Length; i++) { System.String qtxt = matchNone[i]; Query q = qp.Parse(qtxt); if (dbg) { System.Console.Out.WriteLine("matchNone: qtxt=" + qtxt + " q=" + q + " " + q.GetType().FullName); } ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); } // test queries that must be prefix queries and must find only one doc for (int i = 0; i < matchOneDocPrefix.Length; i++) { for (int j = 0; j < matchOneDocPrefix[i].Length; j++) { System.String qtxt = matchOneDocPrefix[i][j]; Query q = qp.Parse(qtxt); if (dbg) { System.Console.Out.WriteLine("match 1 prefix: doc=" + docs[i] + " qtxt=" + qtxt + " q=" + q + " " + q.GetType().FullName); } Assert.AreEqual(typeof(PrefixQuery), q.GetType()); ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(i, hits[0].Doc); } } // test queries that must be wildcard queries and must find only one doc for (int i = 0; i < matchOneDocPrefix.Length; i++) { for (int j = 0; j < matchOneDocWild[i].Length; j++) { System.String qtxt = matchOneDocWild[i][j]; Query q = qp.Parse(qtxt); if (dbg) { System.Console.Out.WriteLine("match 1 wild: doc=" + docs[i] + " qtxt=" + qtxt + " q=" + q + " " + q.GetType().FullName); } Assert.AreEqual(typeof(WildcardQuery), q.GetType()); ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(i, hits[0].Doc); } } searcher.Close(); }
public virtual Query MakeQuery(System.String queryText) { return(qp.Parse(queryText)); }
static int SearchForPhrase(IndexSearcher searcher, string phrase) { var parser = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_30, "Body", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30)); var query = parser.Parse(phrase); return searcher.Search(query, 100).TotalHits; }
public virtual void TestParsingAndSearching() { System.String field = "content"; bool dbg = false; QueryParser qp = new QueryParser(field, new WhitespaceAnalyzer()); qp.SetAllowLeadingWildcard(true); System.String[] docs = new System.String[] { "\\ abcdefg1", "\\79 hijklmn1", "\\\\ opqrstu1" }; // queries that should find all docs System.String[] matchAll = new System.String[] { "*", "*1", "**1", "*?", "*?1", "?*1", "**", "***", "\\\\*" }; // queries that should find no docs System.String[] matchNone = new System.String[] { "a*h", "a?h", "*a*h", "?a", "a?" }; // queries that should be parsed to prefix queries System.String[][] matchOneDocPrefix = new System.String[][] { new System.String[] { "a*", "ab*", "abc*" }, new System.String[] { "h*", "hi*", "hij*", "\\\\7*" }, new System.String[] { "o*", "op*", "opq*", "\\\\\\\\*" } }; // queries that should be parsed to wildcard queries System.String[][] matchOneDocWild = new System.String[][] { new System.String[] { "*a*", "*ab*", "*abc**", "ab*e*", "*g?", "*f?1", "abc**" }, new System.String[] { "*h*", "*hi*", "*hij**", "hi*k*", "*n?", "*m?1", "hij**" }, new System.String[] { "*o*", "*op*", "*opq**", "op*q*", "*u?", "*t?1", "opq**" } }; // prepare the index RAMDirectory dir = new RAMDirectory(); IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < docs.Length; i++) { Document doc = new Document(); doc.Add(new Field(field, docs[i], Field.Store.NO, Field.Index.ANALYZED)); iw.AddDocument(doc); } iw.Close(); IndexSearcher searcher = new IndexSearcher(dir); // test queries that must find all for (int i = 0; i < matchAll.Length; i++) { System.String qtxt = matchAll[i]; Query q = qp.Parse(qtxt); if (dbg) { System.Console.Out.WriteLine("matchAll: qtxt=" + qtxt + " q=" + q + " " + q.GetType().FullName); } ScoreDoc[] hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(docs.Length, hits.Length); } // test queries that must find none for (int i = 0; i < matchNone.Length; i++) { System.String qtxt = matchNone[i]; Query q = qp.Parse(qtxt); if (dbg) { System.Console.Out.WriteLine("matchNone: qtxt=" + qtxt + " q=" + q + " " + q.GetType().FullName); } ScoreDoc[] hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); } // test queries that must be prefix queries and must find only one doc for (int i = 0; i < matchOneDocPrefix.Length; i++) { for (int j = 0; j < matchOneDocPrefix[i].Length; j++) { System.String qtxt = matchOneDocPrefix[i][j]; Query q = qp.Parse(qtxt); if (dbg) { System.Console.Out.WriteLine("match 1 prefix: doc=" + docs[i] + " qtxt=" + qtxt + " q=" + q + " " + q.GetType().FullName); } Assert.AreEqual(typeof(PrefixQuery), q.GetType()); ScoreDoc[] hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(i, hits[0].doc); } } // test queries that must be wildcard queries and must find only one doc for (int i = 0; i < matchOneDocPrefix.Length; i++) { for (int j = 0; j < matchOneDocWild[i].Length; j++) { System.String qtxt = matchOneDocWild[i][j]; Query q = qp.Parse(qtxt); if (dbg) { System.Console.Out.WriteLine("match 1 wild: doc=" + docs[i] + " qtxt=" + qtxt + " q=" + q + " " + q.GetType().FullName); } Assert.AreEqual(typeof(WildcardQuery), q.GetType()); ScoreDoc[] hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(i, hits[0].doc); } } searcher.Close(); }
public static void Main(System.String[] args) { System.String usage = "Usage:\t" + typeof(SearchFiles) + "[-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field] [-paging hitsPerPage]"; usage += "\n\tSpecify 'false' for hitsPerPage to use streaming instead of paging search."; if (args.Length > 0 && ("-h".Equals(args[0]) || "-help".Equals(args[0]))) { System.Console.Out.WriteLine(usage); System.Environment.Exit(0); } System.String index = "index"; System.String field = "contents"; System.String queries = null; int repeat = 0; bool raw = false; System.String normsField = null; bool paging = true; int hitsPerPage = 10; for (int i = 0; i < args.Length; i++) { if ("-index".Equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".Equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".Equals(args[i])) { queries = args[i + 1]; i++; } else if ("-repeat".Equals(args[i])) { repeat = System.Int32.Parse(args[i + 1]); i++; } else if ("-raw".Equals(args[i])) { raw = true; } else if ("-norms".Equals(args[i])) { normsField = args[i + 1]; i++; } else if ("-paging".Equals(args[i])) { if (args[i + 1].Equals("false")) { paging = false; } else { hitsPerPage = System.Int32.Parse(args[i + 1]); if (hitsPerPage == 0) { paging = false; } } i++; } } IndexReader reader = IndexReader.Open(FSDirectory.Open(new System.IO.FileInfo(index)), true); // only searching, so read-only=true if (normsField != null) { reader = new OneNormsReader(reader, normsField); } Searcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); System.IO.StreamReader in_Renamed = null; if (queries != null) { in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(queries, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(queries, System.Text.Encoding.Default).CurrentEncoding); } else { in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.GetEncoding("UTF-8")).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.GetEncoding("UTF-8")).CurrentEncoding); } QueryParser parser = new QueryParser(field, analyzer); while (true) { if (queries == null) { // prompt the user System.Console.Out.WriteLine("Enter query: "); } System.String line = in_Renamed.ReadLine(); if (line == null || line.Length == -1) { break; } line = line.Trim(); if (line.Length == 0) { break; } Query query = parser.Parse(line); System.Console.Out.WriteLine("Searching for: " + query.ToString(field)); if (repeat > 0) { // repeat & time as benchmark System.DateTime start = System.DateTime.Now; for (int i = 0; i < repeat; i++) { searcher.Search(query, null, 100); } System.DateTime end = System.DateTime.Now; System.Console.Out.WriteLine("Time: " + (end.Millisecond - start.Millisecond) + "ms"); } if (paging) { DoPagingSearch(in_Renamed, searcher, query, hitsPerPage, raw, queries == null); } else { DoStreamingSearch(searcher, query); } } reader.Close(); }
public virtual void TestEmptyIndex() { // creating two directories for indices Directory indexStoreA = new MockRAMDirectory(); Directory indexStoreB = new MockRAMDirectory(); // creating a document to store Document lDoc = new Document(); lDoc.Add(new Field("fulltext", "Once upon a time.....", Field.Store.YES, Field.Index.ANALYZED)); lDoc.Add(new Field("id", "doc1", Field.Store.YES, Field.Index.NOT_ANALYZED)); lDoc.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); // creating a document to store Document lDoc2 = new Document(); lDoc2.Add(new Field("fulltext", "in a galaxy far far away.....", Field.Store.YES, Field.Index.ANALYZED)); lDoc2.Add(new Field("id", "doc2", Field.Store.YES, Field.Index.NOT_ANALYZED)); lDoc2.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); // creating a document to store Document lDoc3 = new Document(); lDoc3.Add(new Field("fulltext", "a bizarre bug manifested itself....", Field.Store.YES, Field.Index.ANALYZED)); lDoc3.Add(new Field("id", "doc3", Field.Store.YES, Field.Index.NOT_ANALYZED)); lDoc3.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); // creating an index writer for the first index IndexWriter writerA = new IndexWriter(indexStoreA, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); // creating an index writer for the second index, but writing nothing IndexWriter writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); //-------------------------------------------------------------------- // scenario 1 //-------------------------------------------------------------------- // writing the documents to the first index writerA.AddDocument(lDoc); writerA.AddDocument(lDoc2); writerA.AddDocument(lDoc3); writerA.Optimize(); writerA.Close(); // closing the second index writerB.Close(); // creating the query QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "fulltext", new StandardAnalyzer(Util.Version.LUCENE_CURRENT)); Query query = parser.Parse("handle:1"); // building the searchables Searcher[] searchers = new Searcher[2]; // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index searchers[0] = new IndexSearcher(indexStoreB, true); searchers[1] = new IndexSearcher(indexStoreA, true); // creating the multiSearcher Searcher mSearcher = GetMultiSearcherInstance(searchers); // performing the search ScoreDoc[] hits = mSearcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); // iterating over the hit documents for (int i = 0; i < hits.Length; i++) { mSearcher.Doc(hits[i].Doc); } mSearcher.Close(); //-------------------------------------------------------------------- // scenario 2 //-------------------------------------------------------------------- // adding one document to the empty index writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.LIMITED); writerB.AddDocument(lDoc); writerB.Optimize(); writerB.Close(); // building the searchables Searcher[] searchers2 = new Searcher[2]; // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index searchers2[0] = new IndexSearcher(indexStoreB, true); searchers2[1] = new IndexSearcher(indexStoreA, true); // creating the mulitSearcher MultiSearcher mSearcher2 = GetMultiSearcherInstance(searchers2); // performing the same search ScoreDoc[] hits2 = mSearcher2.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(4, hits2.Length); // iterating over the hit documents for (int i = 0; i < hits2.Length; i++) { // no exception should happen at this point mSearcher2.Doc(hits2[i].Doc); } // test the subSearcher() method: Query subSearcherQuery = parser.Parse("id:doc1"); hits2 = mSearcher2.Search(subSearcherQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits2.Length); Assert.AreEqual(0, mSearcher2.SubSearcher(hits2[0].Doc)); // hit from searchers2[0] Assert.AreEqual(1, mSearcher2.SubSearcher(hits2[1].Doc)); // hit from searchers2[1] subSearcherQuery = parser.Parse("id:doc2"); hits2 = mSearcher2.Search(subSearcherQuery, null, 1000).ScoreDocs; Assert.AreEqual(1, hits2.Length); Assert.AreEqual(1, mSearcher2.SubSearcher(hits2[0].Doc)); // hit from searchers2[1] mSearcher2.Close(); //-------------------------------------------------------------------- // scenario 3 //-------------------------------------------------------------------- // deleting the document just added, this will cause a different exception to take place Term term = new Term("id", "doc1"); IndexReader readerB = IndexReader.Open(indexStoreB, false); readerB.DeleteDocuments(term); readerB.Close(); // optimizing the index with the writer writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.LIMITED); writerB.Optimize(); writerB.Close(); // building the searchables Searcher[] searchers3 = new Searcher[2]; searchers3[0] = new IndexSearcher(indexStoreB, true); searchers3[1] = new IndexSearcher(indexStoreA, true); // creating the mulitSearcher Searcher mSearcher3 = GetMultiSearcherInstance(searchers3); // performing the same search ScoreDoc[] hits3 = mSearcher3.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits3.Length); // iterating over the hit documents for (int i = 0; i < hits3.Length; i++) { mSearcher3.Doc(hits3[i].Doc); } mSearcher3.Close(); indexStoreA.Close(); indexStoreB.Close(); }
public virtual void TestToString() { StopAnalyzer analyzer = new StopAnalyzer(); StopFilter.SetEnablePositionIncrementsDefault(true); QueryParser qp = new QueryParser("field", analyzer); qp.SetEnablePositionIncrements(true); PhraseQuery q = (PhraseQuery) qp.Parse("\"this hi this is a test is\""); Assert.AreEqual("field:\"? hi ? ? ? test\"", q.ToString()); q.Add(new Term("field", "hello"), 1); Assert.AreEqual("field:\"? hi|hello ? ? ? test\"", q.ToString()); }
public static void Main(System.String[] args) { System.String usage = "Usage: " + typeof(SearchFiles) + " [-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field]"; if (args.Length > 0 && ("-h".Equals(args[0]) || "-help".Equals(args[0]))) { System.Console.Out.WriteLine(usage); System.Environment.Exit(0); } System.String index = "index"; System.String field = "contents"; System.String queries = null; int repeat = 0; bool raw = false; System.String normsField = null; for (int i = 0; i < args.Length; i++) { if ("-index".Equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".Equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".Equals(args[i])) { queries = args[i + 1]; i++; } else if ("-repeat".Equals(args[i])) { repeat = System.Int32.Parse(args[i + 1]); i++; } else if ("-raw".Equals(args[i])) { raw = true; } else if ("-norms".Equals(args[i])) { normsField = args[i + 1]; i++; } } IndexReader reader = IndexReader.Open(index); if (normsField != null) reader = new OneNormsReader(reader, normsField); Searcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); System.IO.StreamReader in_Renamed = null; if (queries != null) { in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(queries, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(queries, System.Text.Encoding.Default).CurrentEncoding); } else { in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.GetEncoding("UTF-8")).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.GetEncoding("UTF-8")).CurrentEncoding); } QueryParser parser = new QueryParser(field, analyzer); while (true) { if (queries == null) // prompt the user System.Console.Out.Write("Query: "); System.String line = in_Renamed.ReadLine(); if (line == null || line.Length == 0) break; Query query = parser.Parse(line); System.Console.Out.WriteLine("Searching for: " + query.ToString(field)); Hits hits = searcher.Search(query); if (repeat > 0) { // repeat & time as benchmark System.DateTime start = System.DateTime.Now; for (int i = 0; i < repeat; i++) { hits = searcher.Search(query); } System.DateTime end = System.DateTime.Now; System.Console.Out.WriteLine("Time: " + (end.Millisecond - start.Millisecond) + "ms"); } System.Console.Out.WriteLine(hits.Length() + " total matching documents"); int HITS_PER_PAGE = 10; for (int start = 0; start < hits.Length(); start += HITS_PER_PAGE) { int end = System.Math.Min(hits.Length(), start + HITS_PER_PAGE); for (int i = start; i < end; i++) { if (raw) { // output raw format System.Console.Out.WriteLine("doc=" + hits.Id(i) + " score=" + hits.Score(i)); continue; } Document doc = hits.Doc(i); System.String path = doc.Get("path"); if (path != null) { System.Console.Out.WriteLine((i + 1) + ". " + path); System.String title = doc.Get("title"); if (title != null) { System.Console.Out.WriteLine(" Title: " + doc.Get("title")); } } else { System.Console.Out.WriteLine((i + 1) + ". " + "No path for this document"); } } if (queries != null) // non-interactive break; if (hits.Length() > end) { System.Console.Out.Write("more (y/n) ? "); line = in_Renamed.ReadLine(); if (line.Length == 0 || line[0] == 'n') break; } } } reader.Close(); }
public virtual void TestTermQueryMultiSearcherExplain() { // creating two directories for indices Directory indexStoreA = new MockRAMDirectory(); Directory indexStoreB = new MockRAMDirectory(); Document lDoc = new Document(); lDoc.Add(new Field("handle", "1 2", Field.Store.YES, Field.Index.ANALYZED)); Document lDoc2 = new Document(); lDoc2.Add(new Field("handle", "1 2", Field.Store.YES, Field.Index.ANALYZED)); Document lDoc3 = new Document(); lDoc3.Add(new Field("handle", "1 2", Field.Store.YES, Field.Index.ANALYZED)); IndexWriter writerA = new IndexWriter(indexStoreA, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); IndexWriter writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writerA.AddDocument(lDoc); writerA.AddDocument(lDoc2); writerA.Optimize(); writerA.Close(); writerB.AddDocument(lDoc3); writerB.Close(); QueryParser parser = new QueryParser("fulltext", new StandardAnalyzer()); Query query = parser.Parse("handle:1"); Searcher[] searchers = new Searcher[2]; searchers[0] = new IndexSearcher(indexStoreB); searchers[1] = new IndexSearcher(indexStoreA); Searcher mSearcher = new MultiSearcher(searchers); ScoreDoc[] hits = mSearcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); Explanation explain = mSearcher.Explain(query, hits[0].doc); System.String exp = explain.ToString(0); Assert.IsTrue(exp.IndexOf("maxDocs=3") > -1, exp); Assert.IsTrue(exp.IndexOf("docFreq=3") > -1, exp); query = parser.Parse("handle:\"1 2\""); hits = mSearcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); explain = mSearcher.Explain(query, hits[0].doc); exp = explain.ToString(0); Assert.IsTrue(exp.IndexOf("1=3") > -1, exp); Assert.IsTrue(exp.IndexOf("2=3") > -1, exp); query = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("handle", "1")), new SpanTermQuery(new Term("handle", "2")) }, 0, true); hits = mSearcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); explain = mSearcher.Explain(query, hits[0].doc); exp = explain.ToString(0); Assert.IsTrue(exp.IndexOf("1=3") > -1, exp); Assert.IsTrue(exp.IndexOf("2=3") > -1, exp); mSearcher.Close(); }
public void Test_Store_RAMDirectory() { Lucene.Net.Store.RAMDirectory ramDIR = new Lucene.Net.Store.RAMDirectory(); //Index 1 Doc Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(ramDIR, new Lucene.Net.Analysis.WhitespaceAnalyzer(), true); Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Lucene.Net.Documents.Field("field1", "value1 value11", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED)); wr.AddDocument(doc); wr.Close(); //now serialize it System.Runtime.Serialization.Formatters.Binary.BinaryFormatter serializer = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(); System.IO.MemoryStream memoryStream = new System.IO.MemoryStream(); serializer.Serialize(memoryStream, ramDIR); //Close DIR ramDIR.Close(); ramDIR = null; //now deserialize memoryStream.Seek(0, System.IO.SeekOrigin.Begin); Lucene.Net.Store.RAMDirectory ramDIR2 = (Lucene.Net.Store.RAMDirectory)serializer.Deserialize(memoryStream); //Add 1 more doc wr = new Lucene.Net.Index.IndexWriter(ramDIR2, new Lucene.Net.Analysis.WhitespaceAnalyzer(), false); doc = new Lucene.Net.Documents.Document(); doc.Add(new Lucene.Net.Documents.Field("field1", "value1 value11", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED)); wr.AddDocument(doc); wr.Close(); //Search Lucene.Net.Search.IndexSearcher s = new Lucene.Net.Search.IndexSearcher(ramDIR2); Lucene.Net.QueryParsers.QueryParser qp = new Lucene.Net.QueryParsers.QueryParser("field1", new Lucene.Net.Analysis.Standard.StandardAnalyzer()); Lucene.Net.Search.Query q = qp.Parse("value1"); Lucene.Net.Search.TopDocs topDocs = s.Search(q, 100); s.Close(); Assert.AreEqual(topDocs.TotalHits, 2, "See the issue: LUCENENET-174"); }
public virtual void TestSetPosition() { Analyzer analyzer = new AnonymousClassAnalyzer(this); Directory store = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(store, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document d = new Document(); d.Add(new Field("field", "bogus", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(store, true); TermPositions pos = searcher.IndexReader.TermPositions(new Term("field", "1")); pos.Next(); // first token should be at position 0 Assert.AreEqual(0, pos.NextPosition()); pos = searcher.IndexReader.TermPositions(new Term("field", "2")); pos.Next(); // second token should be at position 2 Assert.AreEqual(2, pos.NextPosition()); PhraseQuery q; ScoreDoc[] hits; q = new PhraseQuery(); q.Add(new Term("field", "1")); q.Add(new Term("field", "2")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // same as previous, just specify positions explicitely. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 1); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // specifying correct positions should find the phrase. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 2); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "3")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // phrase query would find it when correct positions are specified. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "4"), 0); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // phrase query should fail for non existing searched term // even if there exist another searched terms in the same searched position. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "9"), 0); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // multi-phrase query should succed for non existing searched term // because there exist another searched terms in the same searched position. MultiPhraseQuery mq = new MultiPhraseQuery(); mq.Add(new Term[] { new Term("field", "3"), new Term("field", "9") }, 0); hits = searcher.Search(mq, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "4")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // should not find "1 2" because there is a gap of 1 in the index QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT, "field", new StopWhitespaceAnalyzer(false)); q = (PhraseQuery)qp.Parse("\"1 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // omitted stop word cannot help because stop filter swallows the increments. q = (PhraseQuery)qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // query parser alone won't help, because stop filter swallows the increments. qp.EnablePositionIncrements = true; q = (PhraseQuery)qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // stop filter alone won't help, because query parser swallows the increments. qp.EnablePositionIncrements = false; q = (PhraseQuery)qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // when both qp qnd stopFilter propagate increments, we should find the doc. qp = new QueryParser(Util.Version.LUCENE_CURRENT, "field", new StopWhitespaceAnalyzer(true)); qp.EnablePositionIncrements = true; q = (PhraseQuery)qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); }
public static void Main(System.String[] args) { try { Searcher searcher = new IndexSearcher(@"index"); Analyzer analyzer = new StandardAnalyzer(); System.IO.StreamReader in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).CurrentEncoding); while (true) { System.Console.Out.Write("Query: "); System.String line = in_Renamed.ReadLine(); if (line.Length == -1) { break; } Query query = QueryParser.Parse(line, "contents", analyzer); System.Console.Out.WriteLine("Searching for: " + query.ToString("contents")); Hits hits = searcher.Search(query); System.Console.Out.WriteLine(hits.Length() + " total matching documents"); int HITS_PER_PAGE = 10; for (int start = 0; start < hits.Length(); start += HITS_PER_PAGE) { int end = System.Math.Min(hits.Length(), start + HITS_PER_PAGE); for (int i = start; i < end; i++) { Document doc = hits.Doc(i); System.String path = doc.Get("path"); if (path != null) { System.Console.Out.WriteLine(i + ". " + path); } else { System.String url = doc.Get("url"); if (url != null) { System.Console.Out.WriteLine(i + ". " + url); System.Console.Out.WriteLine(" - " + doc.Get("title")); } else { System.Console.Out.WriteLine(i + ". " + "No path nor URL for this document"); } } } if (hits.Length() > end) { System.Console.Out.Write("more (y/n) ? "); line = in_Renamed.ReadLine(); if (line.Length == 0 || line[0] == 'n') { break; } } } } searcher.Close(); } catch (System.Exception e) { System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
public virtual void TestQuery() { RAMDirectory dir = new RAMDirectory(); IndexWriter iw = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); iw.SetMaxBufferedDocs(2); // force multi-segment AddDoc("one", iw, 1f); AddDoc("two", iw, 20f); AddDoc("three four", iw, 300f); iw.Close(); IndexReader ir = IndexReader.Open(dir); IndexSearcher is_Renamed = new IndexSearcher(ir); ScoreDoc[] hits; // assert with norms scoring turned off hits = is_Renamed.Search(new MatchAllDocsQuery(), null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(ir.Document(hits[0].doc).Get("key"), "one"); Assert.AreEqual(ir.Document(hits[1].doc).Get("key"), "two"); Assert.AreEqual(ir.Document(hits[2].doc).Get("key"), "three four"); // assert with norms scoring turned on MatchAllDocsQuery normsQuery = new MatchAllDocsQuery("key"); hits = is_Renamed.Search(normsQuery, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(ir.Document(hits[0].doc).Get("key"), "three four"); Assert.AreEqual(ir.Document(hits[1].doc).Get("key"), "two"); Assert.AreEqual(ir.Document(hits[2].doc).Get("key"), "one"); // change norm & retest ir.SetNorm(0, "key", 400f); normsQuery = new MatchAllDocsQuery("key"); hits = is_Renamed.Search(normsQuery, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(ir.Document(hits[0].doc).Get("key"), "one"); Assert.AreEqual(ir.Document(hits[1].doc).Get("key"), "three four"); Assert.AreEqual(ir.Document(hits[2].doc).Get("key"), "two"); // some artificial queries to trigger the use of skipTo(): BooleanQuery bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); hits = is_Renamed.Search(bq, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); bq.Add(new TermQuery(new Term("key", "three")), BooleanClause.Occur.MUST); hits = is_Renamed.Search(bq, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); // delete a document: is_Renamed.GetIndexReader().DeleteDocument(0); hits = is_Renamed.Search(new MatchAllDocsQuery(), null, 1000).scoreDocs; Assert.AreEqual(2, hits.Length); // test parsable toString() QueryParser qp = new QueryParser("key", analyzer); hits = is_Renamed.Search(qp.Parse(new MatchAllDocsQuery().ToString()), null, 1000).scoreDocs; Assert.AreEqual(2, hits.Length); // test parsable toString() with non default boost Query maq = new MatchAllDocsQuery(); maq.SetBoost(2.3f); Query pq = qp.Parse(maq.ToString()); hits = is_Renamed.Search(pq, null, 1000).scoreDocs; Assert.AreEqual(2, hits.Length); is_Renamed.Close(); ir.Close(); dir.Close(); }
public override void SetUp() { base.SetUp(); System.String[] docText = new System.String[]{"docThatNeverMatchesSoWeCanRequireLastDocCollectedToBeGreaterThanZero", "one blah three", "one foo three multiOne", "one foobar three multiThree", "blueberry pancakes", "blueberry pie", "blueberry strudel", "blueberry pizza"}; Directory directory = new RAMDirectory(); IndexWriter iw = new IndexWriter(directory, new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED); for (int i = 0; i < N_DOCS; i++) { Add(docText[i % docText.Length], iw); } iw.Close(); searcher = new IndexSearcher(directory); System.String qtxt = "one"; for (int i = 0; i < docText.Length; i++) { qtxt += (' ' + docText[i]); // large query so that search will be longer } QueryParser queryParser = new QueryParser(FIELD_NAME, new WhitespaceAnalyzer()); query = queryParser.Parse(qtxt); // warm the searcher searcher.Search(query, null, 1000); }
public static void MainOld(System.String[] args) { System.String usage = "Usage:\t" + typeof(SearchFiles) + "[-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field] [-paging hitsPerPage]"; usage += "\n\tSpecify 'false' for hitsPerPage to use streaming instead of paging search."; if (args.Length > 0 && ("-h".Equals(args[0]) || "-help".Equals(args[0]))) { System.Console.Out.WriteLine(usage); System.Environment.Exit(0); } System.String index = "index"; System.String field = "contents"; System.String queries = null; int repeat = 0; bool raw = false; System.String normsField = null; bool paging = true; int hitsPerPage = 10; for (int i = 0; i < args.Length; i++) { if ("-index".Equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".Equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".Equals(args[i])) { queries = args[i + 1]; i++; } else if ("-repeat".Equals(args[i])) { repeat = System.Int32.Parse(args[i + 1]); i++; } else if ("-raw".Equals(args[i])) { raw = true; } else if ("-norms".Equals(args[i])) { normsField = args[i + 1]; i++; } else if ("-paging".Equals(args[i])) { if (args[i + 1].Equals("false")) { paging = false; } else { hitsPerPage = System.Int32.Parse(args[i + 1]); if (hitsPerPage == 0) { paging = false; } } i++; } } IndexReader reader = IndexReader.Open(FSDirectory.Open(new System.IO.FileInfo(index)), true); // only searching, so read-only=true if (normsField != null) reader = new OneNormsReader(reader, normsField); Searcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); System.IO.StreamReader in_Renamed = null; if (queries != null) { in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(queries, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(queries, System.Text.Encoding.Default).CurrentEncoding); } else { in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.GetEncoding("UTF-8")).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.GetEncoding("UTF-8")).CurrentEncoding); } QueryParser parser = new QueryParser(field, analyzer); while (true) { if (queries == null) // prompt the user System.Console.Out.WriteLine("Enter query: "); System.String line = in_Renamed.ReadLine(); if (line == null || line.Length == - 1) break; line = line.Trim(); if (line.Length == 0) break; Query query = parser.Parse(line); System.Console.Out.WriteLine("Searching for: " + query.ToString(field)); if (repeat > 0) { // repeat & time as benchmark System.DateTime start = System.DateTime.Now; for (int i = 0; i < repeat; i++) { searcher.Search(query, null, 100); } System.DateTime end = System.DateTime.Now; System.Console.Out.WriteLine("Time: " + (end.Millisecond - start.Millisecond) + "ms"); } if (paging) { DoPagingSearch(in_Renamed, searcher, query, hitsPerPage, raw, queries == null); } else { DoStreamingSearch(searcher, query); } } reader.Close(); }
public virtual void TestGetRangeFragments() { System.String queryString = FIELD_NAME + ":[kannedy TO kznnedy]"; //Need to explicitly set the QueryParser property to use RangeQuery rather than RangeFilters QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer()); parser.SetUseOldRangeQuery(true); query = parser.Parse(queryString); DoSearching(query); DoStandardHighlights(); Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found"); }
static void SearchForPhrase(IndexSearcher searcher, string phrase) { using (new AutoStopWatch($"Search for {phrase}")) { Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_30, "Body", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30)); Lucene.Net.Search.Query query = parser.Parse(phrase); var hits = searcher.Search(new TermQuery(new Term("Title", "find me")), 100); hits = searcher.Search(query, 100); Console.WriteLine("Found {0} results for {1}", hits.TotalHits, phrase); } }
public virtual void TestUnRewrittenQuery() { //test to show how rewritten query can still be used searcher = new IndexSearcher(ramDir); Analyzer analyzer = new StandardAnalyzer(); QueryParser parser = new QueryParser(FIELD_NAME, analyzer); Query query = parser.Parse("JF? or Kenned*"); System.Console.Out.WriteLine("Searching with primitive query"); //forget to set this and... //query=query.rewrite(reader); Hits hits = searcher.Search(query); //create an instance of the highlighter with the tags used to surround highlighted text // QueryHighlightExtractor highlighter = new QueryHighlightExtractor(this, query, new StandardAnalyzer()); Highlighter highlighter = new Highlighter(this, new QueryScorer(query)); highlighter.SetTextFragmenter(new SimpleFragmenter(40)); int maxNumFragmentsRequired = 3; for (int i = 0; i < hits.Length(); i++) { System.String text = hits.Doc(i).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text)); System.String highlightedText = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); System.Console.Out.WriteLine(highlightedText); } //We expect to have zero highlights if the query is multi-terms and is not rewritten! Assert.IsTrue(numHighlights == 0, "Failed to find correct number of highlights " + numHighlights + " found"); }
public static Lucene.Net.Search.Query ConvertQueryToLuceneQuery(Query query) { if (query == null) { throw new ArgumentNullException("query"); } Lucene.Net.Search.Query lQuery; if (query is MatchAllDocsQuery) { var lMatchAllDocsQuery = new Lucene.Net.Search.MatchAllDocsQuery(); lQuery = lMatchAllDocsQuery; } else if (query is TermQuery) { var termQuery = query as TermQuery; var term = Term.ConvertToLuceneTerm(termQuery.Term); lQuery = new Lucene.Net.Search.TermQuery(term); } else if (query is TermRangeQuery) { var termRangeQuery = query as TermRangeQuery; var lTermRangeQuery = new Lucene.Net.Search.TermRangeQuery(termRangeQuery.FieldName, termRangeQuery.LowerTerm, termRangeQuery.UpperTerm, termRangeQuery.LowerInclusive, termRangeQuery.UpperInclusive); lQuery = lTermRangeQuery; } else if (query is PhraseQuery) { var phraseQuery = query as PhraseQuery; var lPhraseQuery = new Lucene.Net.Search.PhraseQuery(); foreach (var term in phraseQuery.Terms) { var lTerm = Term.ConvertToLuceneTerm(term); lPhraseQuery.Add(lTerm); } if (phraseQuery.Slop.HasValue) { lPhraseQuery.Slop = phraseQuery.Slop.Value; } lQuery = lPhraseQuery; } else if (query is PrefixQuery) { var prefixQuery = query as PrefixQuery; var term = Term.ConvertToLuceneTerm(prefixQuery.Term); var lPrefixQuery = new Lucene.Net.Search.PrefixQuery(term); lQuery = lPrefixQuery; } else if (query is RegexQuery) { var regexQuery = query as RegexQuery; var term = Term.ConvertToLuceneTerm(regexQuery.Term); var lRegexQuery = new Contrib.Regex.RegexQuery(term); lQuery = lRegexQuery; } else if (query is FuzzyQuery) { var fuzzyQuery = query as FuzzyQuery; var term = Term.ConvertToLuceneTerm(fuzzyQuery.Term); var lFuzzyQuery = new Lucene.Net.Search.FuzzyQuery(term); lQuery = lFuzzyQuery; } else if (query is BooleanQuery) { var booleanQuery = query as BooleanQuery; var lBooleanQuery = new Lucene.Net.Search.BooleanQuery(); foreach (var clause in booleanQuery.Clauses) { var lNestedQuery = Query.ConvertQueryToLuceneQuery(clause.Query); Lucene.Net.Search.Occur lOccur; switch (clause.Occur) { case Occur.Must: lOccur = Lucene.Net.Search.Occur.MUST; break; case Occur.MustNot: lOccur = Lucene.Net.Search.Occur.MUST_NOT; break; case Occur.Should: lOccur = Lucene.Net.Search.Occur.SHOULD; break; default: throw new InvalidOperationException("Occur not implemented or defined."); } var lClause = new Lucene.Net.Search.BooleanClause(lNestedQuery, lOccur); lBooleanQuery.Add(lClause); } if (booleanQuery.MinimumNumberShouldMatch.HasValue) { lBooleanQuery.MinimumNumberShouldMatch = booleanQuery.MinimumNumberShouldMatch.Value; } lQuery = lBooleanQuery; } else if (query is WildcardQuery) { var wildcardQuery = query as WildcardQuery; var lTerm = Term.ConvertToLuceneTerm(wildcardQuery.Term); var lWildcardQuery = new Lucene.Net.Search.WildcardQuery(lTerm); lQuery = lWildcardQuery; } else if (query is DoubleNumericRangeQuery) { var doubleNumericRangeQuery = query as DoubleNumericRangeQuery; var ldoubleNumericRangeQuery = Lucene.Net.Search.NumericRangeQuery.NewDoubleRange( doubleNumericRangeQuery.FieldName, doubleNumericRangeQuery.Min, doubleNumericRangeQuery.Max, doubleNumericRangeQuery.MinInclusive, doubleNumericRangeQuery.MaxInclusive); lQuery = ldoubleNumericRangeQuery; } else if (query is FloatNumericRangeQuery) { var floatNumericRangeQuery = query as FloatNumericRangeQuery; var lfloatNumericRangeQuery = Lucene.Net.Search.NumericRangeQuery.NewFloatRange( floatNumericRangeQuery.FieldName, floatNumericRangeQuery.Min, floatNumericRangeQuery.Max, floatNumericRangeQuery.MinInclusive, floatNumericRangeQuery.MaxInclusive); lQuery = lfloatNumericRangeQuery; } else if (query is IntNumericRangeQuery) { var intNumericRangeQuery = query as IntNumericRangeQuery; var lintNumericRangeQuery = Lucene.Net.Search.NumericRangeQuery.NewIntRange( intNumericRangeQuery.FieldName, intNumericRangeQuery.Min, intNumericRangeQuery.Max, intNumericRangeQuery.MinInclusive, intNumericRangeQuery.MaxInclusive); lQuery = lintNumericRangeQuery; } else if (query is LongNumericRangeQuery) { var longNumericRangeQuery = query as LongNumericRangeQuery; var llongNumericRangeQuery = Lucene.Net.Search.NumericRangeQuery.NewLongRange( longNumericRangeQuery.FieldName, longNumericRangeQuery.Min, longNumericRangeQuery.Max, longNumericRangeQuery.MinInclusive, longNumericRangeQuery.MaxInclusive); lQuery = llongNumericRangeQuery; } else if (query is QueryParserQuery) { var queryParserQuery = query as QueryParserQuery; var queryParser = new Lucene.Net.QueryParsers.QueryParser(Version.LUCENE_30, queryParserQuery.DefaultField, new StandardAnalyzer(Version.LUCENE_30)) { AllowLeadingWildcard = queryParserQuery.AllowLeadingWildcard }; lQuery = queryParser.Parse(queryParserQuery.Query); } else if (query is MultiFieldQueryParserQuery) { var multiFieldQueryParserQuery = query as MultiFieldQueryParserQuery; if (multiFieldQueryParserQuery.FieldNames == null) { multiFieldQueryParserQuery.FieldNames = new List <string>(); } var queryParser = new Lucene.Net.QueryParsers.MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, multiFieldQueryParserQuery.FieldNames.ToArray(), new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30)); lQuery = queryParser.Parse(multiFieldQueryParserQuery.Query); } else { throw new ArgumentException(@"Unknown or invalid query object", "query"); } if (query.Boost.HasValue) { lQuery.Boost = query.Boost.Value; } return(lQuery); }
public virtual void TestFieldSpecificHighlighting() { System.String docMainText = "fred is one of the people"; QueryParser parser = new QueryParser(FIELD_NAME, analyzer); Query query = parser.Parse("fred category:people"); //highlighting respects fieldnames used in query QueryScorer fieldSpecificScorer = new QueryScorer(query, "contents"); Highlighter fieldSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(), fieldSpecificScorer); fieldSpecificHighlighter.SetTextFragmenter(new NullFragmenter()); System.String result = fieldSpecificHighlighter.GetBestFragment(analyzer, FIELD_NAME, docMainText); Assert.AreEqual(result, "<B>fred</B> is one of the people", "Should match"); //highlighting does not respect fieldnames used in query QueryScorer fieldInSpecificScorer = new QueryScorer(query); Highlighter fieldInSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(), fieldInSpecificScorer); fieldInSpecificHighlighter.SetTextFragmenter(new NullFragmenter()); result = fieldInSpecificHighlighter.GetBestFragment(analyzer, FIELD_NAME, docMainText); Assert.AreEqual(result, "<B>fred</B> is one of the <B>people</B>", "Should match"); reader.Close(); }
public void TestCustomExternalQuery() { QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT,TEXT_FIELD,anlzr); String qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup. Query q1 = qp.Parse(qtxt); Query q = new CustomExternalQuery(q1); Log(q); IndexSearcher s = new IndexSearcher(dir); TopDocs hits = s.Search(q, 1000); Assert.AreEqual(N_DOCS, hits.TotalHits); for(int i=0;i<N_DOCS;i++) { int doc = hits.ScoreDocs[i].Doc; float score = hits.ScoreDocs[i].Score; Assert.AreEqual(score, (float)1 + (4 * doc) % N_DOCS, 0.0001, "doc=" + doc); } s.Close(); }
public virtual void TestToString() { StopAnalyzer analyzer = new StopAnalyzer(Util.Version.LUCENE_CURRENT); QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT, "field", analyzer); qp.EnablePositionIncrements = true; PhraseQuery q = (PhraseQuery) qp.Parse("\"this hi this is a test is\""); Assert.AreEqual("field:\"? hi ? ? ? test\"", q.ToString()); q.Add(new Term("field", "hello"), 1); Assert.AreEqual("field:\"? hi|hello ? ? ? test\"", q.ToString()); }
// Test that FieldScoreQuery returns docs with expected score. private void DoTestCustomScore(System.String field, FieldScoreQuery.Type tp, double dboost) { float boost = (float) dboost; IndexSearcher s = new IndexSearcher(dir, true); FieldScoreQuery qValSrc = new FieldScoreQuery(field, tp); // a query that would score by the field QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT, TEXT_FIELD, anlzr); System.String qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup. // regular (boolean) query. Query q1 = qp.Parse(qtxt); Log(q1); // custom query, that should score the same as q1. CustomScoreQuery q2CustomNeutral = new CustomScoreQuery(q1); q2CustomNeutral.Boost = boost; Log(q2CustomNeutral); // custom query, that should (by default) multiply the scores of q1 by that of the field CustomScoreQuery q3CustomMul = new CustomScoreQuery(q1, qValSrc); q3CustomMul.SetStrict(true); q3CustomMul.Boost = boost; Log(q3CustomMul); // custom query, that should add the scores of q1 to that of the field CustomScoreQuery q4CustomAdd = new CustomAddQuery(q1, qValSrc); q4CustomAdd.SetStrict(true); q4CustomAdd.Boost = boost; Log(q4CustomAdd); // custom query, that multiplies and adds the field score to that of q1 CustomScoreQuery q5CustomMulAdd = new CustomMulAddQuery(q1, qValSrc, qValSrc); q5CustomMulAdd.SetStrict(true); q5CustomMulAdd.Boost = boost; Log(q5CustomMulAdd); // do al the searches TopDocs td1 = s.Search(q1, null, 1000); TopDocs td2CustomNeutral = s.Search(q2CustomNeutral, null, 1000); TopDocs td3CustomMul = s.Search(q3CustomMul, null, 1000); TopDocs td4CustomAdd = s.Search(q4CustomAdd, null, 1000); TopDocs td5CustomMulAdd = s.Search(q5CustomMulAdd, null, 1000); // put results in map so we can verify the scores although they have changed System.Collections.Hashtable h1 = TopDocsToMap(td1); System.Collections.Hashtable h2CustomNeutral = TopDocsToMap(td2CustomNeutral); System.Collections.Hashtable h3CustomMul = TopDocsToMap(td3CustomMul); System.Collections.Hashtable h4CustomAdd = TopDocsToMap(td4CustomAdd); System.Collections.Hashtable h5CustomMulAdd = TopDocsToMap(td5CustomMulAdd); VerifyResults(boost, s, h1, h2CustomNeutral, h3CustomMul, h4CustomAdd, h5CustomMulAdd, q1, q2CustomNeutral, q3CustomMul, q4CustomAdd, q5CustomMulAdd); }
public virtual void TestTermQueryMultiSearcherExplain() { // creating two directories for indices Directory indexStoreA = new MockRAMDirectory(); Directory indexStoreB = new MockRAMDirectory(); Document lDoc = new Document(); lDoc.Add(new Field("handle", "1 2", Field.Store.YES, Field.Index.ANALYZED)); Document lDoc2 = new Document(); lDoc2.Add(new Field("handle", "1 2", Field.Store.YES, Field.Index.ANALYZED)); Document lDoc3 = new Document(); lDoc3.Add(new Field("handle", "1 2", Field.Store.YES, Field.Index.ANALYZED)); IndexWriter writerA = new IndexWriter(indexStoreA, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); IndexWriter writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); writerA.AddDocument(lDoc); writerA.AddDocument(lDoc2); writerA.Optimize(); writerA.Close(); writerB.AddDocument(lDoc3); writerB.Close(); QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "fulltext", new StandardAnalyzer(Util.Version.LUCENE_CURRENT)); Query query = parser.Parse("handle:1"); Searcher[] searchers = new Searcher[2]; searchers[0] = new IndexSearcher(indexStoreB, true); searchers[1] = new IndexSearcher(indexStoreA, true); Searcher mSearcher = new MultiSearcher(searchers); ScoreDoc[] hits = mSearcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Explanation explain = mSearcher.Explain(query, hits[0].Doc); System.String exp = explain.ToString(0); Assert.IsTrue(exp.IndexOf("maxDocs=3") > - 1, exp); Assert.IsTrue(exp.IndexOf("docFreq=3") > - 1, exp); query = parser.Parse("handle:\"1 2\""); hits = mSearcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); explain = mSearcher.Explain(query, hits[0].Doc); exp = explain.ToString(0); Assert.IsTrue(exp.IndexOf("1=3") > - 1, exp); Assert.IsTrue(exp.IndexOf("2=3") > - 1, exp); query = new SpanNearQuery(new SpanQuery[]{new SpanTermQuery(new Term("handle", "1")), new SpanTermQuery(new Term("handle", "2"))}, 0, true); hits = mSearcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); explain = mSearcher.Explain(query, hits[0].Doc); exp = explain.ToString(0); Assert.IsTrue(exp.IndexOf("1=3") > - 1, exp); Assert.IsTrue(exp.IndexOf("2=3") > - 1, exp); mSearcher.Close(); }
public virtual void TestQuery() { RAMDirectory dir = new RAMDirectory(); IndexWriter iw = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); iw.SetMaxBufferedDocs(2); // force multi-segment AddDoc("one", iw, 1f); AddDoc("two", iw, 20f); AddDoc("three four", iw, 300f); iw.Close(); IndexReader ir = IndexReader.Open(dir); IndexSearcher is_Renamed = new IndexSearcher(ir); ScoreDoc[] hits; // assert with norms scoring turned off hits = is_Renamed.Search(new MatchAllDocsQuery(), null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(ir.Document(hits[0].Doc).Get("key"), "one"); Assert.AreEqual(ir.Document(hits[1].Doc).Get("key"), "two"); Assert.AreEqual(ir.Document(hits[2].Doc).Get("key"), "three four"); // assert with norms scoring turned on MatchAllDocsQuery normsQuery = new MatchAllDocsQuery("key"); hits = is_Renamed.Search(normsQuery, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(ir.Document(hits[0].Doc).Get("key"), "three four"); Assert.AreEqual(ir.Document(hits[1].Doc).Get("key"), "two"); Assert.AreEqual(ir.Document(hits[2].Doc).Get("key"), "one"); // change norm & retest ir.SetNorm(0, "key", 400f); normsQuery = new MatchAllDocsQuery("key"); hits = is_Renamed.Search(normsQuery, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(ir.Document(hits[0].Doc).Get("key"), "one"); Assert.AreEqual(ir.Document(hits[1].Doc).Get("key"), "three four"); Assert.AreEqual(ir.Document(hits[2].Doc).Get("key"), "two"); // some artificial queries to trigger the use of skipTo(): BooleanQuery bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); hits = is_Renamed.Search(bq, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); bq.Add(new TermQuery(new Term("key", "three")), BooleanClause.Occur.MUST); hits = is_Renamed.Search(bq, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // delete a document: is_Renamed.GetIndexReader().DeleteDocument(0); hits = is_Renamed.Search(new MatchAllDocsQuery(), null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); // test parsable toString() QueryParser qp = new QueryParser("key", analyzer); hits = is_Renamed.Search(qp.Parse(new MatchAllDocsQuery().ToString()), null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); // test parsable toString() with non default boost Query maq = new MatchAllDocsQuery(); maq.SetBoost(2.3f); Query pq = qp.Parse(maq.ToString()); hits = is_Renamed.Search(pq, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); is_Renamed.Close(); ir.Close(); dir.Close(); }
public virtual void TestEmptyIndex() { // creating two directories for indices Directory indexStoreA = new MockRAMDirectory(); Directory indexStoreB = new MockRAMDirectory(); // creating a document to store Document lDoc = new Document(); lDoc.Add(new Field("fulltext", "Once upon a time.....", Field.Store.YES, Field.Index.ANALYZED)); lDoc.Add(new Field("id", "doc1", Field.Store.YES, Field.Index.NOT_ANALYZED)); lDoc.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); // creating a document to store Document lDoc2 = new Document(); lDoc2.Add(new Field("fulltext", "in a galaxy far far away.....", Field.Store.YES, Field.Index.ANALYZED)); lDoc2.Add(new Field("id", "doc2", Field.Store.YES, Field.Index.NOT_ANALYZED)); lDoc2.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); // creating a document to store Document lDoc3 = new Document(); lDoc3.Add(new Field("fulltext", "a bizarre bug manifested itself....", Field.Store.YES, Field.Index.ANALYZED)); lDoc3.Add(new Field("id", "doc3", Field.Store.YES, Field.Index.NOT_ANALYZED)); lDoc3.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); // creating an index writer for the first index IndexWriter writerA = new IndexWriter(indexStoreA, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); // creating an index writer for the second index, but writing nothing IndexWriter writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); //-------------------------------------------------------------------- // scenario 1 //-------------------------------------------------------------------- // writing the documents to the first index writerA.AddDocument(lDoc); writerA.AddDocument(lDoc2); writerA.AddDocument(lDoc3); writerA.Optimize(); writerA.Close(); // closing the second index writerB.Close(); // creating the query QueryParser parser = new QueryParser("fulltext", new StandardAnalyzer()); Query query = parser.Parse("handle:1"); // building the searchables Searcher[] searchers = new Searcher[2]; // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index searchers[0] = new IndexSearcher(indexStoreB); searchers[1] = new IndexSearcher(indexStoreA); // creating the multiSearcher Searcher mSearcher = GetMultiSearcherInstance(searchers); // performing the search ScoreDoc[] hits = mSearcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); // iterating over the hit documents for (int i = 0; i < hits.Length; i++) { mSearcher.Doc(hits[i].doc); } mSearcher.Close(); //-------------------------------------------------------------------- // scenario 2 //-------------------------------------------------------------------- // adding one document to the empty index writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED); writerB.AddDocument(lDoc); writerB.Optimize(); writerB.Close(); // building the searchables Searcher[] searchers2 = new Searcher[2]; // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index searchers2[0] = new IndexSearcher(indexStoreB); searchers2[1] = new IndexSearcher(indexStoreA); // creating the mulitSearcher MultiSearcher mSearcher2 = GetMultiSearcherInstance(searchers2); // performing the same search ScoreDoc[] hits2 = mSearcher2.Search(query, null, 1000).scoreDocs; Assert.AreEqual(4, hits2.Length); // iterating over the hit documents for (int i = 0; i < hits2.Length; i++) { // no exception should happen at this point mSearcher2.Doc(hits2[i].doc); } // test the subSearcher() method: Query subSearcherQuery = parser.Parse("id:doc1"); hits2 = mSearcher2.Search(subSearcherQuery, null, 1000).scoreDocs; Assert.AreEqual(2, hits2.Length); Assert.AreEqual(0, mSearcher2.SubSearcher(hits2[0].doc)); // hit from searchers2[0] Assert.AreEqual(1, mSearcher2.SubSearcher(hits2[1].doc)); // hit from searchers2[1] subSearcherQuery = parser.Parse("id:doc2"); hits2 = mSearcher2.Search(subSearcherQuery, null, 1000).scoreDocs; Assert.AreEqual(1, hits2.Length); Assert.AreEqual(1, mSearcher2.SubSearcher(hits2[0].doc)); // hit from searchers2[1] mSearcher2.Close(); //-------------------------------------------------------------------- // scenario 3 //-------------------------------------------------------------------- // deleting the document just added, this will cause a different exception to take place Term term = new Term("id", "doc1"); IndexReader readerB = IndexReader.Open(indexStoreB); readerB.DeleteDocuments(term); readerB.Close(); // optimizing the index with the writer writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED); writerB.Optimize(); writerB.Close(); // building the searchables Searcher[] searchers3 = new Searcher[2]; searchers3[0] = new IndexSearcher(indexStoreB); searchers3[1] = new IndexSearcher(indexStoreA); // creating the mulitSearcher Searcher mSearcher3 = GetMultiSearcherInstance(searchers3); // performing the same search ScoreDoc[] hits3 = mSearcher3.Search(query, null, 1000).scoreDocs; Assert.AreEqual(3, hits3.Length); // iterating over the hit documents for (int i = 0; i < hits3.Length; i++) { mSearcher3.Doc(hits3[i].doc); } mSearcher3.Close(); indexStoreA.Close(); indexStoreB.Close(); }
public virtual void TestSetPosition() { Analyzer analyzer = new AnonymousClassAnalyzer(this); Directory store = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(store, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document d = new Document(); d.Add(new Field("field", "bogus", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(store); TermPositions pos = searcher.GetIndexReader().TermPositions(new Term("field", "1")); pos.Next(); // first token should be at position 0 Assert.AreEqual(0, pos.NextPosition()); pos = searcher.GetIndexReader().TermPositions(new Term("field", "2")); pos.Next(); // second token should be at position 2 Assert.AreEqual(2, pos.NextPosition()); PhraseQuery q; ScoreDoc[] hits; q = new PhraseQuery(); q.Add(new Term("field", "1")); q.Add(new Term("field", "2")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // same as previous, just specify positions explicitely. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 1); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // specifying correct positions should find the phrase. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 2); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "3")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // phrase query would find it when correct positions are specified. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "4"), 0); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // phrase query should fail for non existing searched term // even if there exist another searched terms in the same searched position. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "9"), 0); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // multi-phrase query should succed for non existing searched term // because there exist another searched terms in the same searched position. MultiPhraseQuery mq = new MultiPhraseQuery(); mq.Add(new Term[]{new Term("field", "3"), new Term("field", "9")}, 0); hits = searcher.Search(mq, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "4")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // should not find "1 2" because there is a gap of 1 in the index QueryParser qp = new QueryParser("field", new StopWhitespaceAnalyzer(false)); q = (PhraseQuery) qp.Parse("\"1 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // omitted stop word cannot help because stop filter swallows the increments. q = (PhraseQuery) qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // query parser alone won't help, because stop filter swallows the increments. qp.SetEnablePositionIncrements(true); q = (PhraseQuery) qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // stop filter alone won't help, because query parser swallows the increments. qp.SetEnablePositionIncrements(false); q = (PhraseQuery) qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // when both qp qnd stopFilter propagate increments, we should find the doc. qp = new QueryParser("field", new StopWhitespaceAnalyzer(true)); qp.SetEnablePositionIncrements(true); q = (PhraseQuery) qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); }