public void CanQueryLuceneIndexCreatedOnDisk() { CanCreateLuceneIndexOnDisk(); System.IO.DirectoryInfo di = new System.IO.DirectoryInfo(System.IO.Path.GetTempPath()); using (Lucene.Net.Store.Directory directory = Lucene.Net.Store.FSDirectory.Open(di)) { Lucene.Net.Index.IndexReader ir = Lucene.Net.Index.IndexReader.Open(directory, true); Lucene.Net.Search.Searcher searcher = new Lucene.Net.Search.IndexSearcher(ir); using (Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30)) { Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser(Version.LUCENE_30, "content", analyzer); Lucene.Net.Search.Query query = parser.Parse("lorem"); Lucene.Net.Search.TopScoreDocCollector collector = Lucene.Net.Search.TopScoreDocCollector.Create(100, true); searcher.Search(query, collector); Lucene.Net.Search.ScoreDoc[] docs = collector.TopDocs().ScoreDocs; foreach (Lucene.Net.Search.ScoreDoc scoreDoc in docs) { //Get the document that represents the search result. Document document = searcher.Doc(scoreDoc.Doc); var id = document.Get("Id"); var content = document.Get("content"); } } } }
public virtual void TestReverseDateSort() { IndexSearcher searcher = new IndexSearcher(directory, true, null); Sort sort = new Sort(new SortField(DATE_TIME_FIELD, SortField.STRING, true)); QueryParser queryParser = new QueryParser(Util.Version.LUCENE_CURRENT, TEXT_FIELD, new WhitespaceAnalyzer()); Query query = queryParser.Parse("Document"); // Execute the search and process the search results. System.String[] actualOrder = new System.String[5]; ScoreDoc[] hits = searcher.Search(query, null, 1000, sort, null).ScoreDocs; for (int i = 0; i < hits.Length; i++) { Document document = searcher.Doc(hits[i].Doc, null); System.String text = document.Get(TEXT_FIELD, null); actualOrder[i] = text; } searcher.Close(); // Set up the expected order (i.e. Document 5, 4, 3, 2, 1). System.String[] expectedOrder = new System.String[5]; expectedOrder[0] = "Document 5"; expectedOrder[1] = "Document 4"; expectedOrder[2] = "Document 3"; expectedOrder[3] = "Document 2"; expectedOrder[4] = "Document 1"; Assert.AreEqual(new System.Collections.ArrayList(expectedOrder), new System.Collections.ArrayList(actualOrder)); }
public IEnumerable <int> Get(string search, int max = 100, int minScore = 1) { if (!built) { BuildIndexes(); built = true; } try { var dir = FSDirectory.Open(new System.IO.DirectoryInfo(@"C:\lucene")); Lucene.Net.Analysis.Standard.StandardAnalyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_30, "body", analyzer); Lucene.Net.Search.Query query = null; query = parser.Parse(search); var searcher = new Lucene.Net.Search.IndexSearcher(dir); var hits = searcher.Search(query, max); var doc = searcher.Doc(hits.ScoreDocs[0].Doc); var result = hits.ScoreDocs.Where(s => s.Score > minScore).Select(h => int.Parse(searcher.Doc(h.Doc).GetField("id").StringValue)); return(result); } catch (Exception e) { throw; } }
public virtual void TestDemo_Renamed() { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead: //Directory directory = FSDirectory.open("/tmp/testindex"); IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000)); Document doc = new Document(); System.String text = "This is the text to be indexed."; doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.AddDocument(doc); iwriter.Close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true // Parse a simple query that searches for "text": QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "fieldname", analyzer); Query query = parser.Parse("text"); ScoreDoc[] hits = isearcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // Iterate through the results: for (int i = 0; i < hits.Length; i++) { Document hitDoc = isearcher.Doc(hits[i].Doc); Assert.AreEqual(hitDoc.Get("fieldname"), "This is the text to be indexed."); } isearcher.Close(); directory.Close(); }
static int SearchForPhrase(IndexSearcher searcher, string phrase) { var parser = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_30, "Body", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30)); var query = parser.Parse(phrase); return(searcher.Search(query, 100).TotalHits); }
/// <summary> /// Search for files. /// </summary> /// <param name="queryText">The query text.</param> /// <returns>The files that match the query text.</returns> public SourceFile[] Search(string queryText) { Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser( Lucene.Net.Util.Version.LUCENE_30, "body", _analyzer); Lucene.Net.Search.Query query = parser.Parse(queryText); using (Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(_directory, true)) { Lucene.Net.Search.TopDocs result = searcher.Search(query, int.MaxValue); List <SourceFile> files = new List <SourceFile>(); foreach (Lucene.Net.Search.ScoreDoc d in result.ScoreDocs) { Lucene.Net.Documents.Document doc = searcher.Doc(d.Doc); files.Add(new SourceFile( doc.Get("id"), doc.Get("type"), doc.Get("name"), doc.Get("fileName"), null)); } return(files.ToArray()); } }
private static void SearchByFld2(string fld, string txt) { string strIndexDir = @"D:\Index"; Analyzer std = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); Lucene.Net.Store.Directory directory = Lucene.Net.Store.FSDirectory.Open(new System.IO.DirectoryInfo(strIndexDir)); Lucene.Net.Search.Searcher srchr = new Lucene.Net.Search.IndexSearcher(Lucene.Net.Index.IndexReader.Open(directory, true)); var parser = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_30, fld, std); Lucene.Net.Search.Query qry = parser.Parse(txt); var cllctr = srchr.Search(qry, 1000); Console.WriteLine(cllctr.TotalHits); ScoreDoc[] hits = cllctr.ScoreDocs; for (int i = 0; i < hits.Length; i++) { int docId = hits[i].Doc; float score = hits[i].Score; Lucene.Net.Documents.Document doc = srchr.Doc(docId); Console.WriteLine("索引时间:" + doc.Get("addtime")); Console.WriteLine("Searched from Text: " + doc.Get(fld)); } Console.WriteLine("over"); }
public string Search(string strQuery) { string result = string.Empty; Lucene.Net.Index.IndexReader reader = Lucene.Net.Index.IndexReader.Open(Server.MapPath(System.Configuration.ConfigurationManager.AppSettings["IndexingArticle"])); Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("ArticleDetail", new Lucene.Net.Analysis.Standard.StandardAnalyzer()); Lucene.Net.Search.Query query = parser.Parse(strQuery); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(reader); Lucene.Net.Search.Hits hits = searcher.Search(query); Lucene.Net.Highlight.QueryScorer score = new Lucene.Net.Highlight.QueryScorer(query); Lucene.Net.Highlight.SimpleHTMLFormatter formater = new Lucene.Net.Highlight.SimpleHTMLFormatter("<span class='Highlight'>", "</span>"); Lucene.Net.Highlight.Highlighter highlighter = new Lucene.Net.Highlight.Highlighter(formater, score); result += "<div align='right' style='background-color:#F0F7F9; padding-right:15px' height='30px'><font style='FONT-WEIGHT: bold; FONT-SIZE: 10pt; COLOR: #005482; FONT-FAMILY: arial'>Kết quả tìm thấy : " + hits.Length() + " </font></div>"; result += "<div style='padding: 10px 10px 10px 10px;'>"; for (int i = 0; i < hits.Length(); i++) { string id = hits.Doc(i).Get("ArticleId"); string title = hits.Doc(i).Get("ArticleTitle"); string detail = hits.Doc(i).Get("ArticleDetail"); Lucene.Net.Analysis.TokenStream ts = (new Lucene.Net.Analysis.Standard.StandardAnalyzer()).TokenStream("ArticleDetail", new System.IO.StringReader(detail)); result += string.Format("<div align='left'><font style='FONT-WEIGHT: bold; FONT-SIZE: 10pt; COLOR: #5b5b5b; FONT-FAMILY: arial'><a href='/?ArticleId={0}'>{1}</a></font>", id, title); result += string.Format("<div align='left'><font style='FONT-SIZE: 9pt' face='Arial' color='#005482'>...{0}...</font></div></div></br>", highlighter.GetBestFragment(ts, detail)); } result += "</div>"; reader.Close(); return(result); }
public virtual void TestReverseDateSort() { IndexSearcher searcher = new IndexSearcher(directory); // Create a Sort object. reverse is set to true. // problem occurs only with SortField.AUTO: Sort sort = new Sort(new SortField(DATE_TIME_FIELD, SortField.AUTO, true)); QueryParser queryParser = new QueryParser(TEXT_FIELD, new WhitespaceAnalyzer()); Query query = queryParser.Parse("Document"); // Execute the search and process the search results. System.String[] actualOrder = new System.String[5]; ScoreDoc[] hits = searcher.Search(query, null, 1000, sort).ScoreDocs; for (int i = 0; i < hits.Length; i++) { Document document = searcher.Doc(hits[i].doc); System.String text = document.Get(TEXT_FIELD); actualOrder[i] = text; } searcher.Close(); // Set up the expected order (i.e. Document 5, 4, 3, 2, 1). System.String[] expectedOrder = new System.String[5]; expectedOrder[0] = "Document 5"; expectedOrder[1] = "Document 4"; expectedOrder[2] = "Document 3"; expectedOrder[3] = "Document 2"; expectedOrder[4] = "Document 1"; Assert.AreEqual(new System.Collections.ArrayList(expectedOrder), new System.Collections.ArrayList(actualOrder)); }
public virtual void TestDemo_Renamed() { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead: //Directory directory = FSDirectory.open("/tmp/testindex"); IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000)); Document doc = new Document(); System.String text = "This is the text to be indexed."; doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.AddDocument(doc); iwriter.Close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true // Parse a simple query that searches for "text": QueryParser parser = new QueryParser("fieldname", analyzer); Query query = parser.Parse("text"); ScoreDoc[] hits = isearcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); // Iterate through the results: for (int i = 0; i < hits.Length; i++) { Document hitDoc = isearcher.Doc(hits[i].doc); Assert.AreEqual(hitDoc.Get("fieldname"), "This is the text to be indexed."); } isearcher.Close(); directory.Close(); }
public override void SetUp() { base.SetUp(); System.String[] docText = new System.String[] { "docThatNeverMatchesSoWeCanRequireLastDocCollectedToBeGreaterThanZero", "one blah three", "one foo three multiOne", "one foobar three multiThree", "blueberry pancakes", "blueberry pie", "blueberry strudel", "blueberry pizza" }; Directory directory = new RAMDirectory(); IndexWriter iw = new IndexWriter(directory, new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED); for (int i = 0; i < N_DOCS; i++) { Add(docText[i % docText.Length], iw); } iw.Close(); searcher = new IndexSearcher(directory, true); System.String qtxt = "one"; // start from 1, so that the 0th doc never matches for (int i = 0; i < docText.Length; i++) { qtxt += (' ' + docText[i]); // large query so that search will be longer } QueryParser queryParser = new QueryParser(Util.Version.LUCENE_CURRENT, FIELD_NAME, new WhitespaceAnalyzer()); query = queryParser.Parse(qtxt); // warm the searcher searcher.Search(query, null, 1000); }
/* * public void testTermRepeatedQuery() throws IOException, ParseException { * // TODO: this corner case yields different results. * checkQuery("multi* multi* foo"); * } */ /// <summary> checks if a query yields the same result when executed on /// a single IndexSearcher containing all documents and on a /// MultiSearcher aggregating sub-searchers /// </summary> /// <param name="queryStr"> the query to check. /// </param> /// <throws> IOException </throws> /// <throws> ParseException </throws> private void CheckQuery(System.String queryStr) { // check result hit ranking if (verbose) { System.Console.Out.WriteLine("Query: " + queryStr); } QueryParser queryParser = new QueryParser(FIELD_NAME, new StandardAnalyzer()); Query query = queryParser.Parse(queryStr); ScoreDoc[] multiSearcherHits = multiSearcher.Search(query, null, 1000).scoreDocs; ScoreDoc[] singleSearcherHits = singleSearcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(multiSearcherHits.Length, singleSearcherHits.Length); for (int i = 0; i < multiSearcherHits.Length; i++) { Document docMulti = multiSearcher.Doc(multiSearcherHits[i].doc); Document docSingle = singleSearcher.Doc(singleSearcherHits[i].doc); if (verbose) { System.Console.Out.WriteLine("Multi: " + docMulti.Get(FIELD_NAME) + " score=" + multiSearcherHits[i].score); } if (verbose) { System.Console.Out.WriteLine("Single: " + docSingle.Get(FIELD_NAME) + " score=" + singleSearcherHits[i].score); } Assert.AreEqual(multiSearcherHits[i].score, singleSearcherHits[i].score, 0.001f); Assert.AreEqual(docMulti.Get(FIELD_NAME), docSingle.Get(FIELD_NAME)); } if (verbose) { System.Console.Out.WriteLine(); } }
public virtual void TestReverseDateSort() { IndexSearcher searcher = new IndexSearcher(directory); // Create a Sort object. reverse is set to true. // problem occurs only with SortField.AUTO: Sort sort = new Sort(new SortField(DATE_TIME_FIELD, SortField.AUTO, true)); QueryParser queryParser = new QueryParser(TEXT_FIELD, new WhitespaceAnalyzer()); Query query = queryParser.Parse("Document"); // Execute the search and process the search results. System.String[] actualOrder = new System.String[5]; ScoreDoc[] hits = searcher.Search(query, null, 1000, sort).scoreDocs; for (int i = 0; i < hits.Length; i++) { Document document = searcher.Doc(hits[i].doc); System.String text = document.Get(TEXT_FIELD); actualOrder[i] = text; } searcher.Close(); // Set up the expected order (i.e. Document 5, 4, 3, 2, 1). System.String[] expectedOrder = new System.String[5]; expectedOrder[0] = "Document 5"; expectedOrder[1] = "Document 4"; expectedOrder[2] = "Document 3"; expectedOrder[3] = "Document 2"; expectedOrder[4] = "Document 1"; Assert.AreEqual(new System.Collections.ArrayList(expectedOrder), new System.Collections.ArrayList(actualOrder)); }
public string SearchAndPaging(string strQuery, string index) { string result = string.Empty; try { List <SearchArticle> searchArticleList = new List <SearchArticle>(); PSCPortal.CMS.ArticleCollection ArticleList = ArticleCollection.GetArticleCollectionPublish(); string nameSub = Libs.Ultility.GetSubDomain() == string.Empty ? "HomePage" : Libs.Ultility.GetSubDomain(); SubDomain subDomain = PSCPortal.Engine.SubDomain.GetSubByName(nameSub); PageCollection pagesBelongTo = subDomain.GetPagesBelongTo(); string strId = string.Empty; foreach (var page in pagesBelongTo) { foreach (var ar in ArticleList.Where(ar => ar.PageId == page.Id)) { strId += ar.Id + " OR "; } if (strId.Length > 0) { strId = strId.Remove(strId.Length - 3, 3); } } int pageIndex = Int32.Parse(index); string strSearch = " ArticleDetail:(" + strQuery + ") AND ArticleId:" + "( " + strId + " )"; Lucene.Net.Index.IndexReader reader = Lucene.Net.Index.IndexReader.Open(Server.MapPath(System.Configuration.ConfigurationManager.AppSettings["IndexingArticle"])); Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("ArticleDetail", new Lucene.Net.Analysis.Standard.StandardAnalyzer()); Lucene.Net.Search.Query query = parser.Parse(strSearch); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(reader); Lucene.Net.Search.Hits hits = searcher.Search(query); Lucene.Net.Highlight.QueryScorer score = new Lucene.Net.Highlight.QueryScorer(query); Lucene.Net.Highlight.SimpleHTMLFormatter formater = new Lucene.Net.Highlight.SimpleHTMLFormatter("<span class='Highlight'>", "</span>"); Lucene.Net.Highlight.Highlighter highlighter = new Lucene.Net.Highlight.Highlighter(formater, score); result += hits.Length() + "_" + "<div class='blog_news'><div class='topic_news_title1'><div class='topic_news_title'><a href='#'>Kết quả tìm thấy: " + hits.Length() + "</a></div></div>"; result += "<div class='ct_topic_l'><div class='ct_topic_r1'>"; for (int i = pageIndex * 20 - 20; i < pageIndex * 20 && i < hits.Length(); i++) { string detail = hits.Doc(i).Get("ArticleDetail"); Lucene.Net.Analysis.TokenStream ts = (new Lucene.Net.Analysis.Standard.StandardAnalyzer()).TokenStream("ArticleDetail", new System.IO.StringReader(detail)); SearchArticle searchArticle = new SearchArticle(); searchArticle.Id = hits.Doc(i).Get("ArticleId");; searchArticle.Title = hits.Doc(i).Get("ArticleTitle"); searchArticle.Highligth = highlighter.GetBestFragment(ts, detail); searchArticleList.Add(searchArticle); } reader.Close(); JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> resultDic = new Dictionary <string, object>(); resultDic["Count"] = hits.Length(); resultDic["Data"] = searchArticleList; result = serializer.Serialize(resultDic); } catch (Exception e) { } return(result); }
// Test that FieldScoreQuery returns docs with expected score. private void DoTestCustomScore(System.String field, FieldScoreQuery.Type tp, double dboost) { float boost = (float)dboost; IndexSearcher s = new IndexSearcher(dir, true); FieldScoreQuery qValSrc = new FieldScoreQuery(field, tp); // a query that would score by the field QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT, TEXT_FIELD, anlzr); System.String qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup. // regular (boolean) query. Query q1 = qp.Parse(qtxt); Log(q1); // custom query, that should score the same as q1. CustomScoreQuery q2CustomNeutral = new CustomScoreQuery(q1); q2CustomNeutral.Boost = boost; Log(q2CustomNeutral); // custom query, that should (by default) multiply the scores of q1 by that of the field CustomScoreQuery q3CustomMul = new CustomScoreQuery(q1, qValSrc); q3CustomMul.SetStrict(true); q3CustomMul.Boost = boost; Log(q3CustomMul); // custom query, that should add the scores of q1 to that of the field CustomScoreQuery q4CustomAdd = new CustomAddQuery(q1, qValSrc); q4CustomAdd.SetStrict(true); q4CustomAdd.Boost = boost; Log(q4CustomAdd); // custom query, that multiplies and adds the field score to that of q1 CustomScoreQuery q5CustomMulAdd = new CustomMulAddQuery(q1, qValSrc, qValSrc); q5CustomMulAdd.SetStrict(true); q5CustomMulAdd.Boost = boost; Log(q5CustomMulAdd); // do al the searches TopDocs td1 = s.Search(q1, null, 1000); TopDocs td2CustomNeutral = s.Search(q2CustomNeutral, null, 1000); TopDocs td3CustomMul = s.Search(q3CustomMul, null, 1000); TopDocs td4CustomAdd = s.Search(q4CustomAdd, null, 1000); TopDocs td5CustomMulAdd = s.Search(q5CustomMulAdd, null, 1000); // put results in map so we can verify the scores although they have changed System.Collections.Hashtable h1 = TopDocsToMap(td1); System.Collections.Hashtable h2CustomNeutral = TopDocsToMap(td2CustomNeutral); System.Collections.Hashtable h3CustomMul = TopDocsToMap(td3CustomMul); System.Collections.Hashtable h4CustomAdd = TopDocsToMap(td4CustomAdd); System.Collections.Hashtable h5CustomMulAdd = TopDocsToMap(td5CustomMulAdd); VerifyResults(boost, s, h1, h2CustomNeutral, h3CustomMul, h4CustomAdd, h5CustomMulAdd, q1, q2CustomNeutral, q3CustomMul, q4CustomAdd, q5CustomMulAdd); }
public List <LuceneData> MemberSearch(string searchTerm) { var searchData = new List <LuceneData>(); try { Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(_indexFileLocation); //create an analyzer to process the text Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(); //create the query parser, with the default search feild set to "content" Lucene.Net.QueryParsers.QueryParser queryParser = new Lucene.Net.QueryParsers.QueryParser("SearchContent", analyzer); //parse the query string into a Query object Lucene.Net.Search.Query query = queryParser.Parse(searchTerm); //create an index searcher that will perform the search //Lucene.Net.Index.IndexReader indexReader = Lucene.Net.Index.IndexReader.Open(dir, true); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir, true); ////build a query object //Lucene.Net.Index.Term luceneSearchTerm = new Lucene.Net.Index.Term("searchContent", searchTerm); //Lucene.Net.Search.Query query = new Lucene.Net.Search.TermQuery(luceneSearchTerm); //execute the query Lucene.Net.Search.Hits hits = searcher.Search(query); //int resultCount = hits.Length(); //if (resultCount > 1000){ // resultCount = 1000; //} //iterate over the results. for (int i = 0; i < hits.Length(); i++) { Lucene.Net.Documents.Document doc = hits.Doc(i); searchData.Add(new LuceneData { MemberID = Convert.ToInt32(doc.Get("MemberID")), FirstName = doc.Get("FirstName"), LastName = doc.Get("LastName"), CompanyName = doc.Get("CompanyName"), City = doc.Get("City"), State = doc.Get("State"), PostalCode = doc.Get("PostalCode") }); } } catch (Exception ex) { } return(searchData); }
public Lucene.Net.Search.TopDocs Search(string queryString) { Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_30, "contents", new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30)); Lucene.Net.Search.Query query = parser.Parse(queryString); return this.IndexSearcher.Search(query, 100); }
public virtual void TestToString() { StopAnalyzer analyzer = new StopAnalyzer(Util.Version.LUCENE_CURRENT); QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT, "field", analyzer); qp.EnablePositionIncrements = true; PhraseQuery q = (PhraseQuery)qp.Parse("\"this hi this is a test is\""); Assert.AreEqual("field:\"? hi ? ? ? test\"", q.ToString()); q.Add(new Term("field", "hello"), 1); Assert.AreEqual("field:\"? hi|hello ? ? ? test\"", q.ToString()); }
public virtual void TestPerFieldAnalyzer() { PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer()); analyzer.AddAnalyzer("partnum", new KeywordAnalyzer()); QueryParser queryParser = new QueryParser("description", analyzer); Query query = queryParser.Parse("partnum:Q36 AND SPACE"); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual("+partnum:Q36 +space", query.ToString("description"), "Q36 kept as-is"); Assert.AreEqual(1, hits.Length, "doc found!"); }
static void SearchForPhrase(IndexSearcher searcher, string phrase) { using (new AutoStopWatch($"Search for {phrase}")) { Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_30, "Body", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30)); Lucene.Net.Search.Query query = parser.Parse(phrase); var hits = searcher.Search(new TermQuery(new Term("Title", "find me")), 100); hits = searcher.Search(query, 100); Console.WriteLine("Found {0} results for {1}", hits.TotalHits, phrase); } }
public virtual void TestPerFieldAnalyzer() { PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer()); analyzer.AddAnalyzer("partnum", new KeywordAnalyzer()); QueryParser queryParser = new QueryParser(Version.LUCENE_CURRENT, "description", analyzer); Query query = queryParser.Parse("partnum:Q36 AND SPACE"); ScoreDoc[] hits = searcher.Search(query, null, 1000, null).ScoreDocs; Assert.AreEqual("+partnum:Q36 +space", query.ToString("description"), "Q36 kept as-is"); Assert.AreEqual(1, hits.Length, "doc found!"); }
public virtual void TestToString() { StopAnalyzer analyzer = new StopAnalyzer(); StopFilter.SetEnablePositionIncrementsDefault(true); QueryParser qp = new QueryParser("field", analyzer); qp.SetEnablePositionIncrements(true); PhraseQuery q = (PhraseQuery)qp.Parse("\"this hi this is a test is\""); Assert.AreEqual("field:\"? hi ? ? ? test\"", q.ToString()); q.Add(new Term("field", "hello"), 1); Assert.AreEqual("field:\"? hi|hello ? ? ? test\"", q.ToString()); }
private Lucene.Net.Search.Query ParseQuery(string searchQuery, Lucene.Net.QueryParsers.QueryParser parser) { Lucene.Net.Search.Query query; try { query = parser.Parse(searchQuery.ToLower().Trim() + "*"); } catch (Lucene.Net.QueryParsers.ParseException) { query = parser.Parse(Lucene.Net.QueryParsers.QueryParser.Escape(searchQuery.Trim())); throw; } return(query); }
public Lucene.Net.Search.Query GetTextQuery(string sValue) { Lucene.Net.Search.Query query = null; try { Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_30, "Body", analyzer); query = parser.Parse(sValue); } catch (Exception) { System.Windows.Forms.MessageBox.Show("Can not parse: " + sValue); } return(query); }
public virtual void TestMultiAnalyzer_Renamed_Method() { Lucene.Net.QueryParsers.QueryParser qp = new Lucene.Net.QueryParsers.QueryParser("", new MultiAnalyzer(this)); // trivial, no multiple tokens: Assert.AreEqual("foo", qp.Parse("foo").ToString()); Assert.AreEqual("foo", qp.Parse("\"foo\"").ToString()); Assert.AreEqual("foo foobar", qp.Parse("foo foobar").ToString()); Assert.AreEqual("\"foo foobar\"", qp.Parse("\"foo foobar\"").ToString()); Assert.AreEqual("\"foo foobar blah\"", qp.Parse("\"foo foobar blah\"").ToString()); // two tokens at the same position: Assert.AreEqual("(multi multi2) foo", qp.Parse("multi foo").ToString()); Assert.AreEqual("foo (multi multi2)", qp.Parse("foo multi").ToString()); Assert.AreEqual("(multi multi2) (multi multi2)", qp.Parse("multi multi").ToString()); Assert.AreEqual("+(foo (multi multi2)) +(bar (multi multi2))", qp.Parse("+(foo multi) +(bar multi)").ToString()); Assert.AreEqual("+(foo (multi multi2)) field:\"bar (multi multi2)\"", qp.Parse("+(foo multi) field:\"bar multi\"").ToString()); // phrases: Assert.AreEqual("\"(multi multi2) foo\"", qp.Parse("\"multi foo\"").ToString()); Assert.AreEqual("\"foo (multi multi2)\"", qp.Parse("\"foo multi\"").ToString()); Assert.AreEqual("\"foo (multi multi2) foobar (multi multi2)\"", qp.Parse("\"foo multi foobar multi\"").ToString()); // fields: Assert.AreEqual("(field:multi field:multi2) field:foo", qp.Parse("field:multi field:foo").ToString()); Assert.AreEqual("field:\"(multi multi2) foo\"", qp.Parse("field:\"multi foo\"").ToString()); // three tokens at one position: Assert.AreEqual("triplemulti multi3 multi2", qp.Parse("triplemulti").ToString()); Assert.AreEqual("foo (triplemulti multi3 multi2) foobar", qp.Parse("foo triplemulti foobar").ToString()); // phrase with non-default slop: Assert.AreEqual("\"(multi multi2) foo\"~10", qp.Parse("\"multi foo\"~10").ToString()); // phrase with non-default boost: Assert.AreEqual("\"(multi multi2) foo\"^2.0", qp.Parse("\"multi foo\"^2").ToString()); // phrase after changing default slop qp.SetPhraseSlop(99); Assert.AreEqual("\"(multi multi2) foo\"~99 bar", qp.Parse("\"multi foo\" bar").ToString()); Assert.AreEqual("\"(multi multi2) foo\"~99 \"foo bar\"~2", qp.Parse("\"multi foo\" \"foo bar\"~2").ToString()); qp.SetPhraseSlop(0); // non-default operator: qp.SetDefaultOperator(Lucene.Net.QueryParsers.QueryParser.AND_OPERATOR); Assert.AreEqual("+(multi multi2) +foo", qp.Parse("multi foo").ToString()); }
public static void searchFor(Searcher searcher, string querystr) { QueryParser parser = new QueryParser("body", new StandardAnalyzer()); // could be outside this function Query query = parser.Parse(querystr); var hits = new AnonymousClassCollector(); // more accurate timer var timer = new Stopwatch(); timer.Start(); searcher.Search(query, hits); timer.Stop(); Console.WriteLine("search for [{0}] returned {1} hits in {2}ms )", query, hits.Count, timer.ElapsedMilliseconds); }
static void SearchForPhrase(IndexSearcher searcher, string phrase) { using (new AutoStopWatch(string.Format("Search for {0}", phrase))) { Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_CURRENT, "Body", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT)); Lucene.Net.Search.Query query = parser.Parse(phrase); var hits = searcher.Search(query, 100); Console.WriteLine("Found {0} results for {1}", hits.TotalHits, phrase); int max = Math.Min(hits.TotalHits,100); for (int i = 0; i < max; i++) { Console.WriteLine(hits.ScoreDocs[i].Doc); } } }
static void SearchForPhrase(IndexSearcher searcher, string phrase) { using (new AutoStopWatch(string.Format("Search for {0}", phrase))) { Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_CURRENT, "Body", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT)); Lucene.Net.Search.Query query = parser.Parse(phrase); var hits = searcher.Search(query, 100); Console.WriteLine("Found {0} results for {1}", hits.TotalHits, phrase); int max = Math.Min(hits.TotalHits, 100); for (int i = 0; i < max; i++) { Console.WriteLine(hits.ScoreDocs[i].Doc); } } }
public void Querying_Analyzers() { var query = LucQuery.Parse("'Mr.John Smith'"); var s = query.ToString(); var pq = query.Query as Lucene.Net.Search.PhraseQuery; Assert.IsNotNull(pq, String.Concat("Parsed query is: ", pq.GetType().Name, ". Expected: PhraseQuery")); var terms = pq.GetTerms(); Assert.IsTrue(terms.Length == 2, String.Concat("Count of terms is: ", terms.Length, ". Expected: 2")); Assert.IsTrue(terms[0].Text() == "mr.john", String.Concat("First term is ", terms[0].Text(), ". Expected: 'mr.john'")); Assert.IsTrue(terms[1].Text() == "smith", String.Concat("Second term is ", terms[1].Text(), ". Expected: 'smith'")); var qtext = "\"Mr.John Smith\""; //var qtext = "(InTree:/Root/Site1/Folder1/Folder2/Folder3 OR InTree:/Root/Site2/Folder1/Folder2/Folder3/Folder5/Folder6) AND Type:Folder AND _Text:\"Mr.John Smith\""; Lucene.Net.Search.Query q; var k = 0; var stopper = Stopwatch.StartNew(); for (int i = 0; i < 10000000; i++) { k++; } var t0 = stopper.ElapsedMilliseconds; stopper.Stop(); stopper = Stopwatch.StartNew(); for (int i = 0; i < 1000; i++) { q = LucQuery.Parse(qtext).Query; } var t1 = stopper.ElapsedMilliseconds; stopper.Stop(); stopper = Stopwatch.StartNew(); for (int i = 0; i < 1000; i++) { q = new Lucene.Net.QueryParsers.QueryParser(LuceneManager.LuceneVersion, "_Text", IndexManager.GetAnalyzer()).Parse(qtext); } var t2 = stopper.ElapsedMilliseconds; stopper.Stop(); }
private List <LuceneData> GetSearchResultByField(string searchQuery, string searchField, Lucene.Net.Search.IndexSearcher searcher, Lucene.Net.Analysis.Standard.StandardAnalyzer analyzer) { try { List <LuceneData> output = new List <LuceneData>(); var hits_limit = 1000; var parser = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_30, searchField, analyzer); var query = ParseQuery(searchQuery, parser); var hits = searcher.Search(query, hits_limit).ScoreDocs; var results = MapLuceneDataToIDList(hits, searcher); output = results.Take(5).ToList <LuceneData>(); return(output); } catch { throw; } }
public virtual void TestNot_Renamed() { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document d1 = new Document(); d1.Add(new Field("field", "a b", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d1); writer.Optimize(); writer.Close(); Searcher searcher = new IndexSearcher(store, true); QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "field", new SimpleAnalyzer()); Query query = parser.Parse("a NOT b"); //System.out.println(query); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); }
public virtual void TestNot_Renamed() { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document d1 = new Document(); d1.Add(new Field("field", "a b", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d1); writer.Optimize(); writer.Close(); Searcher searcher = new IndexSearcher(store); QueryParser parser = new QueryParser("field", new SimpleAnalyzer()); Query query = parser.Parse("a NOT b"); //System.out.println(query); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); }
private static Lucene.Net.Search.Query HandleLucene(Query query, MethodExpression expression, QueryMetadata metadata, BlittableJsonReaderObject parameters, Analyzer analyzer) { var fieldName = ExtractIndexFieldName(query, parameters, expression.Arguments[0], metadata); var(value, valueType) = GetValue(fieldName, query, metadata, parameters, (ValueExpression)expression.Arguments[1]); if (valueType != ValueTokenType.String) { ThrowMethodExpectsArgumentOfTheFollowingType("lucene", ValueTokenType.String, valueType, metadata.QueryText, parameters); } if (metadata.IsDynamic) { fieldName = new QueryFieldName(AutoIndexField.GetSearchAutoIndexFieldName(fieldName.Value), fieldName.IsQuoted); } var parser = new Lucene.Net.QueryParsers.QueryParser(Version.LUCENE_29, fieldName, analyzer); return(parser.Parse(GetValueAsString(value))); }
public void Test_Store_RAMDirectory() { Lucene.Net.Store.RAMDirectory ramDIR = new Lucene.Net.Store.RAMDirectory(); //Index 1 Doc Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(ramDIR, new Lucene.Net.Analysis.WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Lucene.Net.Documents.Field("field1", "value1 value11", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED)); wr.AddDocument(doc); wr.Dispose(); //now serialize it System.Runtime.Serialization.Formatters.Binary.BinaryFormatter serializer = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(); System.IO.MemoryStream memoryStream = new System.IO.MemoryStream(); serializer.Serialize(memoryStream, ramDIR); //Close DIR ramDIR.Close(); ramDIR = null; //now deserialize memoryStream.Seek(0, System.IO.SeekOrigin.Begin); Lucene.Net.Store.RAMDirectory ramDIR2 = (Lucene.Net.Store.RAMDirectory)serializer.Deserialize(memoryStream); //Add 1 more doc wr = new Lucene.Net.Index.IndexWriter(ramDIR2, new Lucene.Net.Analysis.WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); doc = new Lucene.Net.Documents.Document(); doc.Add(new Lucene.Net.Documents.Field("field1", "value1 value11", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED)); wr.AddDocument(doc); wr.Dispose(); //Search Lucene.Net.Search.IndexSearcher s = new Lucene.Net.Search.IndexSearcher(ramDIR2); Lucene.Net.QueryParsers.QueryParser qp = new Lucene.Net.QueryParsers.QueryParser(Version.LUCENE_CURRENT, "field1", new Lucene.Net.Analysis.Standard.StandardAnalyzer(Version.LUCENE_CURRENT)); Lucene.Net.Search.Query q = qp.Parse("value1"); Lucene.Net.Search.TopDocs topDocs = s.Search(q, 100); s.Close(); Assert.AreEqual(topDocs.TotalHits, 2, "See the issue: LUCENENET-174"); }
public void TestCustomExternalQuery() { QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT, TEXT_FIELD, anlzr); String qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup. Query q1 = qp.Parse(qtxt); Query q = new CustomExternalQuery(q1); Log(q); IndexSearcher s = new IndexSearcher(dir); TopDocs hits = s.Search(q, 1000); Assert.AreEqual(N_DOCS, hits.TotalHits); for (int i = 0; i < N_DOCS; i++) { int doc = hits.ScoreDocs[i].Doc; float score = hits.ScoreDocs[i].Score; Assert.AreEqual(score, (float)1 + (4 * doc) % N_DOCS, 0.0001, "doc=" + doc); } s.Close(); }
public virtual void TestGiga() { StandardAnalyzer analyzer = new StandardAnalyzer(); Directory index = new MockRAMDirectory(); IndexWriter w = new IndexWriter(index, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); AddDoc("Lucene in Action", w); AddDoc("Lucene for Dummies", w); // addDoc("Giga", w); AddDoc("Giga byte", w); AddDoc("ManagingGigabytesManagingGigabyte", w); AddDoc("ManagingGigabytesManagingGigabytes", w); AddDoc("The Art of Computer Science", w); AddDoc("J. K. Rowling", w); AddDoc("JK Rowling", w); AddDoc("Joanne K Roling", w); AddDoc("Bruce Willis", w); AddDoc("Willis bruce", w); AddDoc("Brute willis", w); AddDoc("B. willis", w); IndexReader r = w.GetReader(); w.Close(); Query q = new QueryParser("field", analyzer).Parse("giga~0.9"); // 3. search IndexSearcher searcher = new IndexSearcher(r); ScoreDoc[] hits = searcher.Search(q, 10).scoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), "Giga byte"); r.Close(); }
public void CreateParser(String type, String field) { //parser = new Lucene.Net.QueryParsers.QueryParser(VERSION, TEXT_FN, analyzer); switch (type) { case "Single Term": parser = new Lucene.Net.QueryParsers.QueryParser(VERSION, field, analyzer); break; case "Muti-Term": string[] fields = new[] { "DocID", "Title", "Author", "Bibliographic", "Words" }; parser = new Lucene.Net.QueryParsers.MultiFieldQueryParser(VERSION, fields, analyzer); break; /*case "Muti - Phrase": * parser = new Lucene.Net.QueryParsers.mu(VERSION, fields, analyzer); * break;*/ } //parser = new Lucene.Net.QueryParsers.QueryParser(VERSION, TEXT_FN, analyzer); }
public virtual void TestUnRewrittenQuery() { //test to show how rewritten query can still be used searcher = new IndexSearcher(ramDir); Analyzer analyzer = new StandardAnalyzer(); QueryParser parser = new QueryParser(FIELD_NAME, analyzer); Query query = parser.Parse("JF? or Kenned*"); System.Console.Out.WriteLine("Searching with primitive query"); //forget to set this and... //query=query.rewrite(reader); Hits hits = searcher.Search(query); //create an instance of the highlighter with the tags used to surround highlighted text // QueryHighlightExtractor highlighter = new QueryHighlightExtractor(this, query, new StandardAnalyzer()); Highlighter highlighter = new Highlighter(this, new QueryScorer(query)); highlighter.SetTextFragmenter(new SimpleFragmenter(40)); int maxNumFragmentsRequired = 3; for (int i = 0; i < hits.Length(); i++) { System.String text = hits.Doc(i).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text)); System.String highlightedText = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); System.Console.Out.WriteLine(highlightedText); } //We expect to have zero highlights if the query is multi-terms and is not rewritten! Assert.IsTrue(numHighlights == 0, "Failed to find correct number of highlights " + numHighlights + " found"); }
public virtual void TestOverlapAnalyzer() { //UPGRADE_TODO: Class 'java.util.HashMap' was converted to 'System.Collections.Hashtable' which has a different behavior. 'ms-help://MS.VSCC.2003/commoner/redir/redirect.htm?keyword="jlca1073_javautilHashMap_3"' System.Collections.Hashtable synonyms = new System.Collections.Hashtable(); synonyms["football"] = "soccer,footie"; Analyzer analyzer = new SynonymAnalyzer(synonyms); System.String srchkey = "football"; System.String s = "football-soccer in the euro 2004 footie competition"; QueryParser parser = new QueryParser("bookid", analyzer); Query query = parser.Parse(srchkey); Highlighter highlighter = new Highlighter(new QueryScorer(query)); TokenStream tokenStream = analyzer.TokenStream(null, new System.IO.StringReader(s)); // Get 3 best fragments and seperate with a "..." System.String result = highlighter.GetBestFragments(tokenStream, s, 3, "..."); System.String expectedResult = "<B>football</B>-<B>soccer</B> in the euro 2004 <B>footie</B> competition"; Assert.IsTrue(expectedResult.Equals(result), "overlapping analyzer should handle highlights OK"); }
public virtual void TestGetRangeFragments() { System.String queryString = FIELD_NAME + ":[kannedy TO kznnedy]"; //Need to explicitly set the QueryParser property to use RangeQuery rather than RangeFilters QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer()); parser.SetUseOldRangeQuery(true); query = parser.Parse(queryString); DoSearching(query); DoStandardHighlights(); Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found"); }
public override void SetUp() { base.SetUp(); System.String[] docText = new System.String[]{"docThatNeverMatchesSoWeCanRequireLastDocCollectedToBeGreaterThanZero", "one blah three", "one foo three multiOne", "one foobar three multiThree", "blueberry pancakes", "blueberry pie", "blueberry strudel", "blueberry pizza"}; Directory directory = new RAMDirectory(); IndexWriter iw = new IndexWriter(directory, new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED); for (int i = 0; i < N_DOCS; i++) { Add(docText[i % docText.Length], iw); } iw.Close(); searcher = new IndexSearcher(directory); System.String qtxt = "one"; for (int i = 0; i < docText.Length; i++) { qtxt += (' ' + docText[i]); // large query so that search will be longer } QueryParser queryParser = new QueryParser(FIELD_NAME, new WhitespaceAnalyzer()); query = queryParser.Parse(qtxt); // warm the searcher searcher.Search(query, null, 1000); }
public virtual void TestParsingAndSearching() { System.String field = "content"; bool dbg = false; QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT, field, new WhitespaceAnalyzer()); qp.AllowLeadingWildcard = true; System.String[] docs = new System.String[]{"\\ abcdefg1", "\\79 hijklmn1", "\\\\ opqrstu1"}; // queries that should find all docs System.String[] matchAll = new System.String[]{"*", "*1", "**1", "*?", "*?1", "?*1", "**", "***", "\\\\*"}; // queries that should find no docs System.String[] matchNone = new System.String[]{"a*h", "a?h", "*a*h", "?a", "a?"}; // queries that should be parsed to prefix queries System.String[][] matchOneDocPrefix = new System.String[][]{new System.String[]{"a*", "ab*", "abc*"}, new System.String[]{"h*", "hi*", "hij*", "\\\\7*"}, new System.String[]{"o*", "op*", "opq*", "\\\\\\\\*"}}; // queries that should be parsed to wildcard queries System.String[][] matchOneDocWild = new System.String[][]{new System.String[]{"*a*", "*ab*", "*abc**", "ab*e*", "*g?", "*f?1", "abc**"}, new System.String[]{"*h*", "*hi*", "*hij**", "hi*k*", "*n?", "*m?1", "hij**"}, new System.String[]{"*o*", "*op*", "*opq**", "op*q*", "*u?", "*t?1", "opq**"}}; // prepare the index RAMDirectory dir = new RAMDirectory(); IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < docs.Length; i++) { Document doc = new Document(); doc.Add(new Field(field, docs[i], Field.Store.NO, Field.Index.ANALYZED)); iw.AddDocument(doc); } iw.Close(); IndexSearcher searcher = new IndexSearcher(dir, true); // test queries that must find all for (int i = 0; i < matchAll.Length; i++) { System.String qtxt = matchAll[i]; Query q = qp.Parse(qtxt); if (dbg) { System.Console.Out.WriteLine("matchAll: qtxt=" + qtxt + " q=" + q + " " + q.GetType().FullName); } ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(docs.Length, hits.Length); } // test queries that must find none for (int i = 0; i < matchNone.Length; i++) { System.String qtxt = matchNone[i]; Query q = qp.Parse(qtxt); if (dbg) { System.Console.Out.WriteLine("matchNone: qtxt=" + qtxt + " q=" + q + " " + q.GetType().FullName); } ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); } // test queries that must be prefix queries and must find only one doc for (int i = 0; i < matchOneDocPrefix.Length; i++) { for (int j = 0; j < matchOneDocPrefix[i].Length; j++) { System.String qtxt = matchOneDocPrefix[i][j]; Query q = qp.Parse(qtxt); if (dbg) { System.Console.Out.WriteLine("match 1 prefix: doc=" + docs[i] + " qtxt=" + qtxt + " q=" + q + " " + q.GetType().FullName); } Assert.AreEqual(typeof(PrefixQuery), q.GetType()); ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(i, hits[0].Doc); } } // test queries that must be wildcard queries and must find only one doc for (int i = 0; i < matchOneDocPrefix.Length; i++) { for (int j = 0; j < matchOneDocWild[i].Length; j++) { System.String qtxt = matchOneDocWild[i][j]; Query q = qp.Parse(qtxt); if (dbg) { System.Console.Out.WriteLine("match 1 wild: doc=" + docs[i] + " qtxt=" + qtxt + " q=" + q + " " + q.GetType().FullName); } Assert.AreEqual(typeof(WildcardQuery), q.GetType()); ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(i, hits[0].Doc); } } searcher.Close(); }
public virtual void TestEmptyIndex() { // creating two directories for indices Directory indexStoreA = new MockRAMDirectory(); Directory indexStoreB = new MockRAMDirectory(); // creating a document to store Document lDoc = new Document(); lDoc.Add(new Field("fulltext", "Once upon a time.....", Field.Store.YES, Field.Index.ANALYZED)); lDoc.Add(new Field("id", "doc1", Field.Store.YES, Field.Index.NOT_ANALYZED)); lDoc.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); // creating a document to store Document lDoc2 = new Document(); lDoc2.Add(new Field("fulltext", "in a galaxy far far away.....", Field.Store.YES, Field.Index.ANALYZED)); lDoc2.Add(new Field("id", "doc2", Field.Store.YES, Field.Index.NOT_ANALYZED)); lDoc2.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); // creating a document to store Document lDoc3 = new Document(); lDoc3.Add(new Field("fulltext", "a bizarre bug manifested itself....", Field.Store.YES, Field.Index.ANALYZED)); lDoc3.Add(new Field("id", "doc3", Field.Store.YES, Field.Index.NOT_ANALYZED)); lDoc3.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); // creating an index writer for the first index IndexWriter writerA = new IndexWriter(indexStoreA, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); // creating an index writer for the second index, but writing nothing IndexWriter writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); //-------------------------------------------------------------------- // scenario 1 //-------------------------------------------------------------------- // writing the documents to the first index writerA.AddDocument(lDoc); writerA.AddDocument(lDoc2); writerA.AddDocument(lDoc3); writerA.Optimize(); writerA.Close(); // closing the second index writerB.Close(); // creating the query QueryParser parser = new QueryParser("fulltext", new StandardAnalyzer()); Query query = parser.Parse("handle:1"); // building the searchables Searcher[] searchers = new Searcher[2]; // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index searchers[0] = new IndexSearcher(indexStoreB); searchers[1] = new IndexSearcher(indexStoreA); // creating the multiSearcher Searcher mSearcher = GetMultiSearcherInstance(searchers); // performing the search ScoreDoc[] hits = mSearcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); // iterating over the hit documents for (int i = 0; i < hits.Length; i++) { mSearcher.Doc(hits[i].doc); } mSearcher.Close(); //-------------------------------------------------------------------- // scenario 2 //-------------------------------------------------------------------- // adding one document to the empty index writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED); writerB.AddDocument(lDoc); writerB.Optimize(); writerB.Close(); // building the searchables Searcher[] searchers2 = new Searcher[2]; // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index searchers2[0] = new IndexSearcher(indexStoreB); searchers2[1] = new IndexSearcher(indexStoreA); // creating the mulitSearcher MultiSearcher mSearcher2 = GetMultiSearcherInstance(searchers2); // performing the same search ScoreDoc[] hits2 = mSearcher2.Search(query, null, 1000).scoreDocs; Assert.AreEqual(4, hits2.Length); // iterating over the hit documents for (int i = 0; i < hits2.Length; i++) { // no exception should happen at this point mSearcher2.Doc(hits2[i].doc); } // test the subSearcher() method: Query subSearcherQuery = parser.Parse("id:doc1"); hits2 = mSearcher2.Search(subSearcherQuery, null, 1000).scoreDocs; Assert.AreEqual(2, hits2.Length); Assert.AreEqual(0, mSearcher2.SubSearcher(hits2[0].doc)); // hit from searchers2[0] Assert.AreEqual(1, mSearcher2.SubSearcher(hits2[1].doc)); // hit from searchers2[1] subSearcherQuery = parser.Parse("id:doc2"); hits2 = mSearcher2.Search(subSearcherQuery, null, 1000).scoreDocs; Assert.AreEqual(1, hits2.Length); Assert.AreEqual(1, mSearcher2.SubSearcher(hits2[0].doc)); // hit from searchers2[1] mSearcher2.Close(); //-------------------------------------------------------------------- // scenario 3 //-------------------------------------------------------------------- // deleting the document just added, this will cause a different exception to take place Term term = new Term("id", "doc1"); IndexReader readerB = IndexReader.Open(indexStoreB); readerB.DeleteDocuments(term); readerB.Close(); // optimizing the index with the writer writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED); writerB.Optimize(); writerB.Close(); // building the searchables Searcher[] searchers3 = new Searcher[2]; searchers3[0] = new IndexSearcher(indexStoreB); searchers3[1] = new IndexSearcher(indexStoreA); // creating the mulitSearcher Searcher mSearcher3 = GetMultiSearcherInstance(searchers3); // performing the same search ScoreDoc[] hits3 = mSearcher3.Search(query, null, 1000).scoreDocs; Assert.AreEqual(3, hits3.Length); // iterating over the hit documents for (int i = 0; i < hits3.Length; i++) { mSearcher3.Doc(hits3[i].doc); } mSearcher3.Close(); indexStoreA.Close(); indexStoreB.Close(); }
public virtual void DoSearching(System.String queryString) { QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer()); parser.SetMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); query = parser.Parse(queryString); DoSearching(query); }
public virtual void TestMultiSearcher() { //setup index 1 RAMDirectory ramDir1 = new RAMDirectory(); IndexWriter writer1 = new IndexWriter(ramDir1, new StandardAnalyzer(), true); Document d = new Document(); Field f = new Field(FIELD_NAME, "multiOne", Field.Store.YES, Field.Index.TOKENIZED); d.Add(f); writer1.AddDocument(d); writer1.Optimize(); writer1.Close(); IndexReader reader1 = IndexReader.Open(ramDir1); //setup index 2 RAMDirectory ramDir2 = new RAMDirectory(); IndexWriter writer2 = new IndexWriter(ramDir2, new StandardAnalyzer(), true); d = new Document(); f = new Field(FIELD_NAME, "multiTwo", Field.Store.YES, Field.Index.TOKENIZED); d.Add(f); writer2.AddDocument(d); writer2.Optimize(); writer2.Close(); IndexReader reader2 = IndexReader.Open(ramDir2); IndexSearcher[] searchers = new IndexSearcher[2]; searchers[0] = new IndexSearcher(ramDir1); searchers[1] = new IndexSearcher(ramDir2); MultiSearcher multiSearcher = new MultiSearcher(searchers); QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer()); parser.SetMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); query = parser.Parse("multi*"); System.Console.Out.WriteLine("Searching for: " + query.ToString(FIELD_NAME)); //at this point the multisearcher calls combine(query[]) hits = multiSearcher.Search(query); //query = QueryParser.parse("multi*", FIELD_NAME, new StandardAnalyzer()); Query[] expandedQueries = new Query[2]; expandedQueries[0] = query.Rewrite(reader1); expandedQueries[1] = query.Rewrite(reader2); query = query.Combine(expandedQueries); //create an instance of the highlighter with the tags used to surround highlighted text Highlighter highlighter = new Highlighter(this, new QueryScorer(query)); for (int i = 0; i < hits.Length(); i++) { System.String text = hits.Doc(i).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text)); System.String highlightedText = highlighter.GetBestFragment(tokenStream, text); System.Console.Out.WriteLine(highlightedText); } Assert.IsTrue(numHighlights == 2, "Failed to find correct number of highlights " + numHighlights + " found"); }
public virtual void TestTermQueryMultiSearcherExplain() { // creating two directories for indices Directory indexStoreA = new MockRAMDirectory(); Directory indexStoreB = new MockRAMDirectory(); Document lDoc = new Document(); lDoc.Add(new Field("handle", "1 2", Field.Store.YES, Field.Index.ANALYZED)); Document lDoc2 = new Document(); lDoc2.Add(new Field("handle", "1 2", Field.Store.YES, Field.Index.ANALYZED)); Document lDoc3 = new Document(); lDoc3.Add(new Field("handle", "1 2", Field.Store.YES, Field.Index.ANALYZED)); IndexWriter writerA = new IndexWriter(indexStoreA, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); IndexWriter writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); writerA.AddDocument(lDoc); writerA.AddDocument(lDoc2); writerA.Optimize(); writerA.Close(); writerB.AddDocument(lDoc3); writerB.Close(); QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "fulltext", new StandardAnalyzer(Util.Version.LUCENE_CURRENT)); Query query = parser.Parse("handle:1"); Searcher[] searchers = new Searcher[2]; searchers[0] = new IndexSearcher(indexStoreB, true); searchers[1] = new IndexSearcher(indexStoreA, true); Searcher mSearcher = new MultiSearcher(searchers); ScoreDoc[] hits = mSearcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Explanation explain = mSearcher.Explain(query, hits[0].Doc); System.String exp = explain.ToString(0); Assert.IsTrue(exp.IndexOf("maxDocs=3") > - 1, exp); Assert.IsTrue(exp.IndexOf("docFreq=3") > - 1, exp); query = parser.Parse("handle:\"1 2\""); hits = mSearcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); explain = mSearcher.Explain(query, hits[0].Doc); exp = explain.ToString(0); Assert.IsTrue(exp.IndexOf("1=3") > - 1, exp); Assert.IsTrue(exp.IndexOf("2=3") > - 1, exp); query = new SpanNearQuery(new SpanQuery[]{new SpanTermQuery(new Term("handle", "1")), new SpanTermQuery(new Term("handle", "2"))}, 0, true); hits = mSearcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); explain = mSearcher.Explain(query, hits[0].Doc); exp = explain.ToString(0); Assert.IsTrue(exp.IndexOf("1=3") > - 1, exp); Assert.IsTrue(exp.IndexOf("2=3") > - 1, exp); mSearcher.Close(); }
// Test that FieldScoreQuery returns docs with expected score. private void DoTestCustomScore(System.String field, FieldScoreQuery.Type tp, double dboost) { float boost = (float) dboost; IndexSearcher s = new IndexSearcher(dir, true); FieldScoreQuery qValSrc = new FieldScoreQuery(field, tp); // a query that would score by the field QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT, TEXT_FIELD, anlzr); System.String qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup. // regular (boolean) query. Query q1 = qp.Parse(qtxt); Log(q1); // custom query, that should score the same as q1. CustomScoreQuery q2CustomNeutral = new CustomScoreQuery(q1); q2CustomNeutral.Boost = boost; Log(q2CustomNeutral); // custom query, that should (by default) multiply the scores of q1 by that of the field CustomScoreQuery q3CustomMul = new CustomScoreQuery(q1, qValSrc); q3CustomMul.SetStrict(true); q3CustomMul.Boost = boost; Log(q3CustomMul); // custom query, that should add the scores of q1 to that of the field CustomScoreQuery q4CustomAdd = new CustomAddQuery(q1, qValSrc); q4CustomAdd.SetStrict(true); q4CustomAdd.Boost = boost; Log(q4CustomAdd); // custom query, that multiplies and adds the field score to that of q1 CustomScoreQuery q5CustomMulAdd = new CustomMulAddQuery(q1, qValSrc, qValSrc); q5CustomMulAdd.SetStrict(true); q5CustomMulAdd.Boost = boost; Log(q5CustomMulAdd); // do al the searches TopDocs td1 = s.Search(q1, null, 1000); TopDocs td2CustomNeutral = s.Search(q2CustomNeutral, null, 1000); TopDocs td3CustomMul = s.Search(q3CustomMul, null, 1000); TopDocs td4CustomAdd = s.Search(q4CustomAdd, null, 1000); TopDocs td5CustomMulAdd = s.Search(q5CustomMulAdd, null, 1000); // put results in map so we can verify the scores although they have changed System.Collections.Hashtable h1 = TopDocsToMap(td1); System.Collections.Hashtable h2CustomNeutral = TopDocsToMap(td2CustomNeutral); System.Collections.Hashtable h3CustomMul = TopDocsToMap(td3CustomMul); System.Collections.Hashtable h4CustomAdd = TopDocsToMap(td4CustomAdd); System.Collections.Hashtable h5CustomMulAdd = TopDocsToMap(td5CustomMulAdd); VerifyResults(boost, s, h1, h2CustomNeutral, h3CustomMul, h4CustomAdd, h5CustomMulAdd, q1, q2CustomNeutral, q3CustomMul, q4CustomAdd, q5CustomMulAdd); }
public void TestCustomExternalQuery() { QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT,TEXT_FIELD,anlzr); String qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup. Query q1 = qp.Parse(qtxt); Query q = new CustomExternalQuery(q1); Log(q); IndexSearcher s = new IndexSearcher(dir); TopDocs hits = s.Search(q, 1000); Assert.AreEqual(N_DOCS, hits.TotalHits); for(int i=0;i<N_DOCS;i++) { int doc = hits.ScoreDocs[i].Doc; float score = hits.ScoreDocs[i].Score; Assert.AreEqual(score, (float)1 + (4 * doc) % N_DOCS, 0.0001, "doc=" + doc); } s.Close(); }
public void Test_Store_RAMDirectory() { Lucene.Net.Store.RAMDirectory ramDIR = new Lucene.Net.Store.RAMDirectory(); //Index 1 Doc Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(ramDIR, new Lucene.Net.Analysis.WhitespaceAnalyzer(), true); Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Lucene.Net.Documents.Field("field1", "value1 value11", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED)); wr.AddDocument(doc); wr.Close(); //now serialize it System.Runtime.Serialization.Formatters.Binary.BinaryFormatter serializer = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(); System.IO.MemoryStream memoryStream = new System.IO.MemoryStream(); serializer.Serialize(memoryStream, ramDIR); //Close DIR ramDIR.Close(); ramDIR = null; //now deserialize memoryStream.Seek(0, System.IO.SeekOrigin.Begin); Lucene.Net.Store.RAMDirectory ramDIR2 = (Lucene.Net.Store.RAMDirectory)serializer.Deserialize(memoryStream); //Add 1 more doc wr = new Lucene.Net.Index.IndexWriter(ramDIR2, new Lucene.Net.Analysis.WhitespaceAnalyzer(), false); doc = new Lucene.Net.Documents.Document(); doc.Add(new Lucene.Net.Documents.Field("field1", "value1 value11", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED)); wr.AddDocument(doc); wr.Close(); //Search Lucene.Net.Search.IndexSearcher s = new Lucene.Net.Search.IndexSearcher(ramDIR2); Lucene.Net.QueryParsers.QueryParser qp = new Lucene.Net.QueryParsers.QueryParser("field1", new Lucene.Net.Analysis.Standard.StandardAnalyzer()); Lucene.Net.Search.Query q = qp.Parse("value1"); Lucene.Net.Search.TopDocs topDocs = s.Search(q, 100); s.Close(); Assert.AreEqual(topDocs.TotalHits, 2, "See the issue: LUCENENET-174"); }
public virtual void TestGiga() { StandardAnalyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT); Directory index = new MockRAMDirectory(); IndexWriter w = new IndexWriter(index, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); AddDoc("Lucene in Action", w); AddDoc("Lucene for Dummies", w); // addDoc("Giga", w); AddDoc("Giga byte", w); AddDoc("ManagingGigabytesManagingGigabyte", w); AddDoc("ManagingGigabytesManagingGigabytes", w); AddDoc("The Art of Computer Science", w); AddDoc("J. K. Rowling", w); AddDoc("JK Rowling", w); AddDoc("Joanne K Roling", w); AddDoc("Bruce Willis", w); AddDoc("Willis bruce", w); AddDoc("Brute willis", w); AddDoc("B. willis", w); IndexReader r = w.GetReader(); w.Close(); Query q = new QueryParser(Util.Version.LUCENE_CURRENT, "field", analyzer).Parse("giga~0.9"); // 3. search IndexSearcher searcher = new IndexSearcher(r); ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), "Giga byte"); r.Close(); }
public virtual void TestFieldSpecificHighlighting() { System.String docMainText = "fred is one of the people"; QueryParser parser = new QueryParser(FIELD_NAME, analyzer); Query query = parser.Parse("fred category:people"); //highlighting respects fieldnames used in query QueryScorer fieldSpecificScorer = new QueryScorer(query, "contents"); Highlighter fieldSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(), fieldSpecificScorer); fieldSpecificHighlighter.SetTextFragmenter(new NullFragmenter()); System.String result = fieldSpecificHighlighter.GetBestFragment(analyzer, FIELD_NAME, docMainText); Assert.AreEqual(result, "<B>fred</B> is one of the people", "Should match"); //highlighting does not respect fieldnames used in query QueryScorer fieldInSpecificScorer = new QueryScorer(query); Highlighter fieldInSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(), fieldInSpecificScorer); fieldInSpecificHighlighter.SetTextFragmenter(new NullFragmenter()); result = fieldInSpecificHighlighter.GetBestFragment(analyzer, FIELD_NAME, docMainText); Assert.AreEqual(result, "<B>fred</B> is one of the <B>people</B>", "Should match"); reader.Close(); }
public virtual void TestParsingAndSearching() { System.String field = "content"; bool dbg = false; QueryParser qp = new QueryParser(field, new WhitespaceAnalyzer()); qp.SetAllowLeadingWildcard(true); System.String[] docs = new System.String[] { "\\ abcdefg1", "\\79 hijklmn1", "\\\\ opqrstu1" }; // queries that should find all docs System.String[] matchAll = new System.String[] { "*", "*1", "**1", "*?", "*?1", "?*1", "**", "***", "\\\\*" }; // queries that should find no docs System.String[] matchNone = new System.String[] { "a*h", "a?h", "*a*h", "?a", "a?" }; // queries that should be parsed to prefix queries System.String[][] matchOneDocPrefix = new System.String[][] { new System.String[] { "a*", "ab*", "abc*" }, new System.String[] { "h*", "hi*", "hij*", "\\\\7*" }, new System.String[] { "o*", "op*", "opq*", "\\\\\\\\*" } }; // queries that should be parsed to wildcard queries System.String[][] matchOneDocWild = new System.String[][] { new System.String[] { "*a*", "*ab*", "*abc**", "ab*e*", "*g?", "*f?1", "abc**" }, new System.String[] { "*h*", "*hi*", "*hij**", "hi*k*", "*n?", "*m?1", "hij**" }, new System.String[] { "*o*", "*op*", "*opq**", "op*q*", "*u?", "*t?1", "opq**" } }; // prepare the index RAMDirectory dir = new RAMDirectory(); IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < docs.Length; i++) { Document doc = new Document(); doc.Add(new Field(field, docs[i], Field.Store.NO, Field.Index.ANALYZED)); iw.AddDocument(doc); } iw.Close(); IndexSearcher searcher = new IndexSearcher(dir); // test queries that must find all for (int i = 0; i < matchAll.Length; i++) { System.String qtxt = matchAll[i]; Query q = qp.Parse(qtxt); if (dbg) { System.Console.Out.WriteLine("matchAll: qtxt=" + qtxt + " q=" + q + " " + q.GetType().FullName); } ScoreDoc[] hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(docs.Length, hits.Length); } // test queries that must find none for (int i = 0; i < matchNone.Length; i++) { System.String qtxt = matchNone[i]; Query q = qp.Parse(qtxt); if (dbg) { System.Console.Out.WriteLine("matchNone: qtxt=" + qtxt + " q=" + q + " " + q.GetType().FullName); } ScoreDoc[] hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); } // test queries that must be prefix queries and must find only one doc for (int i = 0; i < matchOneDocPrefix.Length; i++) { for (int j = 0; j < matchOneDocPrefix[i].Length; j++) { System.String qtxt = matchOneDocPrefix[i][j]; Query q = qp.Parse(qtxt); if (dbg) { System.Console.Out.WriteLine("match 1 prefix: doc=" + docs[i] + " qtxt=" + qtxt + " q=" + q + " " + q.GetType().FullName); } Assert.AreEqual(typeof(PrefixQuery), q.GetType()); ScoreDoc[] hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(i, hits[0].doc); } } // test queries that must be wildcard queries and must find only one doc for (int i = 0; i < matchOneDocPrefix.Length; i++) { for (int j = 0; j < matchOneDocWild[i].Length; j++) { System.String qtxt = matchOneDocWild[i][j]; Query q = qp.Parse(qtxt); if (dbg) { System.Console.Out.WriteLine("match 1 wild: doc=" + docs[i] + " qtxt=" + qtxt + " q=" + q + " " + q.GetType().FullName); } Assert.AreEqual(typeof(WildcardQuery), q.GetType()); ScoreDoc[] hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(i, hits[0].doc); } } searcher.Close(); }
public virtual void TestToString() { StopAnalyzer analyzer = new StopAnalyzer(Util.Version.LUCENE_CURRENT); QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT, "field", analyzer); qp.EnablePositionIncrements = true; PhraseQuery q = (PhraseQuery) qp.Parse("\"this hi this is a test is\""); Assert.AreEqual("field:\"? hi ? ? ? test\"", q.ToString()); q.Add(new Term("field", "hello"), 1); Assert.AreEqual("field:\"? hi|hello ? ? ? test\"", q.ToString()); }
static int SearchForPhrase(IndexSearcher searcher, string phrase) { var parser = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_30, "Body", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30)); var query = parser.Parse(phrase); return searcher.Search(query, 100).TotalHits; }
public static void Main(System.String[] args) { System.String usage = "Usage: " + typeof(SearchFiles) + " [-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field]"; if (args.Length > 0 && ("-h".Equals(args[0]) || "-help".Equals(args[0]))) { System.Console.Out.WriteLine(usage); System.Environment.Exit(0); } System.String index = "index"; System.String field = "contents"; System.String queries = null; int repeat = 0; bool raw = false; System.String normsField = null; for (int i = 0; i < args.Length; i++) { if ("-index".Equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".Equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".Equals(args[i])) { queries = args[i + 1]; i++; } else if ("-repeat".Equals(args[i])) { repeat = System.Int32.Parse(args[i + 1]); i++; } else if ("-raw".Equals(args[i])) { raw = true; } else if ("-norms".Equals(args[i])) { normsField = args[i + 1]; i++; } } IndexReader reader = IndexReader.Open(index); if (normsField != null) reader = new OneNormsReader(reader, normsField); Searcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); System.IO.StreamReader in_Renamed = null; if (queries != null) { in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(queries, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(queries, System.Text.Encoding.Default).CurrentEncoding); } else { in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.GetEncoding("UTF-8")).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.GetEncoding("UTF-8")).CurrentEncoding); } QueryParser parser = new QueryParser(field, analyzer); while (true) { if (queries == null) // prompt the user System.Console.Out.Write("Query: "); System.String line = in_Renamed.ReadLine(); if (line == null || line.Length == 0) break; Query query = parser.Parse(line); System.Console.Out.WriteLine("Searching for: " + query.ToString(field)); Hits hits = searcher.Search(query); if (repeat > 0) { // repeat & time as benchmark System.DateTime start = System.DateTime.Now; for (int i = 0; i < repeat; i++) { hits = searcher.Search(query); } System.DateTime end = System.DateTime.Now; System.Console.Out.WriteLine("Time: " + (end.Millisecond - start.Millisecond) + "ms"); } System.Console.Out.WriteLine(hits.Length() + " total matching documents"); int HITS_PER_PAGE = 10; for (int start = 0; start < hits.Length(); start += HITS_PER_PAGE) { int end = System.Math.Min(hits.Length(), start + HITS_PER_PAGE); for (int i = start; i < end; i++) { if (raw) { // output raw format System.Console.Out.WriteLine("doc=" + hits.Id(i) + " score=" + hits.Score(i)); continue; } Document doc = hits.Doc(i); System.String path = doc.Get("path"); if (path != null) { System.Console.Out.WriteLine((i + 1) + ". " + path); System.String title = doc.Get("title"); if (title != null) { System.Console.Out.WriteLine(" Title: " + doc.Get("title")); } } else { System.Console.Out.WriteLine((i + 1) + ". " + "No path for this document"); } } if (queries != null) // non-interactive break; if (hits.Length() > end) { System.Console.Out.Write("more (y/n) ? "); line = in_Renamed.ReadLine(); if (line.Length == 0 || line[0] == 'n') break; } } } reader.Close(); }
public virtual void TestToString() { StopAnalyzer analyzer = new StopAnalyzer(); StopFilter.SetEnablePositionIncrementsDefault(true); QueryParser qp = new QueryParser("field", analyzer); qp.SetEnablePositionIncrements(true); PhraseQuery q = (PhraseQuery) qp.Parse("\"this hi this is a test is\""); Assert.AreEqual("field:\"? hi ? ? ? test\"", q.ToString()); q.Add(new Term("field", "hello"), 1); Assert.AreEqual("field:\"? hi|hello ? ? ? test\"", q.ToString()); }
/* public void testTermRepeatedQuery() throws IOException, ParseException { // TODO: this corner case yields different results. checkQuery("multi* multi* foo"); } */ /// <summary> checks if a query yields the same result when executed on /// a single IndexSearcher containing all documents and on a /// MultiSearcher aggregating sub-searchers /// </summary> /// <param name="queryStr"> the query to check. /// </param> /// <throws> IOException </throws> /// <throws> ParseException </throws> private void CheckQuery(System.String queryStr) { // check result hit ranking if (verbose) System.Console.Out.WriteLine("Query: " + queryStr); QueryParser queryParser = new QueryParser(FIELD_NAME, new StandardAnalyzer()); Query query = queryParser.Parse(queryStr); ScoreDoc[] multiSearcherHits = multiSearcher.Search(query, null, 1000).scoreDocs; ScoreDoc[] singleSearcherHits = singleSearcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(multiSearcherHits.Length, singleSearcherHits.Length); for (int i = 0; i < multiSearcherHits.Length; i++) { Document docMulti = multiSearcher.Doc(multiSearcherHits[i].doc); Document docSingle = singleSearcher.Doc(singleSearcherHits[i].doc); if (verbose) System.Console.Out.WriteLine("Multi: " + docMulti.Get(FIELD_NAME) + " score=" + multiSearcherHits[i].score); if (verbose) System.Console.Out.WriteLine("Single: " + docSingle.Get(FIELD_NAME) + " score=" + singleSearcherHits[i].score); Assert.AreEqual(multiSearcherHits[i].score, singleSearcherHits[i].score, 0.001f); Assert.AreEqual(docMulti.Get(FIELD_NAME), docSingle.Get(FIELD_NAME)); } if (verbose) System.Console.Out.WriteLine(); }
public virtual void TestQuery() { RAMDirectory dir = new RAMDirectory(); IndexWriter iw = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); iw.SetMaxBufferedDocs(2); // force multi-segment AddDoc("one", iw, 1f); AddDoc("two", iw, 20f); AddDoc("three four", iw, 300f); iw.Close(); IndexReader ir = IndexReader.Open(dir); IndexSearcher is_Renamed = new IndexSearcher(ir); ScoreDoc[] hits; // assert with norms scoring turned off hits = is_Renamed.Search(new MatchAllDocsQuery(), null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(ir.Document(hits[0].Doc).Get("key"), "one"); Assert.AreEqual(ir.Document(hits[1].Doc).Get("key"), "two"); Assert.AreEqual(ir.Document(hits[2].Doc).Get("key"), "three four"); // assert with norms scoring turned on MatchAllDocsQuery normsQuery = new MatchAllDocsQuery("key"); hits = is_Renamed.Search(normsQuery, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(ir.Document(hits[0].Doc).Get("key"), "three four"); Assert.AreEqual(ir.Document(hits[1].Doc).Get("key"), "two"); Assert.AreEqual(ir.Document(hits[2].Doc).Get("key"), "one"); // change norm & retest ir.SetNorm(0, "key", 400f); normsQuery = new MatchAllDocsQuery("key"); hits = is_Renamed.Search(normsQuery, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(ir.Document(hits[0].Doc).Get("key"), "one"); Assert.AreEqual(ir.Document(hits[1].Doc).Get("key"), "three four"); Assert.AreEqual(ir.Document(hits[2].Doc).Get("key"), "two"); // some artificial queries to trigger the use of skipTo(): BooleanQuery bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); hits = is_Renamed.Search(bq, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); bq.Add(new TermQuery(new Term("key", "three")), BooleanClause.Occur.MUST); hits = is_Renamed.Search(bq, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // delete a document: is_Renamed.GetIndexReader().DeleteDocument(0); hits = is_Renamed.Search(new MatchAllDocsQuery(), null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); // test parsable toString() QueryParser qp = new QueryParser("key", analyzer); hits = is_Renamed.Search(qp.Parse(new MatchAllDocsQuery().ToString()), null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); // test parsable toString() with non default boost Query maq = new MatchAllDocsQuery(); maq.SetBoost(2.3f); Query pq = qp.Parse(maq.ToString()); hits = is_Renamed.Search(pq, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); is_Renamed.Close(); ir.Close(); dir.Close(); }
public static Lucene.Net.Search.Query ConvertQueryToLuceneQuery(Query query) { if (query == null) { throw new ArgumentNullException("query"); } Lucene.Net.Search.Query lQuery; if (query is MatchAllDocsQuery) { var lMatchAllDocsQuery = new Lucene.Net.Search.MatchAllDocsQuery(); lQuery = lMatchAllDocsQuery; } else if (query is TermQuery) { var termQuery = query as TermQuery; var term = Term.ConvertToLuceneTerm(termQuery.Term); lQuery = new Lucene.Net.Search.TermQuery(term); } else if (query is TermRangeQuery) { var termRangeQuery = query as TermRangeQuery; var lTermRangeQuery = new Lucene.Net.Search.TermRangeQuery(termRangeQuery.FieldName, termRangeQuery.LowerTerm, termRangeQuery.UpperTerm, termRangeQuery.LowerInclusive, termRangeQuery.UpperInclusive); lQuery = lTermRangeQuery; } else if (query is PhraseQuery) { var phraseQuery = query as PhraseQuery; var lPhraseQuery = new Lucene.Net.Search.PhraseQuery(); foreach (var term in phraseQuery.Terms) { var lTerm = Term.ConvertToLuceneTerm(term); lPhraseQuery.Add(lTerm); } if (phraseQuery.Slop.HasValue) { lPhraseQuery.Slop = phraseQuery.Slop.Value; } lQuery = lPhraseQuery; } else if (query is PrefixQuery) { var prefixQuery = query as PrefixQuery; var term = Term.ConvertToLuceneTerm(prefixQuery.Term); var lPrefixQuery = new Lucene.Net.Search.PrefixQuery(term); lQuery = lPrefixQuery; } else if (query is RegexQuery) { var regexQuery = query as RegexQuery; var term = Term.ConvertToLuceneTerm(regexQuery.Term); var lRegexQuery = new Contrib.Regex.RegexQuery(term); lQuery = lRegexQuery; } else if (query is FuzzyQuery) { var fuzzyQuery = query as FuzzyQuery; var term = Term.ConvertToLuceneTerm(fuzzyQuery.Term); var lFuzzyQuery = new Lucene.Net.Search.FuzzyQuery(term); lQuery = lFuzzyQuery; } else if (query is BooleanQuery) { var booleanQuery = query as BooleanQuery; var lBooleanQuery = new Lucene.Net.Search.BooleanQuery(); foreach (var clause in booleanQuery.Clauses) { var lNestedQuery = Query.ConvertQueryToLuceneQuery(clause.Query); Lucene.Net.Search.Occur lOccur; switch (clause.Occur) { case Occur.Must: lOccur = Lucene.Net.Search.Occur.MUST; break; case Occur.MustNot: lOccur = Lucene.Net.Search.Occur.MUST_NOT; break; case Occur.Should: lOccur = Lucene.Net.Search.Occur.SHOULD; break; default: throw new InvalidOperationException("Occur not implemented or defined."); } var lClause = new Lucene.Net.Search.BooleanClause(lNestedQuery, lOccur); lBooleanQuery.Add(lClause); } if (booleanQuery.MinimumNumberShouldMatch.HasValue) { lBooleanQuery.MinimumNumberShouldMatch = booleanQuery.MinimumNumberShouldMatch.Value; } lQuery = lBooleanQuery; } else if (query is WildcardQuery) { var wildcardQuery = query as WildcardQuery; var lTerm = Term.ConvertToLuceneTerm(wildcardQuery.Term); var lWildcardQuery = new Lucene.Net.Search.WildcardQuery(lTerm); lQuery = lWildcardQuery; } else if (query is DoubleNumericRangeQuery) { var doubleNumericRangeQuery = query as DoubleNumericRangeQuery; var ldoubleNumericRangeQuery = Lucene.Net.Search.NumericRangeQuery.NewDoubleRange( doubleNumericRangeQuery.FieldName, doubleNumericRangeQuery.Min, doubleNumericRangeQuery.Max, doubleNumericRangeQuery.MinInclusive, doubleNumericRangeQuery.MaxInclusive); lQuery = ldoubleNumericRangeQuery; } else if (query is FloatNumericRangeQuery) { var floatNumericRangeQuery = query as FloatNumericRangeQuery; var lfloatNumericRangeQuery = Lucene.Net.Search.NumericRangeQuery.NewFloatRange( floatNumericRangeQuery.FieldName, floatNumericRangeQuery.Min, floatNumericRangeQuery.Max, floatNumericRangeQuery.MinInclusive, floatNumericRangeQuery.MaxInclusive); lQuery = lfloatNumericRangeQuery; } else if (query is IntNumericRangeQuery) { var intNumericRangeQuery = query as IntNumericRangeQuery; var lintNumericRangeQuery = Lucene.Net.Search.NumericRangeQuery.NewIntRange( intNumericRangeQuery.FieldName, intNumericRangeQuery.Min, intNumericRangeQuery.Max, intNumericRangeQuery.MinInclusive, intNumericRangeQuery.MaxInclusive); lQuery = lintNumericRangeQuery; } else if (query is LongNumericRangeQuery) { var longNumericRangeQuery = query as LongNumericRangeQuery; var llongNumericRangeQuery = Lucene.Net.Search.NumericRangeQuery.NewLongRange( longNumericRangeQuery.FieldName, longNumericRangeQuery.Min, longNumericRangeQuery.Max, longNumericRangeQuery.MinInclusive, longNumericRangeQuery.MaxInclusive); lQuery = llongNumericRangeQuery; } else if (query is QueryParserQuery) { var queryParserQuery = query as QueryParserQuery; var queryParser = new Lucene.Net.QueryParsers.QueryParser(Version.LUCENE_30, queryParserQuery.DefaultField, new StandardAnalyzer(Version.LUCENE_30)) { AllowLeadingWildcard = queryParserQuery.AllowLeadingWildcard }; lQuery = queryParser.Parse(queryParserQuery.Query); } else if (query is MultiFieldQueryParserQuery) { var multiFieldQueryParserQuery = query as MultiFieldQueryParserQuery; if (multiFieldQueryParserQuery.FieldNames == null) { multiFieldQueryParserQuery.FieldNames = new List <string>(); } var queryParser = new Lucene.Net.QueryParsers.MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, multiFieldQueryParserQuery.FieldNames.ToArray(), new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30)); lQuery = queryParser.Parse(multiFieldQueryParserQuery.Query); } else { throw new ArgumentException(@"Unknown or invalid query object", "query"); } if (query.Boost.HasValue) { lQuery.Boost = query.Boost.Value; } return(lQuery); }
/// <summary> /// get parser by fieldname /// </summary> /// <param name="fieldName"></param> /// <returns></returns> private static Lucene.Net.QueryParsers.QueryParser GetParser(string fieldName) { lock (parserCache) { if (!parserCache.ContainsKey(fieldName)) { // TODO: 将版本号抽离,统一定义 parserCache[fieldName] = new Lucene.Net.QueryParsers.QueryParser( Lucene.Net.Util.Version.LUCENE_29, fieldName, new LuceneAnalyser()); } } return parserCache[fieldName]; }