public void Close() { lock (searcherLock) { EnsureOpen(); closed = true; if (searcher != null) { searcher.Close(); } searcher = null; } }
// main search method private static IEnumerable<SampleData> _search(string searchQuery, string searchField = "") { // validation if (string.IsNullOrEmpty(searchQuery.Replace("*", "").Replace("?", ""))) return new List<SampleData>(); // set up lucene searcher using (var searcher = new IndexSearcher(_directory, false)) { var hits_limit = 1000; var analyzer = new StandardAnalyzer(Version.LUCENE_29); // search by single field if (!string.IsNullOrEmpty(searchField)) { var parser = new QueryParser(Version.LUCENE_29, searchField, analyzer); var query = parseQuery(searchQuery, parser); var hits = searcher.Search(query, hits_limit).ScoreDocs; var results = _mapLuceneToDataList(hits, searcher); analyzer.Close(); searcher.Close(); searcher.Dispose(); return results; } // search by multiple fields (ordered by RELEVANCE) else { var parser = new MultiFieldQueryParser (Version.LUCENE_29, new[] {"Id", "Name", "Description"}, analyzer); var query = parseQuery(searchQuery, parser); var hits = searcher.Search(query, null, hits_limit, Sort.INDEXORDER).ScoreDocs; var results = _mapLuceneToDataList(hits, searcher); analyzer.Close(); searcher.Close(); searcher.Dispose(); return results; } } }
public static LuceneResult SearchBIMXchange(string field, string key, int pageSize, int pageNumber) { const string luceneIndexPath = "C:\\LuceneIndex"; var directory = FSDirectory.Open(new DirectoryInfo(luceneIndexPath)); var analyzer = new StandardAnalyzer(Version.LUCENE_29); var parser = new QueryParser(Version.LUCENE_29, field, analyzer); var query = parser.Parse(String.Format("{0}*", key)); var searcher = new IndexSearcher(directory, true); var topDocs = searcher.Search(query, 1000000); var docs = new List<Document>(); var start = (pageNumber-1)*pageSize; for (var i = start; i < start + pageSize && i < topDocs.TotalHits; i++) { var scoreDoc = topDocs.ScoreDocs[i]; var docId = scoreDoc.doc; var doc = searcher.Doc(docId); docs.Add(doc); } searcher.Close(); directory.Close(); var result = new LuceneResult {Results = docs, TotalCount = topDocs.TotalHits}; return result; }
public virtual void TestQuery() { RAMDirectory dir = new RAMDirectory(); IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true); AddDoc("one", iw); AddDoc("two", iw); AddDoc("three four", iw); iw.Close(); IndexSearcher is_Renamed = new IndexSearcher(dir); Hits hits = is_Renamed.Search(new MatchAllDocsQuery()); Assert.AreEqual(3, hits.Length()); // some artificial queries to trigger the use of skipTo(): BooleanQuery bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); hits = is_Renamed.Search(bq); Assert.AreEqual(3, hits.Length()); bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); bq.Add(new TermQuery(new Term("key", "three")), BooleanClause.Occur.MUST); hits = is_Renamed.Search(bq); Assert.AreEqual(1, hits.Length()); // delete a document: is_Renamed.GetIndexReader().DeleteDocument(0); hits = is_Renamed.Search(new MatchAllDocsQuery()); Assert.AreEqual(2, hits.Length()); is_Renamed.Close(); }
private bool isInIndex(IndexableFileInfo fileInfo) { IndexSearcher searcher = new IndexSearcher(this.luceneIndexDir); try { BooleanQuery bq = new BooleanQuery(); bq.Add(new TermQuery(new Term("filename", fileInfo.Filename)), BooleanClause.Occur.MUST); bq.Add(new TermQuery(new Term("LastModified", DateTools.DateToString(fileInfo.LastModified, DateTools.Resolution.SECOND))), BooleanClause.Occur.MUST); Hits hits = searcher.Search(bq); int count = hits.Length(); if (count > 0) { return(true); } } catch (Exception ex) { Console.Write(ex.Message); } finally { searcher.Close(); } return(false); }
public void TestBooleanQuerySerialization() { Lucene.Net.Search.BooleanQuery lucQuery = new Lucene.Net.Search.BooleanQuery(); lucQuery.Add(new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("field", "x")), Occur.MUST); System.Runtime.Serialization.Formatters.Binary.BinaryFormatter bf = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(); System.IO.MemoryStream ms = new System.IO.MemoryStream(); bf.Serialize(ms, lucQuery); ms.Seek(0, System.IO.SeekOrigin.Begin); Lucene.Net.Search.BooleanQuery lucQuery2 = (Lucene.Net.Search.BooleanQuery)bf.Deserialize(ms); ms.Close(); Assert.AreEqual(lucQuery, lucQuery2, "Error in serialization"); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir, true); int hitCount = searcher.Search(lucQuery, 20).TotalHits; searcher.Close(); searcher = new Lucene.Net.Search.IndexSearcher(dir, true); int hitCount2 = searcher.Search(lucQuery2, 20).TotalHits; Assert.AreEqual(hitCount, hitCount2, "Error in serialization - different hit counts"); }
public void Initialize_Indexes_All_Nodes() { string elementIdForTestingSearch = _deepNodeFinder.GetNodesForIndexing()[0].Id; int expectedNumNodes = _deepNodeFinder.GetNodesForIndexing().Length; Assert.AreEqual("usfr-pte_NetCashFlowsProvidedUsedOperatingActivitiesDirectAbstract", elementIdForTestingSearch, "TEST SANITY: element id for test search"); Assert.AreEqual(1595, expectedNumNodes, "TEST SANITY: Number of nodes in found in the test taxonomy"); IndexReader indexReader = IndexReader.Open(_indexMgr.LuceneDirectory_ForTesting); Assert.AreEqual(expectedNumNodes, indexReader.NumDocs(), "An incorrect number of documents were found in the Lucene directory after initialization"); IndexSearcher searcher = new IndexSearcher(_indexMgr.LuceneDirectory_ForTesting); try { Hits results = searcher.Search(new TermQuery(new Term(LuceneNodeIndexer.ELEMENTID_FOR_DELETING_FIELD, elementIdForTestingSearch))); Assert.AreEqual(1, results.Length(), "Search results should only have 1 hit"); Assert.AreEqual(elementIdForTestingSearch, results.Doc(0).Get(LuceneNodeIndexer.ELEMENTID_FIELD), "Search results yielded the wrong element!"); } finally { searcher.Close(); } }
public void HelloWorldTest() { Directory directory = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_29); IndexWriter writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.Add(new Field("id", "1", Field.Store.YES, Field.Index.NO)); doc.Add(new Field("postBody", "sample test", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Commit(); writer.Close(); QueryParser parser = new QueryParser(Version.LUCENE_29, "postBody", analyzer); Query query = parser.Parse("sample test"); //Setup searcher IndexSearcher searcher = new IndexSearcher(directory, true); //Do the search var hits = searcher.Search(query, null, 10); for (int i = 0; i < hits.TotalHits; i++) { var doc1 = hits.ScoreDocs[i]; } searcher.Close(); directory.Close(); }
public override List<ISearchEntity> GetSearchResult(out int MatchCount) { Analyzer analyzer = new StandardAnalyzer(); IndexSearcher searcher = new IndexSearcher(searchInfo.ConfigElement.IndexDirectory); MultiFieldQueryParser parserName = new MultiFieldQueryParser(new string[] { "title", "content", "keywords" }, analyzer); Query queryName = parserName.Parse(searchInfo.QueryString); Hits hits = searcher.Search(queryName); List<ISearchEntity> ResultList = new List<ISearchEntity>(); for (int i = 0; i < hits.Length(); i++) { Document doc = hits.Doc(i); ResultList.Add((ISearchEntity)new NewsModel() { EntityIdentity = Convert.ToInt32(doc.Get("newsid")), Title = Convert.ToString(doc.Get("title")), Content = Convert.ToString(doc.Get("content")), Keywords = doc.Get("keywords") }); } searcher.Close(); MatchCount = hits.Length(); return ResultList; }
private void SwapSearcher(Directory dir) { /* * opening a searcher is possibly very expensive. * We rather close it again if the Spellchecker was closed during * this operation than block access to the current searcher while opening. */ IndexSearcher indexSearcher = CreateSearcher(dir); lock (searcherLock) { if (closed) { indexSearcher.Close(); throw new AlreadyClosedException("Spellchecker has been closed"); } if (searcher != null) { searcher.Close(); } // set the spellindex in the sync block - ensure consistency. searcher = indexSearcher; this.spellindex = dir; } }
public void TestBooleanQuerySerialization() { Lucene.Net.Search.BooleanQuery lucQuery = new Lucene.Net.Search.BooleanQuery(); lucQuery.Add(new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("field", "x")), Occur.MUST); System.Runtime.Serialization.Formatters.Binary.BinaryFormatter bf = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(); System.IO.MemoryStream ms = new System.IO.MemoryStream(); bf.Serialize(ms, lucQuery); ms.Seek(0, System.IO.SeekOrigin.Begin); Lucene.Net.Search.BooleanQuery lucQuery2 = (Lucene.Net.Search.BooleanQuery)bf.Deserialize(ms); ms.Close(); Assert.AreEqual(lucQuery, lucQuery2, "Error in serialization"); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir, true); int hitCount = searcher.Search(lucQuery, 20).TotalHits; searcher.Close(); searcher = new Lucene.Net.Search.IndexSearcher(dir, true); int hitCount2 = searcher.Search(lucQuery2, 20).TotalHits; Assert.AreEqual(hitCount, hitCount2, "Error in serialization - different hit counts"); }
public virtual void TestSorting() { Directory directory = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(2); writer.SetMergeFactor(1000); writer.AddDocument(Adoc(new System.String[]{"id", "a", "title", "ipod", "str_s", "a"})); writer.AddDocument(Adoc(new System.String[]{"id", "b", "title", "ipod ipod", "str_s", "b"})); writer.AddDocument(Adoc(new System.String[]{"id", "c", "title", "ipod ipod ipod", "str_s", "c"})); writer.AddDocument(Adoc(new System.String[]{"id", "x", "title", "boosted", "str_s", "x"})); writer.AddDocument(Adoc(new System.String[]{"id", "y", "title", "boosted boosted", "str_s", "y"})); writer.AddDocument(Adoc(new System.String[]{"id", "z", "title", "boosted boosted boosted", "str_s", "z"})); IndexReader r = writer.GetReader(); writer.Close(); IndexSearcher searcher = new IndexSearcher(r); RunTest(searcher, true); RunTest(searcher, false); searcher.Close(); r.Close(); directory.Close(); }
public SearchResults Find(string terms) { Directory directory = FSDirectory.GetDirectory("./index",false); // Now search the index: var isearcher = new IndexSearcher(directory); // Parse a simple query that searches for "text": //Query query = QueryParser.Parse("text", "fieldname", analyzer); var qp = new QueryParser("description", _analyzer); Query query = qp.Parse(terms); Hits hits = isearcher.Search(query); var sr = new SearchResults(); // Iterate through the results: for (int i = 0; i < hits.Length(); i++) { Document hitDoc = hits.Doc(i); sr.Add(new Result() { Name = hitDoc.Get("name"), Description = hitDoc.Get("description") }); } isearcher.Close(); directory.Close(); return sr; }
public static LuceneResult MultiSearchBIMXchange(Dictionary<string,string> terms, int pageSize, int pageNumber) { var directory = FSDirectory.Open(new DirectoryInfo("LuceneIndex")); var booleanQuery = new BooleanQuery(); foreach(var term in terms) { var query = new TermQuery(new Term(term.Key, term.Value)); booleanQuery.Add(query,BooleanClause.Occur.MUST); } var searcher = new IndexSearcher(directory, true); var topDocs = searcher.Search(booleanQuery, 10); var docs = new List<Document>(); var start = (pageNumber - 1) * pageSize; for (var i = start; i < start + pageSize && i < topDocs.TotalHits; i++) { var scoreDoc = topDocs.ScoreDocs[i]; var docId = scoreDoc.doc; var doc = searcher.Doc(docId); docs.Add(doc); } searcher.Close(); directory.Close(); var result = new LuceneResult {Results = docs, TotalCount = topDocs.TotalHits}; return result; }
public Data searchLucene(Data data) { Search_gl search = new Search_gl(); List<string> item = new List<string>(); Lucene.Net.Store.Directory directory = FSDirectory.Open(new DirectoryInfo(Environment.CurrentDirectory + "\\LuceneIndex")); var analyzer = new StandardAnalyzer(Version.LUCENE_29); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //QueryParser queryParser = new QueryParser(Version.LUCENE_29, "summary", analyzer); //search for single field MultiFieldQueryParser parser = new MultiFieldQueryParser(new string[] {"name", "summary"}, analyzer); //search for multifield Query query = parser.Parse((data.getString("search")) + "*"); //cant search blank text with wildcard as first character TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, collector); ScoreDoc[] hits = collector.TopDocs().ScoreDocs; int count = hits.Length; for (int i = 0; i < count; i++) { int docId = hits[i].doc; float score = hits[i].score; Document doc = searcher.Doc(docId); string id = doc.Get("id"); item.Add(id); } Data list = search.search(data, item.ToArray()); reader.Close(); searcher.Close(); return list; }
private void btnExecuteSearch_Click(object sender, EventArgs e) { Directory indexDirectory = FSDirectory.Open(new System.IO.DirectoryInfo(tempPath)); IndexSearcher searcher = new IndexSearcher(indexDirectory, true); // read-only=true // TODO: QueryParser support for Hebrew terms (most concerning issue is with acronyms - mid-word quotes) QueryParser qp = new QueryParser("content", analyzer); qp.SetDefaultOperator(QueryParser.Operator.AND); Query query = qp.Parse(txbSearchQuery.Text); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; // Iterate through the results: BindingList<SearchResult> l = new BindingList<SearchResult>(); for (int i = 0; i < hits.Length; i++) { Document hitDoc = searcher.Doc(hits[i].doc); SearchResult sr = new SearchResult(hitDoc.GetField("title").StringValue(), hitDoc.GetField("path").StringValue(), hits[i].score); l.Add(sr); } searcher.Close(); indexDirectory.Close(); dgvResults.DataSource = l; }
protected int getHitCount(String fieldName, String searchString) { IndexSearcher searcher = new IndexSearcher(directory, true); //4 Term t = new Term(fieldName, searchString); Query query = new TermQuery(t); //5 int hitCount = TestUtil.hitCount(searcher, query); //6 searcher.Close(); return hitCount; }
protected void Page_Load(object sender, EventArgs e) { //if (Session["KeyWords"] == null ? false : true) //{ // Response.Redirect("Search.aspx"); //} String text = Session["KeyWords"].ToString(); ChineseAnalyzer analyzer = new ChineseAnalyzer(); TokenStream ts = analyzer.TokenStream("ItemName", new System.IO.StringReader(text)); Lucene.Net.Analysis.Token token; try { int n = 0; while ((token = ts.Next()) != null) { this.lbMsg.Text += (n++) + "->" + token.TermText() + " " + token.StartOffset() + " " + token.EndOffset() + " " + token.Type() + "<br>"; // Response.Write((n++) + "->" + token.TermText() + " " + token.StartOffset() + " " //+ token.EndOffset() + " " + token.Type() + "<br>"); } } catch { this.lbMsg.Text = "wrong"; } // Analyzer analyzer = new StandardAnalyzer(); Directory directory = FSDirectory.GetDirectory(Server.MapPath("/indexFile/"), false); IndexSearcher isearcher = new IndexSearcher(directory); Query query; query = QueryParser.Parse(Session["KeyWords"].ToString(), "ItemName", analyzer); //query = QueryParser.Parse("2", "nid", analyzer); Hits hits = isearcher.Search(query); this.lbMsg.Text += "<font color=red>共找到" + hits.Length() + "条记录</font><br>"; //Response.Write("<font color=red>共找到" + hits.Length() + "条记录</font><br>"); for (int i = 0; i < hits.Length(); i++) { Document hitDoc = hits.Doc(i); this.lbMsg.Text += "编号:" + hitDoc.Get("ItemID").ToString() + "<br>" + "分类:" + hitDoc.Get("CategoryName").ToString() + "<br>" + "专题:" + hitDoc.Get("ProductName").ToString() + "<br>" + "标题:<a href=" + hitDoc.Get("visiturl").ToString() + ">" + hitDoc.Get("ItemName").ToString() + "</a><br>"; //Response.Write("编号:" + hitDoc.Get("ItemID").ToString() + "<br>"); //Response.Write("分类:" + hitDoc.Get("CategoryName").ToString() + "<br>"); //Response.Write("标题:<a href=" + hitDoc.Get("visiturl").ToString() + ">" + hitDoc.Get("ItemName").ToString() + "</a><br>"); //Response.Write("专题:" + hitDoc.Get("ProductName").ToString() + "<br>"); } isearcher.Close(); directory.Close(); }
public virtual ActionResult SearchIndex(string term) { //Setup indexer Directory directory = FSDirectory.GetDirectory("LuceneIndex", true); Analyzer analyzer = new StandardAnalyzer(); IndexWriter writer = new IndexWriter(directory, analyzer, true); IndexReader red = IndexReader.Open(directory); int totDocs = red.MaxDoc(); red.Close(); foreach (var ticket in _ticketRepo.GetTicketsByProject(CurrentProject, 0, 1000).Items) { AddListingToIndex(ticket, writer); } writer.Optimize(); //Close the writer writer.Close(); //Setup searcher IndexSearcher searcher = new IndexSearcher(directory); MultiFieldQueryParser parser = new MultiFieldQueryParser( new string[] { "summary", "keyName" }, analyzer); Query query = parser.Parse(term); Hits hits = searcher.Search(query); var tickets = new List<Ticket>(); for (int i = 0; i < hits.Length(); i++) { Document doc = hits.Doc(i); int id = 0; if (int.TryParse(doc.Get("id"), out id)) { tickets.Add(_ticketRepo.GetTicketById(id)); } } //Clean up everything searcher.Close(); directory.Close(); return View(new SearchIndexModel() { Tickets = tickets }); }
public void TestMemLeakage() { CloseableThreadLocalProfiler.EnableCloseableThreadLocalProfiler = true; int LoopCount = 100; Analyzer[] analyzers = new Analyzer[LoopCount]; RAMDirectory[] dirs = new RAMDirectory[LoopCount]; IndexWriter[] indexWriters = new IndexWriter[LoopCount]; System.Threading.Tasks.Parallel.For(0, LoopCount, (i) => { analyzers[i] = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Version.LUCENE_CURRENT); dirs[i] = new RAMDirectory(); indexWriters[i] = new IndexWriter(dirs[i], analyzers[i], true, IndexWriter.MaxFieldLength.UNLIMITED); }); System.Threading.Tasks.Parallel.For(0, LoopCount, (i) => { Document document = new Document(); document.Add(new Field("field", "some test", Field.Store.NO, Field.Index.ANALYZED)); indexWriters[i].AddDocument(document); }); System.Threading.Tasks.Parallel.For(0, LoopCount, (i) => { analyzers[i].Dispose(); indexWriters[i].Dispose(); }); System.Threading.Tasks.Parallel.For(0, LoopCount, (i) => { IndexSearcher searcher = new IndexSearcher(dirs[i]); TopDocs d = searcher.Search(new TermQuery(new Term("field", "test")), 10); searcher.Close(); }); System.Threading.Tasks.Parallel.For(0, LoopCount, (i) => dirs[i].Dispose()); GC.Collect(GC.MaxGeneration); GC.WaitForPendingFinalizers(); int aliveObjects = 0; foreach (WeakReference w in CloseableThreadLocalProfiler.Instances) { object o = w.Target; if (o != null) aliveObjects++; } CloseableThreadLocalProfiler.EnableCloseableThreadLocalProfiler = false; Assert.AreEqual(0, aliveObjects); }
public static void Main(System.String[] args) { try { Directory directory = new RAMDirectory(); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(directory, analyzer, true); int MAX_DOCS = 225; for (int j = 0; j < MAX_DOCS; j++) { Lucene.Net.Documents.Document d = new Lucene.Net.Documents.Document(); d.Add(new Field(PRIORITY_FIELD, HIGH_PRIORITY, Field.Store.YES, Field.Index.TOKENIZED)); d.Add(new Field(ID_FIELD, System.Convert.ToString(j), Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(d); } writer.Close(); // try a search without OR Searcher searcher = new IndexSearcher(directory); Hits hits = null; Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser(PRIORITY_FIELD, analyzer); Query query = parser.Parse(HIGH_PRIORITY); System.Console.Out.WriteLine("Query: " + query.ToString(PRIORITY_FIELD)); hits = searcher.Search(query); PrintHits(hits); searcher.Close(); // try a new search with OR searcher = new IndexSearcher(directory); hits = null; parser = new Lucene.Net.QueryParsers.QueryParser(PRIORITY_FIELD, analyzer); query = parser.Parse(HIGH_PRIORITY + " OR " + MED_PRIORITY); System.Console.Out.WriteLine("Query: " + query.ToString(PRIORITY_FIELD)); hits = searcher.Search(query); PrintHits(hits); searcher.Close(); } catch (System.Exception e) { System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
/// <summary> /// Method for retrieving a list of documents where the keyword is present /// </summary> /// <param Name="ObjectType">[not implemented] search only available for documents</param> /// <param Name="Keyword">The word being searched for</param> /// <param Name="Max">The maximum limit on results returned</param> /// <returns>A list of documentnames indexed by the id of the document</returns> public static Hashtable Search(Guid ObjectType, string Keyword, int Max) { Hashtable results = new Hashtable(); IndexSearcher searcher = new IndexSearcher(index.Indexer.IndexDirectory); Query query = QueryParser.Parse(Keyword, "Content", new StandardAnalyzer()); Hits hits; // Sorting SortField[] sf = {new SortField("SortText")}; try { hits = searcher.Search(query, new Sort(sf)); if (hits.Length() < Max) Max = hits.Length(); for (int i=0;i<Max;i++) { try { results.Add( hits.Doc(i).Get("Id"), hits.Doc(i).Get("Text")); } catch { } } searcher.Close(); } catch (Exception ee) { searcher.Close(); throw ee; } return results; }
public static void Main(System.String[] args) { try { Directory directory = new RAMDirectory(); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(directory, analyzer, true); System.String[] docs = new System.String[]{"a b c d e", "a b c d e a b c d e", "a b c d e f g h i j", "a c e", "e c a", "a c e a c e", "a c e a b c"}; for (int j = 0; j < docs.Length; j++) { Document d = new Document(); d.Add(Field.Text("contents", docs[j])); writer.AddDocument(d); } writer.Close(); Searcher searcher = new IndexSearcher(directory); System.String[] queries = new System.String[]{"\"a c e\""}; Hits hits = null; QueryParsers.QueryParser parser = new QueryParsers.QueryParser("contents", analyzer); parser.SetPhraseSlop(4); for (int j = 0; j < queries.Length; j++) { Query query = parser.Parse(queries[j]); System.Console.Out.WriteLine("Query: " + query.ToString("contents")); //DateFilter filter = // new DateFilter("modified", Time(1997,0,1), Time(1998,0,1)); //DateFilter filter = DateFilter.Before("modified", Time(1997,00,01)); //System.out.println(filter); hits = searcher.Search(query); System.Console.Out.WriteLine(hits.Length() + " total results"); for (int i = 0; i < hits.Length() && i < 10; i++) { Document d = hits.Doc(i); System.Console.Out.WriteLine(i + " " + hits.Score(i) + " " + d.Get("contents")); } } searcher.Close(); } catch (System.Exception e) { System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
static void Main(string[] args) { //Setup indexer Directory directory = FSDirectory.GetDirectory("LuceneIndex"); Analyzer analyzer = new StandardAnalyzer(); IndexWriter writer = new IndexWriter(directory, analyzer); IndexReader red = IndexReader.Open(directory); int totDocs = red.MaxDoc(); red.Close(); //Add documents to the index string text = String.Empty; Console.WriteLine("Enter the text you want to add to the index:"); Console.Write(">"); int txts = totDocs; int j = 0; while ((text = Console.ReadLine()) != String.Empty) { AddTextToIndex(txts++, text, writer); j++; Console.Write(">"); } writer.Optimize(); //Close the writer writer.Flush(); writer.Close(); Console.WriteLine(j + " lines added, "+txts+" documents total"); //Setup searcher IndexSearcher searcher = new IndexSearcher(directory); QueryParser parser = new QueryParser("postBody", analyzer); Console.WriteLine("Enter the search string:"); Console.Write(">"); while ((text = Console.ReadLine()) != String.Empty) { Search(text, searcher, parser); Console.Write(">"); } //Clean up everything searcher.Close(); directory.Close(); }
private void CloseSearchers(IndexReader primary_reader, LNS.IndexSearcher primary_searcher, IndexReader secondary_reader, LNS.IndexSearcher secondary_searcher) { primary_searcher.Close(); if (secondary_searcher != null) { secondary_searcher.Close(); } ReleaseReader(primary_reader); if (secondary_reader != null) { ReleaseReader(secondary_reader); } }
public virtual void TestDeprecatedCstrctors() { Query query = new RangeQuery(null, new Term("content", "C"), false); InitializeIndex(new System.String[]{"A", "B", "C", "D"}); IndexSearcher searcher = new IndexSearcher(dir); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, "A,B,C,D, only B in range"); searcher.Close(); query = new RangeQuery(new Term("content", "C"), null, false); InitializeIndex(new System.String[]{"A", "B", "C", "D"}); searcher = new IndexSearcher(dir); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "A,B,C,D, only B in range"); searcher.Close(); }
// search methods public static IEnumerable<SampleData> GetAllIndexRecords() { // validate search index if (!System.IO.Directory.EnumerateFiles(_luceneDir).Any()) return new List<SampleData>(); // set up lucene searcher var searcher = new IndexSearcher(_directory, false); var reader = IndexReader.Open(_directory, false); var docs = new List<Document>(); var term = reader.TermDocs(); while (term.Next()) docs.Add(searcher.Doc(term.Doc())); reader.Close(); reader.Dispose(); searcher.Close(); searcher.Dispose(); return _mapLuceneToDataList(docs); }
/*** * Understands the lucene query syntax */ public List <Utilities.Language.TextIndexing.IndexResult> GetDocumentsWithQuery(string query) { List <Utilities.Language.TextIndexing.IndexResult> fingerprints = new List <Utilities.Language.TextIndexing.IndexResult>(); HashSet <string> fingerprints_already_seen = new HashSet <string>(); try { using (Lucene.Net.Index.IndexReader index_reader = Lucene.Net.Index.IndexReader.Open(LIBRARY_INDEX_BASE_PATH, true)) { using (Lucene.Net.Search.IndexSearcher index_searcher = new Lucene.Net.Search.IndexSearcher(index_reader)) { Lucene.Net.QueryParsers.QueryParser query_parser = new Lucene.Net.QueryParsers.QueryParser(Version.LUCENE_29, "content", analyzer); Lucene.Net.Search.Query query_object = query_parser.Parse(query); Lucene.Net.Search.Hits hits = index_searcher.Search(query_object); var i = hits.Iterator(); while (i.MoveNext()) { Lucene.Net.Search.Hit hit = (Lucene.Net.Search.Hit)i.Current; string fingerprint = hit.Get("fingerprint"); string page = hit.Get("page"); if (!fingerprints_already_seen.Contains(fingerprint)) { fingerprints_already_seen.Add(fingerprint); IndexResult index_result = new IndexResult { fingerprint = fingerprint, score = hit.GetScore() }; fingerprints.Add(index_result); } } // Close the index index_searcher.Close(); } index_reader.Close(); } } catch (Exception ex) { Logging.Warn(ex, "GetDocumentsWithQuery: There was a problem opening the index file for searching."); } return(fingerprints); }
public TextSearchResult Search(string query, Tenant tenant) { var result = new TextSearchResult(module); if (string.IsNullOrEmpty(query) || !Directory.Exists(path)) { return result; } var dir = Lucene.Net.Store.FSDirectory.Open(new DirectoryInfo(path)); var searcher = new IndexSearcher(dir, false); try { var analyzer = new AnalyzersProvider().GetAnalyzer(tenant.GetCulture().TwoLetterISOLanguageName); var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "Text", analyzer); parser.SetDefaultOperator(QueryParser.Operator.AND); if (TextIndexCfg.MaxQueryLength < query.Length) { query = query.Substring(0, TextIndexCfg.MaxQueryLength); } Query q = null; try { q = parser.Parse(query); } catch (Lucene.Net.QueryParsers.ParseException) { } if (q == null) { q = parser.Parse(QueryParser.Escape(query)); } #pragma warning disable 618 var hits = searcher.Search(q); #pragma warning restore 618 for (int i = 0; i < hits.Length(); i++) { var doc = hits.Doc(i); result.AddIdentifier(doc.Get("Id")); } } finally { searcher.Close(); dir.Close(); } return result; }
/// <summary> /// 打开索引 /// </summary> private void OpenIndex() { //如果不存在索引则创建空白索引 if (!File.Exists(Directorys.IndexDirectory + "segments.gen")) { IndexWriter empty = new IndexWriter(Directorys.IndexDirectory, new ThesaurusAnalyzer(), true); empty.Optimize(); empty.Close(); } //如果索引器已经创建则先关闭索引器 if (searcher != null) { searcher.Close(); } searcher = new Lucene.Net.Search.IndexSearcher(Directorys.IndexDirectory); }
} // constructor public int getNumDocsInIndex() { try { IndexSearcher searcher = new IndexSearcher(luceneIndexDir); try { return(searcher.MaxDoc()); } finally { searcher.Close(); } } catch { } return(-1); }
public virtual void TestMultiValuedNRQ() { System.Random rnd = NewRandom(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED); //DecimalFormat format = new DecimalFormat("00000000000", new System.Globalization.CultureInfo("en-US").NumberFormat); for (int l = 0; l < 5000; l++) { Document doc = new Document(); for (int m = 0, c = rnd.Next(10); m <= c; m++) { int value_Renamed = rnd.Next(System.Int32.MaxValue); doc.Add(new Field("asc", value_Renamed.ToString().PadLeft(11, '0'), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new NumericField("trie", Field.Store.NO, true).SetIntValue(value_Renamed)); } writer.AddDocument(doc); } writer.Close(); Searcher searcher = new IndexSearcher(directory, true); for (int i = 0; i < 50; i++) { int lower = rnd.Next(System.Int32.MaxValue); int upper = rnd.Next(System.Int32.MaxValue); if (lower > upper) { int a = lower; lower = upper; upper = a; } TermRangeQuery cq = new TermRangeQuery("asc", lower.ToString().PadLeft(11, '0'), upper.ToString().PadLeft(11, '0'), true, true); System.Int32 tempAux = (System.Int32) lower; System.Int32 tempAux2 = (System.Int32) upper; NumericRangeQuery tq = NumericRangeQuery.NewIntRange("trie", tempAux, tempAux2, true, true); TopDocs trTopDocs = searcher.Search(cq, 1); TopDocs nrTopDocs = searcher.Search(tq, 1); Assert.AreEqual(trTopDocs.totalHits, nrTopDocs.totalHits, "Returned count for NumericRangeQuery and TermRangeQuery must be equal"); } searcher.Close(); directory.Close(); }
public static void Main(String[] args) { if (args.Length != 2) { Console.Out.WriteLine(typeof(SynExpand) + " <index path> <query>"); return; } var directory = FSDirectory.Open(new DirectoryInfo(args[0])); var searcher = new IndexSearcher(directory, true); String query = args[1]; const string field = "contents"; Query q = Expand(query, searcher, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), field, 0.9f); System.Console.Out.WriteLine("Query: " + q.ToString(field)); searcher.Close(); directory.Close(); }
public override List<ISearchEntity> GetSearchResult(out int MatchCount) { Analyzer analyzer = new StandardAnalyzer(); IndexSearcher searcher = new IndexSearcher(searchInfo.ConfigElement.IndexDirectory); MultiFieldQueryParser parserName = new MultiFieldQueryParser(new string[] { "productname", "keywords", "description" }, analyzer); Query queryName = parserName.Parse(searchInfo.QueryString); Query queryCategory = new WildcardQuery(new Term("catepath", "*" + searchInfo.Category + "*")); BooleanQuery bQuery = new BooleanQuery(); bQuery.Add(queryName, BooleanClause.Occur.MUST); if (searchInfo.Category != 0) bQuery.Add(queryCategory, BooleanClause.Occur.MUST); Hits hits = searcher.Search(bQuery, GetSort()); List<ISearchEntity> ResultList = new List<ISearchEntity>(); for (int i = 0; i < hits.Length(); i++) { Document doc = hits.Doc(i); ResultList.Add((ISearchEntity)new ProductModel() { EntityIdentity = Convert.ToInt32(doc.Get("productid")), ProductName = doc.Get("productname"), CategoryID = Convert.ToInt32(doc.Get("cateid")), CategoryPath = doc.Get("catepath"), Keywords = doc.Get("keywords"), Description = doc.Get("description"), Price = Convert.ToDecimal(doc.Get("price")), CreateTime = Convert.ToDateTime(doc.Get("createtime")), UpdateTime = Convert.ToDateTime(doc.Get("updatetime")), ProductImage = Convert.ToString(doc.Get("mainimage")) }); } searcher.Close(); MatchCount = hits.Length(); return ResultList; }
public virtual void TestExclusive() { Query query = new TermRangeQuery("content", "A", "C", false, false); InitializeIndex(new System.String[]{"A", "B", "C", "D"}); IndexSearcher searcher = new IndexSearcher(dir); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "A,B,C,D, only B in range"); searcher.Close(); InitializeIndex(new System.String[]{"A", "B", "D"}); searcher = new IndexSearcher(dir); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "A,B,D, only B in range"); searcher.Close(); AddDoc("C"); searcher = new IndexSearcher(dir); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "C added, still only B in range"); searcher.Close(); }
public void Test_Store_RAMDirectory() { Lucene.Net.Store.RAMDirectory ramDIR = new Lucene.Net.Store.RAMDirectory(); //Index 1 Doc Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(ramDIR, new Lucene.Net.Analysis.WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Lucene.Net.Documents.Field("field1", "value1 value11", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED)); wr.AddDocument(doc); wr.Dispose(); //now serialize it System.Runtime.Serialization.Formatters.Binary.BinaryFormatter serializer = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(); System.IO.MemoryStream memoryStream = new System.IO.MemoryStream(); serializer.Serialize(memoryStream, ramDIR); //Close DIR ramDIR.Close(); ramDIR = null; //now deserialize memoryStream.Seek(0, System.IO.SeekOrigin.Begin); Lucene.Net.Store.RAMDirectory ramDIR2 = (Lucene.Net.Store.RAMDirectory)serializer.Deserialize(memoryStream); //Add 1 more doc wr = new Lucene.Net.Index.IndexWriter(ramDIR2, new Lucene.Net.Analysis.WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); doc = new Lucene.Net.Documents.Document(); doc.Add(new Lucene.Net.Documents.Field("field1", "value1 value11", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED)); wr.AddDocument(doc); wr.Dispose(); //Search Lucene.Net.Search.IndexSearcher s = new Lucene.Net.Search.IndexSearcher(ramDIR2); Lucene.Net.QueryParsers.QueryParser qp = new Lucene.Net.QueryParsers.QueryParser(Version.LUCENE_CURRENT, "field1", new Lucene.Net.Analysis.Standard.StandardAnalyzer(Version.LUCENE_CURRENT)); Lucene.Net.Search.Query q = qp.Parse("value1"); Lucene.Net.Search.TopDocs topDocs = s.Search(q, 100); s.Close(); Assert.AreEqual(topDocs.TotalHits, 2, "See the issue: LUCENENET-174"); }
protected void SearchButton_Click(object sender, EventArgs e) { if (!string.IsNullOrEmpty(SearchTextBox.Text)) { Lucene.Net.Store.RAMDirectory ramDir = new Lucene.Net.Store.RAMDirectory(luceneDBPath); String srch = SearchTextBox.Text; Lucene.Net.Search.IndexSearcher idx = new Lucene.Net.Search.IndexSearcher(ramDir); Lucene.Net.QueryParsers.QueryParser qp = new Lucene.Net.QueryParsers.QueryParser("_searchtxt", new Lucene.Net.Analysis.Standard.StandardAnalyzer()); qp.SetDefaultOperator(Lucene.Net.QueryParsers.QueryParser.Operator.AND); Lucene.Net.Search.BooleanQuery.SetMaxClauseCount(100); Lucene.Net.Search.Hits hits = idx.Search(qp.Parse(srch)); List<int> aIds = new List<int>(); for (int i = 0; i < hits.Length(); i++) { Lucene.Net.Documents.Document doc = hits.Doc(i); int aid = 0; if (int.TryParse(doc.Get("id"), out aid)) { aIds.Add(aid); } } using (DataClassesDataContext dtx = new DataClassesDataContext()) { var arts = from a in dtx.Articles where aIds.Contains(a.Id) select a; DataList1.DataSource = arts; DataList1.DataBind(); } idx.Close(); GC.Collect(); } }
public void TestCustomExternalQuery() { QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT, TEXT_FIELD, anlzr); String qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup. Query q1 = qp.Parse(qtxt); Query q = new CustomExternalQuery(q1); Log(q); IndexSearcher s = new IndexSearcher(dir); TopDocs hits = s.Search(q, 1000); Assert.AreEqual(N_DOCS, hits.TotalHits); for (int i = 0; i < N_DOCS; i++) { int doc = hits.ScoreDocs[i].Doc; float score = hits.ScoreDocs[i].Score; Assert.AreEqual(score, (float)1 + (4 * doc) % N_DOCS, 0.0001, "doc=" + doc); } s.Close(); }
public static void Main(System.String[] args) { if (args.Length != 2) { System.Console.Out.WriteLine(typeof(SynExpand) + " <index path> <query>"); return; } FSDirectory directory = FSDirectory.GetDirectory(args[0], false); IndexSearcher searcher = new IndexSearcher(directory); System.String query = args[1]; System.String field = "contents"; Query q = Expand(query, searcher, new StandardAnalyzer(), field, 0.9f); System.Console.Out.WriteLine("Query: " + q.ToString(field)); searcher.Close(); directory.Close(); }
public virtual void TestInclusive() { Query query = new TermRangeQuery("content", "A", "C", true, true); InitializeIndex(new System.String[]{"A", "B", "C", "D"}); IndexSearcher searcher = new IndexSearcher(dir, true); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length, "A,B,C,D - A,B,C in range"); searcher.Close(); InitializeIndex(new System.String[]{"A", "B", "D"}); searcher = new IndexSearcher(dir, true); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, "A,B,D - A and B in range"); searcher.Close(); AddDoc("C"); searcher = new IndexSearcher(dir, true); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length, "C added - A, B, C in range"); searcher.Close(); }
public void TearDown() { reader.Close(); searcher.Close(); directory.Close(); }
public override void TearDown() { base.TearDown(); searcher.Close(); directory.Close(); }
public virtual void TestPhraseQueryInConjunctionScorer() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "foobar", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("source", "marketing")); phraseQuery.Add(new Term("source", "info")); ScoreDoc[] hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); QueryUtils.Check(phraseQuery, searcher); TermQuery termQuery = new TermQuery(new Term("contents", "foobar")); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, BooleanClause.Occur.MUST); booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); QueryUtils.Check(termQuery, searcher); searcher.Close(); writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); doc = new Document(); doc.Add(new Field("contents", "map entry woo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "woo map entry", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "map foobarword entry woo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory); termQuery = new TermQuery(new Term("contents", "woo")); phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("contents", "map")); phraseQuery.Add(new Term("contents", "entry")); hits = searcher.Search(termQuery, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, BooleanClause.Occur.MUST); booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); booleanQuery = new BooleanQuery(); booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST); booleanQuery.Add(termQuery, BooleanClause.Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); QueryUtils.Check(booleanQuery, searcher); searcher.Close(); directory.Close(); }
public virtual void TestKnownSetOfDocuments() { System.String test1 = "eating chocolate in a computer lab"; //6 terms System.String test2 = "computer in a computer lab"; //5 terms System.String test3 = "a chocolate lab grows old"; //5 terms System.String test4 = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms System.Collections.IDictionary test4Map = new System.Collections.Hashtable(); test4Map["chocolate"] = 3; test4Map["lab"] = 2; test4Map["eating"] = 1; test4Map["computer"] = 1; test4Map["with"] = 1; test4Map["a"] = 1; test4Map["colored"] = 1; test4Map["in"] = 1; test4Map["an"] = 1; test4Map["computer"] = 1; test4Map["old"] = 1; Document testDoc1 = new Document(); SetupDoc(testDoc1, test1); Document testDoc2 = new Document(); SetupDoc(testDoc2, test2); Document testDoc3 = new Document(); SetupDoc(testDoc3, test3); Document testDoc4 = new Document(); SetupDoc(testDoc4, test4); Directory dir = new MockRAMDirectory(); try { IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); Assert.IsTrue(writer != null); writer.AddDocument(testDoc1, null); writer.AddDocument(testDoc2, null); writer.AddDocument(testDoc3, null); writer.AddDocument(testDoc4, null); writer.Close(); IndexSearcher knownSearcher = new IndexSearcher(dir, true, null); TermEnum termEnum = knownSearcher.reader_ForNUnit.Terms(null); TermDocs termDocs = knownSearcher.reader_ForNUnit.TermDocs(null); //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length); Similarity sim = knownSearcher.Similarity; while (termEnum.Next(null) == true) { Term term = termEnum.Term; //System.out.println("Term: " + term); termDocs.Seek(term, null); while (termDocs.Next(null)) { int docId = termDocs.Doc; int freq = termDocs.Freq; //System.out.println("Doc Id: " + docId + " freq " + freq); ITermFreqVector vector = knownSearcher.reader_ForNUnit.GetTermFreqVector(docId, "field", null); float tf = sim.Tf(freq); float idf = sim.Idf(knownSearcher.DocFreq(term, null), knownSearcher.MaxDoc); //float qNorm = sim.queryNorm() //This is fine since we don't have stop words float lNorm = sim.LengthNorm("field", vector.GetTerms().Length); //float coord = sim.coord() //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm); Assert.IsTrue(vector != null); System.String[] vTerms = vector.GetTerms(); int[] freqs = vector.GetTermFrequencies(); for (int i = 0; i < vTerms.Length; i++) { if (term.Text.Equals(vTerms[i])) { Assert.IsTrue(freqs[i] == freq); } } } //System.out.println("--------"); } Query query = new TermQuery(new Term("field", "chocolate")); ScoreDoc[] hits = knownSearcher.Search(query, null, 1000, null).ScoreDocs; //doc 3 should be the first hit b/c it is the shortest match Assert.IsTrue(hits.Length == 3); float score = hits[0].Score; /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0))); * System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1))); * System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " + hits.doc(2).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/ Assert.IsTrue(hits[0].Doc == 2); Assert.IsTrue(hits[1].Doc == 3); Assert.IsTrue(hits[2].Doc == 0); ITermFreqVector vector2 = knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].Doc, "field", null); Assert.IsTrue(vector2 != null); //System.out.println("Vector: " + vector); System.String[] terms = vector2.GetTerms(); int[] freqs2 = vector2.GetTermFrequencies(); Assert.IsTrue(terms != null && terms.Length == 10); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; //System.out.println("Term: " + term); int freq = freqs2[i]; Assert.IsTrue(test4.IndexOf(term) != -1); System.Int32 freqInt = -1; try { freqInt = (System.Int32)test4Map[term]; } catch (Exception) { Assert.IsTrue(false); } Assert.IsTrue(freqInt == freq); } SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].Doc, mapper, null); var vectorEntrySet = mapper.TermVectorEntrySet; Assert.IsTrue(vectorEntrySet.Count == 10, "mapper.getTermVectorEntrySet() Size: " + vectorEntrySet.Count + " is not: " + 10); TermVectorEntry last = null; foreach (TermVectorEntry tve in vectorEntrySet) { if (tve != null && last != null) { Assert.IsTrue(last.Frequency >= tve.Frequency, "terms are not properly sorted"); System.Int32 expectedFreq = (System.Int32)test4Map[tve.Term]; //we expect double the expectedFreq, since there are two fields with the exact same text and we are collapsing all fields Assert.IsTrue(tve.Frequency == 2 * expectedFreq, "Frequency is not correct:"); } last = tve; } FieldSortedTermVectorMapper fieldMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].Doc, fieldMapper, null); var map = fieldMapper.FieldToTerms; Assert.IsTrue(map.Count == 2, "map Size: " + map.Count + " is not: " + 2); vectorEntrySet = map["field"]; Assert.IsTrue(vectorEntrySet != null, "vectorEntrySet is null and it shouldn't be"); Assert.IsTrue(vectorEntrySet.Count == 10, "vectorEntrySet Size: " + vectorEntrySet.Count + " is not: " + 10); knownSearcher.Close(); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } }
public virtual void TestBasic() { Directory dir = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.MergeFactor = 2; writer.SetMaxBufferedDocs(2); writer.SetSimilarity(new SimpleSimilarity()); System.Text.StringBuilder sb = new System.Text.StringBuilder(265); System.String term = "term"; for (int i = 0; i < 30; i++) { Document d = new Document(); sb.Append(term).Append(" "); System.String content = sb.ToString(); Field noTf = new Field("noTf", content + (i % 2 == 0?"":" notf"), Field.Store.NO, Field.Index.ANALYZED); noTf.OmitTermFreqAndPositions = true; d.Add(noTf); Field tf = new Field("tf", content + (i % 2 == 0?" tf":""), Field.Store.NO, Field.Index.ANALYZED); d.Add(tf); writer.AddDocument(d); //System.out.println(d); } writer.Optimize(); // flush writer.Close(); _TestUtil.CheckIndex(dir); /* * Verify the index */ Searcher searcher = new IndexSearcher(dir, true); searcher.Similarity = new SimpleSimilarity(); Term a = new Term("noTf", term); Term b = new Term("tf", term); Term c = new Term("noTf", "notf"); Term d2 = new Term("tf", "tf"); TermQuery q1 = new TermQuery(a); TermQuery q2 = new TermQuery(b); TermQuery q3 = new TermQuery(c); TermQuery q4 = new TermQuery(d2); searcher.Search(q1, new AnonymousClassCountingHitCollector(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q2, new AnonymousClassCountingHitCollector1(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q3, new AnonymousClassCountingHitCollector2(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q4, new AnonymousClassCountingHitCollector3(this)); //System.out.println(CountingHitCollector.getCount()); BooleanQuery bq = new BooleanQuery(); bq.Add(q1, Occur.MUST); bq.Add(q4, Occur.MUST); searcher.Search(bq, new AnonymousClassCountingHitCollector4(this)); Assert.IsTrue(15 == CountingHitCollector.GetCount()); searcher.Close(); dir.Close(); }
private void TestNormalization(int nDocs, System.String message) { Query query = new TermQuery(new Term("contents", "doc0")); RAMDirectory ramDirectory1; IndexSearcher indexSearcher1; ScoreDoc[] hits; ramDirectory1 = new MockRAMDirectory(); // First put the documents in the same index InitIndex(ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc... InitIndex(ramDirectory1, nDocs, false, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... indexSearcher1 = new IndexSearcher(ramDirectory1); indexSearcher1.SetDefaultFieldSortScoring(true, true); hits = indexSearcher1.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, message); // Store the scores for use later float[] scores = new float[] { hits[0].score, hits[1].score }; Assert.IsTrue(scores[0] > scores[1], message); indexSearcher1.Close(); ramDirectory1.Close(); hits = null; RAMDirectory ramDirectory2; IndexSearcher indexSearcher2; ramDirectory1 = new MockRAMDirectory(); ramDirectory2 = new MockRAMDirectory(); // Now put the documents in a different index InitIndex(ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc... InitIndex(ramDirectory2, nDocs, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... indexSearcher1 = new IndexSearcher(ramDirectory1); indexSearcher1.SetDefaultFieldSortScoring(true, true); indexSearcher2 = new IndexSearcher(ramDirectory2); indexSearcher2.SetDefaultFieldSortScoring(true, true); Searcher searcher = GetMultiSearcherInstance(new Searcher[] { indexSearcher1, indexSearcher2 }); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, message); // The scores should be the same (within reason) Assert.AreEqual(scores[0], hits[0].score, 1e-6, message); // This will a document from ramDirectory1 Assert.AreEqual(scores[1], hits[1].score, 1e-6, message); // This will a document from ramDirectory2 // Adding a Sort.RELEVANCE object should not change anything hits = searcher.Search(query, null, 1000, Sort.RELEVANCE).ScoreDocs; Assert.AreEqual(2, hits.Length, message); Assert.AreEqual(scores[0], hits[0].score, 1e-6, message); // This will a document from ramDirectory1 Assert.AreEqual(scores[1], hits[1].score, 1e-6, message); // This will a document from ramDirectory2 searcher.Close(); ramDirectory1.Close(); ramDirectory2.Close(); }
public virtual void TestQuery() { RAMDirectory dir = new RAMDirectory(); IndexWriter iw = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); iw.SetMaxBufferedDocs(2); // force multi-segment AddDoc("one", iw, 1f); AddDoc("two", iw, 20f); AddDoc("three four", iw, 300f); iw.Close(); IndexReader ir = IndexReader.Open(dir); IndexSearcher is_Renamed = new IndexSearcher(ir); ScoreDoc[] hits; // assert with norms scoring turned off hits = is_Renamed.Search(new MatchAllDocsQuery(), null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(ir.Document(hits[0].doc).Get("key"), "one"); Assert.AreEqual(ir.Document(hits[1].doc).Get("key"), "two"); Assert.AreEqual(ir.Document(hits[2].doc).Get("key"), "three four"); // assert with norms scoring turned on MatchAllDocsQuery normsQuery = new MatchAllDocsQuery("key"); hits = is_Renamed.Search(normsQuery, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(ir.Document(hits[0].doc).Get("key"), "three four"); Assert.AreEqual(ir.Document(hits[1].doc).Get("key"), "two"); Assert.AreEqual(ir.Document(hits[2].doc).Get("key"), "one"); // change norm & retest ir.SetNorm(0, "key", 400f); normsQuery = new MatchAllDocsQuery("key"); hits = is_Renamed.Search(normsQuery, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(ir.Document(hits[0].doc).Get("key"), "one"); Assert.AreEqual(ir.Document(hits[1].doc).Get("key"), "three four"); Assert.AreEqual(ir.Document(hits[2].doc).Get("key"), "two"); // some artificial queries to trigger the use of skipTo(): BooleanQuery bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); hits = is_Renamed.Search(bq, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); bq.Add(new TermQuery(new Term("key", "three")), BooleanClause.Occur.MUST); hits = is_Renamed.Search(bq, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); // delete a document: is_Renamed.GetIndexReader().DeleteDocument(0); hits = is_Renamed.Search(new MatchAllDocsQuery(), null, 1000).scoreDocs; Assert.AreEqual(2, hits.Length); // test parsable toString() QueryParser qp = new QueryParser("key", analyzer); hits = is_Renamed.Search(qp.Parse(new MatchAllDocsQuery().ToString()), null, 1000).scoreDocs; Assert.AreEqual(2, hits.Length); // test parsable toString() with non default boost Query maq = new MatchAllDocsQuery(); maq.SetBoost(2.3f); Query pq = qp.Parse(maq.ToString()); hits = is_Renamed.Search(pq, null, 1000).scoreDocs; Assert.AreEqual(2, hits.Length); is_Renamed.Close(); ir.Close(); dir.Close(); }