public static LuceneResult SearchBIMXchange(string field, string key, int pageSize, int pageNumber) { const string luceneIndexPath = "C:\\LuceneIndex"; var directory = FSDirectory.Open(new DirectoryInfo(luceneIndexPath)); var analyzer = new StandardAnalyzer(Version.LUCENE_29); var parser = new QueryParser(Version.LUCENE_29, field, analyzer); var query = parser.Parse(String.Format("{0}*", key)); var searcher = new IndexSearcher(directory, true); var topDocs = searcher.Search(query, 1000000); var docs = new List<Document>(); var start = (pageNumber-1)*pageSize; for (var i = start; i < start + pageSize && i < topDocs.TotalHits; i++) { var scoreDoc = topDocs.ScoreDocs[i]; var docId = scoreDoc.doc; var doc = searcher.Doc(docId); docs.Add(doc); } searcher.Close(); directory.Close(); var result = new LuceneResult {Results = docs, TotalCount = topDocs.TotalHits}; return result; }
public void Searcher(string path) { luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(path); writer = null; analyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); parser = new QueryParser(VERSION, TEXT, analyzer); }
public void Optimize() { IFullTextSession s = Search.CreateFullTextSession(OpenSession()); ITransaction tx = s.BeginTransaction(); int loop = 2000; for (int i = 0; i < loop; i++) { s.Persist(new Email(i + 1, "JBoss World Berlin", "Meet the guys who wrote the software")); } tx.Commit(); s.Close(); s = Search.CreateFullTextSession(OpenSession()); tx = s.BeginTransaction(); s.SearchFactory.Optimize(typeof(Email)); tx.Commit(); s.Close(); // Check non-indexed object get indexed by s.index; s = new FullTextSessionImpl(OpenSession()); tx = s.BeginTransaction(); QueryParser parser = new QueryParser("id", new StopAnalyzer()); int result = s.CreateFullTextQuery(parser.Parse("Body:wrote")).List().Count; Assert.AreEqual(2000, result); s.Delete("from System.Object"); tx.Commit(); s.Close(); }
// main search method private static IEnumerable<SampleData> _search(string searchQuery, string searchField = "") { // validation if (string.IsNullOrEmpty(searchQuery.Replace("*", "").Replace("?", ""))) return new List<SampleData>(); // set up lucene searcher using (var searcher = new IndexSearcher(_directory, false)) { var hits_limit = 1000; var analyzer = new StandardAnalyzer(Version.LUCENE_29); // search by single field if (!string.IsNullOrEmpty(searchField)) { var parser = new QueryParser(Version.LUCENE_29, searchField, analyzer); var query = parseQuery(searchQuery, parser); var hits = searcher.Search(query, hits_limit).ScoreDocs; var results = _mapLuceneToDataList(hits, searcher); analyzer.Close(); searcher.Close(); searcher.Dispose(); return results; } // search by multiple fields (ordered by RELEVANCE) else { var parser = new MultiFieldQueryParser (Version.LUCENE_29, new[] {"Id", "Name", "Description"}, analyzer); var query = parseQuery(searchQuery, parser); var hits = searcher.Search(query, null, hits_limit, Sort.INDEXORDER).ScoreDocs; var results = _mapLuceneToDataList(hits, searcher); analyzer.Close(); searcher.Close(); searcher.Dispose(); return results; } } }
public void MrsJones() { var dir = new RAMDirectory(); var analyzer = new WhitespaceAnalyzer(); var writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); var document = new Lucene.Net.Documents.Document(); document.Add(new Field("Name", "MRS. SHABA", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS)); writer.AddDocument(document); writer.Close(true); var searcher = new IndexSearcher(dir, true); var termEnum = searcher.GetIndexReader().Terms(); while (termEnum.Next()) { var buffer = termEnum.Term().Text(); Console.WriteLine(buffer); } var queryParser = new QueryParser(Version.LUCENE_29, "", analyzer); queryParser.SetLowercaseExpandedTerms(false); var query = queryParser.Parse("Name:MRS.*"); Console.WriteLine(query); var result = searcher.Search(query, 10); Assert.NotEqual(0,result.totalHits); }
public void CustomBridges() { Cloud cloud = new Cloud(); cloud.CustomFieldBridge = ("This is divided by 2"); cloud.CustomStringBridge = ("This is div by 4"); ISession s = OpenSession(); ITransaction tx = s.BeginTransaction(); s.Save(cloud); s.Flush(); tx.Commit(); tx = s.BeginTransaction(); IFullTextSession session = Search.CreateFullTextSession(s); QueryParser parser = new QueryParser("id", new SimpleAnalyzer()); Lucene.Net.Search.Query query = parser.Parse("CustomFieldBridge:This AND CustomStringBridge:This"); IList result = session.CreateFullTextQuery(query).List(); Assert.AreEqual(1, result.Count, "Properties not mapped"); query = parser.Parse("CustomFieldBridge:by AND CustomStringBridge:is"); result = session.CreateFullTextQuery(query).List(); Assert.AreEqual(0, result.Count, "Custom types not taken into account"); s.Delete(s.Get(typeof(Cloud), cloud.Id)); tx.Commit(); s.Close(); }
public void PerformAmountSearch(string strQuery, out string[] datesArray, out int[] amountsArray) { List<Document> tweets = new List<Document>(); List<string> dates = new List<string>(); List<int> amounts = new List<int>(); int amount = 0; Query query = new QueryParser("text", an).Parse(strQuery); Hits results = searcher.Search(query,Sort.INDEXORDER); for (int i = 0; i < results.Length(); i++) { Document doc = results.Doc(i); string date = DateTime.Parse(doc.Get("created")).Date.ToString(); if (!dates.Contains(date)) { if (dates.Count > 0) { amounts.Add(amount); } amount = 0; dates.Add(date); } amount++; } amounts.Add(amount); datesArray = dates.ToArray(); amountsArray = amounts.ToArray(); }
private void button1_Click(object sender, EventArgs e) { Directory index = new RAMDirectory(); StandardAnalyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); IndexWriter w = new IndexWriter(index, analyzer); addDoc(w, "Lucene in Action"); addDoc(w, "Lucene for Dummies"); addDoc(w, "Managing Gigabytes"); addDoc(w, "The Art of Computer Science"); w.Close(); String querystr = "Lucene in Action"; Query q = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "title", analyzer).Parse(querystr); //q.Parse(); int hitsPerPage = 10; IndexReader reader = IndexReader.Open(index,true); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.Create(hitsPerPage, true); searcher.Search(q, collector); ScoreDoc[] hits = collector.TopDocs().ScoreDocs; System.Console.WriteLine("Found {0} Hits", hits.Length); foreach (var item in hits) { int docId = item.Doc; Document d = searcher.Doc(docId); System.Console.WriteLine(d.Get("title") + " " + item.Score); } }
private void btnExecuteSearch_Click(object sender, EventArgs e) { Directory indexDirectory = FSDirectory.Open(new System.IO.DirectoryInfo(tempPath)); IndexSearcher searcher = new IndexSearcher(indexDirectory, true); // read-only=true // TODO: QueryParser support for Hebrew terms (most concerning issue is with acronyms - mid-word quotes) QueryParser qp = new QueryParser("content", analyzer); qp.SetDefaultOperator(QueryParser.Operator.AND); Query query = qp.Parse(txbSearchQuery.Text); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; // Iterate through the results: BindingList<SearchResult> l = new BindingList<SearchResult>(); for (int i = 0; i < hits.Length; i++) { Document hitDoc = searcher.Doc(hits[i].doc); SearchResult sr = new SearchResult(hitDoc.GetField("title").StringValue(), hitDoc.GetField("path").StringValue(), hits[i].score); l.Add(sr); } searcher.Close(); indexDirectory.Close(); dgvResults.DataSource = l; }
public ISearchResult Search(string query) { var timer = new Stopwatch(); timer.Start(); var directory = FSDirectory.Open(new DirectoryInfo(path)); var analyzer = new StandardAnalyzer(Version.LUCENE_29); var searcher = new IndexSearcher(directory, true); var queryParser = new QueryParser(Version.LUCENE_29, "text", analyzer); var result = searcher.Search(queryParser.Parse(query), 20); var docs = (from scoreDoc in result.scoreDocs let doc = searcher.Doc(scoreDoc.doc) let fields = new Dictionary<string, string> { { "title", doc.Get("title") }, { "text", doc.Get("text") } } select new LuceneDocument { Id = scoreDoc.doc.ToString(), Fields = fields }).ToList(); var ret = new SearchResult { Query = query, Total = result.totalHits, Documents = docs, Source = Name }; searcher.Close(); directory.Close(); timer.Stop(); ret.Duration = (decimal) timer.Elapsed.TotalSeconds; return ret; }
public void ResultTransformToDelimString() { IFullTextSession s = Search.CreateFullTextSession(this.OpenSession()); this.PrepEmployeeIndex(s); s.Clear(); ITransaction tx = s.BeginTransaction(); QueryParser parser = new QueryParser("Dept", new StandardAnalyzer()); Query query = parser.Parse("Dept:ITech"); IFullTextQuery hibQuery = s.CreateFullTextQuery(query, typeof(Employee)); hibQuery.SetProjection( "Id", "Lastname", "Dept", ProjectionConstants.THIS, ProjectionConstants.SCORE, ProjectionConstants.BOOST, ProjectionConstants.ID); hibQuery.SetResultTransformer(new ProjectionToDelimStringResultTransformer()); IList result = hibQuery.List(); Assert.IsTrue(((string)result[0]).StartsWith("1000, Griffin, ITech"), "incorrect transformation"); Assert.IsTrue(((string)result[1]).StartsWith("1002, Jimenez, ITech"), "incorrect transformation"); // cleanup s.Delete("from System.Object"); tx.Commit(); s.Close(); }
public Task<SearchResultCollection> Search(string search) { return System.Threading.Tasks.Task.Run(() => { var src = new SearchResultCollection(); if (string.IsNullOrWhiteSpace(search)) return src; try { var parser = new QueryParser(Version.LUCENE_30,"All", analyzer); Query q = new TermQuery(new Term("All", search)); using (var indexSearcher = new IndexSearcher(directory, true)) { Query query = parser.Parse(search); TopDocs result = indexSearcher.Search(query, 50); foreach (ScoreDoc h in result.ScoreDocs) { Document doc = indexSearcher.Doc(h.Doc); string id = doc.Get("id"); BaseContent value; if (LookupTable.TryGetValue(id, out value)) src.Add(new SearchResult {Relevance = h.Score, Content = value}); } } } catch (Exception e) { Logger.Log("DataServer","Error lucene search",e.Message,Logger.Level.Error); } return src; }); }
public LuceneIndexer() { luceneIndexDirectory = null; writer = null; analyzer = null; parser = null; }
public SearchResults Find(string terms) { Directory directory = FSDirectory.GetDirectory("./index",false); // Now search the index: var isearcher = new IndexSearcher(directory); // Parse a simple query that searches for "text": //Query query = QueryParser.Parse("text", "fieldname", analyzer); var qp = new QueryParser("description", _analyzer); Query query = qp.Parse(terms); Hits hits = isearcher.Search(query); var sr = new SearchResults(); // Iterate through the results: for (int i = 0; i < hits.Length(); i++) { Document hitDoc = hits.Doc(i); sr.Add(new Result() { Name = hitDoc.Get("name"), Description = hitDoc.Get("description") }); } isearcher.Close(); directory.Close(); return sr; }
public IEnumerable<Hit> Search(string query, int maxResults) { var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); QueryParser qp = new QueryParser( Lucene.Net.Util.Version.LUCENE_29, "contents", analyzer ); Query q = qp.Parse(query); TopDocs top = searcher.Search(q, maxResults); List<Hit> result = new List<Hit>(top.totalHits); for (int index = 0; index < top.totalHits; index++) { var doc = searcher.Doc(top.scoreDocs[index].doc); string contents = doc.Get("contents"); var scorer = new QueryScorer(q, searcher.GetIndexReader(), "contents"); var highlighter = new Highlighter(scorer); result.Add(new Hit() { Relevance = top.scoreDocs[index].score, Title = doc.Get("title"), Url = doc.Get("path"), Excerpt = highlighter.GetBestFragment(analyzer, "contents", contents) }); } return result; }
public static void Main(string[] args) { BasicConfigurator.Configure(); //using (var reader = new MsmqLogReader(new Uri("msmq://localhost/test_queue2"))) //{ // reader.Start(); // Console.ReadLine(); //} var searcher = new IndexSearcher("messages"); var parser = new QueryParser("", new StandardAnalyzer()); //var query = parser.Parse("MessageId:\"b5005080-800c-43c3-a20b-16db773d7663\" AND MessageId:2307015"); var query = parser.Parse("Timestamp:[\"2008-12-16T08:14:53.9749900\" TO \"2008-12-16T08:14:53.6343650\"]"); var hits = searcher.Search(query); for (int i = 0; i < hits.Length(); i++) { var doc = hits.Doc(i); Console.WriteLine(); foreach (Fieldable field in doc.GetFields()) { Console.WriteLine("{0}: {1}", field.Name(), field.StringValue()); } Console.WriteLine(); } }
internal LuceneSearchDeps(SearcherManager searcherManager, QueryParser parser, IMetaDataResolver resolver, IQueue queue) { this.SearcherManager = searcherManager; this.Parser = parser; this.Resolver = resolver; this.Queue = queue; }
public static ICollection<SearchResult> Execute(string query) { ICollection<SearchResult> searchResults = new List<SearchResult>(); string directoryPath = AppDomain.CurrentDomain.BaseDirectory + @"\App_Data\LuceneIndexes"; var directory = FSDirectory.Open(directoryPath); var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "SearchBody", analyzer); Query searchQuery = parser.Parse(query + "*"); IndexSearcher searcher = new IndexSearcher(directory); TopDocs hits = searcher.Search(searchQuery, 200); int results = hits.ScoreDocs.Length; for (int i = 0; i < results; i++) { Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); var searchResult = new SearchResult(); searchResult.EntityId = int.Parse(doc.Get("EntityId")); searchResult.EntityTypeName = doc.Get("EntityTypeName"); searchResult.SearchTitle = doc.Get("SearchTitle"); searchResult.SearchBody = doc.Get("SearchBody"); searchResults.Add(searchResult); } searcher.Dispose(); directory.Dispose(); return searchResults; }
public void HelloWorldTest() { Directory directory = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_29); IndexWriter writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.Add(new Field("id", "1", Field.Store.YES, Field.Index.NO)); doc.Add(new Field("postBody", "sample test", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Commit(); writer.Close(); QueryParser parser = new QueryParser(Version.LUCENE_29, "postBody", analyzer); Query query = parser.Parse("sample test"); //Setup searcher IndexSearcher searcher = new IndexSearcher(directory, true); //Do the search var hits = searcher.Search(query, null, 10); for (int i = 0; i < hits.TotalHits; i++) { var doc1 = hits.ScoreDocs[i]; } searcher.Close(); directory.Close(); }
public Task<List<TestDocument>> Query(string q) { QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "text", Analyzer); Query query = parser.Parse(q); IndexSearcher searcher = new IndexSearcher(Index, true); //Do the search TopDocs docs = searcher.Search(query, 10); int results = docs.TotalHits; List<TestDocument> ret = new List<TestDocument>(); for (int i = 0; i < results; i++) { ScoreDoc d = docs.ScoreDocs[i]; float score = d.Score; Document idoc = searcher.Doc(d.Doc); ret.Add(new TestDocument() { Id = Convert.ToInt32(idoc.GetField("id").StringValue), Text = idoc.GetField("text").StringValue }); } searcher.Dispose(); return Task.FromResult(ret); }
public SearchResult[] Search(string searchString) { Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Version.LUCENE_29); QueryParser parser = new QueryParser(Version.LUCENE_29, "Content", analyzer); var query = parser.Parse(searchString); Searcher searcher = new IndexSearcher(Lucene.Net.Index.IndexReader.Open(directory, true)); TopScoreDocCollector collector = TopScoreDocCollector.Create(100, true); searcher.Search(query, collector); var hits = collector.TopDocs().ScoreDocs; List<SearchResult> results = new List<SearchResult>(); for (int i = 0; i < hits.Length; i++) { int docId = hits[i].Doc; float score = hits[i].Score; Lucene.Net.Documents.Document doc = searcher.Doc(docId); results.Add(new SearchResult { BookId = Guid.Parse(doc.Get("BookId")), Score = score }); } return results.ToArray(); }
public ArrayList getNotesMatchingTitle(string search) { ArrayList snotes = new ArrayList (); try { QueryParser parser = new QueryParser ("title", analyzer); string lucsearch = search + "*^4" + " content:" + search + "*"; Query query = parser.Parse (lucsearch); IndexSearcher searcher = new IndexSearcher (lucIdx); Hits hits = searcher.Search (query); int results = hits.Length (); Console.WriteLine ("Found {0} results", results); for (int i = 0; i < results; i++) { Document doc = hits.Doc (i); //float score = hits.Score (i); snotes.Add (new Note (doc.Get ("title"), doc.Get ("lastmod"))); } } catch (Exception e) { Console.WriteLine ("ERROR Search: " + e.Message); } return snotes; }
public Engine() { var directory = new RAMDirectory(); var analyzer = new StandardAnalyzer(Version.LUCENE_30); using (var indexWriter = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED)) { for (int i = 0; i < 10000; i++) { Console.Write("."); var document = new Document(); document.Add(new Field("Id", i.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); document.Add(new Field("Name", "Name" + i.ToString(), Field.Store.YES, Field.Index.ANALYZED)); indexWriter.AddDocument(document); } } Console.ReadKey(); var queryParser = new QueryParser(Version.LUCENE_30, "Name", analyzer); var query = queryParser.Parse("Name37~"); IndexReader indexReader = IndexReader.Open(directory, true); var searcher = new IndexSearcher(indexReader); TopDocs resultDocs = searcher.Search(query, indexReader.MaxDoc); }
private void button2_Click(object sender, EventArgs e) { String field = "content"; IndexReader reader = IndexReader.Open(FSDirectory.Open(new DirectoryInfo(INDEX_DIR.FullName)), true); Searcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, field, analyzer); Query query = parser.Parse(textBox1.Text.Trim()); TopScoreDocCollector collector = TopScoreDocCollector.Create(searcher.MaxDoc, false); searcher.Search(query, collector); ScoreDoc[] hits = collector.TopDocs().ScoreDocs; MessageBox.Show(this, "共 " + collector.TotalHits.ToString() + " 条记录"); //ltrResult.Text = "共 " + collector.GetTotalHits().ToString() + " 条记录<br>"; //for (Int32 i = 0; i < collector.GetTotalHits(); i++) //{ // ltrResult.Text += "doc=" + hits[i].doc + " score=" + hits[i].score + "<br>"; // Document doc = searcher.Doc(hits[i].doc); // ltrResult.Text += "Path:" + doc.Get("path") + "<br>"; //} reader.Dispose(); }
public void ClassBridge() { ISession s = this.OpenSession(); ITransaction tx = s.BeginTransaction(); s.Save(this.getDept1()); s.Save(this.getDept2()); s.Save(this.getDept3()); s.Flush(); tx.Commit(); tx = s.BeginTransaction(); IFullTextSession session = Search.CreateFullTextSession(s); // The branchnetwork field is the concatenation of both // the branch field and the network field of the Department // class. This is in the Lucene document but not in the // Department entity itself. QueryParser parser = new QueryParser("branchnetwork", new SimpleAnalyzer()); Query query = parser.Parse("branchnetwork:layton 2B"); IFullTextQuery hibQuery = session.CreateFullTextQuery(query, typeof(Department)); IList result = hibQuery.List(); Assert.IsNotNull(result); Assert.AreEqual("2B", ((Department)result[0]).Network, "incorrect entity returned, wrong network"); Assert.AreEqual("Layton", ((Department)result[0]).Branch, "incorrect entity returned, wrong branch"); Assert.AreEqual(1, result.Count, "incorrect number of results returned"); // Partial match. query = parser.Parse("branchnetwork:3c"); hibQuery = session.CreateFullTextQuery(query, typeof(Department)); result = hibQuery.List(); Assert.IsNotNull(result); Assert.AreEqual("3C", ((Department)result[0]).Network, "incorrect entity returned, wrong network"); Assert.AreEqual("West Valley", ((Department)result[0]).Branch, "incorrect entity returned, wrong branch"); Assert.AreEqual(1, result.Count, "incorrect number of results returned"); // No data cross-ups . query = parser.Parse("branchnetwork:Kent Lewin"); hibQuery = session.CreateFullTextQuery(query, typeof(Department)); result = hibQuery.List(); Assert.IsNotNull(result); Assert.IsTrue(result.Count == 0, "problem with field cross-ups"); // Non-ClassBridge field. parser = new QueryParser("BranchHead", new SimpleAnalyzer()); query = parser.Parse("BranchHead:Kent Lewin"); hibQuery = session.CreateFullTextQuery(query, typeof(Department)); result = hibQuery.List(); Assert.IsNotNull(result); Assert.IsTrue(result.Count == 1, "incorrect entity returned, wrong branch head"); Assert.AreEqual("Kent Lewin", ((Department)result[0]).BranchHead, "incorrect entity returned"); // Cleanup foreach (object element in s.CreateQuery("from " + typeof(Department).FullName).List()) { s.Delete(element); } tx.Commit(); s.Close(); }
public void ObjectNotFound() { ISession sess = OpenSession(); ITransaction tx = sess.BeginTransaction(); Author author = new Author(); author.Name = "Moo Cow"; sess.Persist(author); tx.Commit(); sess.Clear(); IDbCommand statement = sess.Connection.CreateCommand(); statement.CommandText = "DELETE FROM Author"; statement.ExecuteNonQuery(); IFullTextSession s = Search.CreateFullTextSession(sess); tx = s.BeginTransaction(); QueryParser parser = new QueryParser("title", new KeywordAnalyzer()); Lucene.Net.Search.Query query = parser.Parse("name:moo"); IFullTextQuery hibQuery = s.CreateFullTextQuery(query, typeof(Author), typeof(Music)); IList result = hibQuery.List(); Assert.AreEqual(0, result.Count, "Should have returned no author"); foreach (object o in result) { s.Delete(o); } tx.Commit(); s.Close(); }
public void TestBasicQueryParser() { var query = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "description", new SimpleAnalyzer()) .Parse("partnum:Q36 AND SPACE"); Assert.AreEqual("+partnum:q +description:space", query.ToString(), "note Q36 -> q"); Assert.AreEqual(0, searcher.Search(query, 10).ScoreDocs.Length, "doc not found :("); }
public List<int> AiComplete(string args) { List<int> list = new List<int>(); try { IndexReader citac = IndexReader.Open(this.folder, true); var seracher = new IndexSearcher(citac); var queryParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "Content", new KeywordAnalyzer()); queryParser.AllowLeadingWildcard = true; var query = queryParser.Parse(args+"*"); TopDocs result = seracher.Search(query, 5); var lista = result.ScoreDocs; foreach (var hint in lista) { var h = seracher.Doc(hint.Doc); list.Add(h.Get("ArticlesID").ToInt()); } } catch (Exception) { } return list; }
public static void Arquive(string text, string url, bool isEnglish) { //INICIALIZAR VARIÁVEIS CONSOANTE SER EM INGLÊS OU EM PORTUGUÊS string directory; Lucene.Net.Analysis.Analyzer analyzer; Object _mon= isEnglish? _monEn : _monPt; InitPathAndAnalyzer(out directory, out analyzer, isEnglish); //CRIAR UM NOVO DOCUMENTO COM A INFORMAÇÃO NECESSÁRIA: UM CAMPO COM O URL E UM OUTRO COM O CONTEUDO A ANALIZAR string hash= text.GetHashCode().ToString(); Document document = new Document(); document.Add(new Field("url", url, Field.Store.YES,Field.Index.NOT_ANALYZED)); document.Add(new Field("text", text, Field.Store.NO, Field.Index.ANALYZED)); document.Add(new Field("hash",hash, Field.Store.YES, Field.Index.NOT_ANALYZED)); Monitor.Enter(_mon); Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.Open(new DirectoryInfo(directory)); if (System.IO.Directory.GetFiles(directory).Length == 0) { IndexWriter init = new IndexWriter(dir, analyzer, true, new IndexWriter.MaxFieldLength(25000)); init.Dispose(); } //VERIFICAR SE O DOCUMENTO JÁ EXISTE E SE CONTEM A MESMA INFORMAÇÃO QUE JÁ LÁ SE ENCONTRA IndexSearcher isearcher = new IndexSearcher(dir, false); //O CAMPO DE PROCURA SERA O URL QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "url", analyzer); Query query = parser.Parse(url); int s = isearcher.IndexReader.NumDocs(); //Console.WriteLine(isearcher.IndexReader.NumDocs()); ScoreDoc[] hits = isearcher.Search(query, null, 1000).ScoreDocs; if(hits.Length>0){ Document doc = isearcher.Doc(hits[0].Doc); //Console.WriteLine(doc.Get("url")); //É IGUAL if (doc.Get("hash").Equals(hash)) { Monitor.Exit(_mon); return; } //É DIFERENTE else { isearcher.IndexReader.DeleteDocument(hits[0].Doc); } } isearcher.Dispose(); //ACEDER AO INDEX COM A INFORMAÇÃO INDEXADA DA LINGUAGEM CORRECTA E ADICIONAR O NOVO DOCUMENTO IndexWriter iwriter = new IndexWriter(dir, analyzer, false, new IndexWriter.MaxFieldLength(25000)); iwriter.AddDocument(document); iwriter.Optimize(); iwriter.Dispose(); Monitor.Exit(_mon); }
// Initializes objects public LuceneApplication() { luceneIndexDirectory = null; writer = null; analyzer = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(VERSION, "English"); parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer); customSimilarity = new CustomSimilarity(); }
protected QueryParser GetQueryParser(string highlightField) { if (!QueryParsers.ContainsKey(highlightField)) { QueryParsers[highlightField] = new QueryParser(_luceneVersion, highlightField, HighlightAnalyzer); } return QueryParsers[highlightField]; }
private Query Parse(QueryParser parser, string value) { if (string.IsNullOrEmpty(value)) { throw new InvalidOperationException("The key value for field " + parser.Field + " must not be blank."); } return parser.Parse(QueryParser.Escape(value)); }
public SearchService( Func <INoSqlSession> noSqlSessionFactory, ILogger logger) { this.noSqlSessionFactory = noSqlSessionFactory; this.logger = logger; analyzer = new LithuanianAnalyzer(); parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "text", analyzer); parser.SetMultiTermRewriteMethod(Lucene.Net.Search.MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); }
public static List <Object> SearchByQueryPlusScore(IndexStorage indexStorage, int topRank, String query, SearchType searchType, out List <float> listScores) { listScores = new List <float>(); if (String.IsNullOrEmpty(query)) { return(null); } IndexSearcher indexSearch = new IndexSearcher(indexStorage.DirectoryIndexing); //var parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, new[]{ // Indexing.TEXT_SPOT}, indexStorage.analyzer); Lucene.Net.QueryParsers.QueryParser queryParser = null; if (searchType == SearchType.OCR) { queryParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, Indexing.TEXT_SPOT, indexStorage.analyzer); } else if (searchType == SearchType.CAPTION) { queryParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, Indexing.TEXT_CAPTION, indexStorage.analyzer); } Query q = queryParser.Parse(query); TopDocs topDocs = indexSearch.Search(q, topRank); if (topDocs == null || topDocs.ScoreDocs.Length == 0) { return(null); } List <Object> result = new List <Object>(); for (int i = 0; i < topDocs.ScoreDocs.Length; i++) { ScoreDoc scoreDoc = topDocs.ScoreDocs[i]; float score = scoreDoc.Score; listScores.Add(score); //Console.WriteLine(score); int docId = scoreDoc.Doc; Document doc = indexSearch.Doc(docId); if (searchType == SearchType.OCR) { result.Add(new TextSpot(doc.Get(Indexing.FRAME_NAME), doc.Get(Indexing.TEXT_SPOT))); } else if (searchType == SearchType.CAPTION) { result.Add(new TextCaption(doc.Get(Indexing.FRAME_NAME), doc.Get(Indexing.TEXT_CAPTION))); } } return(result); }
/// <summary> /// Initialises the parser object /// </summary> public void CreateParser() { string[] fields = { TITLE_FN, ABSTRACT_FN }; var boosts = new Dictionary <string, float>(); //var boosts = new Dictionary<string, float> { {TITLE_FN, 10 }, { ABSTRACT_FN, 5} }; parser = new MultiFieldQueryParser(VERSION, fields, analyzer, boosts); boosts.Add(TITLE_FN, 5000); boosts.Add(ABSTRACT_FN, 10000); //parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TITLE_FN, analyzer); }
/// Executes the query. // Preprocesses the query text entered by the user // and queries the index. // Calculates the total time to run the query // and sets some text variables for later use. public int RunQuery(string text, bool preproc, out string qText) { // start timer... DateTime start = DateTime.Now; // get the query settings from the collection IRQueryParams queryParams = myCollection.GetQueryParams(); string[] queryFields = queryParams.Fields; float[] queryFieldBoosts = queryParams.FieldBoosts; // build field boost dictionary IDictionary <string, float> boosts = new Dictionary <string, float>(); for (int i = 0; i < queryFields.Length; i++) { boosts.Add(queryFields[i], queryFieldBoosts[i]); } // setup searcher, query and parser CreateSearcher(); Query query; parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, queryFields, analyzer, boosts); // preprocess query (if required) if (preproc == true) { query = PreprocessQuery(text, parser); } else { // no preprocessing query = parser.Parse(text); } // print query text to form qText = query.ToString(); // execute the search searchResults = searcher.Search(query, maxResults); // end timer and calculate total time DateTime end = DateTime.Now; TimeSpan duration = end - start; queryTime = duration.Seconds + (float)duration.Milliseconds / 1000; CleanUpSearcher(); return(searchResults.TotalHits); }
public static List <Models.SearchResult> Query(string text, int max = 8, string userId = null) { if (!string.IsNullOrEmpty(text) && !text.EndsWith(":")) { userId = string.IsNullOrEmpty(userId) ? Account.AuditId : userId; var analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); //todo: what version? var parser = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_29, "text", analyzer); //text is just the default field to search var query = parser.Parse(text); //var term = new Term("text", text.ToLower()); //var query = new Lucene.Net.Search.PrefixQuery(term); //parser.Parse(text); //var query = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "text", analyzer).Parse(text.ToLower()); using (var dir = FSDirectory.Open(new DirectoryInfo(IndexDir))) { using (var searcher = new IndexSearcher(dir, true)) { var collector = TopScoreDocCollector.create(max * 2, true); //todo: mini-hack to accomidate the post-filtering of search results - attempt to get twice as many results as we need. hopefully we won't filter out that many... searcher.Search(query, collector); var hits = collector.TopDocs().ScoreDocs; //var hits = searcher.Search(query); var ret = new List <Models.SearchResult>(); for (var i = 0; i < hits.Length; i++) { var docId = hits[i].doc; var doc = new Models.SearchDocument(searcher.Doc(docId)); var provider = GetDocumentProvider(doc.Type); if (provider != null) { if (provider.IsAuthorized(doc, userId)) { ret.Add(provider.FormatResult(doc)); } } else { throw new Exception(string.Format("Formatter for type {0} not found", doc.Type)); } if (ret.Count >= max) //todo: mini-hack to accomidate the post-filtering of search results { break; } } return(ret); } } } return(new List <SearchResult>()); }
//constructor public LuceneSearch() { luceneIndexDirectory = null; writer = null; searcher = null; parser = null; ScoreDocs = null; //default settings of analyzer: Standard Analyzer, with lower case filter and no stop word filter analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION, new HashSet <string>()); //changes to Lucene score newSimilarity = new NewSimilarity(); }
public void RunSearch() { BeginRunSearch(); try { LuceneQueryParsers.QueryParser parser = CreateQueryParser(); LuceneSearch.Query q = parser.Parse(IndexQueryDefinition.QueryText); QueryHits = IndexSearcher.Search(q, IndexQueryDefinition.TopDocsToFind); } finally { EndRunSearch(); } }
public LuceneApplication() { luceneIndexDirectory = null; analyzerstandard = null; analyzerkeyword = null; writer = null; analysor = null; searcher = null; parser = null; customSimilarity = new CustomSimilarity();//for task 6 tokenCount = new Dictionary <string, int>(); numofdoc = 0; numofrelevant = 0; option = new List <string>(); infneed = new Dictionary <string, string>(); }//contructor which is used to initialize the objects
/*** * Understands the lucene query syntax */ public List <Utilities.Language.TextIndexing.IndexResult> GetDocumentsWithQuery(string query) { List <Utilities.Language.TextIndexing.IndexResult> fingerprints = new List <Utilities.Language.TextIndexing.IndexResult>(); HashSet <string> fingerprints_already_seen = new HashSet <string>(); try { using (Lucene.Net.Index.IndexReader index_reader = Lucene.Net.Index.IndexReader.Open(LIBRARY_INDEX_BASE_PATH, true)) { using (Lucene.Net.Search.IndexSearcher index_searcher = new Lucene.Net.Search.IndexSearcher(index_reader)) { Lucene.Net.QueryParsers.QueryParser query_parser = new Lucene.Net.QueryParsers.QueryParser(Version.LUCENE_29, "content", analyzer); Lucene.Net.Search.Query query_object = query_parser.Parse(query); Lucene.Net.Search.Hits hits = index_searcher.Search(query_object); var i = hits.Iterator(); while (i.MoveNext()) { Lucene.Net.Search.Hit hit = (Lucene.Net.Search.Hit)i.Current; string fingerprint = hit.Get("fingerprint"); string page = hit.Get("page"); if (!fingerprints_already_seen.Contains(fingerprint)) { fingerprints_already_seen.Add(fingerprint); IndexResult index_result = new IndexResult { fingerprint = fingerprint, score = hit.GetScore() }; fingerprints.Add(index_result); } } // Close the index index_searcher.Close(); } index_reader.Close(); } } catch (Exception ex) { Logging.Warn(ex, "GetDocumentsWithQuery: There was a problem opening the index file for searching."); } return(fingerprints); }
// Processes query and ranks results public void SearchIndex(string querytext, ref TopDocs results, ref int numberOfDocuments) { parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer); querytext = querytext.ToLower(); Query query = parser.Parse(querytext); results = searcher.Search(query, 1400); numberOfDocuments = results.TotalHits; int rank = 0; foreach (ScoreDoc scoreDoc in results.ScoreDocs) { rank++; // retrieve the document from the 'ScoreDoc' object Lucene.Net.Documents.Document doc = searcher.Doc(scoreDoc.Doc); string myFieldValue = doc.Get(TEXT_FN).ToString(); //Console.WriteLine("Rank " + rank + " text " + myFieldValue); } }
static void SearchForPhrase(IndexSearcher searcher, string phrase) { using (new AutoStopWatch(string.Format("Search for {0}", phrase))) { Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("Body", new StandardAnalyzer()); Lucene.Net.Search.Query query = parser.Parse(phrase); Hits hits = searcher.Search(query); Console.WriteLine("Found {0} results for {1}", hits.Length(), phrase); int max = hits.Length(); if (max > 100) { max = 100; } for (int i = 0; i < max; i++) { Console.WriteLine(hits.Doc(i).GetField("Title").StringValue()); } } }
static void SearchForPhrase(IndexSearcher searcher, string phrase) { using (new AutoStopWatch(string.Format("Search for {0}", phrase))) { Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_CURRENT, "Body", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT)); Lucene.Net.Search.Query query = parser.Parse(phrase); var hits = searcher.Search(query, 100); Console.WriteLine("Found {0} results for {1}", hits.TotalHits, phrase); int max = hits.TotalHits; if (max > 100) { max = 100; } for (int i = 0; i < max; i++) { Console.WriteLine(hits.ScoreDocs[i].Doc); } } }
public void CreateParser() // Creates Parser { parser = new QueryParser(VERSION, TEXT_FN, analyzer); }
/// <summary> /// Initialises the parser object /// </summary> public void CreateParser() { string[] multiFields = new string[] { "url", "passage_text", "query_id", "query", "answers" }; // parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "passage_ID", analyzer); parser = new MultiFieldQueryParser(VERSION, multiFields, analyzer); }
public void CreateParter(string fieldname) { parser = new Lucene.Net.QueryParsers.QueryParser(VERSION, fieldname, analysor); //defining parser by using field name //here, using my mixture analysor define a parser which translate the query known by human being to the query by machine }
public void CreateParser() { parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, parserFields, shingleAnalyzer, fieldWeights); }
public void CreateParser() { string[] fields = new[] { TEXT_FN, "aaa" }; // parser = new Lucene.Net.QueryParsers.QueryParser(VERSION, TEXT_FN, analyzer); parser = new Lucene.Net.QueryParsers.MultiFieldQueryParser(VERSION, fields, analyzer); }
public List <Post> Similar(int postid, int itemsToReturn) { var list = new List <Post>(); if (postid <= 0) { return(list); } IndexSearcher searcher = null; IndexReader reader = null; EnsureIndexExists(); var query = GetIdSearchQuery(postid); lck.AcquireReaderLock(ReaderTimeOut); try { searcher = new IndexSearcher(rd); // Get Original document TopDocs hits = searcher.Search(query, itemsToReturn); if (hits == null || hits.ScoreDocs.Length <= 0) { return(list); } int docNum = hits.ScoreDocs[0].Doc; if (docNum > -1) { LQ.QueryParser parser = GetQueryParser(); reader = IndexReader.Open(rd, true); var mlt = new MoreLikeThis(reader); mlt.Analyzer = _analyzer; mlt.SetFieldNames(new[] { SearchFields.Title, SearchFields.Body, SearchFields.Tag }); mlt.MinDocFreq = 5; mlt.MinTermFreq = 2; mlt.Boost = true; var moreResultsQuery = mlt.Like(docNum); TopDocs similarhits = searcher.Search(moreResultsQuery, itemsToReturn); for (int i = 0; i < similarhits.ScoreDocs.Length; i++) { Document doc = searcher.Doc(similarhits.ScoreDocs[i].Doc); var post = CreatePostFromDocument(doc, null); if (postid != post.Id) { list.Add(post); } if (list.Count >= itemsToReturn) { break; } } } } catch (Exception) { } finally { if (searcher != null) { searcher.Dispose(); } if (reader != null) { reader.Dispose(); } lck.ReleaseReaderLock(); } return(list); }
//function to set up searcher and parser objects public void SetupSearch() { searcher = new IndexSearcher(luceneIndexDirectory); parser = new MultiFieldQueryParser(VERSION, new string[] { WORD_FN, AUTHOR_FN, BILI_FN, TITLE_FN }, analyzer); }
/// <summary> /// Initialises the parser object /// </summary> public void CreateParser(string label) { parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, label, analyzer); }
/// <summary> /// Initialises the parser object /// </summary> public void CreateParser() { parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer); }
/// <summary> /// Initialises the parser object /// </summary> public void CreateParser() { parser = new QueryParser(VERSION, TEXT_URL, analyzer); //parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_PASSAGE, analyzer); }
public SqloogleMiaSearcher(string indexPath, int resultsLimit = 50) { _resultsLimit = resultsLimit; var fields = new[] { "server", "database", "schema", "name", "equality", "inequality", "included" }; var boosts = new Dictionary<string, float> { { "server", .4F }, { "database", .3F }, { "schema", .2F }, { "name", .1F }, { "equality", .0F }, { "inequality", .0F }, { "included", .0F } }; _directory = FSDirectory.Open(new DirectoryInfo(indexPath)); _searcher = new IndexSearcher(_directory, true); _analyzer = new StandardAnalyzer(Version.LUCENE_30); _parser = new MultiFieldQueryParser(Version.LUCENE_30, fields, _analyzer, boosts) { DefaultOperator = QueryParser.Operator.AND }; _logger.Trace("Searcher is ready."); }