//////////////////////////////////////////////////////////////// public bool HasUri(Uri uri) { IndexReader primary_reader; primary_reader = LuceneCommon.GetReader(PrimaryStore); Term term; term = new Term("Uri", UriFu.UriToEscapedString(uri)); TermDocs term_docs; term_docs = primary_reader.TermDocs(); term_docs.Seek(term); bool has_uri = false; if (term_docs.Next()) { has_uri = true; } term_docs.Close(); LuceneCommon.ReleaseReader(primary_reader); return(has_uri); }
private static void HandleSignal(int signal) { Logger.Log.Debug("Handling signal {0} ({1})", signal, (Mono.Unix.Native.Signum)signal); // Pass the signals to the helper too. GLib.Idle.Add(new GLib.IdleHandler(delegate() { RemoteIndexer.SignalRemoteIndexer((Mono.Unix.Native.Signum)signal); return(false); })); // If we get SIGUSR1, turn the debugging level up. if ((Mono.Unix.Native.Signum)signal == Mono.Unix.Native.Signum.SIGUSR1) { LogLevel old_level = Log.Level; Log.Level = LogLevel.Debug; Log.Debug("Moving from log level {0} to Debug", old_level); return; } else if ((Mono.Unix.Native.Signum)signal == Mono.Unix.Native.Signum.SIGUSR2) { // Debugging hook for beagrepd QueryDriver.DebugHook(); LuceneCommon.DebugHook(); return; } Logger.Log.Debug("Initiating shutdown in response to signal."); Shutdown.BeginShutdown(); }
public SnippetReader(TextReader line_reader, string[] query_terms, bool full_text, int context_length, int snippet_length) { this.line_reader = line_reader; this.found_snippet_length = 0; this.full_text = full_text; this.context_length = (context_length > 0 ? context_length : context_length_default); this.snippet_length = (snippet_length > 0 ? snippet_length : snippet_length_default); if (query_terms == null) { return; } this.sliding_window = new SlidingWindow(this.context_length); // remove stop words from query_terms query_terms_list = new ArrayList(query_terms.Length); foreach (string term in query_terms) { if (LuceneCommon.IsStopWord(term)) { continue; } query_terms_list.Add(term); } //Console.WriteLine ("Creating snippet reader"); }
//////////////////////////////////////////////////////////////// public Uri[] PropertyQuery(Property prop) { // FIXME: Should we support scanning the secondary // index as well? IndexReader primary_reader; primary_reader = LuceneCommon.GetReader(PrimaryStore); Term term; term = new Term(PropertyToFieldName(prop.Type, prop.Key), prop.Value.ToLower()); TermDocs term_docs; term_docs = primary_reader.TermDocs(); term_docs.Seek(term); ArrayList uri_list = new ArrayList(); while (term_docs.Next()) { Document doc = primary_reader.Document(term_docs.Doc()); uri_list.Add(GetUriFromDocument(doc)); } term_docs.Close(); LuceneCommon.ReleaseReader(primary_reader); return((Uri[])uri_list.ToArray(typeof(Uri))); }
private List <ContentViewModel> ShowSearchContent() { string indexPath = System.Configuration.ConfigurationManager.AppSettings["LuceneNetDir"]; string searchString = Request["txtSearch"]; List <string> list = LuceneCommon.PanGuSplitWord("Title", searchString); //对用户输入的搜索条件进行拆分。 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件 PhraseQuery queryBody = new PhraseQuery(); ////多部分查询 //PhraseQuery queryTitle = new PhraseQuery(); //先用空格,让用户去分词,空格分隔的就是词“计算机 专业” foreach (string word in list) { queryBody.Add(new Term("Price", word)); //queryTitle.Add(new Term("Title", word)); } //多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。) queryBody.Slop = 100; //queryTitle.SetSlop(100); #region 多部分查询 //BooleanQuery query = new BooleanQuery(); //query.Add(queryTitle, BooleanClause.Occur.SHOULD); //query.Add(queryBody, BooleanClause.Occur.SHOULD); #endregion //TopScoreDocCollector是盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true); //根据query查询条件进行查询,查询结果放入collector容器 searcher.Search(queryBody, null, collector); //得到所有查询结果中的文档,GetTotalHits():表示总条数 TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容. ScoreDoc[] docs = collector.TopDocs(0, collector.TotalHits).ScoreDocs; //可以用来实现分页功能 List <ContentViewModel> viewModelList = new List <ContentViewModel>(); for (int i = 0; i < docs.Length; i++) { //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document. ContentViewModel viewModel = new ContentViewModel(); int docId = docs[i].Doc; //得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId); //找到文档id对应的文档详细信息 viewModel.Id = Convert.ToInt32(doc.Get("Id")); // 取出放进字段的值 viewModel.Title = doc.Get("Title"); viewModel.Content = LuceneCommon.CreateHightLight(searchString, doc.Get("Price")); //将搜索的关键字高亮显示。 viewModelList.Add(viewModel); } //先将搜索的词插入到明细表。 SearchDetails searchDetail = new SearchDetails(); searchDetail.Id = Guid.NewGuid(); searchDetail.KeyWords = Request["txtSearch"]; searchDetail.SearchTime = DateTime.Now; SearchDetailsService.AddEntity(searchDetail); return(viewModelList); }
// Returns a list of all files and directories in dir static ICollection GetAllItemsInDirectory(DirectoryInfo dir) { // form the query string parent_uri_str = PathToUri(dir.FullName).ToString(); // Instead of taking the painfull way of using BeagrepAnalyzer, lets just add the prefix manually // LuceneCommon thinks exposing secret property type encoding is bad, I think so too... except for now string key = "prop:k:" + Property.ParentDirUriPropKey; //Logger.Log.Debug ("Querying for {0}={1}", parent_uri_str, key); LNS.Query query = new LNS.TermQuery(new Term(key, parent_uri_str)); // do the search LNS.IndexSearcher searcher; searcher = LuceneCommon.GetSearcher(driver.PrimaryStore); BetterBitArray matches; matches = new BetterBitArray(searcher.MaxDoc()); BitArrayHitCollector collector; collector = new BitArrayHitCollector(matches); searcher.Search(query, null, collector); // Finally we pull all of the matching documents, // convert them to Dirent, and store them in a list. ArrayList match_list = new ArrayList(); int i = 0; while (i < matches.Count) { i = matches.GetNextTrueIndex(i); if (i >= matches.Count) { break; } Document doc; doc = searcher.Doc(i); Dirent info; info = DocumentToDirent(doc); match_list.Add(info); ++i; } LuceneCommon.ReleaseSearcher(searcher); //Logger.Log.Debug ("Found {0} items in {1}", match_list.Count, dir.FullName); return(match_list); }
// FIXME: Move these to LuceneCommon if and when we decide to // support adding/removing arbitrary backends at runtime internal void Close() { Log.Debug("Removing static queryable {0}", IndexName); if (text_cache != null) { text_cache.Dispose(); } // Free the cached IndexReaders LuceneCommon.CloseReader(LuceneCommon.GetReader(Driver.PrimaryStore)); LuceneCommon.CloseReader(LuceneCommon.GetReader(Driver.SecondaryStore)); Driver.PrimaryStore.Close(); Driver.SecondaryStore.Close(); FileAttributesStore.Dispose(); }
public StoredInfo GetStoredInfo(Uri uri) { StoredInfo info = new StoredInfo(); LNS.Query query = UriQuery("Uri", uri); SingletonCollector collector = new SingletonCollector(); LNS.IndexSearcher searcher = LuceneCommon.GetSearcher(PrimaryStore); searcher.Search(query, null, collector); if (collector.MatchId != -1) { Document doc = searcher.Doc(collector.MatchId); info = DocumentToStoredInfo(doc); } LuceneCommon.ReleaseSearcher(searcher); return(info); }
//////////////////////////////////////////////////////////////// public NameInfo GetNameInfoById(Guid id) { Uri uri; uri = GuidFu.ToUri(id); IndexReader reader; reader = LuceneCommon.GetReader(SecondaryStore); TermDocs term_docs; term_docs = reader.TermDocs(); Term term = new Term("Uri", UriFu.UriToEscapedString(uri)); term_docs.Seek(term); int match_id = -1; if (term_docs.Next()) { match_id = term_docs.Doc(); } term_docs.Close(); NameInfo info = null; if (match_id != -1) { Document doc; doc = reader.Document(match_id, fields_nameinfo); info = DocumentToNameInfo(doc); } LuceneCommon.ReleaseReader(reader); return(info); }
public void Merge(LuceneCommon index_to_merge) { // FIXME: Error recovery // Merge the primary index IndexWriter primary_writer; Lucene.Net.Store.Directory[] primary_store = { index_to_merge.PrimaryStore }; primary_writer = new IndexWriter(PrimaryStore, null, false); primary_writer.AddIndexes(primary_store); primary_writer.Close(); // Merge the secondary index IndexWriter secondary_writer; Lucene.Net.Store.Directory[] secondary_store = { index_to_merge.SecondaryStore }; secondary_writer = new IndexWriter(SecondaryStore, null, false); secondary_writer.AddIndexes(secondary_store); secondary_writer.Close(); }
// Returns true if there are docs to search and creates the readers and searchers // in that case. Otherwise, returns false. private bool BuildSearchers(out IndexReader primary_reader, out LNS.IndexSearcher primary_searcher, out IndexReader secondary_reader, out LNS.IndexSearcher secondary_searcher) { primary_searcher = null; secondary_reader = null; secondary_searcher = null; primary_reader = LuceneCommon.GetReader(PrimaryStore); if (primary_reader.NumDocs() == 0) { ReleaseReader(primary_reader); primary_reader = null; return(false); } primary_searcher = new LNS.IndexSearcher(primary_reader); if (SecondaryStore != null) { secondary_reader = LuceneCommon.GetReader(SecondaryStore); if (secondary_reader.NumDocs() == 0) { ReleaseReader(secondary_reader); secondary_reader = null; } } if (secondary_reader != null) { secondary_searcher = new LNS.IndexSearcher(secondary_reader); } return(true); }
public Hashtable GetStoredUriStrings(string server, string file) { Hashtable uris = new Hashtable(); Term term = new Term(PropertyToFieldName(PropertyType.Keyword, "fixme:file"), file); LNS.QueryFilter filter = new LNS.QueryFilter(new LNS.TermQuery(term)); term = new Term(PropertyToFieldName(PropertyType.Keyword, "fixme:account"), server); LNS.TermQuery query = new LNS.TermQuery(term); LNS.IndexSearcher searcher = LuceneCommon.GetSearcher(PrimaryStore); LNS.Hits hits = searcher.Search(query, filter); for (int i = 0; i < hits.Length(); i++) { StoredInfo info = DocumentToStoredInfo(hits.Doc(i)); uris.Add(info.Uri.ToString(), info.FullyIndexed); } LuceneCommon.ReleaseSearcher(searcher); return(uris); }
static void AddSearchTermInfo(QueryPart part, SearchTermResponse response, StringBuilder sb) { if (part.Logic == QueryPartLogic.Prohibited) { return; } if (part is QueryPart_Or) { ICollection sub_parts; sub_parts = ((QueryPart_Or)part).SubParts; foreach (QueryPart qp in sub_parts) { AddSearchTermInfo(qp, response, sb); } return; } if (!(part is QueryPart_Text)) { return; } QueryPart_Text tp; tp = (QueryPart_Text)part; string [] split; split = tp.Text.Split(' '); // First, remove stop words for (int i = 0; i < split.Length; ++i) { if (LuceneCommon.IsStopWord(split [i])) { split [i] = null; } } // Assemble the phrase minus stop words sb.Length = 0; for (int i = 0; i < split.Length; ++i) { if (split [i] == null) { continue; } if (sb.Length > 0) { sb.Append(' '); } sb.Append(split [i]); } response.ExactText.Add(sb.ToString()); // Now assemble a stemmed version sb.Length = 0; // clear the previous value for (int i = 0; i < split.Length; ++i) { if (split [i] == null) { continue; } if (sb.Length > 0) { sb.Append(' '); } sb.Append(LuceneCommon.Stem(split [i].ToLower())); } response.StemmedText.Add(sb.ToString()); }
// Starts scanning at character pos of string text for occurrence of any word // in stemmed_terms. Returns a list of (words)*[(matched word)(words)*]+ private SnippetLine MarkTerms(ArrayList stemmed_terms, string text, ref int pos) { SnippetLine snippet_line = null; int prev_match_end_pos = pos; // misnomer; means 1 + end_pos of previous word // 1. get next word // 2. if no next word, return arraylist // 3. if word is not a match, following_words ++ // 4. else { // 4a. add list to the arraylist // 4b. add word to the arraylist // 4c. clear list // 4d. following_words=0 // } // 5. if (following_words >= max_following_words) { // 5a. add list to the arraylist // 5b. clear list // 5c. return list // } while (pos < text.Length) { // Find the beginning of the next token if (IsTokenSeparator(text [pos])) { ++pos; continue; } // Find the end of the next token int end_pos = pos + 1; while (end_pos < text.Length && !IsTokenSeparator(text [end_pos])) { ++end_pos; } string token = text.Substring(pos, end_pos - pos); string stemmed_token = null; bool found_match = false; // Iterate through the stemmed terms and match the token for (int i = 0; i < stemmed_terms.Count; i++) { // If this term is longer than the token in question, give up. if (end_pos - pos < ((string)stemmed_terms [i]).Length) { continue; } // We cache the token, so as to avoid stemming it more than once // when considering multiple terms. if (stemmed_token == null) { stemmed_token = LuceneCommon.Stem(token.ToLower()); } if (String.Compare((string)stemmed_terms [i], stemmed_token, true) != 0) { continue; } // We have a match! found_match = true; //Console.WriteLine ("Found match"); if (snippet_line == null) { snippet_line = new SnippetLine(); } // Find the fragment before the match int start_pos = sliding_window.StartValue; if (start_pos == -1) // If no non-match words seen after last match { start_pos = prev_match_end_pos; // Use wherever previous word ended } sliding_window.Reset(); string before_match = text.Substring(start_pos, pos - start_pos); snippet_line.AddNonMatchFragment(before_match); //Console.WriteLine ("Adding [{0}, {1}]:[{2}]", start_pos, pos - 1, before_match); snippet_line.AddMatchFragment(i, token); //Console.WriteLine ("Adding word [{0}, {1}]:[{2}]", pos, end_pos - 1, token); prev_match_end_pos = end_pos; break; } if (!found_match) { // Add the start pos of the token to the window sliding_window.Add(pos); // If we found a match previously and saw enough following words, stop if (snippet_line != null && snippet_line.Count > 0 && sliding_window.Count == context_length) { sliding_window.Reset(); string after_match = text.Substring(prev_match_end_pos, end_pos - prev_match_end_pos); snippet_line.AddNonMatchFragment(after_match); //Console.WriteLine ("Adding [{0}, {1}]:[{2}]", prev_match_end_pos, end_pos - 1, after_match); return(snippet_line); } } pos = end_pos; } // If less than 6 words came after the last match, add the rest here if (snippet_line != null && snippet_line.Count > 0) { sliding_window.Reset(); string after_match = text.Substring(prev_match_end_pos, pos - prev_match_end_pos); snippet_line.AddNonMatchFragment(after_match); //Console.WriteLine ("Adding [{0}, {1}]:[{2}]", prev_match_end_pos, pos - 1, after_match); //Console.WriteLine ("Sending snippet: {0}", snippet_line.ToString ()); return(snippet_line); } sliding_window.Reset(); return(null); }
// Return all directories with name public ICollection GetAllDirectoryNameInfo(string name) { // First we assemble a query to find all of the directories. string field_name; field_name = PropertyToFieldName(PropertyType.Keyword, Property.IsDirectoryPropKey); LNS.Query isdir_query = new LNS.TermQuery(new Term(field_name, "true")); LNS.Query query = null; if (name == null) { query = isdir_query; } else { string dirname_field; dirname_field = PropertyToFieldName(PropertyType.Text, Property.TextFilenamePropKey); LNS.Query dirname_query; dirname_query = LuceneCommon.StringToQuery(dirname_field, name, null); LNS.BooleanQuery bool_query = new LNS.BooleanQuery(); bool_query.Add(isdir_query, LNS.BooleanClause.Occur.MUST); bool_query.Add(dirname_query, LNS.BooleanClause.Occur.MUST); query = bool_query; } // Then we actually run the query LNS.IndexSearcher searcher; //searcher = new LNS.IndexSearcher (SecondaryStore); searcher = LuceneCommon.GetSearcher(SecondaryStore); BetterBitArray matches; matches = new BetterBitArray(searcher.MaxDoc()); BitArrayHitCollector collector; collector = new BitArrayHitCollector(matches); searcher.Search(query, null, collector); // Finally we pull all of the matching documents, // convert them to NameInfo, and store them in a list. ArrayList match_list = new ArrayList(); int i = 0; while (i < matches.Count) { i = matches.GetNextTrueIndex(i); if (i >= matches.Count) { break; } Document doc; doc = searcher.Doc(i, fields_nameinfo); NameInfo info; info = DocumentToNameInfo(doc); match_list.Add(info); ++i; } LuceneCommon.ReleaseSearcher(searcher); return(match_list); }
//////////////////////////////////////////////////////////////// public Guid GetIdByNameAndParentId(string name, Guid parent_id) { string parent_uri_str; parent_uri_str = GuidFu.ToUriString(parent_id); string key1, key2; key1 = PropertyToFieldName(PropertyType.Keyword, Property.ParentDirUriPropKey); key2 = PropertyToFieldName(PropertyType.Keyword, Property.ExactFilenamePropKey); Term term1, term2; term1 = new Term(key1, parent_uri_str); term2 = new Term(key2, name.ToLower()); // Lets walk the exact file name terms first (term2) // since there are probably fewer than parent directory // Uri terms. List <int> term2_doc_ids = new List <int> (); IndexReader reader = LuceneCommon.GetReader(SecondaryStore); TermDocs term_docs = reader.TermDocs(); term_docs.Seek(term2); while (term_docs.Next()) { term2_doc_ids.Add(term_docs.Doc()); } term_docs.Seek(term1); int match_id = -1; while (term_docs.Next()) { int doc_id = term_docs.Doc(); if (term2_doc_ids.BinarySearch(doc_id) >= 0) { match_id = doc_id; break; } } term_docs.Close(); Guid id; if (match_id != -1) { Document doc; doc = reader.Document(match_id); id = GuidFu.FromUriString(doc.Get("Uri")); } else { id = Guid.Empty; } LuceneCommon.ReleaseReader(reader); return(id); }