/// <summary> /// 搜索LUCENE数据 /// </summary> /// <param name="indexType"></param> /// <param name="query"></param> /// <param name="sort"></param> /// <param name="pagerInfo"></param> /// <param name="callback"></param> /// <returns></returns> public static List <Document> SearchLuceneData(string directoryPath, Query query, Sort sort, PagerInfo pagerInfo, Action <Document> callback) { List <Document> list = new List <Document>(); FSDirectory directory = FSDirectory.Open(new System.IO.DirectoryInfo(directoryPath), new NoLockFactory()); IndexReader indexReader = IndexReader.Open(directory, true); IndexSearcher indexSearcher = new IndexSearcher(indexReader); ScoreDoc[] docs; int totalCount; int startOffset; int endOffset; if (sort != null) { TopFieldDocs resultFieldDocs = indexSearcher.Search(query, null, indexSearcher.MaxDoc(), sort); totalCount = resultFieldDocs.totalHits; pagerInfo.RecordCount = totalCount; startOffset = (pagerInfo.PageIndex - 1) * pagerInfo.PageSize; endOffset = pagerInfo.PageIndex * pagerInfo.PageSize; if (endOffset >= totalCount) { endOffset = totalCount; } docs = resultFieldDocs.scoreDocs; } else { TopDocs resultFieldDocs = indexSearcher.Search(query, null, indexSearcher.MaxDoc()); totalCount = resultFieldDocs.totalHits; pagerInfo.RecordCount = totalCount; startOffset = (pagerInfo.PageIndex - 1) * pagerInfo.PageSize; endOffset = pagerInfo.PageIndex * pagerInfo.PageSize; if (endOffset >= totalCount) { endOffset = totalCount; } docs = resultFieldDocs.scoreDocs; } if (totalCount > 0) { for (int i = startOffset; i < endOffset; i++) { ScoreDoc hit = docs[i]; Document doc = indexSearcher.Doc(hit.doc); list.Add(doc); if (callback != null) { callback(doc); } } } indexSearcher.Close(); directory.Close(); return(list); }
private static void SearchSomething(String searchText) { Directory directory = FSDirectory.Open(new DirectoryInfo("LuceneIndex")); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_29); IndexSearcher searcher = new IndexSearcher(directory, true); int results = 0; if (searcher.MaxDoc() > 0) { BooleanQuery booleanQuery = new BooleanQuery(); Lucene.Net.Search.Query query1 = new WildcardQuery(new Term("path", searchText)); booleanQuery.Add(query1, BooleanClause.Occur.SHOULD); TopDocs topDocs = searcher.Search(booleanQuery, searcher.MaxDoc()); results = topDocs.ScoreDocs.Length; Console.WriteLine("Found {0} results", results); for (int i = 0; i < results; i++) { ScoreDoc scoreDoc = topDocs.ScoreDocs[i]; float score = scoreDoc.Score; int docId = scoreDoc.Doc; Document doc = searcher.Doc(docId); Console.WriteLine("Result num {0}, score {1}", i + 1, score); Console.WriteLine("Text found: {0}\r\n", doc.Get("path")); } } searcher.Close(); directory.Close(); }
public static List <T> RawQuery <T>(Query query, SortOptions sortOptions = null) where T : class { List <T> results = new List <T>(); IndexSearcher searcher = GetIndexSearcher(); TopDocsCollector collector; if (sortOptions == null) { collector = TopScoreDocCollector.create(searcher.MaxDoc(), true); } else { collector = TopFieldCollector.create( new Sort(new SortField(sortOptions.FieldName, (int)sortOptions.FieldType, sortOptions.Ascending)), searcher.MaxDoc(), false, false, false, true ); } searcher.Search(query, collector); var topDocs = collector.TopDocs(); var scoreDocs = topDocs.ScoreDocs; var maxRecord = scoreDocs.Length; for (int index = 0; index < maxRecord && index < scoreDocs.Length; index++) { ScoreDoc scoreDoc = scoreDocs[index]; Document doc = searcher.Doc(scoreDoc.doc); var data = doc.Get("Data"); var result = JsonConvert.DeserializeObject(data, _jsonSerializerSettings) as T; if (result == null) { continue; } results.Add(result); } return(results); }
public static List <SearchItem> GetAllItems() { string path = Directory.GetCurrentDirectory() + @"\mtad\"; //建立索引搜索,指定索引目录 IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(new System.IO.DirectoryInfo(path)), true); //获取最大文档数量 var count = searcher.MaxDoc(); var searchItems = new List <SearchItem>(); for (int i = 0; i < count; i++) { var document = searcher.Doc(i); //var fields = document.GetFields(); var id = document.GetValues("id")[0]; var tablename = document.GetValues("tablename")[0]; var acronym = document.GetValues("acronym")[0]; var english = document.GetValues("english")[0]; var chinese = document.GetValues("chinese")[0]; var explain = document.GetValues("explain")[0]; var searchItem = new SearchItem { Id = id, TableName = tablename, Acronym = acronym, English = english, Chinese = chinese, Explain = explain }; searchItems.Add(searchItem); } return(searchItems); }
internal static ScoreDoc[] Search(IndexSearcher searcher, string searchfield, List <string> keywords, int max_doc_num) { QueryParser queryparser = new QueryParser(version, searchfield, standardAnalyzer); string queryStr = ""; for (int i = 0; i < keywords.Count - 1; i++) { queryStr += keywords[i] + " OR "; } queryStr += keywords[keywords.Count - 1]; Query query = queryparser.Parse(queryStr); TopDocs hits = searcher.Search(query, null, (int)Math.Min(searcher.MaxDoc(), max_doc_num)); ScoreDoc[] scoredocs = hits.scoreDocs; return(scoredocs); //List<Document> docs = new List<Document>(); //foreach (var scoredoc in scoredocs) //{ // docs.Add(searcher.XmlDoc(scoredoc.doc)); //} //return docs; }
public CardDescription[] Read(Lucene.Net.Store.Directory dir, ReadProcessChangedInvoker processchanged) { ArrayList cards = new ArrayList(MinCapacity); Query query = new MatchAllDocsQuery(); Searcher searcher = new IndexSearcher(dir, true); TopDocs td = searcher.Search(query, null, searcher.MaxDoc()); ScoreDoc[] docs = td.scoreDocs; int length = docs.Length; for (int i = 0; i < length; i++) { Document doc = searcher.Doc(docs[i].doc); cards.Add(ParseCard(doc)); if (processchanged != null) { processchanged.Invoke(length, i + 1); } } searcher.Close(); return((CardDescription[])cards.ToArray(typeof(CardDescription))); }
public string checkIndex() { try { searcher = new IndexSearcher(this.pathIndex); searcher.Close(); } catch (IOException) { return("-"); } return(searcher.MaxDoc().ToString()); }
public void checkIndex() { try { searcher = new IndexSearcher(this.pathIndex); searcher.Close(); } catch (IOException) { FncRebuildIndex(this.FilePath); //status("The index doesn't exist or is damaged. Please rebuild it.", true); return; } string msg = String.Format("Index is ready. It contains {0} documents.", searcher.MaxDoc()); status(msg); }
public List <LuceneResult> Search(Query query, Sort sort) { var searcher = new IndexSearcher(_rd); var collector = TopFieldCollector.create(sort ?? new Sort(), searcher.MaxDoc(), false, true, true, sort == null); searcher.Search(query, collector); var docs = collector.TopDocs(); var maxscore = docs.GetMaxScore(); // Note: cheap way to avoid div/zero if (maxscore == 0) { maxscore = 1; } return((from hit in docs.scoreDocs let score = hit.score / maxscore where score >= 0.001f select new LuceneResult(searcher.Doc(hit.doc), score)).ToList()); }
public override CardDescription[] Read(string dirname, ReadProcessChangedInvoker processchanged) { if (dirname == null || dirname.Length <= 0) { return(null); } if (!Directory.Exists(dirname)) { return(null); } if (dirname[dirname.Length - 1] != '\\') { dirname += "\\"; } ArrayList cards = new ArrayList(MinCapacity); Query query = new MatchAllDocsQuery(); Lucene.Net.Store.Directory dir = new Lucene.Net.Store.SimpleFSDirectory(new DirectoryInfo(dirname), new Lucene.Net.Store.SimpleFSLockFactory()); Searcher searcher = new IndexSearcher(dir, true); TopDocs td = searcher.Search(query, null, searcher.MaxDoc()); ScoreDoc[] docs = td.scoreDocs; int length = docs.Length; for (int i = 0; i < length; i++) { Document doc = searcher.Doc(docs[i].doc); cards.Add(ParseCard(doc)); if (processchanged != null) { processchanged.Invoke(length, i + 1); } } searcher.Close(); return((CardDescription[])cards.ToArray(typeof(CardDescription))); }
public static List <int> Search(IndexSearcher searcher, string queryStr, string queryField, int docCnt = -1) { if (docCnt == -1) { docCnt = searcher.MaxDoc(); } QueryParser queryparser = new QueryParser(version, queryField, standardAnalyzer); queryStr = queryStr.Replace("-", ""); if (String.IsNullOrEmpty(queryStr)) { return(new List <int>()); } Query query = queryparser.Parse(queryStr); var docs = searcher.Search(query, null, docCnt).scoreDocs; List <int> docIDs = new List <int>(); foreach (var scoreDoc in docs) { docIDs.Add(scoreDoc.doc); } return(docIDs); }
static TestIndex() { Directory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, null, true); writer.SetMaxFieldLength(MaxNumberOfTermsPerDocument); var pathTokenStream = new PathTokenStream(""); var contentTokenStream = new SimpleTokenStream(""); var externalsTokenStream = new PathTokenStream(""); Field field_id = new Field("id", "", Field.Store.YES, Field.Index.UN_TOKENIZED); Field field_rev_first = new Field(FieldName.RevisionFirst, "", Field.Store.NO, Field.Index.UN_TOKENIZED); Field field_rev_last = new Field(FieldName.RevisionLast, "", Field.Store.NO, Field.Index.UN_TOKENIZED); Document doc = new Document(); doc.Add(field_id); doc.Add(new Field(FieldName.Path, pathTokenStream)); doc.Add(new Field(FieldName.Content, contentTokenStream)); doc.Add(new Field(FieldName.Externals, externalsTokenStream)); doc.Add(field_rev_first); doc.Add(field_rev_last); for (int i = 0; i < Data.GetLength(0); ++i) { string id = Data[i, 1]; field_id.SetValue(id); pathTokenStream.SetText(id); int rev_first = Revision.Head; if (id.StartsWith("/revisions")) { contentTokenStream.SetText(""); externalsTokenStream.SetText(""); rev_first = int.Parse(Data[i, 2]); } else { contentTokenStream.SetText(Data[i, 2]); externalsTokenStream.SetText(Data[i, 3]); } field_rev_first.SetValue(RevisionFieldValue(rev_first)); field_rev_last.SetValue(HeadRevisionFieldValue()); writer.AddDocument(doc); if (id.StartsWith("/revisions") && Data[i, 3] != null) // update last revision { // Change the last revision // Warning: It is not possible to load a document from the index // We have to rebuild/reparse it from the scratch writer.DeleteDocuments(new Term("id", id)); pathTokenStream.SetText(id); contentTokenStream.SetText(""); externalsTokenStream.SetText(""); int rev_last = int.Parse(Data[i, 3]); field_rev_last.SetValue(RevisionFieldValue(rev_last)); id += "@" + rev_first; Data[i, 1] = id; field_id.SetValue(id); writer.AddDocument(doc); } } // delete non existent document test writer.DeleteDocuments(new Term("id", "bliflaiwj123dj33")); writer.Optimize(); writer.Close(); Searcher = new IndexSearcher(directory); Assert.AreEqual(Data.GetLength(0), Searcher.MaxDoc()); // smoke test for index creation }
public virtual PagedList <Models.ResultObject> Search(string key, int pageIndex, int pageSize, params string[] folders) { var indexDirectory = FSDirectory.Open(new DirectoryInfo(indexDir)); if (!IndexReader.IndexExists(indexDirectory) || string.IsNullOrEmpty(key) && (folders == null || folders.Length == 0)) { return(new PagedList <ResultObject>(new ResultObject[0], pageIndex, pageSize, 0)); } var query = new BooleanQuery(); key = QueryParser.Escape(key.Trim().ToLower()); if (string.IsNullOrEmpty(key)) { key = "*:*"; } QueryParser titleParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, Converter.TitleFieldName, this.Analyzer); var titleQuery = titleParser.Parse(key); titleQuery.SetBoost(2); query.Add(titleQuery, BooleanClause.Occur.SHOULD); QueryParser bodyParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, Converter.BodyFieldName, this.Analyzer); var bodyQuery = bodyParser.Parse(key); bodyQuery.SetBoost(1); query.Add(bodyQuery, BooleanClause.Occur.SHOULD); QueryWrapperFilter filter = null; if (folders != null && folders.Length > 0) { var folderQuery = new BooleanQuery(); //QueryParser folderParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "FolderName", this.Analyzer); foreach (var folder in folders) { var termQuery = new TermQuery(new Term("FolderName", folder)); termQuery.SetBoost(3); folderQuery.Add(termQuery, BooleanClause.Occur.SHOULD); } filter = new QueryWrapperFilter(folderQuery); } var searcher = new IndexSearcher(indexDirectory, true); TopDocsCollector collecltor = TopScoreDocCollector.create(searcher.MaxDoc(), false); if (filter == null) { searcher.Search(query, collecltor); } else { searcher.Search(query, filter, collecltor); } Lucene.Net.Highlight.Highlighter lighter = new Highlighter(new SimpleHTMLFormatter("<strong class='highlight'>", "</strong>"), new Lucene.Net.Highlight.QueryScorer((Query)query)); var startIndex = (pageIndex - 1) * pageSize; List <ResultObject> results = new List <ResultObject>(); foreach (var doc in collecltor.TopDocs(startIndex, pageSize).ScoreDocs) { var document = searcher.Doc(doc.doc); ResultObject result = Converter.ToResultObject(lighter, document); if (result != null) { results.Add(result); } } return(new PagedList <ResultObject>(results, pageIndex, pageSize, collecltor.GetTotalHits())); }
public static void RawIndexToIndex(string inputFolder, string outputFolder) { IndexWriter tweetWriter = new IndexWriter(new SimpleFSDirectory(new DirectoryInfo(outputFolder)), new StandardAnalyzer(Version.LUCENE_29), new IndexWriter.MaxFieldLength(int.MaxValue)); HashSet <string> stopwords = new HashSet <string>(Stopwords); var iodir = new DirectoryInfo(inputFolder); var directory = FSDirectory.Open(iodir); IndexSearcher searcher = new IndexSearcher(directory); for (int i = 0; i < searcher.MaxDoc(); i++) { if (i % 10000 == 0) { Console.Out.WriteLine(i); } Document doc = searcher.Doc(i); string text = doc.Get("Text"); //bool isRetweet = bool.Parse(doc.Get("IsRetweet")); var type = AnalyzeTweet(text, stopwords); var dic = RefineTweet(text, type, stopwords); string[] items = text.Split(' '); List <string> words = new List <string>(); List <string> hashtags = new List <string>(); List <string> mentions = new List <string>(); List <string> retweets = new List <string>(); for (int j = 0; j < items.Length; j++) { if (dic.ContainsKey(j)) { if (type[j] == WordType.Hashtag) { hashtags.Add(dic[j]); } if (type[j] == WordType.Mention) { mentions.Add(dic[j]); } if (type[j] == WordType.Retweet) { retweets.Add(dic[j]); } if (type[j] == WordType.Word) { words.Add(dic[j]); } } } if (hashtags.Count > 0) { doc.Add(new Field("Hashtag", hashtags.Aggregate("", (a, b) => a + " " + b).Substring(1), Field.Store.YES, Field.Index.ANALYZED)); } else { doc.Add(new Field("Hashtag", "", Field.Store.YES, Field.Index.ANALYZED)); } if (mentions.Count > 0) { doc.Add(new Field("Mention", mentions.Aggregate("", (a, b) => a + " " + b).Substring(1), Field.Store.YES, Field.Index.ANALYZED)); } else { doc.Add(new Field("Mention", "", Field.Store.YES, Field.Index.ANALYZED)); } if (retweets.Count > 0) { doc.Add(new Field("Retweet", retweets.Aggregate("", (a, b) => a + " " + b).Substring(1), Field.Store.YES, Field.Index.ANALYZED)); } else { doc.Add(new Field("Retweet", "", Field.Store.YES, Field.Index.ANALYZED)); } if (words.Count > 0) { doc.Add(new Field("Word", words.Aggregate("", (a, b) => a + " " + b), Field.Store.YES, Field.Index.ANALYZED)); } else { doc.Add(new Field("Word", "", Field.Store.YES, Field.Index.ANALYZED)); } if (hashtags.Count < 5 && words.Count > 3) { tweetWriter.AddDocument(doc); } } tweetWriter.Optimize(); tweetWriter.Close(); }