/// <summary> /// 对keyword进行分词,将分词的结果返回 /// </summary> public static IEnumerable <string> SplitWords(string keyword) { IList <string> list = new List <string>(); Analyzer analyzer = new PanGuAnalyzer(); TokenStream stream = analyzer.TokenStream(keyword, new StringReader(keyword)); ITermAttribute ita = null; bool hasNext = stream.IncrementToken(); while (hasNext) { ita = stream.GetAttribute <ITermAttribute>(); list.Add(ita.Term); hasNext = stream.IncrementToken(); } return(list); //IList<string> list = new List<string>(); //Analyzer analyzer = new PanGuAnalyzer(); //TokenStream tokenStream = analyzer.TokenStream("", new StringReader(keyword)); //Token token = null; //while ((token = tokenStream.IncrementToken()) != null) //{ // // token.TermText()为当前分的词 // string word = token.TermText(); // list.Add(word); //} //return list; }
static ScoreDoc[] SearchTime(IndexSearcher searcher, string queryString, string field, int numHit, bool inOrder) { //TopScoreDocCollector collector = TopScoreDocCollector.create(numHit, inOrder); Analyzer analyser = new PanGuAnalyzer(); QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, field, analyser); var querys = queryString.Split('&'); if (querys != null || querys.Length > 1) { BooleanQuery query = new BooleanQuery(); foreach (var str in querys) { query.Add(parser.Parse(str), BooleanClause.Occur.MUST); } TopFieldDocs topField = searcher.Search(query, null, 20, new Sort(new SortField("Time", SortField.STRING_VAL, true))); return(topField.scoreDocs); } else { Query query = parser.Parse(queryString); TopFieldDocs topField = searcher.Search(query, null, 20, new Sort(new SortField("Time", SortField.STRING_VAL, true))); //searcher.Search(query, collector); return(topField.scoreDocs); } }
/// <summary> /// 获取商品信息数据 /// </summary> /// <param name="queryString"></param> /// <returns></returns> public List <Commodity> QueryIndex(string queryString) { IndexSearcher searcher = null; try { List <Commodity> ciList = new List <Commodity>(); Directory dir = FSDirectory.Open(StaticConstant.IndexPath); searcher = new IndexSearcher(dir); Analyzer analyzer = new PanGuAnalyzer(); //--------------------------------------这里配置搜索条件 QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", analyzer); Query query = parser.Parse(queryString); Console.WriteLine(query.ToString()); //显示搜索表达式 TopDocs docs = searcher.Search(query, (Filter)null, 10000); foreach (ScoreDoc sd in docs.ScoreDocs) { Document doc = searcher.Doc(sd.Doc); ciList.Add(DocumentToCommodityInfo(doc)); } return(ciList); } finally { if (searcher != null) { searcher.Dispose(); } } }
/// <summary> /// /// </summary> /// <param name="path">文件夹,索引存放位置</param> /// <param name="sources"></param> static void CreateIndex(string path, List <Meta> sources) { var root = new DirectoryInfo(path); var fsLockFactory = new NativeFSLockFactory(); using (FSDirectory fsRoot = FSDirectory.Open(root, fsLockFactory)) { //创建向索引库写操作对象 //使用IndexWriter打开directory时会自动对索引库文件上锁 //Analyzer analyzer = new SimpleAnalyzer(); Analyzer analyzer = new PanGuAnalyzer(); using (IndexWriter writer = new IndexWriter(fsRoot, analyzer, !IndexReader.IndexExists(fsRoot), IndexWriter.MaxFieldLength.UNLIMITED)) { foreach (var source in sources) { Document document = new Document(); document.Add(new Field("Title", source.Title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("Url", source.Url, Field.Store.YES, Field.Index.NOT_ANALYZED)); document.Add(new Field("Content", source.Content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(document); } } } }
public string gettxt3(string postStr) { string word = ""; String text = postStr.Trim();; Analyzer anal = new PanGuAnalyzer();//使用盘古分词 StringReader sb = new StringReader(text); TokenStream ts = anal.ReusableTokenStream("", sb); Token t = null; word = "select top 1 Content from tb_U_Message where 1=1"; while ((t = ts.Next()) != null) { Console.WriteLine(t.TermText()); // Response.Write(t.TermText()); word += "and Title like'%" + t.TermText() + "%'"; //and Title like'%"+t.TermText()+"%' } string count = HXD.DBUtility.SQLHelper.ExecuteScalar(word).ToString(); Repeater += "<MsgType><![CDATA[text]]></MsgType>"; Repeater += "<Content><![CDATA[" + count + "\n\n如词答案任没有解决您的问题,你可以点击直接询问我们在泰国的达人。]]></Content>"; return(Repeater); }
private static IEnumerable <string> GetSearchTerms(string searchTerm) { List <string> result = new List <string>(); var analyzer = new PanGuAnalyzer(); StringReader sr = new StringReader(searchTerm); TokenStream stream = analyzer.TokenStream(null, sr); bool hasnext = stream.IncrementToken(); System.DateTime start = System.DateTime.Now; ITermAttribute ita; while (hasnext) { ita = stream.GetAttribute <ITermAttribute>(); result.Add(ita.Term); hasnext = stream.IncrementToken(); } stream.CloneAttributes(); sr.Close(); analyzer.Dispose(); var resultString = string.Join(" ", result); return(resultString.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries) .Concat(new[] { searchTerm }) .Distinct(StringComparer.OrdinalIgnoreCase) .Select(Escape)); }
/// <summary> /// 查询所有符合条件的内容 /// </summary> /// <param name="kw">关键词</param> /// <param name="segment">提取长度</param> /// <returns></returns> public static IEnumerable <PostOutputDto> Search(string kw, int segment = 200) { if (string.IsNullOrEmpty(IndexPath)) { throw new Exception("未设置索引文件夹路径,参数名:" + IndexPath); } string indexPath = IndexPath; using (var analyzer = new PanGuAnalyzer()) { var list = CutKeywords(kw); var result = new ConcurrentQueue <PostOutputDto>(); Parallel.ForEach(list, k => { if (k.Contains(new[] { @"\?", @"\*", @"\+", @"\-", @"\[", @"\]", @"\{", @"\}", @"\(", @"\)", "�" })) { return; } FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); var searcher = new IndexSearcher(reader); QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_30, new[] { nameof(Post.Id), nameof(Post.Title), nameof(Post.Content), nameof(Post.Author), nameof(Post.Label), nameof(Post.Email), nameof(Post.Keyword) }, analyzer); //多个字段查询 Query query = parser.Parse(k); int n = 100000; TopDocs docs = searcher.Search(query, null, n); if (docs?.TotalHits != 0 && docs?.ScoreDocs != null) { foreach (ScoreDoc sd in docs.ScoreDocs) //遍历搜索到的结果 { Document doc = searcher.Doc(sd.Doc); if (result.Any(p => p.Id == doc.Get(nameof(Post.Id)).ToInt32())) { continue; } var simpleHtmlFormatter = new SimpleHTMLFormatter("<span style='color:red;background-color:yellow;font-size: 1.1em;font-weight:700;'>", "</span>"); var highlighter = new Highlighter(simpleHtmlFormatter, new Segment()) { FragmentSize = segment }; var content = doc.Get(nameof(Post.Content)); if (content.Length <= segment) { segment = content.Length; } result.Enqueue(new PostOutputDto() { Id = doc.Get(nameof(Post.Id)).ToInt32(), Title = doc.Get(nameof(Post.Title)).ToLower().Contains(k.ToLower()) ? highlighter.GetBestFragment(k, doc.Get(nameof(Post.Title))) : doc.Get(nameof(Post.Title)), Content = content.ToLower().Contains(k.ToLower()) ? highlighter.GetBestFragment(k, content) : content.Substring(0, segment), Author = doc.Get(nameof(Post.Author)).ToLower().Contains(k.ToLower()) ? highlighter.GetBestFragment(k, doc.Get(nameof(Post.Author))) : doc.Get(nameof(Post.Author)), Label = doc.Get(nameof(Post.Label)).ToLower().Contains(k.ToLower()) ? highlighter.GetBestFragment(k, doc.Get(nameof(Post.Label))) : doc.Get(nameof(Post.Label)), Email = doc.Get(nameof(Post.Email)).ToLower().Contains(k.ToLower()) ? highlighter.GetBestFragment(k, doc.Get(nameof(Post.Email))) : doc.Get(nameof(Post.Email)), Keyword = doc.Get(nameof(Post.Keyword)).ToLower().Contains(k.ToLower()) ? highlighter.GetBestFragment(k, doc.Get(nameof(Post.Keyword))) : doc.Get(nameof(Post.Keyword)) }); } } }); return(result.Where(p => !string.IsNullOrEmpty(p.Title)).DistinctBy(p => p.Id)); } }
private static IndexWriter GetWriter(String project) { if (String.IsNullOrWhiteSpace(project)) { project = "NoneName"; } String path = AllConfig.m_path; if (m_indexWrite.ContainsKey(project)) { m_indexSearch.TryRemove(project, out var a); return(m_indexWrite[project]); } lock (m_lock) { if (m_indexWrite.ContainsKey(project)) { m_indexSearch.TryRemove(project, out var a); return(m_indexWrite[project]); } IndexWriter fsWriter = null; Boolean isExiested = File.Exists(Path.Combine(path, "write.lock")); FSDirectory fsDir = FSDirectory.Open(new DirectoryInfo(path)); Analyzer analyser = new PanGuAnalyzer(); fsWriter = new IndexWriter(fsDir, analyser, !isExiested, IndexWriter.MaxFieldLength.UNLIMITED); m_indexWrite.TryAdd(project, fsWriter); return(fsWriter); } }
public static void PrepareIndex(bool isPangu) { Analyzer analyzer = null; if (isPangu) { analyzer = new PanGuAnalyzer();//盘古Analyzer } else { analyzer = new StandardAnalyzer(Version.LUCENE_29); } DirectoryInfo dirInfo = Directory.CreateDirectory(Config.INDEX_STORE_PATH); LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); CreateIndex(writer, "jeffreyzhao", "博客园有一个老赵,人格魅力巨大,洋名就叫jeffreyzhao。据我所知,他还是一个胖子,一个钢琴业余爱好者。"); CreateIndex(writer, "lucene测试", "这是一个测试,关于lucene.net的 关注老赵"); CreateIndex(writer, "博客园里有牛人", "Hello World. 我认识的一个高手,他拥有广博的知识,有极客的态度,还经常到园子里来看看"); CreateIndex(writer, "奥巴马", "美国现任总统是奥巴马?确定不是奥巴牛和奥巴羊 不知道问老赵"); CreateIndex(writer, "奥林匹克", "奥林匹克运动会将来到南美美丽热情的国度巴西,也就是亚马逊河流域的一个地方"); CreateIndex(writer, "写给自己", "博客园的jeffwong,新的开始,继续努力了"); writer.Optimize(); writer.Close(); }
public static List <string> PanGuSplitWord(string msg) { Analyzer analyzer = new PanGuAnalyzer(); TokenStream tokenStream = analyzer.TokenStream("", new StringReader(msg)); ITermAttribute ita; bool hasNext = true; List <string> list = new List <string>(); while (hasNext) { ita = tokenStream.GetAttribute <ITermAttribute>(); list.Add(ita.Term); hasNext = tokenStream.IncrementToken(); } analyzer.Close(); return(list); /*Token token; * List<string> list = new List<string>(); * while ((token = tokenStream.) != null) * { * list.Add(token.TermText()); * } * return list;*/ }
public List <Student> QueryIndex(string queryString) { IndexSearcher searcher = null; List <Student> ciList = null; try { ciList = new List <Student>(); Directory dir = FSDirectory.Open(IndexHelper.Path); searcher = new IndexSearcher(dir); Analyzer analyzer = new PanGuAnalyzer(); //这里配置搜索条件 QueryParser parser = new QueryParser(Version.LUCENE_30, "title", analyzer); Query query = parser.Parse(queryString); TopDocs docs = searcher.Search(query, (Filter)null, 10000); foreach (ScoreDoc sd in docs.ScoreDocs) { Document doc = searcher.Doc(sd.Doc); ciList.Add(DocumentToCommodityInfo(doc)); } return(ciList); } catch (Exception e) { logger.Error("Error:", e); } finally { if (searcher != null) { searcher.Close(); } } return(ciList); }
/// <summary> /// 生成所有索引 /// </summary> /// <param name="itemCallback"></param> public void MakeLuceneIndex(Action <Document> callback) { #region 中国大学 ChinaUniversityDao chinaUniverisityDao = new ChinaUniversityDao(); List <ChinaUniversity> universityList = chinaUniverisityDao.GetChinaUniversityList(); PanGuAnalyzer analyzer = new PanGuAnalyzer(true); string textIndexDir = LuceneManager.GetLuceneTextIndexDirectoryPath(LuceneTextIndexType.ChinaUniversity, null); List <Document> documentList = new List <Document>(); foreach (ChinaUniversity university in universityList) { Document indexDoc = new Document(); #region 根据需要添加要被索引的数据列 indexDoc.Add(new NumericField("UniversityId", Field.Store.YES, true).SetIntValue(university.UniversityId)); indexDoc.Add(new Field("CnName", university.Name.ToLower(), Field.Store.YES, Field.Index.ANALYZED)); indexDoc.Add(new Field("Pinyin", university.Pinyin.ToLower(), Field.Store.YES, Field.Index.ANALYZED)); #endregion 根据需要添加要被索引的数据列 documentList.Add(indexDoc); } LuceneManager.MakeIndex(documentList, textIndexDir, callback); #endregion 中国大学 }
private List <BookSearchModel> SearchBookContent(string searchWords) { List <BookSearchModel> bookSearchModelList = new List <BookSearchModel>(); //1.对搜索条件进行分词 Analyzer analyzer = new PanGuAnalyzer(); TokenStream tokenStream = analyzer.TokenStream("", new StringReader(searchWords)); Lucene.Net.Analysis.Token token = null; string indexPath = @"D:\lucenedir"; //string kw = "面向对象";//对用户输入的搜索条件进行拆分。 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件 PhraseQuery query = new PhraseQuery(); //foreach (string word in kw.Split(' '))//先用空格,让用户去分词,空格分隔的就是词“计算机 专业” //{ // query.Add(new Term("body", word)); //} //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系. // query.Add(new Term("body", "大学生")); while ((token = tokenStream.Next()) != null) { query.Add(new Term("body", token.TermText())); } // query.Add(new Term("body", kw));//body中含有kw的文章 query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。) //TopScoreDocCollector是盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, null, collector); //根据query查询条件进行查询,查询结果放入collector容器 ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //得到所有查询结果中的文档,GetTotalHits():表示总条数 TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容. //可以用来实现分页功能 for (int i = 0; i < docs.Length; i++) { // //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document. int docId = docs[i].doc; //得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId); //找到文档id对应的文档详细信息 BookSearchModel searchModel = new BookSearchModel(); searchModel.Id = int.Parse(doc.Get("ID")); searchModel.Title = doc.Get("title"); searchModel.ContenDescription = SearchWordHighlight.CreateHightLight(searchWords, doc.Get("body")); //this.listBox1.Items.Add(doc.Get("number") + "\n");// 取出放进字段的值 //this.listBox1.Items.Add(doc.Get("body") + "\n"); //this.listBox1.Items.Add("-----------------------\n"); bookSearchModelList.Add(searchModel); } //将搜索的此插入词库之中 SearchDetails entity = new SearchDetails() { Id = Guid.NewGuid(), KeyWords = searchWords, SearchDateTime = DateTime.Now }; SearchDetailsService.AddEntity(entity); return(bookSearchModelList); }
/// <summary> /// 进行搜索 /// </summary> /// <returns></returns> public ActionResult Search() { string kw = Request["kw"]; // 获取用户输入的搜索内容 string indexPath = Server.MapPath("~/lucenedir"); // 从哪里搜索 // 对用户输入的内容进行分割 List <string> kws = new List <string>(); // 定义一个集合用来存储分割后的分词 Analyzer analyzer = new PanGuAnalyzer(); TokenStream tokenStream = analyzer.TokenStream("", new StringReader(kw.ToString())); Lucene.Net.Analysis.Token token = null; while ((token = tokenStream.Next()) != null) { kws.Add(token.TermText()); } FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件 // 注意:这个类只可以进行单个列条件搜索,如果想要实现多个条件搜索要使用另外一个类 PhraseQuery query = new PhraseQuery(); foreach (var word in kws) { query.Add(new Term("content", word)); // 向content这个列进行搜索 } query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。) //TopScoreDocCollector是盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, null, collector); //根据query查询条件进行查询,查询结果放入collector容器 ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //得到所有查询结果中的文档,GetTotalHits():表示总条数 TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容. // 创建一个list集合用来存储搜索到的结果 List <BookVieModel> bookList = new List <BookVieModel>(); for (int i = 0; i < docs.Length; i++) { //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document. int docId = docs[i].doc; //得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId); //找到文档id对应的文档详细信息 BookVieModel model = new BookVieModel(); model.Id = Convert.ToInt32(doc.Get("Id")); // 注意:这些字段要和在添加搜索词库的时候保持一致 model.Title = CreateHightLight(kw, doc.Get("title")); // 注意:这些字段要和在添加搜索词库的时候保持一致 // 对搜索到结果中的搜索词进行高亮显示 model.Content = CreateHightLight(kw, doc.Get("content")); // 注意:这些字段要和在添加搜索词库的时候保持一致 bookList.Add(model); } ViewBag.books = bookList; ViewBag.kw = kw; return(View("Index")); }
public LoggerMqConsume() { RouteKey = "Logger_Route_Key"; QueueName = "Logger_Queue"; FSDirectory fsDir = FSDirectory.Open(new DirectoryInfo(INDEX_STORE_PATH)); Analyzer analyser = new PanGuAnalyzer(); fsWriter = new IndexWriter(fsDir, analyser, true, IndexWriter.MaxFieldLength.UNLIMITED); }
private void button3_Click(object sender, EventArgs e) { string indexPath = @"C:\Users\杨ShineLon\Desktop\lucenedir"; // 从哪里搜索 string kw = textBox1.Text; //"面向对象";//对用户输入的搜索条件进行拆分。 // 对用户输入的内容进行分割 List <string> kws = new List <string>(); // 定义一个集合用来存储分割后的分词 Analyzer analyzer = new PanGuAnalyzer(); TokenStream tokenStream = analyzer.TokenStream("", new StringReader(kw.ToString())); Net.Analysis.Token token = null; while ((token = tokenStream.Next()) != null) { kws.Add(token.TermText()); //Console.WriteLine(token.TermText()); } FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件 PhraseQuery query = new PhraseQuery(); //foreach (string word in kw.Split(' '))//先用空格,让用户去分词,空格分隔的就是词“计算机 专业” //{ // query.Add(new Term("body", word)); //} //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系. // query.Add(new Term("body", "大学生")); //query.Add(new Term("body", kw));//body中含有kw的文章 foreach (var word in kws) { query.Add(new Term("body", word)); } query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。) //TopScoreDocCollector是盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, null, collector); //根据query查询条件进行查询,查询结果放入collector容器 ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //得到所有查询结果中的文档,GetTotalHits():表示总条数 TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容. //可以用来实现分页功能 this.listBox1.Items.Clear(); for (int i = 0; i < docs.Length; i++) { // //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document. int docId = docs[i].doc; //得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId); //找到文档id对应的文档详细信息 this.listBox1.Items.Add(doc.Get("number") + "\n"); // 取出放进字段的值 this.listBox1.Items.Add(doc.Get("body") + "\n"); this.listBox1.Items.Add("-----------------------\n"); } }
/// <summary> /// 创建索引 /// </summary> /// <param name="Model"></param> private void createIndex(News Model) { //定义索引目录路径 string path = Server.MapPath("/Indexs"); //定义一个分词器 Analyzer analyzer = new PanGuAnalyzer(); //定义索引用到的目录 //Lock的作用是防止一个Lucene索引,同一时刻被多个IndexWriter进行写操作; //如果一个Lucene索引同时被多个IndexWriter进行写操作,可能造成索引损坏。 //在一个Lucene索引被锁住后,Lucene索引文件夹内一定有一个write.lock文件,反之,则不一定。 //IndexWriter 默认使用 NativeFSLockFactory; //NativeFSLockFactory的主要好处是,如果JIT出现异常退出,操作系统会删除锁,而不是锁定文件,虽然write.lock依然存在; //正常退出的情况下,write.lock也不会被删除,只是Lucene会释放write.lock文件的引用。 Lucene.Net.Store.Directory d = FSDirectory.Open(new DirectoryInfo(path), new NativeFSLockFactory()); //如果指定目录中存在索引,则返回true,否则返回假 6217 0001 4002 9603 964 bool isUpdate = IndexReader.IndexExists(d); if (isUpdate) { //如果当前目录中的索引是锁定状态,则解锁当前目录 if (IndexWriter.IsLocked(d)) { IndexWriter.Unlock(d); } } //第三个参数:true:创建索引或覆盖现有索引,false:追加索引 using (IndexWriter iw = new IndexWriter(d, analyzer, !isUpdate, IndexWriter.MaxFieldLength.LIMITED)) { Random random = new Random(); for (int i = 100; i < 110; i++) { DateTime dt = DateTime.Now.AddHours(random.Next(100, 1000)); Document doc = new Document(); //按数字域进行存储 doc.Add(new NumericField("Id", Field.Store.YES, true).SetIntValue(Model.Id)); doc.Add(new Field("Title", Model.Title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS)); doc.Add(new Field("Content", Model.Content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS)); doc.Add(new Field("AddTime", dt.ToString("yyyy-MM-dd HH:mm:ss"), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); doc.Add(new NumericField("OrderId", Field.Store.YES, true).SetLongValue(Convert.ToInt64(DateTools.DateToString(dt, DateTools.Resolution.SECOND)))); iw.AddDocument(doc); } iw.Optimize(); } }
private void button3_Click(object sender, EventArgs e) { Analyzer analyzer = new PanGuAnalyzer(); TokenStream tokenStream = analyzer.TokenStream("", new StringReader("面向世界,面向现代化")); Lucene.Net.Analysis.Token token = null; while ((token = tokenStream.Next()) != null) { Console.WriteLine(token.TermText()); } }
private void button6_Click(object sender, EventArgs e) { Analyzer analyzer = new PanGuAnalyzer(); TokenStream tokenStream = analyzer.TokenStream("", new StringReader(textBox2.Text)); Token token = null; while ((token = tokenStream.Next()) != null) { MessageBox.Show(token.TermText()); } }
public static void Optimize() { var analyzer = new PanGuAnalyzer(); //new StandardAnalyzer(Version.LUCENE_30); using (var writer = new IndexWriter(_directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED)) { analyzer.Close(); writer.Optimize(); writer.Dispose(); } }
// // GET: /Test/ public ActionResult Index() { PanGuAnalyzer analyzer = new PanGuAnalyzer(); TokenStream ts = analyzer.ReusableTokenStream("", new StringReader("供应子母门 非标门 钢质门 进户门 金属门")); Token token; while ((token = ts.Next()) != null) { Response.Write(token.TermText() + ";"); } return(View()); }
public static string Delete(string id) { #region 验证输入参数是否合格 if (string.IsNullOrEmpty(id)) { throw new Exception("参数id不能为空"); } #endregion //Analyzer analyzer = new PanGuAnalyzer(); //FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); //bool isUpdate = IndexReader.IndexExists(directory); //if (isUpdate) //{ // //如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁 // if (IndexWriter.IsLocked(directory)) // { // IndexWriter.Unlock(directory); // } //} //IndexWriter writer = new IndexWriter(directory, analyzer, !isUpdate, IndexWriter.MaxFieldLength.UNLIMITED); ////为避免重复索引,所以先删除id=id的记录,再重新添加 //writer.DeleteDocuments(new Term("id", id)); ////对索引文件进行优化 //writer.Optimize(); //analyzer.Close(); //writer.Dispose(); //directory.Dispose(); Analyzer analyzer = new PanGuAnalyzer(); FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); /**创建 索引写对象 * 用于正式 写入索引与文档数据、删除索引与文档数据 * */ IndexWriter indexWriter = new IndexWriter(directory, analyzer, Lucene.Net.Index.IndexWriter.MaxFieldLength.LIMITED); /** 删除所有索引 * 如果索引库中的索引已经被删除,则重复删除时无效*/ Term term = new Term("id", id); TermQuery query = new TermQuery(term); indexWriter.DeleteDocuments(query); /** 虽然不 commit,也会生效,但建议做提交操作,*/ indexWriter.Commit(); /** 关闭流,里面会自动 flush*/ indexWriter.Optimize(); indexWriter.Dispose(); return(""); }
public static string DeleteAll() { ///** 创建 IKAnalyzer 中文分词器 // * IKAnalyzer():默认使用最细粒度切分算法 // * IKAnalyzer(boolean useSmart):当为true时,分词器采用智能切分 ;当为false时,分词器迚行最细粒度切分 // * */ //Analyzer analyzer = new IKAnalyzer(); ///** 指定索引和文档存储的目录 // * 如果此目录不是 Lucene 的索引目录,则不进行任何操作*/ //Directory directory = FSDirectory.open(indexDir); ///** 创建 索引写配置对象,传入分词器 // * Lucene 7.4.0 版本 IndexWriterConfig 构造器不需要指定 Version.LUCENE_4_10_3 // * Lucene 4.10.3 版本 IndexWriterConfig 构造器需要指定 Version.LUCENE_4_10_3 // * */ //IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer); ///**创建 索引写对象 // * 用于正式 写入索引与文档数据、删除索引与文档数据 // * */ //IndexWriter indexWriter = new IndexWriter(directory, config); ///** 删除所有索引 // * 如果索引库中的索引已经被删除,则重复删除时无效*/ //indexWriter.deleteAll(); ///** 虽然不 commit,也会生效,但建议做提交操作,*/ //indexWriter.commit(); ///** 关闭流,里面会自动 flush*/ //indexWriter.close(); Analyzer analyzer = new PanGuAnalyzer(); FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); /**创建 索引写对象 * 用于正式 写入索引与文档数据、删除索引与文档数据 * */ IndexWriter indexWriter = new IndexWriter(directory, analyzer, Lucene.Net.Index.IndexWriter.MaxFieldLength.LIMITED); /** 删除所有索引 * 如果索引库中的索引已经被删除,则重复删除时无效*/ indexWriter.DeleteAll(); /** 虽然不 commit,也会生效,但建议做提交操作,*/ indexWriter.Commit(); /** 关闭流,里面会自动 flush*/ indexWriter.Dispose(); return(""); }
/// <summary> /// 盘古分词 /// </summary> /// <param name="msg">需要进行拆分的字符串</param> /// <returns>拆分结果</returns> public static List <string> PanguSplitWords(string msg) { List <string> list = new List <string>(); Analyzer analyzer = new PanGuAnalyzer(); TokenStream tokenStream = analyzer.TokenStream("", new StringReader(msg)); Lucene.Net.Analysis.Token token = null; while ((token = tokenStream.Next()) != null) { list.Add(token.TermText()); } return(list); }
/// <summary> /// 对用户输入的搜索的条件进行分词 /// </summary> /// <param name="str"></param> /// <returns></returns> private static string[] SplitWord(string str) { List <string> list = new List <string>(); Analyzer analyzer = new PanGuAnalyzer(); //指定盘古分词 TokenStream tokenStream = analyzer.TokenStream("", new StringReader(str)); // Lucene.Net.Analysis.Token token = null; while ((token = tokenStream.Next()) != null) { list.Add(token.TermText()); } return(list.ToArray()); }
/// <summary> /// 分页获取商品信息数据 /// </summary> /// <param name="queryString"></param> /// <param name="fieldName"></param> /// <param name="pageIndex">第一页为1</param> /// <param name="pageSize"></param> /// <param name="totalCount"></param> /// <param name="filter"></param> /// <param name="sort"></param> /// <param name="fieldModelList"></param> /// <returns></returns> public List <T> QueryIndexPage(string queryString, string fieldName, int pageIndex, int pageSize, out int totalCount, Filter filter, Sort sort, IEnumerable <FieldDataModel> fieldModelList) { List <T> modelList = null; using (Analyzer analyzer = new PanGuAnalyzer()) { //--------------------------------------这里配置搜索条件 QueryParser parser = new QueryParser(LuceneUtil.Version.LUCENE_30, fieldName, analyzer); Query query = parser.Parse(queryString); modelList = QueryIndexPage(query, pageIndex, pageSize, out totalCount, filter, sort, fieldModelList); } return(modelList); }
//private CustomLogger m_logger = new CustomLogger(typeof(LuceneQuery<T>)); #endregion Identity #region QueryIndex /// <summary> /// 获取商品信息数据 /// </summary> /// <param name="queryString"></param> /// <param name="fieldName"></param> /// <param name="fieldModelList"></param> /// <param name="listCount"></param> /// <returns></returns> public List <T> QueryIndex(string queryString, string fieldName, IEnumerable <FieldDataModel> fieldModelList, int listCount = 1000) { List <T> modelList = null; using (Analyzer analyzer = new PanGuAnalyzer()) { //--------------------------------------这里配置搜索条件 QueryParser parser = new QueryParser(LuceneUtil.Version.LUCENE_30, fieldName, analyzer); Query query = parser.Parse(queryString); modelList = QueryIndex(query, fieldModelList, listCount); } return(modelList); }
private string[] SplitWords(string content) { List <string> strList = new List <string>(); Analyzer analyzer = new PanGuAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法 TokenStream tokenStream = analyzer.TokenStream("", new StringReader(content)); Lucene.Net.Analysis.Token token = null; while ((token = tokenStream.Next()) != null) { //Next继续分词 直至返回null strList.Add(token.TermText()); //得到分词后结果 } return(strList.ToArray()); }
public static string[] GetKeyWords(string str) { var list = new List <string>(); Analyzer analyzer = new PanGuAnalyzer(); TokenStream tokenStream = analyzer.TokenStream("", new StringReader(str)); Token token; while ((token = tokenStream.Next()) != null) { list.Add(token.TermText()); } return(list.ToArray()); }
static ScoreDoc[] Search(IndexSearcher searcher, string queryString, string field, int numHit, bool inOrder) { TopScoreDocCollector collector = TopScoreDocCollector.create(numHit, inOrder); Analyzer analyser = new PanGuAnalyzer(); QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, field, analyser); Query query = parser.Parse(queryString); searcher.Search(query, collector); return(collector.TopDocs().scoreDocs); }