private void button4_Click(object sender, EventArgs e) { string indexPath = @"C:\lucenedir"; //注意和磁盘上文件夹的大小写一致,否则会报错。将创建的分词内容放在该目录下。 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); //指定索引文件(打开索引目录) FS指的是就是FileSystem bool isUpdate = IndexReader.IndexExists(directory); //IndexReader:对索引进行读取的类。该语句的作用:判断索引库文件夹是否存在以及索引特征文件是否存在。 if (isUpdate) { //同时只能有一段代码对索引库进行写操作。当使用IndexWriter打开directory时会自动对索引库文件上锁。 //如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁(提示一下:如果我现在正在写着已经加锁了,但是还没有写完,这时候又来一个请求,那么不就解锁了吗?这个问题后面会解决) if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); } } IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);//向索引库中写索引。这时在这里加锁。 for (int i = 1; i <= 10; i++) { string txt = File.ReadAllText(@"D:\传智讲课\0413班\OA\OA项目,第七天搜索,Lucene.Net,盘古分词,Quartz.Net\资料\测试文件\" + i + ".txt", System.Text.Encoding.Default); //注意这个地方的编码 Document document = new Document(); //表示一篇文档。 //Field.Store.YES:表示是否存储原值。只有当Field.Store.YES在后面才能用doc.Get("number")取出值来.Field.Index. NOT_ANALYZED:不进行分词保存 document.Add(new Field("number", i.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Index. ANALYZED:进行分词保存:也就是要进行全文的字段要设置分词 保存(因为要进行模糊查询) //Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS:不仅保存分词还保存分词的距离。 document.Add(new Field("body", txt, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(document); } writer.Close(); //会自动解锁。 directory.Close(); //不要忘了Close,否则索引结果搜不到 }
//执行创建索引文件 private void ExcuteCreate(string indexPath, out List <string> adds, out List <string> updates, out List <string> error) { //FSDirectory是用于对文件系统目录的操作的类 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); //检查目录是否存在 bool isUpdate = IndexReader.IndexExists(directory); if (isUpdate) { //目录存在则判断目录是否被锁定,被锁定就解锁 if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); } } //IndexWriter主要用于写索引 //方法签名:public IndexWriter(Directory d,Analyzer a,boolean create,IndexWriter.MaxFieldLength mfl) //第一个参数是 (Directory d):索引的目录(前面的FSDirectory类的对象) //第二个参数是 (Analyzer a):分析器(这里我们用盘古分词的分析器) //第三个参数是 (boolean create):是否创建目录 //第四个参数是 (IndexWriter.MaxFieldLength):最大长度 IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate, IndexWriter.MaxFieldLength.UNLIMITED); //执行lucene方法 _baseLucene.ExecuteLuceneMethod(this._add, this._update, this._update, out adds, out updates, out error); //要记得关闭 writer.Close(); directory.Close(); }
private static void CreateIndex(List <SellOffer> list) { string indexpath = ConfigurationManager.AppSettings["Indexpath"]; FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexpath), new NativeFSLockFactory()); //IndexReader:对索引库进行读取的类 bool isExist = IndexReader.IndexExists(directory); //是否存在索引库文件夹以及索引库特征文件 if (isExist) { //如果索引目录被锁定(比如索引过程中程序异常退出或另一进程在操作索引库),则解锁 //Q:存在问题 如果一个用户正在对索引库写操作 此时是上锁的 而另一个用户过来操作时 将锁解开了 于是产生冲突 --解决方法后续 if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); } } IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isExist, IndexWriter.MaxFieldLength.UNLIMITED); ISellOfferDetail selldetailBll = new SellOfferDetailService(); try { foreach (var pitem in list) { SellOfferDetail offerdetail = new SellOfferDetail(); offerdetail = selldetailBll.SearchById(pitem.Id); Document document = new Document(); Field id = new Field("id", pitem.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED); Field title = new Field("title", pitem.Title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); title.SetBoost(1.0f); Field keywords = new Field("keywords", pitem.Keywords, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); keywords.SetBoost(0.7f); Field detail = new Field("detail", offerdetail.Detail, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); Field sysattr = new Field("sysattr", pitem.SysAttr, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); sysattr.SetBoost(0.4f); Field cusattr = new Field("cusattr", pitem.CusAttr, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); cusattr.SetBoost(0.1f); document.Add(id); document.Add(title); document.Add(keywords); document.Add(detail); document.Add(sysattr); document.Add(cusattr); writer.AddDocument(document); //文档写入索引库 } writer.Optimize(); writer.Close(); //会自动解锁 directory.Close(); //不要忘了Close,否则索引结果搜不到 } catch (Exception ex) { Utility.writelog("创建索引出问题:" + ex + ""); } }
private void ProessQueue() { string indexPath = @"G:/index";//注意和磁盘上文件夹的大小写一致,否则会报错。 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); bool isUpdate = IndexReader.IndexExists(directory); // logger.Debug("打开索引库:" + indexPath); if (isUpdate) { //暂时规定:同时只能有一段代码操作索引库 //如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁 if (IndexWriter.IsLocked(directory)) { // logger.Debug("索引库被锁定,进行解锁"); IndexWriter.Unlock(directory); } } //IndexWriter负责把数据向索引库中写入 IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED); while (queue.Count > 0) { JobInfo jobInfo = queue.Dequeue(); if (jobInfo.JobType == JobType.Add) { Model.T007店铺货物表 good = operateContext.BLLSession.IT007店铺货物表BLL.GetListBy(m => m.GoodID == jobInfo.GoodId).FirstOrDefault(); Document document = new Document();//文档对象。相当于表的一行记录 document.Add(new Field("GoodID", good.GoodID.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); document.Add(new Field("GoodIntroduction", good.GoodIntroduction, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("GoodName", good.GoodName, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("GoodNumber", good.GoodNumber.ToString(), Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("GoodPhoto", good.GoodPhoto, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("GoodPrice", good.GoodPrice.ToString(), Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("ShopID", good.ShopID.ToString(), Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(document); } else if (jobInfo.JobType == JobType.Edit) { Model.T007店铺货物表 good = operateContext.BLLSession.IT007店铺货物表BLL.GetListBy(m => m.GoodID == jobInfo.GoodId).FirstOrDefault(); Document document = new Document();//文档对象。相当于表的一行记录 document.Add(new Field("GoodID", good.GoodID.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); document.Add(new Field("GoodIntroduction", good.GoodIntroduction, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("GoodName", good.GoodName, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("GoodNumber", good.GoodNumber.ToString(), Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("GoodPhoto", good.GoodPhoto, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("GoodPrice", good.GoodPrice.ToString(), Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("ShopID", good.ShopID.ToString(), Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.UpdateDocument(new Term("GoodID", good.GoodID.ToString()), document); //update index set .... where id=art.id } else if (jobInfo.JobType == JobType.Delete) { // logger.Debug("删除文章的任务,Id=" + jobInfo.GoodId); writer.DeleteDocuments(new Term("GoodID", jobInfo.GoodId.ToString())); } } writer.Optimize(); writer.Close(); directory.Close(); //不要忘了Close,否则索引结果搜不到 }
public void OneDoWork() { try { FSDirectory directory = FSDirectory.Open(new DirectoryInfo(_indexSavePath), new NativeFSLockFactory()); //表示将创建的索引文件保存在indexPath目录下 bool isUpdate = IndexReader.IndexExists(directory); //)判断目录directory是否是一个索引目录 if (isUpdate) { //如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁 if (IndexWriter.IsLocked(directory)) //) 判断目录是否锁定,在对目录写之前会先把目录锁定 { IndexWriter.Unlock(directory); //如果没有锁定则需要手动锁定因为。两个IndexWriter无法同时写一个索引文件 } } IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate, IndexWriter.MaxFieldLength.UNLIMITED); //IndexWriter把输入写入索引的时候,Lucene.net是把写入的文件用指定的分词算法将文章分词(这样检索的时候才能查的快),然后将词放入索引文件。 var models = GetLists(); writer.DeleteAll(); models.ForEach(p => { Document document = new Document();//创建一行记录 _addDocAction(p, document); writer.AddDocument(document); }); writer.Optimize(); writer.Close(); directory.Close(); } catch (Exception ex) { LogHelper.Error(ex); } }
public void CreateIndexByData <T>(List <T> list, Action <Document, T> AddField) { FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); bool isExist = IndexReader.IndexExists(directory); if (isExist) { //如果索引目录被锁定(比如索引过程中程序异常退出或另一进程在操作索引库),则解锁 //Q:存在问题 如果一个用户正在对索引库写操作 此时是上锁的 而另一个用户过来操作时 将锁解开了 于是产生冲突 --解决方法后续 if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); } } IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isExist, IndexWriter.MaxFieldLength.UNLIMITED); //List<Bid> bidList = new List<Bid> { //new Bid{ID=1,Title="盘古分词",BidContent="通过这个 Demo.exe 你可以对盘古分词的各种参数进行测试,你也可以点击保持配置来生成你在界面上设置好的参数的配置文件。"} //}; foreach (var item in list) { Document doc = new Document(); ////Field.Index.ANALYZED:指定文章内容按照分词后结果保存 否则无法实现后续的模糊查询 ////WITH_POSITIONS_OFFSETS:指示不仅保存分割后的词 还保存词之间的距离 //doc.Add(new Field("id",bid.ID.ToString(),Field.Store.YES,Field.Index.NOT_ANALYZED)); //doc.Add(new Field("title",bid.Title,Field.Store.YES,Field.Index.ANALYZED,Field.TermVector.WITH_POSITIONS_OFFSETS)); //doc.Add(new Field("content",bid.BidContent,Field.Store.YES,Field.Index.ANALYZED,Field.TermVector.WITH_POSITIONS_OFFSETS)); AddField(doc, item); writer.AddDocument(doc); } writer.Close(); directory.Close(); }
public void CreateTestIndex(string name) { MultiSite_GangGuanEntities Bll = new Models.MultiSite_GangGuanEntities(); string indexpath = HttpContext.Server.MapPath("/Indexdata"); FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexpath), new NoLockFactory()); bool isExist = IndexReader.IndexExists(directory); //是否存在索引库文件夹以及索引库特征文件 if (isExist) { //如果索引目录被锁定(比如索引过程中程序异常退出或另一进程在操作索引库),则解锁 //Q:存在问题 如果一个用户正在对索引库写操作 此时是上锁的 而另一个用户过来操作时 将锁解开了 于是产生冲突 --解决方法后续 if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); } } IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isExist, IndexWriter.MaxFieldLength.UNLIMITED); Document document = new Document(); Field title = new Field("title", name, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(title); writer.AddDocument(document); //文档写入索引库 writer.Optimize(); writer.Close(); //会自动解锁 directory.Close(); //不要忘了Close,否则索引结果搜不到 }
public void DeleteDoc(string key, object id) { try { FSDirectory directory = FSDirectory.Open(new DirectoryInfo(_indexSavePath), new NativeFSLockFactory()); //表示将创建的索引文件保存在indexPath目录下 bool isUpdate = IndexReader.IndexExists(directory); //)判断目录directory是否是一个索引目录 if (isUpdate) { //如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁 if (IndexWriter.IsLocked(directory)) //) 判断目录是否锁定,在对目录写之前会先把目录锁定 { IndexWriter.Unlock(directory); //如果没有锁定则需要手动锁定因为。两个IndexWriter无法同时写一个索引文件 } } IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate, IndexWriter.MaxFieldLength.UNLIMITED); //IndexWriter把输入写入索引的时候,Lucene.net是把写入的文件用指定的分词算法将文章分词(这样检索的时候才能查的快),然后将词放入索引文件。 writer.DeleteDocuments(new Term(key, id.ToString())); writer.Optimize(); writer.Close(); directory.Close(); } catch (Exception ex) { LogHelper.Error(ex); } }
/// <summary> /// 搜索LUCENE数据 /// </summary> /// <param name="indexType"></param> /// <param name="query"></param> /// <param name="sort"></param> /// <param name="pagerInfo"></param> /// <param name="callback"></param> /// <returns></returns> public static List <Document> SearchLuceneData(string directoryPath, Query query, Sort sort, PagerInfo pagerInfo, Action <Document> callback) { List <Document> list = new List <Document>(); FSDirectory directory = FSDirectory.Open(new System.IO.DirectoryInfo(directoryPath), new NoLockFactory()); IndexReader indexReader = IndexReader.Open(directory, true); IndexSearcher indexSearcher = new IndexSearcher(indexReader); ScoreDoc[] docs; int totalCount; int startOffset; int endOffset; if (sort != null) { TopFieldDocs resultFieldDocs = indexSearcher.Search(query, null, indexSearcher.MaxDoc(), sort); totalCount = resultFieldDocs.totalHits; pagerInfo.RecordCount = totalCount; startOffset = (pagerInfo.PageIndex - 1) * pagerInfo.PageSize; endOffset = pagerInfo.PageIndex * pagerInfo.PageSize; if (endOffset >= totalCount) { endOffset = totalCount; } docs = resultFieldDocs.scoreDocs; } else { TopDocs resultFieldDocs = indexSearcher.Search(query, null, indexSearcher.MaxDoc()); totalCount = resultFieldDocs.totalHits; pagerInfo.RecordCount = totalCount; startOffset = (pagerInfo.PageIndex - 1) * pagerInfo.PageSize; endOffset = pagerInfo.PageIndex * pagerInfo.PageSize; if (endOffset >= totalCount) { endOffset = totalCount; } docs = resultFieldDocs.scoreDocs; } if (totalCount > 0) { for (int i = startOffset; i < endOffset; i++) { ScoreDoc hit = docs[i]; Document doc = indexSearcher.Doc(hit.doc); list.Add(doc); if (callback != null) { callback(doc); } } } indexSearcher.Close(); directory.Close(); return(list); }
//[Obsolete] //public static void CreateIndexByData<T>(List<T> list, Action<Document, T> AddField,string indexPath) //{ // FSDirectory directory = FSDirectory.Open(new DirectoryInfo(IndexConfig.IndexDirectory), new NativeFSLockFactory()); // bool isExist = IndexReader.IndexExists(directory); // if (isExist) // { // //如果索引目录被锁定(比如索引过程中程序异常退出或另一进程在操作索引库),则解锁 // //Q:存在问题 如果一个用户正在对索引库写操作 此时是上锁的 而另一个用户过来操作时 将锁解开了 于是产生冲突 --解决方法后续 // if (IndexWriter.IsLocked(directory)) // { // IndexWriter.Unlock(directory); // } // } // IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isExist, IndexWriter.MaxFieldLength.UNLIMITED); // foreach (var item in list) // { // Document doc = new Document(); // ////Field.Index.ANALYZED:指定文章内容按照分词后结果保存 否则无法实现后续的模糊查询 // ////WITH_POSITIONS_OFFSETS:指示不仅保存分割后的词 还保存词之间的距离 // //doc.Add(new Field("id",bid.ID.ToString(),Field.Store.YES,Field.Index.NOT_ANALYZED)); // //doc.Add(new Field("title",bid.Title,Field.Store.YES,Field.Index.ANALYZED,Field.TermVector.WITH_POSITIONS_OFFSETS)); // //doc.Add(new Field("content",bid.BidContent,Field.Store.YES,Field.Index.ANALYZED,Field.TermVector.WITH_POSITIONS_OFFSETS)); // AddField(doc, item); // writer.AddDocument(doc); // } // writer.Close(); // directory.Close(); //} public static void CreateIndex(List <Document> docList, string indexPath) { FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); bool isExist = IndexReader.IndexExists(directory); if (isExist) { //如果索引目录被锁定(比如索引过程中程序异常退出或另一进程在操作索引库),则解锁 //Q:存在问题 如果一个用户正在对索引库写操作 此时是上锁的 而另一个用户过来操作时 将锁解开了 于是产生冲突 --解决方法后续 if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); } } IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isExist, IndexWriter.MaxFieldLength.UNLIMITED); foreach (var doc in docList) { //Document doc = new Document(); ////Field.Index.ANALYZED:指定文章内容按照分词后结果保存 否则无法实现后续的模糊查询 ////WITH_POSITIONS_OFFSETS:指示不仅保存分割后的词 还保存词之间的距离 //doc.Add(new Field("id",bid.ID.ToString(),Field.Store.YES,Field.Index.NOT_ANALYZED)); //doc.Add(new Field("title",bid.Title,Field.Store.YES,Field.Index.ANALYZED,Field.TermVector.WITH_POSITIONS_OFFSETS)); //doc.Add(new Field("content",bid.BidContent,Field.Store.YES,Field.Index.ANALYZED,Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(doc); } writer.Close(); directory.Close(); }
/// <summary> /// 索引任务线程 /// </summary> private void IndexOn() { Logger.Debug("索引任务线程启动"); while (true) { if (jobs.Count <= 0) { Thread.Sleep(5 * 1000); continue; } //创建索引目录 if (!System.IO.Directory.Exists(IndexDic)) { System.IO.Directory.CreateDirectory(IndexDic); } FSDirectory directory = FSDirectory.Open(new DirectoryInfo(IndexDic), new NativeFSLockFactory()); bool isUpdate = IndexReader.IndexExists(directory); Logger.Debug("索引库存在状态" + isUpdate); if (isUpdate) { //如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁 if (IndexWriter.IsLocked(directory)) { Logger.Debug("开始解锁索引库"); IndexWriter.Unlock(directory); Logger.Debug("解锁索引库完成"); } } IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED); ProcessJobs(writer); writer.Close(); directory.Close();//不要忘了Close,否则索引结果搜不到 Logger.Debug("全部索引完毕"); } }
/// <summary> ///将一 条记录生成一条索引记录 /// </summary> /// <param name="CreateIndexDirectionPathInfo">索引记录存放的位置</param> /// <param name="field">要添加的字段</param> private static void CreateIndex(DirectoryInfo CreateIndexDirectionPathInfo, Field[] field) { string indexPath = CreateDirectory(CreateIndexDirectionPathInfo.FullName).FullName; FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); bool isExists = IndexReader.IndexExists(directory); if (isExists) { //如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁 if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); } } IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isExists, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED); Document document = new Document(); //只有对需要全文检索的字段才ANALYZED Field[] Fields = field; foreach (Field fie in Fields) { document.Add(fie); } writer.AddDocument(document); writer.Optimize(); writer.Close(); directory.Close(); }
public override void Close() { if (Enabled) { _tempStorageDir.Close(); } _realDirectory.Close(); }
public static void CreateContactIndex() { // 索引文档保存位置 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(ContactIndexPath), new NativeFSLockFactory()); //判断索引库是否存在 bool isUpdate = IndexReader.IndexExists(directory); if (isUpdate) { // 如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁 // Lucene.Net在写索引库之前会自动加锁,在close的时候会自动解锁 // 不能多线程执行,只能处理意外被永远锁定的情况 if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); //unlock:强制解锁 } } // 创建向索引库写操作对象 IndexWriter(索引目录,指定使用盘古分词进行切词,最大写入长度限制) // 补充:使用IndexWriter打开directory时会自动对索引库文件上锁 IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate, IndexWriter.MaxFieldLength.UNLIMITED); var users = UserInfoService.LoadEntities(u => u.IsDeleted == false).ToList(); if (users != null && users.Count > 0) { foreach (var userInfo in users) { //创建document // 一条Document相当于一条记录 与本项目 命名空间冲突了... Lucene.Net.Documents.Document document = new Lucene.Net.Documents.Document(); // 每个Document可以有自己的属性(字段),所有字段名都是自定义的,值都是string类型 // Field.Store.YES不仅要对文章进行分词记录,也要保存原文,就不用去数据库里查一次了 document.Add(new Field("id", userInfo.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); // 需要进行全文检索的字段加 Field.Index. ANALYZED // Field.Index.ANALYZED:指定文章内容按照分词后结果保存,否则无法实现后续的模糊查询 // WITH_POSITIONS_OFFSETS:指示不仅保存分割后的词,还保存词之间的距离 //不检索 只存储 document.Add(new Field("name", userInfo.Name, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("email", userInfo.Email, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("phone", string.IsNullOrEmpty(userInfo.Contact.PhoneNumber) ? "" : userInfo.Contact.PhoneNumber, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("position", string.IsNullOrEmpty(userInfo.Position.Name) ? "" : userInfo.Position.Name, Field.Store.YES, Field.Index.NOT_ANALYZED)); String contactInfo = userInfo.Name + "," + userInfo.Email + ","; contactInfo += string.IsNullOrEmpty(userInfo.Contact.PhoneNumber) ? "" : userInfo.Contact.PhoneNumber; contactInfo += string.IsNullOrEmpty(userInfo.Position.Name) ? "" : userInfo.Position.Name; //只检索 不存储 document.Add(new Field("contactInfo", contactInfo, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); // 防止重复索引,删除已存在的。 writer.DeleteDocuments(new Term("id", userInfo.Id.ToString())); // 把文档写入索引库 writer.AddDocument(document); } } writer.Close(); // Close后自动对索引库文件解锁 directory.Close(); // 不要忘了Close,否则索引结果搜不到 }
private void CreateIndexByData() { string indexPath = Context.Server.MapPath("~/IndexData");//索引文档保存位置 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); //IndexReader:对索引库进行读取的类 bool isExist = IndexReader.IndexExists(directory); //是否存在索引库文件夹以及索引库特征文件 if (isExist) { //如果索引目录被锁定(比如索引过程中程序异常退出或另一进程在操作索引库),则解锁 //Q:存在问题 如果一个用户正在对索引库写操作 此时是上锁的 而另一个用户过来操作时 将锁解开了 于是产生冲突 --解决方法后续 if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); } } if (System.IO.Directory.Exists(indexPath)) { DirectoryInfo di = new DirectoryInfo(indexPath); foreach (System.IO.FileInfo f in di.GetFiles()) { f.Delete(); } } //创建向索引库写操作对象 IndexWriter(索引目录,指定使用盘古分词进行切词,最大写入长度限制) //补充:使用IndexWriter打开directory时会自动对索引库文件上锁 IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isExist, IndexWriter.MaxFieldLength.UNLIMITED); List <SUC_NEWS> news = new SUC_NEWS().FindAll(); //--------------------------------遍历数据源 将数据转换成为文档对象 存入索引库 foreach (var m in news) { Document document = new Document(); //new一篇文档对象 --一条记录对应索引库中的一个文档 //向文档中添加字段 Add(字段,值,是否保存字段原始值,是否针对该列创建索引) document.Add(new Field("ID", m.ID.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//--所有字段的值都将以字符串类型保存 因为索引库只存储字符串类型数据 //Field.Store:表示是否保存字段原值。指定Field.Store.YES的字段在检索时才能用document.Get取出原值 //Field.Index.NOT_ANALYZED:指定不按照分词后的结果保存--是否按分词后结果保存取决于是否对该列内容进行模糊查询 document.Add(new Field("TITLE", m.TITLE, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); //Field.Index.ANALYZED:指定文章内容按照分词后结果保存 否则无法实现后续的模糊查询 //WITH_POSITIONS_OFFSETS:指示不仅保存分割后的词 还保存词之间的距离 document.Add(new Field("CONTENT", m.CONTENT, Field.Store.YES, Field.Index.NOT_ANALYZED));//Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("URL", m.pandaWebUrl, Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(document); //文档写入索引库 } writer.Close(); //会自动解锁 directory.Close(); //不要忘了Close,否则索引结果搜不到 Response.Write("<script language='javascript'>alert('生成完毕');</script>"); }
public void Dispose() { analyzer.Close(); if (currentIndexSearcherHolder != null) { currentIndexSearcherHolder.SetIndexSearcher(null); } writer.Close(); directory.Close(); }
// 执行消息队列出队操作 private static void ExecuteDequeue() { FSDirectory directory = null; IndexWriter writer = null; try { string indexPath = IndexPath;//注意和磁盘上文件夹的大小写一致,否则会报错。 directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); bool isExist = IndexReader.IndexExists(directory); if (isExist && IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); //开锁 } writer = new IndexWriter(directory, new PanGuAnalyzer(), !isExist, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED); #region 断取出数据,写入到索引库中 while (true) { bool hasData = false; foreach (string queueName in ExecuteQueueDic.Keys) { string data = RedisHelper.Dequeue(queueName); if (data != null) { hasData = true; var executeMethod = ExecuteQueueDic[queueName]; executeMethod(data, writer); } } if (!hasData) //如果所有队列中都没有数据,则暂停一会儿 { Thread.Sleep(200); return; } } #endregion } finally { if (writer != null) { writer.Close(); } if (writer != null) { directory.Close();//不要忘了Close,否则索引结果搜不到 } } }
public int GetDocumentCount(string idxDirPath) { FSDirectory fsIdxDirPath = null; IndexReader ir = null; try { fsIdxDirPath = FSDirectory.Open(FileSystems.getDefault().getPath(idxDirPath)); ir = DirectoryReader.Open(fsIdxDirPath); return(ir.MaxDoc()); } finally { ir.Close(); fsIdxDirPath.Close(); } }
public void CreateSearchIndex() { string indexPath = @"C:\lucenedir"; //注意和磁盘上文件夹的大小写一致,否则会报错。将创建的分词内容放在该目录下。 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); //指定索引文件(打开索引目录) FS指的是就是FileSystem bool isUpdate = IndexReader.IndexExists(directory); //IndexReader:对索引进行读取的类。该语句的作用:判断索引库文件夹是否存在以及索引特征文件是否存在。 if (isUpdate) { //同时只能有一段代码对索引库进行写操作。当使用IndexWriter打开directory时会自动对索引库文件上锁。 //如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁(提示一下:如果我现在正在写着已经加锁了,但是还没有写完,这时候又来一个请求,那么不就解锁了吗?这个问题后面会解决) if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); } } IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);//向索引库中写索引。这时在这里加锁。 //判断队列中是否有数据(注意:为什么写循环) while (Common.RedisHelper.GetEqueueCount("addLucene") > 0) { string str = Common.RedisHelper.Dequeue("addLucene"); SearchContent model = Common.SerializeHelper.DeserializeToObject <SearchContent>(str); //注意:这里只根据ID删除是有问题的。有可能删除删除别的信息。 writer.DeleteDocuments(new Term("Id", model.Id.ToString()));//删除 if (model.LuceneActionType == LuceneActionType.Delete) { continue; } Document document = new Document(); //表示一篇文档。 //Field.Store.YES:表示是否存储原值。只有当Field.Store.YES在后面才能用doc.Get("number")取出值来.Field.Index. NOT_ANALYZED:不进行分词保存 document.Add(new Field("Id", model.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); document.Add(new Field("AddDate", model.AddDate.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); document.Add(new Field("Flag", model.Flag.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Index. ANALYZED:进行分词保存:也就是要进行全文的字段要设置分词 保存(因为要进行模糊查询) //Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS:不仅保存分词还保存分词的距离。 document.Add(new Field("Title", model.Title, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("Content", model.Content, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(document); } writer.Close(); //会自动解锁。 directory.Close(); //不要忘了Close,否则索引结果搜不到 }
public void CreateIndex(List <Products_SellOffer> prolist) { MultiSite_GangGuanEntities Bll = new Models.MultiSite_GangGuanEntities(); string indexpath = HttpContext.Server.MapPath("/Indexdata"); FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexpath), new NoLockFactory()); bool isExist = IndexReader.IndexExists(directory); //是否存在索引库文件夹以及索引库特征文件 if (isExist) { //如果索引目录被锁定(比如索引过程中程序异常退出或另一进程在操作索引库),则解锁 //Q:存在问题 如果一个用户正在对索引库写操作 此时是上锁的 而另一个用户过来操作时 将锁解开了 于是产生冲突 --解决方法后续 if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); } } IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isExist, IndexWriter.MaxFieldLength.UNLIMITED); foreach (var pitem in prolist) { Document document = new Document(); Field id = new Field("id", pitem.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED); Field title = new Field("title", pitem.Title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); title.SetBoost(1.0f); Field keywords = new Field("keywords", pitem.Keywords, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); keywords.SetBoost(0.7f); Field detail = new Field("detail", Bll.Products_SellOffer_Detail.Where(d => d.SellOfferId == pitem.Id).FirstOrDefault().Detail, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); Field sysattr = new Field("sysattr", pitem.SysAttr, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); sysattr.SetBoost(0.4f); Field cusattr = new Field("cusattr", pitem.CusAttr, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); cusattr.SetBoost(0.1f); document.Add(id); document.Add(title); document.Add(keywords); document.Add(detail); document.Add(sysattr); document.Add(cusattr); writer.AddDocument(document); //文档写入索引库 } writer.Optimize(); writer.Close(); //会自动解锁 directory.Close(); //不要忘了Close,否则索引结果搜不到 }
public void Dispose() { if (directory != null) { directory.Close(); } if (reader != null) { reader.Close(); } if (searcher != null) { searcher.Close(); } }
/// <summary> /// 创建索引 /// </summary> private void CreateIndexByData() { string indexPath = Context.Server.MapPath("~/IndexData");//索引文档保存位置 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); //IndexReader:对索引库进行读取的类 bool isExist = IndexReader.IndexExists(directory); //是否存在索引库文件夹以及索引库特征文件 if (isExist) { //如果索引目录被锁定(比如索引过程中程序异常退出或另一进程在操作索引库),则解锁 //Q:存在问题 如果一个用户正在对索引库写操作 此时是上锁的 而另一个用户过来操作时 将锁解开了 于是产生冲突 --解决方法后续 if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); } } //创建向索引库写操作对象 IndexWriter(索引目录,指定使用盘古分词进行切词,最大写入长度限制) //补充:使用IndexWriter打开directory时会自动对索引库文件上锁 IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isExist, IndexWriter.MaxFieldLength.UNLIMITED); BooksManager bookManager = new BooksManager(); List <PZYM.Shop.Model.Books> bookList = bookManager.GetModelList(""); //--------------------------------遍历数据源 将数据转换成为文档对象 存入索引库 foreach (var book in bookList) { Document document = new Document(); //new一篇文档对象 --一条记录对应索引库中的一个文档 //向文档中添加字段 Add(字段,值,是否保存字段原始值,是否针对该列创建索引) document.Add(new Field("Id", book.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//--所有字段的值都将以字符串类型保存 因为索引库只存储字符串类型数据 //Field.Store:表示是否保存字段原值。指定Field.Store.YES的字段在检索时才能用document.Get取出原值 //Field.Index.NOT_ANALYZED:指定不按照分词后的结果保存--是否按分词后结果保存取决于是否对该列内容进行模糊查询 document.Add(new Field("Description", book.Description, Field.Store.YES, Field.Index.ANALYZED)); //Field.Index.ANALYZED:指定文章内容按照分词后结果保存 否则无法实现后续的模糊查询 //WITH_POSITIONS_OFFSETS:指示不仅保存分割后的词 还保存词之间的距离 document.Add(new Field("MenuId", book.MenuId.ToString(), Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field("CreativeHTML", string.IsNullOrWhiteSpace(book.CreativeHTML) ? "" : book.CreativeHTML, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(document); //文档写入索引库 } writer.Close(); //会自动解锁 directory.Close(); //不要忘了Close,否则索引结果搜不到 }
/// <summary> /// 创建索引 /// </summary> protected void btnCreateIndex_Click(object sender, EventArgs e) { string indexPath = Context.Server.MapPath("~/Index"); // 索引文档保存位置 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); bool isUpdate = IndexReader.IndexExists(directory); //判断索引库是否存在 if (isUpdate) { // 如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁 // Lucene.Net在写索引库之前会自动加锁,在close的时候会自动解锁 // 不能多线程执行,只能处理意外被永远锁定的情况 if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); //unlock:强制解锁,待优化 } } // 创建向索引库写操作对象 IndexWriter(索引目录,指定使用盘古分词进行切词,最大写入长度限制) // 补充:使用IndexWriter打开directory时会自动对索引库文件上锁 IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate, IndexWriter.MaxFieldLength.UNLIMITED); for (int i = 1000; i < 1100; i++) { string txt = File.ReadAllText(Context.Server.MapPath("~/Upload/Articles/") + i + ".txt"); // 一条Document相当于一条记录 Document document = new Document(); // 每个Document可以有自己的属性(字段),所有字段名都是自定义的,值都是string类型 // Field.Store.YES不仅要对文章进行分词记录,也要保存原文,就不用去数据库里查一次了 document.Add(new Field("id", i.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); // 需要进行全文检索的字段加 Field.Index. ANALYZED // Field.Index.ANALYZED:指定文章内容按照分词后结果保存,否则无法实现后续的模糊查询 // WITH_POSITIONS_OFFSETS:指示不仅保存分割后的词,还保存词之间的距离 document.Add(new Field("msg", txt, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); // 防止重复索引,如果不存在则删除0条 writer.DeleteDocuments(new Term("id", i.ToString()));// 防止已存在的数据 => delete from t where id=i // 把文档写入索引库 writer.AddDocument(document); Console.WriteLine("索引{0}创建完毕", i.ToString()); } writer.Close(); // Close后自动对索引库文件解锁 directory.Close(); // 不要忘了Close,否则索引结果搜不到 lblIndexStatus.Text = "索引文件创建成功!"; lblIndexStatus.Visible = true; btnCreateIndex.Enabled = false; }
public string CreateIndex() { //索引保存位置 var indexPath = Directory.GetCurrentDirectory() + "/Index"; if (!Directory.Exists(indexPath)) { Directory.CreateDirectory(indexPath); } FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); if (IndexWriter.IsLocked(directory)) { // 如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁 // Lucene.Net在写索引库之前会自动加锁,在close的时候会自动解锁 IndexWriter.Unlock(directory); } //Lucene的index模块主要负责索引的创建 // 创建向索引库写操作对象 IndexWriter(索引目录,指定使用盘古分词进行切词,最大写入长度限制) // 补充:使用IndexWriter打开directory时会自动对索引库文件上锁 //IndexWriter构造函数中第一个参数指定索引文件存储位置; //第二个参数指定分词Analyzer,Analyzer有多个子类, //然而其分词效果并不好,这里使用的是第三方开源分词工具盘古分词; //第三个参数表示是否重新创建索引,true表示重新创建(删除之前的索引文件), //最后一个参数指定Field的最大数目。 IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); var txtPath = Directory.GetCurrentDirectory() + "/Upload/Articles"; for (int i = 1; i <= 1000; i++) { // 一条Document相当于一条记录 Document document = new Document(); var title = "天骄战纪_" + i + ".txt"; var content = System.IO.File.ReadAllText(txtPath + "/" + title, Encoding.Default); // 每个Document可以有自己的属性(字段),所有字段名都是自定义的,值都是string类型 // Field.Store.YES不仅要对文章进行分词记录,也要保存原文,就不用去数据库里查一次了 document.Add(new Field("Title", "天骄战纪_" + i, Field.Store.YES, Field.Index.NOT_ANALYZED)); // 需要进行全文检索的字段加 Field.Index. ANALYZED // Field.Index.ANALYZED:指定文章内容按照分词后结果保存,否则无法实现后续的模糊查询 // WITH_POSITIONS_OFFSETS:指示不仅保存分割后的词,还保存词之间的距离 document.Add(new Field("Content", content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(document); } writer.Close(); // Close后自动对索引库文件解锁 directory.Close(); // 不要忘了Close,否则索引结果搜不到 return("索引创建完毕"); }
/// <summary> /// 更新索引库操作 /// </summary> private void CRUDIndex() { FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); bool isExist = IndexReader.IndexExists(directory); if (isExist) { if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); } } IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isExist, IndexWriter.MaxFieldLength.UNLIMITED); while (bookQueue.Count > 0) { Document document = new Document(); BookViewMode book = bookQueue.Dequeue(); if (book.IT == IndexType.Insert) { document.Add(new Field("id", book.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); document.Add(new Field("MenuId", book.MenuId.ToString(), Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field("Description", book.Description, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("CreativeHTML", book.CreativeHTML, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(document); } else if (book.IT == IndexType.Delete) { writer.DeleteDocuments(new Term("id", book.Id.ToString())); } else if (book.IT == IndexType.Modify) { //先删除 再新增 writer.DeleteDocuments(new Term("id", book.Id.ToString())); document.Add(new Field("id", book.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); document.Add(new Field("MenuId", book.MenuId.ToString(), Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field("Description", book.Description, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("CreativeHTML", book.CreativeHTML, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(document); } } writer.Close(); directory.Close(); }
protected void searchButton_Click(object sender, EventArgs e) { //索引库的位置 string indexPath = "C:/index"; FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); bool isUpdate = IndexReader.IndexExists(directory); if (isUpdate) { if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); } } IndexWriter write = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate, IndexWriter.MaxFieldLength.UNLIMITED); WebClient wc = new WebClient(); wc.Encoding = Encoding.UTF8; int maxID = GetMaxID(); for (int i = 1; i <= maxID; i++) { string url = "http://localhost:8080/showtopic-" + i + ".aspx"; string html = wc.DownloadString(url); HTMLDocumentClass doc = new HTMLDocumentClass(); doc.designMode = "on"; doc.IHTMLDocument2_write(html); doc.close(); string title = doc.title; string body = doc.body.innerText; write.DeleteDocuments(new Term("number", i.ToString())); Document document = new Document(); document.Add(new Field("number", i.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); document.Add(new Field("title", title, Field.Store.YES, Field.Index.NOT_ANALYZED)); document.Add(new Field("body", body, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); write.AddDocument(document); logger.Debug("索引" + i.ToString() + "完毕"); } write.Close(); directory.Close(); logger.Debug("全部索引完毕"); }
private void btnfc_Click(object sender, EventArgs e) { DataTable dt = new Business.DictProcess().GetItemAll().Tables[0]; //第一个版本应该保存body和title,搜索结果形成超链接,不显示正文。 string indexPath = "c:/index"; FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); bool isUpdate = IndexReader.IndexExists(directory); if (isUpdate) { //如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁 if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); } } IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED); for (int i = 0; i < dt.Rows.Count; i++) { //为避免重复索引,所以先删除number=i的记录,再重新添加 writer.DeleteDocuments(new Term("code", dt.Rows[i]["code"].ToString())); Document document = new Document(); //只有对需要全文检索的字段才ANALYZED document.Add(new Field("code", dt.Rows[i]["code"].ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); document.Add(new Field("name", dt.Rows[i]["name"].ToString(), Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(document); } //Document document = new Document(); ////只有对需要全文检索的字段才ANALYZED //document.Add(new Field("code", "codes", Field.Store.YES, Field.Index.NOT_ANALYZED)); //document.Add(new Field("name", "我的阿莫西林", Field.Store.YES, Field.Index.ANALYZED)); //writer.AddDocument(document); writer.Close(); directory.Close();//不要忘了Close,否则索引结果搜不到 }
private void CreateIndexContent() { try { FSDirectory directory = FSDirectory.Open(new DirectoryInfo(IndexPath), new NativeFSLockFactory()); //指定索引文件(打开索引目录) FS指的是就是FileSystem bool isUpdate = IndexReader.IndexExists(directory); //IndexReader:对索引进行读取的类。该语句的作用:判断索引库文件夹是否存在以及索引特征文件是否存在。 if (isUpdate) { //同时只能有一段代码对索引库进行写操作。当使用IndexWriter打开directory时会自动对索引库文件上锁。 //如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁 if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); } } using (IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED))//向索引库中写索引。这时在这里加锁。 { while (queue.Count > 0) { IndexContent indexContent = queue.Dequeue() as IndexContent; //将队列中的数据出队 writer.DeleteDocuments(new Term("Id", indexContent.Id)); if (indexContent.LuceneEnum == LuceneEnum.DeleType) { continue; } Document document = new Document(); //表示一篇文档。 //Field.Store.YES:表示是否存储原值。只有当Field.Store.YES在后面才能用doc.Get("Id")取出值来.Field.Index. NOT_ANALYZED:不进行分词保存 document.Add(new Field("Id", indexContent.Id, Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Index. ANALYZED:进行分词保存:也就是要进行全文的字段要设置分词 保存(因为要进行模糊查询) //Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS:不仅保存分词还保存分词的距离。 //Field.Store.YES不仅要对文章进行分词记录,也要保存原文,就不用去数据库里查一次了 //需要进行全文检索的字段加 Field.Index. ANALYZED document.Add(new Field("Title", indexContent.Title, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("Content", indexContent.Content, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("CreateTime", indexContent.CreateTime.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); //防止存在的数据//delete from t where id=i writer.DeleteDocuments(new Term("Id", indexContent.Id)); writer.AddDocument(document); } } directory.Close();//不要忘了Close,否则索引结果搜不到 } catch (Exception ex) { throw ex; } }
private void WriteSearchContent() { string indexPath = @"E:\lucenedir"; //注意和磁盘上文件夹的大小写一致,否则会报错。将创建的分词内容放在该目录下。 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); //指定索引文件(打开索引目录) FS指的是就是FileSystem bool isUpdate = IndexReader.IndexExists(directory); //IndexReader:对索引进行读取的类。该语句的作用:判断索引库文件夹是否存在以及索引特征文件是否存在。 if (isUpdate) { //同时只能有一段代码对索引库进行写操作。当使用IndexWriter打开directory时会自动对索引库文件上锁。 //如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁(提示一下:如果我现在正在写着已经加锁了,但是还没有写完,这时候又来一个请求,那么不就解锁了吗?这个问题后面会解决) if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); } } IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);//向索引库中写索引。这时在这里加锁。 while (queue.Count > 0) { IndexContent indexContent = queue.Dequeue();//出队 writer.DeleteDocuments(new Term("id", indexContent.id.ToString())); if (indexContent.LuceneEnumType == LuceneEnumType.Delete) { continue; } Document document = new Document(); //表示一篇文档。 //string id,string img,string title,string jiage,string chuxiaojia //Field.Store.YES:表示是否存储原值。只有当Field.Store.YES在后面才能用doc.Get("number")取出值来.Field.Index. NOT_ANALYZED:不进行分词保存 document.Add(new Field("id", indexContent.id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Index. ANALYZED:进行分词保存:也就是要进行全文的字段要设置分词 保存(因为要进行模糊查询) //Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS:不仅保存分词还保存分词的距离。 document.Add(new Field("img", indexContent.img, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("title", indexContent.title, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("jiage", indexContent.jiage, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("chuxiaojia", indexContent.chuxiaojia, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(document); } writer.Close(); //会自动解锁。 directory.Close(); //不要忘了Close,否则索引结果搜不到 }
/// <summary> /// Lucene 倒排索引处理 /// 开始→数据→文本→分词器→Field对象→Document对象→IndexWriter→Directory→结束 /// </summary> private void IndexHandler() { //将创建的分词内容放在该目录下. string indexPath = System.Configuration.ConfigurationManager.AppSettings["IndexPath"]; FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); bool isUpdate = IndexReader.IndexExists(directory); if (isUpdate) { if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); } } IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate, IndexWriter.MaxFieldLength.UNLIMITED); //循环遍历队列 while (queue.Count > 0) { IndexTask task = queue.Dequeue(); writer.DeleteDocuments(new Term("id", task.Id.ToString())); if (task.Type == TaskType.Delete) { continue; } Document document = new Document(); document.Add(new Field("id", task.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); document.Add(new Field("title", task.Title, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("content", task.Content, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(document); } writer.Close(); directory.Close(); }