private void btnUpdateIndex_Click(object sender, EventArgs e) { string id = txtFileId.Text.Trim(); if (string.IsNullOrEmpty(id) == true) { SetOutput("请输入文件id(整数)"); return; } string filename = "think in lucene......"; if (string.IsNullOrEmpty(id)) { return; } bool enableCreate = IsEnableCreated();//是否已经创建索引文件 Term term = new Term("id", id); Document doc = new Document(); doc = new Document();//创建文档,给文档添加字段,并把文档添加到索引书写器里 doc.Add(new Field("id", id, Field.Store.YES, Field.Index.TOKENIZED));//存储且索引 doc.Add(new Field("filename", filename, Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("contents", filename, Field.Store.YES, Field.Index.TOKENIZED)); LuceneIO.Directory directory = LuceneIO.FSDirectory.GetDirectory(INDEX_STORE_PATH, enableCreate); IndexWriter writer = new IndexWriter(directory, new StandardAnalyzer(),IndexWriter.MaxFieldLength.LIMITED); writer.UpdateDocument(term, doc); writer.Optimize(); //writer.Commit(); writer.Close(); directory.Close(); SetOutput(string.Format("更新索引.Id:{0},已经优化成功", id)); }
public static void PrepareIndex(bool isPangu) { Analyzer analyzer = null; if (isPangu) { analyzer = new PanGuAnalyzer();//盘古Analyzer } else { analyzer = new StandardAnalyzer(Version.LUCENE_29); } //测试文章测试 DirectoryInfo dirInfo = Directory.CreateDirectory(Config.INDEX_STORE_PATH); LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); CreateIndex(writer, "jeffreyzhao", "博客园有一个老赵,人格魅力巨大,洋名就叫jeffreyzhao。据我所知,他还是一个胖子,一个钢琴业余爱好者。"); CreateIndex(writer, "lucene测试", "这是一个测试,关于lucene.net的 关注老赵"); CreateIndex(writer, "博客园里有牛人", "Hello World. 我认识的一个高手,他拥有广博的知识,有极客的态度,还经常到园子里来看看"); CreateIndex(writer, "奥巴马", "美国现任总统是奥巴马?确定不是奥巴牛和奥巴羊 不知道问老赵"); CreateIndex(writer, "奥林匹克", "奥林匹克运动会将来到南美美丽热情的国度巴西,也就是亚马逊河流域的一个地方"); CreateIndex(writer, "写给自己", "博客园的jeffwong,新的开始,继续努力了"); writer.Optimize(); writer.Close(); }
private static IndexSearcher searcher; //索引搜索器 #endregion #region methods public static IndexSearcher GenerateSearcher() { DirectoryInfo dirInfo = Directory.CreateDirectory(Config.INDEX_STORE_PATH); LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo); searcher = new IndexSearcher(directory, true); return(searcher); }
///// <summary> ///// 删除索引所保存的文件夹 ///// </summary> ///// <param name="sender"></param> ///// <param name="e"></param> //private void btnDeleteAllIndex_Click(object sender, EventArgs e) //{ // DeleteFiles(INDEX_STORE_PATH); // SetOutput(string.Format("{0}文件夹保存的索引已经全部删除", INDEX_STORE_PATH)); //} private void btnDeleteIndex_Click(object sender, EventArgs e) { string id = txtFileId.Text.Trim(); if (string.IsNullOrEmpty(id) ==true) { SetOutput("请输入文件id(整数)"); return; } LuceneIO.Directory directory = LuceneIO.FSDirectory.GetDirectory(INDEX_STORE_PATH, false); IndexModifier modifier = new IndexModifier(directory, new StandardAnalyzer(), false); Term term = new Term("id", id); modifier.DeleteDocuments(term);//删除 modifier.Close(); directory.Close(); SetOutput(string.Format("删除文件索引成功,ID为{0}!", id)); }
//创建索引 private void button3_Click(object sender, EventArgs e) { bool isPangu = true; wznr_Servise wznr = new wznr_Servise(); Analyzer analyzer = new PanGuAnalyzer();//盘古Analyzer DirectoryInfo dirInfo = Directory.CreateDirectory(Config.INDEX_STORE_PATH); LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); DataTable dt = wznr.GetDataTable("SELECT Title, p FROM test4Table"); for (int i = 0; i < dt.Rows.Count; i++) { string title = Convert.ToString(dt.Rows[i][0]); string content = Convert.ToString(dt.Rows[i][1]); CreateIndex(writer, title, content); } writer.Optimize(); writer.Close(); this.richTextBox1.Text = string.Format("{0}索引创建成功", isPangu ? "盘古分词" : string.Empty); }
//创建索引导入数据2012-5-29添加--修改类(弃用) public void CreateIndexImport(GetTotalDelegate getTotalRecordsDelegate, System.Diagnostics.Stopwatch sw, System.Windows.Forms.RichTextBox rich) { int totalRecords = 0; FenCiHelper fch = new FenCiHelper(); wznr_Servise wznr = new wznr_Servise(); DataTable dt = wznr.GetOldbDataTable("select 标题,内容 from Content"); //wznr.GetDataTable1("SELECT Title, Content FROM News"); dt.Columns.AddRange(new DataColumn[] { new DataColumn("keys", typeof(string)), new DataColumn("bztype", typeof(string)) }); //添加新列 DataRow dr = dt.NewRow(); //实例化新行 DataTable newdt = wznr.GetTableSchema(); DataRow newdr; string line; string p; //每个p标签 for (int i = 0; i < dt.Rows.Count; i++) //添加新列分词录入 { totalRecords++; getTotalRecordsDelegate(totalRecords); string title = Convert.ToString(dt.Rows[i][0]); //获取到标题 string content = Convert.ToString(dt.Rows[i][1]); //获取到内容 //string bztype = Convert.ToString(dt.Rows[i][2]);//类别 //调用PanguFenCi(aa)进行分词添加到datatable中批量录入到关键词表中。 title = fch.PanguFenCi(title); //p标签获取获取每个p---2012-5-18日修改 //p = GetPhtml(content); //获取路径,循环每次都需要读取文本文件里设置的关键词 string path = Directory.GetCurrentDirectory(); string txtpath = path + @"\App_Data\sDict.txt"; //读取文本内容逐行 StreamReader file = new StreamReader(txtpath); //分类操作,读取文本与标题分词判断 string[] arr = title.Split('/'); while ((line = file.ReadLine()) != null) { string[] arrtxt = line.Split(','); for (int j = 0; j < arr.Length; j++) { if (arrtxt[0].Equals(arr[j])) { dt.Rows[i][3] += arrtxt[1] + ",";//类别 break; //strtxt = arrtxt[1];//所属类别 } } } //调用PanGuContentFenCi(content)进行分词同时输出同义词 dt.Rows[i][1] = fch.PanGuContentFenCi(content); dt.Rows[i][2] = title; //dt.Rows[i][3] = bztype; Analyzer analyzer = new PanGuAnalyzer();//盘古Analyzer DirectoryInfo dirInfo = Directory.CreateDirectory(Config.INDEX_STORE_PATH); LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); foreach (Match m in Regex.Matches(content, @"<(\w+)>[^P]*[^<]*</(\w+)>")) { newdr = newdt.NewRow(); newdr["Title"] = dt.Rows[i][0]; newdr["p"] = m.Value; newdr["p1"] = content; newdr["keys"] = title; newdr["bztype"] = dt.Rows[i][3]; newdt.Rows.Add(newdr); // CreateIndex(writer, dt.Rows[i][0].ToString(), m.Value);创建索引 } writer.Optimize(); writer.Close(); rich.Text += dt.Rows[i][2].ToString() + "-----索引创建成功\n"; rich.ForeColor = System.Drawing.Color.Green;//ConsoleColor.Green; } string sql = "insert into test3Table (Title,Content,keys,bztype)" + " SELECT nc.Title,nc.Content,nc.keys,nc.bztype" + " FROM @NewBulkTestTvp AS nc"; string sqlp = "insert into test4Table (Title,p,p1,keys,bztype)" + " SELECT nc.Title,nc.p,nc.p1,nc.keys,nc.bztype" + " FROM @NewBulkTestTvp AS nc"; sw.Start(); wznr.TableValuedToDB(dt, sql, "dbo.test3Udt"); wznr.TableValuedToDB(newdt, sqlp, "dbo.test4Udt"); sw.Stop(); getTotalRecordsDelegate(totalRecords); }