private void btnAppendIndex_Click(object sender, EventArgs e) { DialogResult result = this.fbdSelectFile.ShowDialog(); if (result != DialogResult.OK) { return; } IndexModifier modifier = null; try { SetOutput("======================.txt文件索引追加开始==============================="); modifier = new IndexModifier(INDEX_STORE_PATH, new StandardAnalyzer(), IsEnableCreated()); #region 同步索引 Stopwatch watch = new Stopwatch(); watch.Start(); IndexDirectory(modifier, new FileInfo(this.fbdSelectFile.SelectedPath)); modifier.Optimize();//优化索引 modifier.Close();//关闭索引读写器 watch.Stop(); StringBuffer sb = "索引追加完成,共用时:" + watch.Elapsed.Hours + "时 " + watch.Elapsed.Minutes + "分 " + watch.Elapsed.Seconds + "秒 " + watch.Elapsed.Milliseconds + "毫秒"; SetOutput(sb); #endregion } catch (Exception ex) { SetOutput(ex.Message); } }
public CreateIndexQuery() { Name = new Identifier(); OnTable = new TableRef(""); Type = IndexType.DEFAULT; Modifier = IndexModifier.DEFAULT; OnColumns = new List <ColumnRef>(); MsSqlFullTextKeyIndex = null; }
/// <summary> /// 索引优化(可以按照一定的策略进行优化 ,比如每天深夜自动进行索引优化) /// </summary> /// <param name="result"></param> private void IndexCallback(IAsyncResult result) { IndexModifier modifier = result.AsyncState as IndexModifier; modifier.Optimize();//优化索引 modifier.Close();//关闭索引读写器 TimeSpan span = DateTime.Now - dtStart; StringBuffer sb = "索引完成,共用时:" + span.Hours + "时 " + span.Minutes + "分 " + span.Seconds + "秒 " + span.Milliseconds + "毫秒"; SetOutput(sb); }
private void IndexDB(IndexModifier modifier,IList<Person> listModels) { SetOutput(string.Format("正在建立数据库索引,共{0}人",listModels.Count)); foreach (Person item in listModels) { Document doc = new Document();//创建文档,给文档添加字段,并把文档添加到索引书写器里 doc.Add(new Field("id", item.Id.ToString(), Field.Store.YES, Field.Index.TOKENIZED));//存储且索引 doc.Add(new Field("fullname", string.Format("{0} {1}",item.FirstName,item.LastName), Field.Store.YES, Field.Index.TOKENIZED));//存储且索引 modifier.AddDocument(doc); } }
public CreateIndexQuery(string name, TableRef table, IndexType type = IndexType.DEFAULT, IndexModifier modifier = IndexModifier.DEFAULT, string msSqlFullTextKeyIndex = null, params ColumnRef[] columns) { Name = new Identifier(name); OnTable = table; Type = type; Modifier = modifier; OnColumns = new List <ColumnRef>(columns.Length); OnColumns.AddRange(columns.Select(x => x)); if (msSqlFullTextKeyIndex != null) { MsSqlFullTextKeyIndex = new Identifier(msSqlFullTextKeyIndex); } }
/// <summary> /// 增加索引 ///</summary> ///<param name="model">索引实体</param> ///<returns></returns> ///<remarks>2013-3-8 杨浩 创建</remarks> public void AddIndex(Hyt.Model.PdProductIndex model) { try { _modifier = new IndexModifier(IndexStorePath, new PanGuAnalyzer(), false); _modifier.AddDocument(ModelToDocument(model)); } catch { //TODO:此处实现日志异常记录 } finally { _modifier.Flush(); _modifier.Close(); } }
/// <summary> /// 删除索引 ///</summary> ///<param name="productSysNo">产品系统编号</param> ///<returns></returns> ///<remarks>2013-3-8 杨浩 创建</remarks> public void DeleteIndex(int productSysNo) { try { _modifier = new IndexModifier(IndexStorePath, new PanGuAnalyzer(), false); Term term = new Term("SysNo", productSysNo.ToString()); _modifier.DeleteDocuments(term);//删除 } catch { //TODO:此处实现日志异常记录 } finally { _modifier.Close(); } }
///// <summary> ///// 删除索引所保存的文件夹 ///// </summary> ///// <param name="sender"></param> ///// <param name="e"></param> //private void btnDeleteAllIndex_Click(object sender, EventArgs e) //{ // DeleteFiles(INDEX_STORE_PATH); // SetOutput(string.Format("{0}文件夹保存的索引已经全部删除", INDEX_STORE_PATH)); //} private void btnDeleteIndex_Click(object sender, EventArgs e) { string id = txtFileId.Text.Trim(); if (string.IsNullOrEmpty(id) ==true) { SetOutput("请输入文件id(整数)"); return; } LuceneIO.Directory directory = LuceneIO.FSDirectory.GetDirectory(INDEX_STORE_PATH, false); IndexModifier modifier = new IndexModifier(directory, new StandardAnalyzer(), false); Term term = new Term("id", id); modifier.DeleteDocuments(term);//删除 modifier.Close(); directory.Close(); SetOutput(string.Format("删除文件索引成功,ID为{0}!", id)); }
private void IndexDirectory(IndexModifier modifier, FileInfo file) { if (Directory.Exists(file.FullName)) { String[] files = Directory.GetFileSystemEntries(file.FullName); // an IO error could occur if (files != null) { for (int i = 0; i < files.Length; i++) { IndexDirectory(modifier, new FileInfo(files[i])); //递归 } } } else if (string.Compare(file.Extension.ToLower(), ".txt") == 0) { IndexFile(file, modifier); } }
/// <summary> /// </summary> protected override void DoIndexNode(Node node, bool reindexAfterRemovingOld) { lock (this) { foreach (string langKey in luceneDirectoryIndexedByLanguageCodeThenView.Keys) { foreach (string viewKey in luceneDirectoryIndexedByLanguageCodeThenView[langKey].Keys) { Directory d = luceneDirectoryIndexedByLanguageCodeThenView[langKey][viewKey]; IndexModifier modifier = new IndexModifier(d, LuceneLibrarySearchCriteriaAdaptor.ANALYZER, false); try { foreach (int i in GetDocumentIdsForElementId(node.Id)) { Document doc = GetDocument(i); if (MakePath(node) == doc.GetField(LuceneNodeIndexer.PATH_FIELD).StringValue() && node.Order.ToString() == doc.GetField(LuceneNodeIndexer.ORDER_FIELD).StringValue()) { Debug.WriteLine( string.Format("DoIndexNode: Removing document {0} for element id {1}", i, node.Id)); modifier.DeleteDocument(i); } } } finally { Debug.WriteLine("DoIndexNode: Closing index reader"); modifier.Close(); } if (!reindexAfterRemovingOld) { return; } indexWriter = new IndexWriter(d, LuceneLibrarySearchCriteriaAdaptor.ANALYZER, false); LuceneNodeIndexer indexer = new LuceneNodeIndexer(indexWriter, langKey); indexer.AddToIndex(node, node.PreferredLabel); } } } }
/// <summary> /// 创建索引至内存 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void btnMemoryCreateIndex_Click(object sender, EventArgs e) { DialogResult result = this.fbdSelectFile.ShowDialog(); if (result != DialogResult.OK) { return; } id = 1; IndexModifier modifier = null; try { SetOutput("======================.txt文件内存索引创建开始==============================="); ramDir = new LuceneIO.RAMDirectory(); modifier = new IndexModifier(ramDir, new StandardAnalyzer(), true); #region 同步创建索引 Stopwatch watch = new Stopwatch(); watch.Start(); IndexDirectory(modifier, new FileInfo(this.fbdSelectFile.SelectedPath)); modifier.Optimize();//优化索引 modifier.Close();//关闭索引读写器 watch.Stop(); StringBuffer sb = "索引完成,共用时:" + watch.Elapsed.Hours + "时 " + watch.Elapsed.Minutes + "分 " + watch.Elapsed.Seconds + "秒 " + watch.Elapsed.Milliseconds + "毫秒"; SetOutput(sb); #endregion //#region 异步创建索引 //dtStart = DateTime.Now; //AsyncIndexDirectoryCaller caller = new AsyncIndexDirectoryCaller(IndexDirectory); //IAsyncResult ar = caller.BeginInvoke(modifier, new FileInfo(this.fbdSelectFile.SelectedPath), new AsyncCallback(IndexCallback), modifier); //#endregion } catch (Exception ex) { SetOutput(ex.Message); } }
/// <summary> /// Loops thro a list of stories and adds them to the index. If the crawl is an incremental /// update then first the story is removed then added again. /// </summary> /// <param name="modifier">IndexModifer used to update the index</param> /// <param name="isIncrementalCrawl">bool indicating if the stories should /// be removed from the existing index before being added again.</param> /// <param name="stories">StoryCollection containing the stories to add/update /// in the index</param> private void AddStoriesToIndex(IndexModifier modifier, bool isIncrementalCrawl, StoryCollection stories) { if (isIncrementalCrawl) { //remove the stories from the index that have been updated Log.DebugFormat("Updating index, removing {0} stories", stories.Count); foreach (Story s in stories) { Term existingItem = new Term("id", s.StoryID.ToString()); int j = modifier.DeleteDocuments(existingItem); } } //add the new documents Log.DebugFormat("Adding batch of {0} stories to the index", stories.Count); foreach (Story story in stories) { //spam stories shouldnt be added to the index if (story.IsSpam) { continue; } Document doc = new Document(); doc.Add(new Field("url", story.Url, Field.Store.NO, Field.Index.TOKENIZED)); doc.Add(new Field("title", story.Title, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES)); doc.Add(new Field("description", story.Description, Field.Store.NO, Field.Index.TOKENIZED)); doc.Add(new Field("users", GetUserWhoKickedSearchString(story), Field.Store.NO, Field.Index.TOKENIZED)); doc.Add(new Field("category", story.Category.Name, Field.Store.NO, Field.Index.TOKENIZED)); doc.Add(new Field("tags", GetStoryTags(story), Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES)); doc.Add(new Field("id", story.StoryID.ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("kickCount", story.KickCount.ToString(), Field.Store.NO, Field.Index.UN_TOKENIZED)); doc.Add(new Field("dateAdded", DateField.DateToString(story.CreatedOn), Field.Store.NO, Field.Index.UN_TOKENIZED)); modifier.AddDocument(doc); Log.DebugFormat("StoryId {0} added to index", story.StoryID); } }
/// <summary> /// 索引文件 /// </summary> /// <param name="file"></param> /// <param name="modifier"></param> private void IndexFile(FileInfo file, IndexModifier modifier) { try { Document doc = new Document();//创建文档,给文档添加字段,并把文档添加到索引书写器里 SetOutput("正在建立索引,文件名:" + file.FullName); doc.Add(new Field("id", id.ToString(), Field.Store.YES, Field.Index.TOKENIZED));//存储且索引 id++; /* filename begin */ doc.Add(new Field("filename", file.FullName, Field.Store.YES, Field.Index.TOKENIZED));//存储且索引 //doc.Add(new Field("filename", file.FullName, Field.Store.YES, Field.Index.UN_TOKENIZED)); //doc.Add(new Field("filename", file.FullName, Field.Store.NO, Field.Index.TOKENIZED)); //doc.Add(new Field("filename", file.FullName, Field.Store.NO, Field.Index.UN_TOKENIZED)); /* filename end */ /* contents begin */ //doc.Add(new Field("contents", new StreamReader(file.FullName, System.Text.Encoding.Default))); string contents = string.Empty; using (TextReader rdr = new StreamReader(file.FullName, System.Text.Encoding.Default)) { contents = rdr.ReadToEnd();//将文件内容提取出来 doc.Add(new Field("contents", contents, Field.Store.YES, Field.Index.TOKENIZED));//存储且索引 //doc.Add(new Field("contents", contents, Field.Store.NO, Field.Index.TOKENIZED));//不存储索引 } /* contents end */ modifier.AddDocument(doc); } catch (FileNotFoundException fnfe) { SetOutput(fnfe.Message); } }
private void btnDBCreateIndex_Click(object sender, EventArgs e) { StringBuffer sql = "SELECT TOP 1000 Id,FirstName,LastName FROM Person(NOLOCK)"; try { IList<Person> listPersons = EntityConvertor.QueryForList<Person>(sql.ToString(), strSqlConn, null); SetOutput("======================DB索引创建开始==============================="); IndexModifier modifier = new IndexModifier(INDEX_STORE_PATH, new StandardAnalyzer(), true); #region 同步创建索引 Stopwatch watch = new Stopwatch(); watch.Start(); IndexDB(modifier, listPersons); modifier.Optimize();//优化索引 modifier.Close();//关闭索引读写器 watch.Stop(); StringBuffer sb = "索引完成,共用时:" + watch.Elapsed.Hours + "时 " + watch.Elapsed.Minutes + "分 " + watch.Elapsed.Seconds + "秒 " + watch.Elapsed.Milliseconds + "毫秒"; SetOutput(sb); #endregion //#region 异步创建索引 //dtStart = DateTime.Now; //AsyncIndexDBCaller caller = new AsyncIndexDBCaller(IndexDB); //IAsyncResult ar = caller.BeginInvoke(modifier, listPersons, new AsyncCallback(IndexCallback), modifier); //#endregion } catch (Exception ex) { SetOutput(ex.Message); } }
public CreateIndexQuery(string name, string table, IndexType type = IndexType.DEFAULT, IndexModifier modifier = IndexModifier.DEFAULT, string msSqlFullTextKeyIndex = null, params ColumnRef[] columns) : this(name, new TableRef(table), type, modifier, msSqlFullTextKeyIndex, columns) { }
private static void DeleteDocument(IndexModifier modifier, string val) { modifier.Delete(new Term("_key", val)); }
public void CreateIndexIfNeeded(List <MsDocEntryPoint> documentation, DateTime?lastModified) { lock (SynchLock) { if (IsIndexingNeeded(lastModified) || !IsOperational) { try { //Drop all index if (searcher != null) { searcher.Close(); } if (_directory != null) { _directory.Close(); } //Delete dir Directory.Delete(_indexDirectory, true); _directory = FSDirectory.GetDirectory(_indexDirectory, true); //Reopen directory var indexModifier = new IndexModifier(_directory, new StandardAnalyzer(), true); foreach (var entryPoint in documentation) { var pointDoc = new Document(); //Id keys pointDoc.Add(new Field("point", entryPoint.Name, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.YES)); if (!string.IsNullOrEmpty(entryPoint.Summary)) { pointDoc.Add(new Field("summary", entryPoint.Summary, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); } if (!string.IsNullOrEmpty(entryPoint.Example)) { pointDoc.Add(new Field("example", entryPoint.Example, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); } foreach (var pointMethod in entryPoint.Methods) { var doc = new Document(); //Id keys doc.Add(new Field("point", entryPoint.Name, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.YES)); doc.Add(new Field("path", pointMethod.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.YES)); doc.Add(new Field("url", pointMethod.Path, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO)); if (!string.IsNullOrEmpty(pointMethod.Notes)) { doc.Add(new Field("notes", pointMethod.Notes, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); } if (!string.IsNullOrEmpty(pointMethod.Remarks)) { doc.Add(new Field("remarks", pointMethod.Remarks, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); } if (!string.IsNullOrEmpty(pointMethod.Example)) { doc.Add(new Field("examlpe", pointMethod.Example, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); } if (!string.IsNullOrEmpty(pointMethod.Returns)) { doc.Add(new Field("returns", pointMethod.Returns, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); } if (!string.IsNullOrEmpty(pointMethod.ShortName)) { doc.Add(new Field("short", pointMethod.ShortName, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); } if (!string.IsNullOrEmpty(pointMethod.Summary)) { doc.Add(new Field("summary", pointMethod.Summary, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); } foreach (var param in pointMethod.Params) { if (!string.IsNullOrEmpty(param.Description)) { doc.Add(new Field("param-" + param.Name + "-description", param.Description, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); } if (!string.IsNullOrEmpty(pointMethod.Remarks)) { doc.Add(new Field("param-" + param.Name + "-remarks", param.Remarks, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); } } indexModifier.AddDocument(doc); } } indexModifier.Optimize(); indexModifier.Close(); searcher = new IndexSearcher(_directory); IsOperational = true; } catch (Exception) { IsOperational = false; } } } }
public ArrayList IndexDocuments(ArrayList fnames) { ArrayList failures = new ArrayList(); // create index if it doesn't exist IndexModifier writer = File.Exists(Path.Combine(_indexName, "segments")) ? new IndexModifier(_indexName, new StandardAnalyzer(), false) : new IndexModifier(_indexName, new StandardAnalyzer(), true); foreach (string fname in fnames) { try { if (_verbose) { _messageStringBuilder.AppendFormat("Indexing file: {0}\n", fname); } XPathDocument pathDocument = new XPathDocument(new StreamReader(fname)); XPathNavigator pathNavigator = pathDocument.CreateNavigator(); pathNavigator.MoveToFirstChild(); string docname = pathNavigator.Name; if (_verbose) { _messageStringBuilder.AppendFormat("Found document type: {0}\n", docname); } foreach (RuleSet ruleSet in _ruleSets) { if (ruleSet.Name == docname) { if (_verbose) { _messageStringBuilder.Append("Found matching ruleset, indexing.\n"); } pathNavigator.MoveToRoot(); // first thing is to read and cache global fields // these are duped for each document found ArrayList globals = new ArrayList(); foreach (Rule rule in ruleSet.GlobalRules) { XPathExpression pathExpression = pathNavigator.Compile(rule.XPath); XPathNodeIterator pathNodeIterator = pathNavigator.Select(pathExpression); while (pathNodeIterator.MoveNext()) { if (_verbose) { _messageStringBuilder.AppendFormat("Found field '{0}' value '{1}'\n", rule.Name, pathNodeIterator.Current.Value); } globals.Add(new RuleValue(rule, pathNodeIterator.Current.Value)); } } // ok, all globals cached. now to index documents pathNavigator.MoveToRoot(); foreach (DocItem docItem in ruleSet.DocItems) { XPathExpression pathExpression = pathNavigator.Compile(docItem.XPath); XPathNodeIterator pathNodeIterator = pathNavigator.Select(pathExpression); while (pathNodeIterator.MoveNext()) { XPathNavigator clone = pathNodeIterator.Current.Clone(); if (_verbose) { _messageStringBuilder.Append("Found document\n"); } // now compute key string key = string.Empty; if (docItem.Key != string.Empty) { //key = dn.Evaluate(doc.Key).ToString(); object result = clone.Evaluate(docItem.Key); XPathNodeIterator nodeIterator = result as XPathNodeIterator; if (nodeIterator != null) { while (nodeIterator.MoveNext()) { key = nodeIterator.Current.ToString(); } } else { key = result.ToString(); } } else { key = fname; } if (_verbose) { _messageStringBuilder.AppendFormat("Key is {0}\n", key); } // delete if document exists DeleteDocument(writer, key); Document ludoc = new Document(); AddKeyField(ludoc, key); AddNameField(ludoc, ruleSet.Name); string textField = string.Empty; Rule ruleText = null; foreach (Rule rule in docItem.Rules) { XPathExpression expr = clone.Compile(rule.XPath); XPathNodeIterator iter = clone.Select(expr); while (iter.MoveNext()) { string textVal = parseHtml(iter.Current.Value); if (_verbose) { _messageStringBuilder.AppendFormat("Found field '{0}' value '{1}'\n", rule.Name, textVal); } // index away! if (rule.Name == "text") { if (ruleText == null) { ruleText = rule; textField = textVal; } else { textField += " ... " + textVal; } } else { AddField(ludoc, rule, iter.Current.Value); } } } if (textField.Length > 0) { AddField(ludoc, ruleText, textField); } // now add the globals into this document foreach (RuleValue ruleValue in globals) { AddField(ludoc, ruleValue.Rule, ruleValue.Value); } writer.AddDocument(ludoc); } } } } // end of fnames loop } catch (Exception e) { failures.Add(String.Format("Failed to index file {0}, exception: {1}", fname, e)); if (_verbose) { _messageStringBuilder.AppendFormat("Failed to index {0}\n", fname); _messageStringBuilder.Append(e.StackTrace + "\n"); } } } writer.Optimize(); writer.Close(); return(failures); }
/// <summary> /// Creates/Updates the index for a given host. If the index already exists then we'll update it other /// wise we create a new index for the host. Each host index is store in its own folder off the base directory. /// </summary> /// <param name="hostId"></param> private void GenerateIndex(int hostId, DateTime lastUpdateTime) { Log.InfoFormat("Starting index generation for HostID: {0}", hostId); IndexModifier modifier = null; bool isIncrementalCrawl = false; int storiesToIndexCount = 0; int pageSize; try { bool indexExists = IndexExists(hostId); if (indexExists) { //logic to version the lucene index to allow //use to replace with a new version should we need to //this should be kind of value stored in settings table. //check if the version of the lucene index is the latest version indexExists = IsLuceneIndexCorrectVersion(hostId); if (!indexExists) { Log.Debug("Lucene index exists but is older version, need to overwrite with new document fields"); } } StoryCollection stories = null; if (!indexExists) { //the index doesnt exist so we are going to do a full index of //all stories in the database Log.InfoFormat("Creating a new index HostID: {0}", hostId); isIncrementalCrawl = false; storiesToIndexCount = Story.GetAllStoriesCount(hostId); } else { if (!HostCrawlSuccessful(hostId)) { //force the last update time to a low value to get all the records //since we need to recrawl the index fully lastUpdateTime = DateTime.Parse("1/1/1975"); Log.InfoFormat("Last crawl didnt complete successfully, attempting a full crawl"); } else { Log.InfoFormat("Updating existing index"); } isIncrementalCrawl = true; storiesToIndexCount = Story.GetUpdatedStoriesCount(hostId, lastUpdateTime); Log.InfoFormat("Found: {0} stories to add to index since last update at: {1}", storiesToIndexCount, lastUpdateTime); } if (storiesToIndexCount == 0) { Log.InfoFormat("Nothing todo, no new stories to crawl for HostID: {0}", hostId); isUpdateRunning = false; return; } modifier = new IndexModifier(IndexHostPath(hostId), new DnkAnalyzer(), !indexExists); modifier.SetMaxBufferedDocs(50); modifier.SetMergeFactor(150); SearchSettings searchSettings = new SearchSettings(); pageSize = searchSettings.StoriesPageSize; int pageTotal = CalculateNumberOfPages(storiesToIndexCount, pageSize); for (int i = 1; i <= pageTotal; i++) { if (isIncrementalCrawl) { stories = Story.GetUpdatedStories(hostId, lastUpdateTime, i, pageSize); } else { stories = Story.GetAllStories(hostId, i, pageSize); } AddStoriesToIndex(modifier, isIncrementalCrawl, stories); } modifier.Optimize(); Log.InfoFormat("index optimized for HostID:{0}", hostId); modifier.Close(); Log.InfoFormat("Index Modifier closed for Host:{0}", hostId); modifier = null; //we completed ok RecordHostCrawlSuccess(hostId, true); } catch (Exception ex) { RecordHostCrawlSuccess(hostId, false); Log.ErrorFormat("Error occurred while adding items to the index HostID:{0}, message: {1}", hostId, ex.Message); } finally { //attempt to close the modifier if it still exists if (modifier != null) { try { modifier.Close(); modifier = null; Log.InfoFormat("Able to close the Index Modifier in the final block, HostID:{0}", hostId); } catch (Exception ex) { Log.ErrorFormat("Unable to close the modifer in the final block HostID:{0} Message:{1}", hostId, ex.Message); } } } }