private void LoadData() { SearchSettings searchSettings = new SearchSettings(); lblBaseDirectory.Text = searchSettings.SearchBaseDirectory; lblRecrawlRate.Text = searchSettings.SearchUpdateInterval.ToString(); lblLastCrawl.Text = searchSettings.SearchLastCrawl.ToString("F"); Dictionary<string, Host> hosts = HostCache.Hosts; rptDeleteIndex.DataSource = hosts; rptDeleteIndex.DataBind(); }
/// <summary> /// Creates/Updates the index for a given host. If the index already exists then we'll update it other /// wise we create a new index for the host. Each host index is store in its own folder off the base directory. /// </summary> /// <param name="hostId"></param> private void GenerateIndex(int hostId, DateTime lastUpdateTime) { Log.InfoFormat("Starting index generation for HostID: {0}", hostId); IndexModifier modifier = null; bool isIncrementalCrawl = false; int storiesToIndexCount = 0; int pageSize; try { bool indexExists = IndexExists(hostId); if (indexExists) { //logic to version the lucene index to allow //use to replace with a new version should we need to //this should be kind of value stored in settings table. //check if the version of the lucene index is the latest version indexExists = IsLuceneIndexCorrectVersion(hostId); if (!indexExists) { Log.Debug("Lucene index exists but is older version, need to overwrite with new document fields"); } } StoryCollection stories = null; if (!indexExists) { //the index doesnt exist so we are going to do a full index of //all stories in the database Log.InfoFormat("Creating a new index HostID: {0}", hostId); isIncrementalCrawl = false; storiesToIndexCount = Story.GetAllStoriesCount(hostId); } else { if (!HostCrawlSuccessful(hostId)) { //force the last update time to a low value to get all the records //since we need to recrawl the index fully lastUpdateTime = DateTime.Parse("1/1/1975"); Log.InfoFormat("Last crawl didnt complete successfully, attempting a full crawl"); } else Log.InfoFormat("Updating existing index"); isIncrementalCrawl = true; storiesToIndexCount = Story.GetUpdatedStoriesCount(hostId, lastUpdateTime); Log.InfoFormat("Found: {0} stories to add to index since last update at: {1}", storiesToIndexCount, lastUpdateTime); } if (storiesToIndexCount == 0) { Log.InfoFormat("Nothing todo, no new stories to crawl for HostID: {0}", hostId); isUpdateRunning = false; return; } modifier = new IndexModifier(IndexHostPath(hostId), new DnkAnalyzer(), !indexExists); modifier.SetMaxBufferedDocs(50); modifier.SetMergeFactor(150); SearchSettings searchSettings = new SearchSettings(); pageSize = searchSettings.StoriesPageSize; int pageTotal = CalculateNumberOfPages(storiesToIndexCount, pageSize); for (int i = 1; i <= pageTotal; i++) { if (isIncrementalCrawl) stories = Story.GetUpdatedStories(hostId, lastUpdateTime, i, pageSize); else stories = Story.GetAllStories(hostId, i, pageSize); AddStoriesToIndex(modifier, isIncrementalCrawl, stories); } modifier.Optimize(); Log.InfoFormat("index optimized for HostID:{0}", hostId); modifier.Close(); Log.InfoFormat("Index Modifier closed for Host:{0}", hostId); modifier = null; //we completed ok RecordHostCrawlSuccess(hostId, true); } catch (Exception ex) { RecordHostCrawlSuccess(hostId, false); Log.ErrorFormat("Error occurred while adding items to the index HostID:{0}, message: {1}",hostId, ex.Message); } finally { //attempt to close the modifier if it still exists if (modifier != null) { try { modifier.Close(); modifier = null; Log.InfoFormat("Able to close the Index Modifier in the final block, HostID:{0}", hostId); } catch (Exception ex) { Log.ErrorFormat("Unable to close the modifer in the final block HostID:{0} Message:{1}", hostId, ex.Message); } } } }
/// <summary> /// explict private cstor /// </summary> SearchUpdate() { Log.Debug("Creating SearchUpdate singleton"); int crawlIntervalMinutes; hostCrawlError = new Dictionary<int, bool>(); SearchSettings searchSettings = new SearchSettings(); try { //attempt to lookup the index location for the settings string baseDirectory = searchSettings.SearchBaseDirectory; indexBasePath = HttpContext.Current.Server.MapPath(baseDirectory); } catch (ArgumentException ex) { Log.Fatal("The setting key is missing, we cant create an index of the site without it. This value holds the location of the index"); throw ex; } crawlIntervalMinutes = searchSettings.SearchUpdateInterval; TimerCallback callback = new TimerCallback(UpdateIndex); updateTimer = new Timer(callback, null, TimeSpan.Zero, TimeSpan.FromMinutes(crawlIntervalMinutes)); }
/// <summary> /// Creates/Updates the index for a given host. If the index already exists then we'll update it other /// wise we create a new index for the host. Each host index is store in its own folder off the base directory. /// </summary> /// <param name="hostId"></param> private void GenerateIndex(int hostId, DateTime lastUpdateTime) { Log.InfoFormat("Starting index generation for HostID: {0}", hostId); IndexModifier modifier = null; bool isIncrementalCrawl = false; int storiesToIndexCount = 0; int pageSize; try { bool indexExists = IndexExists(hostId); if (indexExists) { //logic to version the lucene index to allow //use to replace with a new version should we need to //this should be kind of value stored in settings table. //check if the version of the lucene index is the latest version indexExists = IsLuceneIndexCorrectVersion(hostId); if (!indexExists) { Log.Debug("Lucene index exists but is older version, need to overwrite with new document fields"); } } StoryCollection stories = null; if (!indexExists) { //the index doesnt exist so we are going to do a full index of //all stories in the database Log.InfoFormat("Creating a new index HostID: {0}", hostId); isIncrementalCrawl = false; storiesToIndexCount = Story.GetAllStoriesCount(hostId); } else { if (!HostCrawlSuccessful(hostId)) { //force the last update time to a low value to get all the records //since we need to recrawl the index fully lastUpdateTime = DateTime.Parse("1/1/1975"); Log.InfoFormat("Last crawl didnt complete successfully, attempting a full crawl"); } else { Log.InfoFormat("Updating existing index"); } isIncrementalCrawl = true; storiesToIndexCount = Story.GetUpdatedStoriesCount(hostId, lastUpdateTime); Log.InfoFormat("Found: {0} stories to add to index since last update at: {1}", storiesToIndexCount, lastUpdateTime); } if (storiesToIndexCount == 0) { Log.InfoFormat("Nothing todo, no new stories to crawl for HostID: {0}", hostId); isUpdateRunning = false; return; } modifier = new IndexModifier(IndexHostPath(hostId), new DnkAnalyzer(), !indexExists); modifier.SetMaxBufferedDocs(50); modifier.SetMergeFactor(150); SearchSettings searchSettings = new SearchSettings(); pageSize = searchSettings.StoriesPageSize; int pageTotal = CalculateNumberOfPages(storiesToIndexCount, pageSize); for (int i = 1; i <= pageTotal; i++) { if (isIncrementalCrawl) { stories = Story.GetUpdatedStories(hostId, lastUpdateTime, i, pageSize); } else { stories = Story.GetAllStories(hostId, i, pageSize); } AddStoriesToIndex(modifier, isIncrementalCrawl, stories); } modifier.Optimize(); Log.InfoFormat("index optimized for HostID:{0}", hostId); modifier.Close(); Log.InfoFormat("Index Modifier closed for Host:{0}", hostId); modifier = null; //we completed ok RecordHostCrawlSuccess(hostId, true); } catch (Exception ex) { RecordHostCrawlSuccess(hostId, false); Log.ErrorFormat("Error occurred while adding items to the index HostID:{0}, message: {1}", hostId, ex.Message); } finally { //attempt to close the modifier if it still exists if (modifier != null) { try { modifier.Close(); modifier = null; Log.InfoFormat("Able to close the Index Modifier in the final block, HostID:{0}", hostId); } catch (Exception ex) { Log.ErrorFormat("Unable to close the modifer in the final block HostID:{0} Message:{1}", hostId, ex.Message); } } } }