Exemplo n.º 1
0
        private void LoadData()
        {
            SearchSettings searchSettings = new SearchSettings();
            lblBaseDirectory.Text = searchSettings.SearchBaseDirectory;
            lblRecrawlRate.Text = searchSettings.SearchUpdateInterval.ToString();
            lblLastCrawl.Text = searchSettings.SearchLastCrawl.ToString("F");

            Dictionary<string, Host> hosts = HostCache.Hosts;
            rptDeleteIndex.DataSource = hosts;
            rptDeleteIndex.DataBind();
        }
Exemplo n.º 2
0
        /// <summary>
        /// Creates/Updates the index for a given host. If the index already exists then we'll update it other
        /// wise we create a new index for the host. Each host index is store in its own folder off the base directory.
        /// </summary>
        /// <param name="hostId"></param>
        private void GenerateIndex(int hostId, DateTime lastUpdateTime)
        {
            Log.InfoFormat("Starting index generation for HostID: {0}", hostId);

            IndexModifier modifier = null;
            bool isIncrementalCrawl = false;
            int storiesToIndexCount = 0;
            int pageSize;

            try
            {
                bool indexExists = IndexExists(hostId);

                if (indexExists)
                {
                    //logic to version the lucene index to allow
                    //use to replace with a new version should we need to
                    //this should be kind of value stored in settings table.
                    //check if the version of the lucene index is the latest version
                    indexExists = IsLuceneIndexCorrectVersion(hostId);

                    if (!indexExists)
                    {
                        Log.Debug("Lucene index exists but is older version, need to overwrite with new document fields");
                    }

                }

                StoryCollection stories = null;

                if (!indexExists)
                {
                    //the index doesnt exist so we are going to do a full index of
                    //all stories in the database
                    Log.InfoFormat("Creating a new index HostID: {0}", hostId);

                    isIncrementalCrawl = false;
                    storiesToIndexCount = Story.GetAllStoriesCount(hostId);
                }
                else
                {
                    if (!HostCrawlSuccessful(hostId))
                    {
                        //force the last update time to a low value to get all the records
                        //since we need to recrawl the index fully
                        lastUpdateTime = DateTime.Parse("1/1/1975");
                        Log.InfoFormat("Last crawl didnt complete successfully, attempting a full crawl");
                    }
                    else
                        Log.InfoFormat("Updating existing index");

                    isIncrementalCrawl = true;
                    storiesToIndexCount = Story.GetUpdatedStoriesCount(hostId, lastUpdateTime);

                    Log.InfoFormat("Found: {0} stories to add to index since last update at: {1}",
                                    storiesToIndexCount,
                                    lastUpdateTime);
                }

                if (storiesToIndexCount == 0)
                {
                    Log.InfoFormat("Nothing todo, no new stories to crawl for HostID: {0}", hostId);
                    isUpdateRunning = false;
                    return;
                }

                modifier = new IndexModifier(IndexHostPath(hostId), new DnkAnalyzer(), !indexExists);
                modifier.SetMaxBufferedDocs(50);
                modifier.SetMergeFactor(150);

                SearchSettings searchSettings = new SearchSettings();
                pageSize = searchSettings.StoriesPageSize;

                int pageTotal = CalculateNumberOfPages(storiesToIndexCount, pageSize);
                for (int i = 1; i <= pageTotal; i++)
                {
                    if (isIncrementalCrawl)
                        stories = Story.GetUpdatedStories(hostId, lastUpdateTime, i, pageSize);
                    else
                        stories = Story.GetAllStories(hostId, i, pageSize);

                    AddStoriesToIndex(modifier, isIncrementalCrawl, stories);
                }

                modifier.Optimize();
                Log.InfoFormat("index optimized for HostID:{0}", hostId);

                modifier.Close();
                Log.InfoFormat("Index Modifier closed for Host:{0}", hostId);

                modifier = null;

                //we completed ok
                RecordHostCrawlSuccess(hostId, true);
            }
            catch (Exception ex)
            {
                RecordHostCrawlSuccess(hostId, false);
                Log.ErrorFormat("Error occurred while adding items to the index HostID:{0}, message: {1}",hostId, ex.Message);
            }
            finally
            {
                //attempt to close the modifier if it still exists
                if (modifier != null)
                {
                    try
                    {
                        modifier.Close();
                        modifier = null;
                        Log.InfoFormat("Able to close the Index Modifier in the final block, HostID:{0}", hostId);
                    }
                    catch (Exception ex)
                    {
                        Log.ErrorFormat("Unable to close the modifer in the final block HostID:{0} Message:{1}", hostId, ex.Message);
                    }
                }
            }
        }
Exemplo n.º 3
0
        /// <summary>
        /// explict private cstor
        /// </summary>
        SearchUpdate()
        {
            Log.Debug("Creating SearchUpdate singleton");

            int crawlIntervalMinutes;
            hostCrawlError = new Dictionary<int, bool>();

            SearchSettings searchSettings = new SearchSettings();

            try
            {
                //attempt to lookup the index location for the settings
                string baseDirectory = searchSettings.SearchBaseDirectory;
                indexBasePath = HttpContext.Current.Server.MapPath(baseDirectory);
            }
            catch (ArgumentException ex)
            {
                Log.Fatal("The setting key is missing, we cant create an index of the site without it. This value holds the location of the index");
                throw ex;
            }

            crawlIntervalMinutes = searchSettings.SearchUpdateInterval;

            TimerCallback callback = new TimerCallback(UpdateIndex);
            updateTimer = new Timer(callback, null, TimeSpan.Zero, TimeSpan.FromMinutes(crawlIntervalMinutes));
        }
Exemplo n.º 4
0
        /// <summary>
        /// Creates/Updates the index for a given host. If the index already exists then we'll update it other
        /// wise we create a new index for the host. Each host index is store in its own folder off the base directory.
        /// </summary>
        /// <param name="hostId"></param>
        private void GenerateIndex(int hostId, DateTime lastUpdateTime)
        {
            Log.InfoFormat("Starting index generation for HostID: {0}", hostId);

            IndexModifier modifier            = null;
            bool          isIncrementalCrawl  = false;
            int           storiesToIndexCount = 0;
            int           pageSize;

            try
            {
                bool indexExists = IndexExists(hostId);

                if (indexExists)
                {
                    //logic to version the lucene index to allow
                    //use to replace with a new version should we need to
                    //this should be kind of value stored in settings table.
                    //check if the version of the lucene index is the latest version
                    indexExists = IsLuceneIndexCorrectVersion(hostId);

                    if (!indexExists)
                    {
                        Log.Debug("Lucene index exists but is older version, need to overwrite with new document fields");
                    }
                }

                StoryCollection stories = null;

                if (!indexExists)
                {
                    //the index doesnt exist so we are going to do a full index of
                    //all stories in the database
                    Log.InfoFormat("Creating a new index HostID: {0}", hostId);

                    isIncrementalCrawl  = false;
                    storiesToIndexCount = Story.GetAllStoriesCount(hostId);
                }
                else
                {
                    if (!HostCrawlSuccessful(hostId))
                    {
                        //force the last update time to a low value to get all the records
                        //since we need to recrawl the index fully
                        lastUpdateTime = DateTime.Parse("1/1/1975");
                        Log.InfoFormat("Last crawl didnt complete successfully, attempting a full crawl");
                    }
                    else
                    {
                        Log.InfoFormat("Updating existing index");
                    }

                    isIncrementalCrawl  = true;
                    storiesToIndexCount = Story.GetUpdatedStoriesCount(hostId, lastUpdateTime);

                    Log.InfoFormat("Found: {0} stories to add to index since last update at: {1}",
                                   storiesToIndexCount,
                                   lastUpdateTime);
                }


                if (storiesToIndexCount == 0)
                {
                    Log.InfoFormat("Nothing todo, no new stories to crawl for HostID: {0}", hostId);
                    isUpdateRunning = false;
                    return;
                }


                modifier = new IndexModifier(IndexHostPath(hostId), new DnkAnalyzer(), !indexExists);
                modifier.SetMaxBufferedDocs(50);
                modifier.SetMergeFactor(150);

                SearchSettings searchSettings = new SearchSettings();
                pageSize = searchSettings.StoriesPageSize;

                int pageTotal = CalculateNumberOfPages(storiesToIndexCount, pageSize);
                for (int i = 1; i <= pageTotal; i++)
                {
                    if (isIncrementalCrawl)
                    {
                        stories = Story.GetUpdatedStories(hostId, lastUpdateTime, i, pageSize);
                    }
                    else
                    {
                        stories = Story.GetAllStories(hostId, i, pageSize);
                    }

                    AddStoriesToIndex(modifier, isIncrementalCrawl, stories);
                }

                modifier.Optimize();
                Log.InfoFormat("index optimized for HostID:{0}", hostId);

                modifier.Close();
                Log.InfoFormat("Index Modifier closed for Host:{0}", hostId);

                modifier = null;

                //we completed ok
                RecordHostCrawlSuccess(hostId, true);
            }
            catch (Exception ex)
            {
                RecordHostCrawlSuccess(hostId, false);
                Log.ErrorFormat("Error occurred while adding items to the index HostID:{0}, message: {1}", hostId, ex.Message);
            }
            finally
            {
                //attempt to close the modifier if it still exists
                if (modifier != null)
                {
                    try
                    {
                        modifier.Close();
                        modifier = null;
                        Log.InfoFormat("Able to close the Index Modifier in the final block, HostID:{0}", hostId);
                    }
                    catch (Exception ex)
                    {
                        Log.ErrorFormat("Unable to close the modifer in the final block HostID:{0} Message:{1}", hostId, ex.Message);
                    }
                }
            }
        }