Пример #1
0
        public JsonResult StartIndexProcess(string pageName)
        {
            try
            {
                CRAWLER_DEPTH = Int16.Parse(ConfigurationManager.AppSettings["DepthLevels"]);
                string Folder     = SearchUtils.GetDirectoryForFile(pageName, -1);
                string actualPage = System.IO.Path.GetFileName(pageName);

                //create a record to serve as a groupID  for the site or group of pages to index.
                int siteIndexID = SearchServices.GetNewSiteIndex(Folder, actualPage);

                //now save the first page so that the parallel functions have links to use.
                SearchResult csr = SearchUtils.LoadPageContent(pageName, -1, siteIndexID);
                SearchUtils.GetLinksAndKeywords(csr);
                csr.PageID = SearchServices.SaveSearchResults(csr);

                //now everything is ready to run in a loop until all pages have been indexed.

                return(StartCrawler(-1, siteIndexID));
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
            return(null);
        }
Пример #2
0
        public JsonResult StartCrawler(int parentID, int siteIndexID)
        {
            IndexResultVM finalCount;

            try
            {
                //this method runs recursively until the limit is reached.
                ConcurrentBag <SearchResult> searchResults = new ConcurrentBag <SearchResult>();
                // get the links from the saved links
                bool MaximumDepthReached = SearchServices.CanReachMaxDepth(CRAWLER_DEPTH, siteIndexID);
                if (!MaximumDepthReached)
                {
                    List <PageContentVM> pageLinksMain = SearchServices.GetLinkDataForSiteIndexID(siteIndexID);

                    //put the links into a list so that they can be run in Parallel.
                    Parallel.ForEach(pageLinksMain, (sr) =>
                    {
                        string fullURL   = string.Join("", sr.PageDirectory, sr.PageName);
                        SearchResult csr = SearchUtils.LoadPageContent(fullURL, sr.ParentID, siteIndexID);
                        searchResults.Add(csr);
                    });

                    // now that all the links have content, do a regular loop for the parsing and saving .
                    foreach (SearchResult csr in searchResults)
                    {
                        SearchUtils.GetLinksAndKeywords(csr);
                        csr.PageID = SearchServices.SaveSearchResults(csr);
                        StartCrawler(csr.PageID, siteIndexID);
                    }
                }
            }
            catch (DbEntityValidationException)
            {
                Server.ClearError();
            }
            catch (Exception)
            {
                Server.ClearError();
            }
            finally
            {
                finalCount = SearchServices.GetIndexedPagesTotals(siteIndexID);
            }

            return(Json(finalCount, JsonRequestBehavior.AllowGet));
        }