Пример #1
0
        //Main Method that calls several Save methods for the content, links and keywords of a page.
        public static int SaveSearchResults(SearchResult searchResults)
        {
            try
            {
                int pageIDAfterInsert = 0;
                //save the  page
                IndexedPages pg = new IndexedPages();

                pg.DateCreated     = DateTime.Now;
                pg.ParentID        = searchResults.ParentID;
                pg.PageName        = GetFileWithFolder(searchResults.PageURL);
                pg.PageURL         = searchResults.PageURL;
                pg.ParentDirectory = searchResults.ParentDirectory;
                pg.IndexedSiteID   = searchResults.IndexedSiteID;
                pg.Title           = searchResults.Title.Length > 50 ? searchResults.Title.Substring(0, 49) : searchResults.Title;
                if (!IsPageAlreadySaved(pg.PageURL, pg.PageName))
                {
                    DB.IndexedPages.Add(pg);
                    DB.SaveChanges();
                    pageIDAfterInsert = pg.PageID;
                }
                else
                {   //the page already exists so add a few missing fields.
                    pg             = GetPageByName(pg.PageURL, pg.PageName);
                    pg.DateCreated = DateTime.Now;

                    pg.Title = searchResults.Title;

                    pageIDAfterInsert = pg.PageID;
                    DB.SaveChanges();
                }



                SaveLinks(searchResults, pg); //save the links for this page.


                SaveTheKeywords(searchResults, pg); //save the keywords


                UpdateIsIndexedFlag(pg.PageID);   //update the IsIndexed flag so it is not run again.


                return(pageIDAfterInsert);
            }
            catch (DbEntityValidationException)
            {
                return(0);
            }
            catch (Exception)
            {
                return(0);
            }
        }
Пример #2
0
        protected internal static IndexedPages GetIndexable()
        {
            List<MethodInfo> mi = new List<MethodInfo>(GetAllIndexableMethods());
            //IndexUrls urls = new IndexUrls();
            IndexedPages pages = new IndexedPages();

            foreach (MethodInfo method in mi)
            {
                Indexable i = (Indexable)Attribute.GetCustomAttribute(method, typeof(Indexable));

                if (i.AdditionalUrl != null && i.AdditionalUrl.Length > 0)
                {
                    foreach(String addurl in i.AdditionalUrl)
                    {
                        String UrlPath = "";
                        if(i.Path == null || i.Path == "")
                        {
                            if (addurl == "")
                                UrlPath = "/" + method.DeclaringType.Name.Replace("Controller", "") + "/" + method.Name;
                            else
                                UrlPath = "/" + method.DeclaringType.Name.Replace("Controller", "") + "/" + method.Name + "/" + i.AdditionalUrl;
                        }
                        else
                        {
                            if (addurl == "")
                                UrlPath = i.Path;
                            else
                            {
                                if (i.Path.EndsWith("/"))
                                    UrlPath = i.Path.Substring(0, i.Path.Length - 1) + "/" + addurl;
                                else
                                    UrlPath = i.Path + "/" + addurl;
                            }
                        }

                        pages.AddLink(new Link()
                        {
                            Crawled = false,
                            Page = new Page(UrlPath, i.Priority,
                                i.KeywordsAndPriority,
                                i.Keywords)
                        });
                    }
                }
            }
            return pages;
        }
Пример #3
0
        public static void SaveLinks(SearchResult searchResults, IndexedPages pg)
        {
            List <IndexedPages> linkPages = new List <IndexedPages>();

            try
            {
                foreach (string singleLink in searchResults.Links)
                {
                    IndexedPages cp = new IndexedPages();
                    if (singleLink.Length > 1) //to avoid root (/)
                    {
                        cp.DateCreated   = DateTime.Now;
                        cp.ParentID      = pg.PageID;
                        cp.PageName      = GetFileWithFolder(singleLink);
                        cp.IndexedSiteID = pg.IndexedSiteID;
                        //get directory for the file, not only the filename.
                        cp.ParentDirectory = Services.SearchUtils.GetDirectoryForFile(singleLink, pg.PageID);
                        cp.PageURL         = GetFullURLFromName(cp.PageName, cp.ParentDirectory);
                        cp.Title           = ""; // THIS COMES ONLY FROM THE CONTENT;


                        // code to avoid duplicates.

                        if (IsValidLink(cp.PageURL) && !SearchServices.IsPageAlreadySaved(cp.PageURL, cp.PageName))
                        {
                            linkPages.Add(cp);
                        }
                    }
                }

                DB.IndexedPages.AddRange(linkPages);
                DB.SaveChanges();
            }
            catch (DbEntityValidationException)
            {
            }
            catch (Exception)
            {
            }
        }
Пример #4
0
        //save each word and the # of times it occurs, word by word found on a parent page.
        public static void SaveTheKeywords(SearchResult searchResults, IndexedPages pg)
        {
            List <PageKeyWords> keywordRankingList = new List <PageKeyWords>();

            try
            {
                //save the keywords for this page.
                foreach (WordRankVM kw in searchResults.KeyWordRankingList)
                {
                    PageKeyWords pkw = new PageKeyWords();
                    pkw.PageID       = pg.PageID;
                    pkw.Keyword      = kw.Keyword;
                    pkw.KeywordCount = kw.Rank;
                    keywordRankingList.Add(pkw);
                }
                DB.PageKeyWords.AddRange(keywordRankingList);
                DB.SaveChanges();
            }
            catch (Exception)
            {
            }
        }
Пример #5
0
 public static async Task CrawlCompleteAsync(IndexedPages index)
 {
     ///1) Fill the IndexCache with the index
     ///2) Set the IndexRunning to false
     IndexRunning = false;
 }