//Main Method that calls several Save methods for the content, links and keywords of a page. public static int SaveSearchResults(SearchResult searchResults) { try { int pageIDAfterInsert = 0; //save the page IndexedPages pg = new IndexedPages(); pg.DateCreated = DateTime.Now; pg.ParentID = searchResults.ParentID; pg.PageName = GetFileWithFolder(searchResults.PageURL); pg.PageURL = searchResults.PageURL; pg.ParentDirectory = searchResults.ParentDirectory; pg.IndexedSiteID = searchResults.IndexedSiteID; pg.Title = searchResults.Title.Length > 50 ? searchResults.Title.Substring(0, 49) : searchResults.Title; if (!IsPageAlreadySaved(pg.PageURL, pg.PageName)) { DB.IndexedPages.Add(pg); DB.SaveChanges(); pageIDAfterInsert = pg.PageID; } else { //the page already exists so add a few missing fields. pg = GetPageByName(pg.PageURL, pg.PageName); pg.DateCreated = DateTime.Now; pg.Title = searchResults.Title; pageIDAfterInsert = pg.PageID; DB.SaveChanges(); } SaveLinks(searchResults, pg); //save the links for this page. SaveTheKeywords(searchResults, pg); //save the keywords UpdateIsIndexedFlag(pg.PageID); //update the IsIndexed flag so it is not run again. return(pageIDAfterInsert); } catch (DbEntityValidationException) { return(0); } catch (Exception) { return(0); } }
protected internal static IndexedPages GetIndexable() { List<MethodInfo> mi = new List<MethodInfo>(GetAllIndexableMethods()); //IndexUrls urls = new IndexUrls(); IndexedPages pages = new IndexedPages(); foreach (MethodInfo method in mi) { Indexable i = (Indexable)Attribute.GetCustomAttribute(method, typeof(Indexable)); if (i.AdditionalUrl != null && i.AdditionalUrl.Length > 0) { foreach(String addurl in i.AdditionalUrl) { String UrlPath = ""; if(i.Path == null || i.Path == "") { if (addurl == "") UrlPath = "/" + method.DeclaringType.Name.Replace("Controller", "") + "/" + method.Name; else UrlPath = "/" + method.DeclaringType.Name.Replace("Controller", "") + "/" + method.Name + "/" + i.AdditionalUrl; } else { if (addurl == "") UrlPath = i.Path; else { if (i.Path.EndsWith("/")) UrlPath = i.Path.Substring(0, i.Path.Length - 1) + "/" + addurl; else UrlPath = i.Path + "/" + addurl; } } pages.AddLink(new Link() { Crawled = false, Page = new Page(UrlPath, i.Priority, i.KeywordsAndPriority, i.Keywords) }); } } } return pages; }
public static void SaveLinks(SearchResult searchResults, IndexedPages pg) { List <IndexedPages> linkPages = new List <IndexedPages>(); try { foreach (string singleLink in searchResults.Links) { IndexedPages cp = new IndexedPages(); if (singleLink.Length > 1) //to avoid root (/) { cp.DateCreated = DateTime.Now; cp.ParentID = pg.PageID; cp.PageName = GetFileWithFolder(singleLink); cp.IndexedSiteID = pg.IndexedSiteID; //get directory for the file, not only the filename. cp.ParentDirectory = Services.SearchUtils.GetDirectoryForFile(singleLink, pg.PageID); cp.PageURL = GetFullURLFromName(cp.PageName, cp.ParentDirectory); cp.Title = ""; // THIS COMES ONLY FROM THE CONTENT; // code to avoid duplicates. if (IsValidLink(cp.PageURL) && !SearchServices.IsPageAlreadySaved(cp.PageURL, cp.PageName)) { linkPages.Add(cp); } } } DB.IndexedPages.AddRange(linkPages); DB.SaveChanges(); } catch (DbEntityValidationException) { } catch (Exception) { } }
//save each word and the # of times it occurs, word by word found on a parent page. public static void SaveTheKeywords(SearchResult searchResults, IndexedPages pg) { List <PageKeyWords> keywordRankingList = new List <PageKeyWords>(); try { //save the keywords for this page. foreach (WordRankVM kw in searchResults.KeyWordRankingList) { PageKeyWords pkw = new PageKeyWords(); pkw.PageID = pg.PageID; pkw.Keyword = kw.Keyword; pkw.KeywordCount = kw.Rank; keywordRankingList.Add(pkw); } DB.PageKeyWords.AddRange(keywordRankingList); DB.SaveChanges(); } catch (Exception) { } }
public static async Task CrawlCompleteAsync(IndexedPages index) { ///1) Fill the IndexCache with the index ///2) Set the IndexRunning to false IndexRunning = false; }