public static SearchResult LoadPageContent(string pageURL, int parentID, int siteIndexID) { SearchResult searchResult = null; searchResult = new SearchResult(); //check if this page has been indexed BEFORE getting the content. try { searchResult.ParentID = parentID; searchResult.PageName = GetFilenameFromURL(pageURL); searchResult.IndexedSiteID = siteIndexID; searchResult.PageURL = pageURL; if (!SearchServices.IsPageContentIndexed(pageURL, searchResult.PageName)) { searchResult.SearchContent = GetPageContent(pageURL); } return(searchResult); } catch (AggregateException) { return(searchResult); } catch (Exception) { return(searchResult); } }
/// <summary>GetLinksAndKeywords /// the content of the page is loaded. So extract the links and text from a single page. /// //then load all of them into the main container= SearchResult ///Main Object-SearchResult: Container object for all the properties. /////This method loads and then passes the container to the save method later. //Extracts links, title, and converts html to text content /// then counts up the keywords from the content. /// </summary> /// <param name="SearchResult"></param> public static void GetLinksAndKeywords(SearchResult sr) { //check if this page has been indexed BEFORE getting the content. try { if (!SearchServices.IsPageContentIndexed(sr.PageURL, sr.PageName)) { sr.Title = GetPageTitle(sr.SearchContent, sr.PageName); sr.ParentDirectory = GetDirectoryForFile(sr.PageURL, sr.ParentID); sr.PageURL = sr.PageURL; sr.TextContent = GetTextFromHTML(sr.SearchContent); //use the full page content to extract the links sr.Links = GetLinks(sr.SearchContent); //use ONLY the cleaned text to find the keyword ranking. sr.KeyWordRankingList = GetKeywordCounts(sr.TextContent); } } catch (DbEntityValidationException) { } catch (Exception) { } }