Ejemplo n.º 1
0
        public static SearchResult LoadPageContent(string pageURL, int parentID, int siteIndexID)
        {
            SearchResult searchResult = null;

            searchResult = new SearchResult();
            //check if this page has been indexed BEFORE getting the content.
            try
            {
                searchResult.ParentID      = parentID;
                searchResult.PageName      = GetFilenameFromURL(pageURL);
                searchResult.IndexedSiteID = siteIndexID;
                searchResult.PageURL       = pageURL;

                if (!SearchServices.IsPageContentIndexed(pageURL, searchResult.PageName))
                {
                    searchResult.SearchContent = GetPageContent(pageURL);
                }
                return(searchResult);
            }

            catch (AggregateException)
            {
                return(searchResult);
            }
            catch (Exception)
            {
                return(searchResult);
            }
        }
Ejemplo n.º 2
0
        /// <summary>GetLinksAndKeywords
        ///  the content of the page is loaded. So extract the links and text from a single page.
        /// //then load all of them into the main container= SearchResult
        ///Main Object-SearchResult: Container object for all the properties.
        /////This method loads and then passes the container to the save method later.
        //Extracts links, title, and converts html to text content
        /// then counts up the keywords from the content.
        /// </summary>
        /// <param name="SearchResult"></param>

        public static void GetLinksAndKeywords(SearchResult sr)
        {
            //check if this page has been indexed BEFORE getting the content.
            try
            {
                if (!SearchServices.IsPageContentIndexed(sr.PageURL, sr.PageName))
                {
                    sr.Title           = GetPageTitle(sr.SearchContent, sr.PageName);
                    sr.ParentDirectory = GetDirectoryForFile(sr.PageURL, sr.ParentID);
                    sr.PageURL         = sr.PageURL;
                    sr.TextContent     = GetTextFromHTML(sr.SearchContent);

                    //use the full page content to extract the links
                    sr.Links = GetLinks(sr.SearchContent);

                    //use ONLY the cleaned text to find the keyword ranking.
                    sr.KeyWordRankingList = GetKeywordCounts(sr.TextContent);
                }
            }
            catch (DbEntityValidationException)
            {
            }
            catch (Exception)
            {
            }
        }