//get the totals by page and by keyword.
        public static SearchTotal GetIndexedPageTotals(int indexedSiteID)
        {
            SearchTotal st = new SearchTotal();

            try
            {
                var pgCount = (from px in DB.IndexedPages
                               where px.IndexedSiteID == indexedSiteID
                               group px by px.PageURL into gr1
                               select new { myKey = gr1.Key, mycount = gr1.Count() }).ToList();
                st.PagesIndexed = pgCount.Sum(g => g.mycount);

                var kwCount = (from p in DB.IndexedPages
                               join pkw in DB.PageKeyWords
                               on p.PageID equals pkw.PageID
                               where p.IndexedSiteID == indexedSiteID
                               group p by p.PageName into gp
                               select new { myKey = gp.Key, myKWCount = gp.Count() }).ToList();
                st.KeywordsIndexed = kwCount.Sum(c => c.myKWCount);
                return(st);
            }
            catch (Exception ex)
            {
                MessageLogger.LogThis(ex);
                return(null);
            }
        }
        ///group the pages by pageURL and sum up the keyword counts
        public static List <KeywordRanking> GetKeywordRanking(string keyWord)
        {
            try
            {
                var results = (from pg in DB.IndexedPages
                               join pgLinks in DB.PageKeyWords
                               on pg.PageID equals pgLinks.PageID

                               where pgLinks.Keyword.Contains(keyWord) || pgLinks.Keyword.StartsWith(keyWord) ||
                               null == keyWord
                               group new { pg, pgLinks } by pg.PageURL into grup1
                               select new KeywordRanking
                {
                    PageURL = grup1.FirstOrDefault().pg.PageURL,
                    Title = grup1.FirstOrDefault().pg.Title,
                    Rank = grup1.Sum(g => g.pgLinks.KeywordCount.Value)
                }).ToList();


                return(results);
            }
            catch (DbEntityValidationException ex)
            {
                MessageLogger.LogThis(ex);
                return(null);
            }
            catch (Exception ex)
            {
                MessageLogger.LogThis(ex);
                return(null);
            }
        }
 //Get the data for a page so it can be indexed.
 //used for requesting the content based on the URL.
 public static IndexedPage GetPageByName(string pageURL, string pageName)
 {
     try
     {
         Uri    siteURL    = new Uri(pageURL);
         string domainName = siteURL.GetLeftPart(UriPartial.Authority);
         var    result     = (from p in DB.IndexedPages
                              where p.PageName.ToLower() == pageName.ToLower() &&
                              p.PageURL.StartsWith(domainName) ||
                              p.PageURL == pageURL
                              select p).ToList();
         if (result.Any())
         {
             return(result.First());
         }
         else
         {
             return(null);
         }
     }
     catch (Exception ex)
     {
         MessageLogger.LogThis(ex);
         return(null);
     }
 }
        //Main Method that calls several Save methods for the content, links and keywords of a page.
        public static int SaveSearchResults(ContentSearchResult searchResults)
        {
            try
            {
                int pageIDAfterInsert = 0;
                //save the  page
                IndexedPage pg = new IndexedPage();

                pg.DateCreated     = DateTime.Now;
                pg.ParentID        = searchResults.ParentID;
                pg.PageName        = GetFileWithFolder(searchResults.PageURL);
                pg.PageURL         = searchResults.PageURL;
                pg.ParentDirectory = searchResults.ParentDirectory;
                pg.IndexedSiteID   = searchResults.IndexedSiteID;
                pg.Title           = searchResults.Title.Length > 50 ? searchResults.Title.Substring(0, 49) : searchResults.Title;
                if (!IsPageAlreadySaved(pg.PageURL, pg.PageName))
                {
                    DB.IndexedPages.Add(pg);
                    DB.SaveChanges();
                    pageIDAfterInsert = pg.PageID;
                }
                else
                {   //the page already exists so add a few missing fields.
                    pg             = GetPageByName(pg.PageURL, pg.PageName);
                    pg.DateCreated = DateTime.Now;

                    pg.Title = searchResults.Title;

                    pageIDAfterInsert = pg.PageID;
                    DB.SaveChanges();
                }



                SaveTheLinks(searchResults, pg); //save the links for this page.


                SaveTheKeywords(searchResults, pg); //save the keywords


                UpdateIsIndexedFlag(pg.PageID);   //update the IsIndexed flag so it is not run again.


                return(pageIDAfterInsert);
            }
            catch (DbEntityValidationException ex)
            { MessageLogger.LogThis(ex);
              return(0); }
            catch (Exception ex)
            {
                MessageLogger.LogThis(ex);
                return(0);
            }
        }
        //Saves the links which are pulled from the HTML of a page.
        public static void SaveTheLinks(ContentSearchResult searchResults, IndexedPage pg)
        {
            List <IndexedPage> linkPages = new List <IndexedPage>();

            try
            { if (searchResults.Links.Count > 10)
              {     //for speed, remove the links so we can see if the rest of the system works .
                  searchResults.Links.RemoveRange(10, searchResults.Links.Count - 10);
              }

              foreach (string singleLink in searchResults.Links)
              {
                  IndexedPage cp = new IndexedPage();
                  if (singleLink.Length > 1)   //it might be only a /
                  {
                      cp.DateCreated   = DateTime.Now;
                      cp.ParentID      = pg.PageID;
                      cp.PageName      = GetFileWithFolder(singleLink);
                      cp.IndexedSiteID = pg.IndexedSiteID;
                      //get directory for the file, not only the filename.
                      cp.ParentDirectory = Services.SearchLibrary.GetDirectoryForFile(singleLink, pg.PageID);
                      cp.PageURL         = GetFullURLFromPartial(cp.PageName, cp.ParentDirectory);
                      cp.Title           = ""; // THIS COMES ONLY FROM THE CONTENT;


                      // code to avoid duplicates.

                      if (IsValidLink(cp.PageURL) && !DBSearchResult.IsPageAlreadySaved(cp.PageURL, cp.PageName))
                      {
                          linkPages.Add(cp);
                      }
                  }
              }

              DB.IndexedPages.AddRange(linkPages);
              DB.SaveChanges(); }
            catch (DbEntityValidationException ex)
            {
                var s = new Exception();


                string data = Services.SerializeIt.SerializeThis(searchResults);

                MessageLogger.LogThis(ex, data);
            }
            catch (Exception ex)
            {
                string data = Services.SerializeIt.SerializeThis(searchResults);
                MessageLogger.LogThis(ex, data);
            }
        }
        //clears the AppLogs table.
        public static void ClearEventLog()
        {
            try
            {
                var msgs = DB.AppLogs.ToList();
                foreach (var item in msgs)
                {
                    DB.AppLogs.Remove(item);
                    DB.SaveChanges();
                }
            }

            catch (Exception ex)
            {
                MessageLogger.LogThis(ex);
            }
        }
        //has the page been saved alread?
        //A link might have been inserted already. This avoids duplicates.
        public static bool IsPageAlreadySaved(string pageURL, string pageName)
        {
            try
            {
                Uri    siteURL    = new Uri(pageURL);
                string domainName = siteURL.GetLeftPart(UriPartial.Authority);
                var    result     = (from p in DB.IndexedPages
                                     where p.PageName.ToLower() == pageName.ToLower() &&
                                     p.PageURL.StartsWith(domainName) ||
                                     p.PageURL == pageURL
                                     select p).ToList();
                return(result.Any());
            }

            catch (Exception ex)
            {
                MessageLogger.LogThis(ex);
                return(true);
            }
        }
        //some links are on the same page or is only the domain page..skip these.
        public static bool IsValidLink(string pageURL)
        {   // if the url is too short
            //or is the same as the domain this will throw an error
            //and it can be skipped.

            try
            {
                Uri    siteURL    = new Uri(pageURL);
                string domainName = siteURL.GetLeftPart(UriPartial.Authority);

                if (pageURL.StartsWith("#"))
                {
                    return(false);
                }
            }
            catch (Exception ex)
            {
                MessageLogger.LogThis(ex);
                return(false);
            }

            return(true);
        }
        //save each word and the # of times it occurs, word by word found on a parent page.
        public static void SaveTheKeywords(ContentSearchResult searchResults, IndexedPage pg)
        {
            List <PageKeyWord> keywordRankingList = new List <PageKeyWord>();

            try
            {
                //save the keywords for this page.
                foreach (KeywordRanking kw in searchResults.KeyWordRankingList)
                {
                    PageKeyWord pkw = new PageKeyWord();
                    pkw.PageID       = pg.PageID;
                    pkw.Keyword      = kw.Keyword;
                    pkw.KeywordCount = kw.Rank;
                    keywordRankingList.Add(pkw);
                }
                DB.PageKeyWords.AddRange(keywordRankingList);
                DB.SaveChanges();
            }
            catch (Exception ex)
            {
                string data = Services.SerializeIt.SerializeThis(searchResults.KeyWordRankingList);
                MessageLogger.LogThis(ex, data);
            }
        }