Exemple #1
0
        void recursiveIndexLinks(List <string> messages, FoogleEngine.Page page, List <string> parsedLinks, ref int linksCount, ref int linksParsed, int level = 2)
        {
            linksCount += page.PageLinks.Count;
            foreach (string link in page.PageLinks)
            {
                int index = parsedLinks.BinarySearch(link);
                if (index < 0)
                {
                    linksParsed++;
                    FoogleEngine.Page childPage;
                    if (FoogleEngine.IndexWorker.TryPageParse(link, out childPage))
                    {
                        parsedLinks.Insert(Math.Abs(index) - 1, childPage.Url.ToString());
                    }
                    if (level < LevelsDeep && childPage != null)
                    {
                        recursiveIndexLinks(messages, childPage, parsedLinks, ref linksCount, ref linksParsed, level + 1);
                    }

                    fb.IndexPage(childPage, link);
                }
                messages.Clear();
                messages.Add("Foogle is beginning indexing...");
                messages.Add("Foogle has currently found (" + linksCount + ") connections to Index");
                messages.Add("Foogle has indexed, " + linksParsed + " of " + linksCount + ".");
                IndexMessageAll(messages);
            }
        }
Exemple #2
0
 static public bool TryPageParse(string url, out Page parsedPage)
 {
     parsedPage = null;
     try
     {
         bool   foundEncoding = false;
         string pageString;
         using (WebClient client = new WebClient())
         {
             byte[]      data        = client.DownloadData(url);
             ContentType contentType = new System.Net.Mime.ContentType(client.ResponseHeaders[HttpResponseHeader.ContentType]);
             pageString = Encoding.GetEncoding(contentType.CharSet).GetString(data);
             if (pageString.Contains("html") == false)
             {
                 foreach (var encoding in Encoding.GetEncodings())
                 {
                     client.Encoding = encoding.GetEncoding();
                     pageString      = client.DownloadString(url);
                     if (pageString.Contains("html"))
                     {
                         foundEncoding = true;
                         break;
                     }
                 }
             }
             else
             {
                 foundEncoding = true;
             }
         }
         if (foundEncoding)
         {
             parsedPage = new FoogleEngine.Page(pageString, new Uri(url));
         }
         return(parsedPage != null);
     }
     catch (Exception ex)
     {
         // Swallow result and like it
         parsedPage = null;
         return(false);
     }
 }
Exemple #3
0
        public IEnumerable <SearchMatch> Search(string searchText)
        {
            IEnumerator <DataTable> data = fb.RetrievePages().GetEnumerator();

            while (data.MoveNext())
            {
                if (data.Current != null)
                {
                    foreach (DataRow dr in data.Current.Rows)
                    {
                        if (((bool)dr["Parsable"]))
                        {
                            Page        page = new FoogleEngine.Page(dr["Html"].ToString(), new Uri(dr["Url"].ToString()));
                            SearchMatch sm   = new FoogleEngine.SearchWorker.SearchMatch(page, searchText);
                            if (sm.MatchPercentage > 0)
                            {
                                yield return(sm);
                            }
                        }
                    }
                }
            }
        }