void recursiveIndexLinks(List <string> messages, FoogleEngine.Page page, List <string> parsedLinks, ref int linksCount, ref int linksParsed, int level = 2) { linksCount += page.PageLinks.Count; foreach (string link in page.PageLinks) { int index = parsedLinks.BinarySearch(link); if (index < 0) { linksParsed++; FoogleEngine.Page childPage; if (FoogleEngine.IndexWorker.TryPageParse(link, out childPage)) { parsedLinks.Insert(Math.Abs(index) - 1, childPage.Url.ToString()); } if (level < LevelsDeep && childPage != null) { recursiveIndexLinks(messages, childPage, parsedLinks, ref linksCount, ref linksParsed, level + 1); } fb.IndexPage(childPage, link); } messages.Clear(); messages.Add("Foogle is beginning indexing..."); messages.Add("Foogle has currently found (" + linksCount + ") connections to Index"); messages.Add("Foogle has indexed, " + linksParsed + " of " + linksCount + "."); IndexMessageAll(messages); } }
static public bool TryPageParse(string url, out Page parsedPage) { parsedPage = null; try { bool foundEncoding = false; string pageString; using (WebClient client = new WebClient()) { byte[] data = client.DownloadData(url); ContentType contentType = new System.Net.Mime.ContentType(client.ResponseHeaders[HttpResponseHeader.ContentType]); pageString = Encoding.GetEncoding(contentType.CharSet).GetString(data); if (pageString.Contains("html") == false) { foreach (var encoding in Encoding.GetEncodings()) { client.Encoding = encoding.GetEncoding(); pageString = client.DownloadString(url); if (pageString.Contains("html")) { foundEncoding = true; break; } } } else { foundEncoding = true; } } if (foundEncoding) { parsedPage = new FoogleEngine.Page(pageString, new Uri(url)); } return(parsedPage != null); } catch (Exception ex) { // Swallow result and like it parsedPage = null; return(false); } }
public IEnumerable <SearchMatch> Search(string searchText) { IEnumerator <DataTable> data = fb.RetrievePages().GetEnumerator(); while (data.MoveNext()) { if (data.Current != null) { foreach (DataRow dr in data.Current.Rows) { if (((bool)dr["Parsable"])) { Page page = new FoogleEngine.Page(dr["Html"].ToString(), new Uri(dr["Url"].ToString())); SearchMatch sm = new FoogleEngine.SearchWorker.SearchMatch(page, searchText); if (sm.MatchPercentage > 0) { yield return(sm); } } } } } }