Esempio n. 1
0
        //this function take a search engine and get all the links
        public void SearchEngine(SearchEngines searchEngine, string searchTerm)
        {
            try
            {
                StringBuilder sb            = new StringBuilder();
                byte[]        ResultsBuffer = new byte[8192];
                //create the search url using the engine url and search term
                string          SearchResults = searchEngine.Url + searchTerm.Trim();
                HttpWebRequest  request       = (HttpWebRequest)WebRequest.Create(SearchResults);
                HttpWebResponse response      = (HttpWebResponse)request.GetResponse();

                //read the response and transform into a string
                Stream resStream = response.GetResponseStream();
                string tempString;
                int    count;
                do
                {
                    count = resStream.Read(ResultsBuffer, 0, ResultsBuffer.Length);
                    if (count != 0)
                    {
                        tempString = Encoding.ASCII.GetString(ResultsBuffer, 0, count);
                        sb.Append(tempString);
                    }
                }while (count > 0);
                string sbb = sb.ToString();

                //convert the response into a html document with the help of HTMLAgilityPack
                HtmlDocument html = new HtmlDocument();
                html.OptionOutputAsXml = true;
                html.LoadHtml(sbb);
                HtmlNode doc = html.DocumentNode;
                //doc.InnerHtml= HttpUtility.HtmlDecode(doc.InnerHtml);
                //find each link href inside the document
                foreach (HtmlNode link in doc.SelectNodes(searchEngine.NodeSelector))
                {
                    //
                    string hrefValue = link.GetAttributeValue("href", string.Empty);
                    //try to eliminate the ones which are adds or similar
                    if (!hrefValue.ToString().ToUpper().Contains(searchEngine.Name.ToUpper()) &&
                        hrefValue.ToString().Contains(searchEngine.EscapeString) &&
                        (hrefValue.ToString().ToUpper().Contains("HTTP://") ||
                         hrefValue.ToString().ToUpper().Contains("HTTPS://")))
                    {
                        int index = hrefValue.IndexOf("&");
                        //check if there are additional parameters added by the engine, like google
                        //and remove them if needed, otherwise keep the url
                        if (index > 0)
                        {
                            hrefValue = hrefValue.Substring(0, index);
                            //use the function which will open each link and search for the term inside
                            LinkResults currentLink = SearchLink(hrefValue, searchTerm, searchEngine.Name);
                            if (currentLink != null)
                            {
                                _results.Add(currentLink);
                            }
                        }
                        else
                        {
                            LinkResults currentLink = SearchLink(hrefValue, searchTerm, searchEngine.Name);
                            if (currentLink != null)
                            {
                                _results.Add(currentLink);
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                throw ex;
            }
        }
Esempio n. 2
0
 /// <summary>
 /// Fills the data from the application using the API.
 /// </summary>
 public override void FillData()
 {
     Results = new LinkResults(_apiApp, Name);
 }
Esempio n. 3
0
        //this function open each link and searches inside how many times the term is
        public LinkResults SearchLink(string url, string searchTerm, string engine)
        {
            try
            {
                LinkResults   result        = new LinkResults();
                StringBuilder sb            = new StringBuilder();
                byte[]        ResultsBuffer = new byte[8192];
                string        searchURL     = "";
                //cut the url if it is google
                if (engine == "Google")
                {
                    searchURL = HttpUtility.UrlDecode(url.Substring(7));
                }
                else
                {
                    searchURL = HttpUtility.UrlDecode(url);
                }

                //check if we already searched this link
                if (Results.Where(x => x.LinkUrl == searchURL).Count() > 0)
                {
                    //if we already found it then check if its a new search engine
                    LinkResults foundLink = Results.Where(x => x.LinkUrl == searchURL &&
                                                          !x.SearchEngine.Contains(engine)).FirstOrDefault();

                    //if yes then add it so the user knows this page was found in these engines
                    if (foundLink != null)
                    {
                        foundLink.SearchEngine = foundLink.SearchEngine + " and " + engine;
                        return(null);
                    }
                    else
                    {
                        //case same engine returned same link twice
                        return(null);
                    }
                }
                //if we reached here its a new page so add the page and the engine
                result.LinkUrl      = searchURL;
                result.SearchEngine = engine;

                //open the page to search inside for the search term
                HttpWebRequest  request  = (HttpWebRequest)WebRequest.Create(searchURL);
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();

                //read the response and transform into a string
                Stream resStream = response.GetResponseStream();
                string tempString;
                int    count;
                do
                {
                    count = resStream.Read(ResultsBuffer, 0, ResultsBuffer.Length);
                    if (count != 0)
                    {
                        tempString = Encoding.ASCII.GetString(ResultsBuffer, 0, count);
                        sb.Append(tempString);
                    }
                }while (count > 0);
                string sbb = sb.ToString();

                //convert the response into a html document with the help of HTMLAgilityPack
                HtmlDocument html = new HtmlDocument();
                html.OptionOutputAsXml = true;
                html.LoadHtml(sbb);
                HtmlNode doc = html.DocumentNode;

                //if no links found then 0 mentioned in links
                if (doc.SelectNodes("//a[@href]") != null)
                {
                    foreach (HtmlNode link in doc.SelectNodes("//a[@href]"))
                    {
                        string hrefValue = link.GetAttributeValue("href", string.Empty);
                        if (hrefValue.Contains(searchTerm))
                        {
                            //if a link mention the search term increase
                            result.MentionedInLinks += 1;
                        }
                    }
                }

                //use regex to find how many times the search term is found
                result.MentionedTimesTotal = Regex.Matches(doc.InnerText.ToUpper(), searchTerm.ToUpper()).Count;
                return(result);
            }
            catch (Exception ex)
            {
                //in case of errors it could be 404 or 403 so some logging would be required here
                //to keep track just for the count of them, but this is a test
                return(null);
                //throw ex;
            }
        }