public ResultLinkInfo GetResult(ScrapeInfo scrapeInfo) { ServicePointManager.SecurityProtocol = System.Net.SecurityProtocolType.Tls12; WebHeaderCollection headerCollection = BuildHeaders(scrapeInfo.Headers); HttpWebRequest request = (HttpWebRequest)WebRequest.Create(scrapeInfo.Url); request.Headers = headerCollection; int random = DateTime.Now.Second; request.Headers.Add("User-Agent", $"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:{random}) Gecko/20100101 Firefox/{random}"); request.Method = "GET"; request.Timeout = 10000; request.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip; //request.Proxy.Credentials = System.Net.CredentialCache.DefaultCredentials; WebResponse response; try { response = request.GetResponse(); } catch { ResultLinkInfo info404 = new ResultLinkInfo() { Name = scrapeInfo.Name, Success = false }; return(info404); } Stream stream = response.GetResponseStream(); //stream = new GZipStream(stream, CompressionMode.Decompress); //stream = new DeflateStream(stream, CompressionMode.Decompress); StreamReader reader = new StreamReader(stream); string html = reader.ReadToEnd(); Parser parser = new Parser(); string result = parser.ParseResultsFromHtml(html, scrapeInfo.XPath); ResultLinkInfo resultInfo = new ResultLinkInfo() { Name = scrapeInfo.Name, Link = new Uri(scrapeInfo.Url), ResultText = result, Success = DetermineSuccess(scrapeInfo, result) }; return(resultInfo); }
public bool DetermineSuccess(ScrapeInfo info, string html) //consider private { bool success = false; if (info.SuccessIndicator != null) { success = html.Contains(info.SuccessIndicator); } else if (info.FailureIndicator != null) { success = !html.Contains(info.FailureIndicator); } return(success); }