public override ParsedResult Parse(string domain, string proxy = null) { try { ParsedResult result = new ParsedResult(); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); string html = getHTML(domain, proxy); if (html == null) { result.Url = domain; result.expiridate = "-"; result.creationdate = "-"; return(result); } doc.LoadHtml(html); HtmlAgilityPack.HtmlNode node = doc.DocumentNode.SelectSingleNode("//span[@class=\'span8\']//h1[@class=\'h3\']"); if (node != null) { result.Url = node.InnerText.Split(' ')[0].Trim(); } else { result.Url = domain; result.expiridate = "-"; result.creationdate = "-"; return(result); } return(new ParsedResult()); } catch (Exception) { return(new ParsedResult { Url = domain, creationdate = "-", expiridate = "-" }); } }
public void insertParsed(ParsedResult parsed) { result.Add(parsed); if (result.Count == 1 + Sources.Count) { WriteResult(); //isdone = true; } }
public void getDA_PA(string domain, ParsedResult pr) { WebClient webClient = new WebClient(); string json = webClient.DownloadString(apiurl + domain); if (json != null) { ParsedResult timer = JsonConvert.DeserializeObject <ParsedResult>(json); pr.data = timer.data; } }
public CsvReaderWriter(string filepath, string resultpath, bool dp, bool ct, bool write_headers, bool dateparsing) { Sources = GetCsvDomains(filepath); nextposition = 0; result = new List <ParsedResult>(); result.Add(ParsedResult.getHeaders()); fileresultpath = resultpath; da_pa = dp; cf_tf = ct; headers = write_headers; dateparse = dateparsing; }
public override ParsedResult Parse(string domain, string proxy = null) { try { ParsedResult result = new ParsedResult(); result.Url = domain; HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); string html = getHTML(domain, proxy); if (html == null) { result.Url = domain; result.expiridate = "not found"; result.creationdate = ""; return(result); } doc.LoadHtml(html); HtmlAgilityPack.HtmlNode node = doc.DocumentNode.SelectSingleNode("//div[@class=\'whois-response\']"); if (node != null) { var rg1 = new Regex(@"Registrar Registration Expiration Date: (.*?)T"); result.expiridate = rg1.Match(node.InnerHtml).Groups[1].Value; if (result.expiridate == string.Empty) { result.expiridate = "not found"; result.creationdate = ""; return(result); } var rg2 = new Regex(@"Creation Date: (.*?)T"); result.creationdate = rg2.Match(node.InnerHtml).Groups[1].Value; } else { result.Url = domain; result.expiridate = "not found"; result.creationdate = ""; return(result); } return(result); } catch (Exception) { return(new ParsedResult { Url = domain, creationdate = "", expiridate = "not found" }); } }
public override ParsedResult Parse(string domain, string proxy = null) { try { ParsedResult result = new ParsedResult(); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); string html = getHTML(domain, proxy); if (html == null) { result.Url = domain; result.Rank = "-"; return(result); } doc.LoadHtml(html); HtmlAgilityPack.HtmlNode node = doc.DocumentNode.SelectSingleNode("//h1[@class=\"domain text-center-xs text-left-not-xs\"]"); if (node != null) { result.Url = node.InnerText.Trim().ToLower(); } else { result.Url = domain; result.Rank = "-"; return(result); } node = doc.DocumentNode.SelectSingleNode("//div[@id=\"worldRanking-item\"]//div[@class=\"rankValue\"]"); if (node != null) { var rg = new Regex(@"#(.*?)<"); result.Rank = rg.Match(node.InnerHtml.TrimEnd()).Groups[1].Value; } HtmlAgilityPack.HtmlNodeCollection nc = doc.DocumentNode.SelectNodes("//td[@class=\"text-right\"]"); if (nc != null) { result.Overall_Visits = nc[0].InnerText; result.Time_On_Site = nc[1].InnerText; result.Pages_per_Visit = nc[2].InnerText; result.Pages_per_Visit = result.Pages_per_Visit.Replace('.', ','); result.Bounce_Rate = nc[3].InnerText; } node = doc.DocumentNode.SelectSingleNode("//div[@id=\"review\"]"); if (node != null) { var rg = new Regex("\"Organic Search\" [(](.*?)%"); result.Organic_Search = rg.Match(node.InnerText).Groups[1].Value.TrimEnd(); if (result.Organic_Search.Contains('.')) { result.Organic_Search = result.Organic_Search.Remove(result.Organic_Search.IndexOf('.')); } } HtmlAgilityPack.HtmlNodeCollection htmlNodes = doc.DocumentNode.SelectNodes("//table[@id=\"countriesBreakdownTable\"]//tr"); if (htmlNodes != null) { foreach (HtmlAgilityPack.HtmlNode row in htmlNodes) { string country = row.SelectSingleNode("td").InnerText; string procent = row.SelectSingleNode("td/div[@class=\"shareValue\"]").InnerText; result.CountresAdd(country, procent); } } result.CountraseRelease(); return(result); } catch (Exception ex) { MessageBox.Show(ex.ToString()); return(new ParsedResult { Url = domain, Rank = "-" }); } }
public override ParsedResult Parse(string domain, string proxy = null) { try { ParsedResult result = new ParsedResult(); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); string html = getHTML(domain, proxy); if (html == null) { result.Url = domain; result.Rank = "-"; return(result); } doc.LoadHtml(html); HtmlAgilityPack.HtmlNode node = doc.DocumentNode.SelectSingleNode("//span[@class=\'span8\']//h1[@class=\'h3\']"); if (node != null) { result.Url = node.InnerText.Split(' ')[0].Trim(); } else { result.Url = domain; result.Rank = "-"; return(result); } //span[@class='col-pad']//strong[@class='metrics-data align-vmiddle'] node = doc.DocumentNode.SelectSingleNode("//section[@id=\'rank-panel-content\']//span[@class=\'globleRank\']//strong[@class=\'metrics-data align-vmiddle\']"); if (node != null) { result.Rank = node.InnerText.Trim(); } if (result.Rank == "-") { result.Url = domain; result.Rank = "-"; return(result); } HtmlAgilityPack.HtmlNodeCollection nc = doc.DocumentNode.SelectNodes("//section[@id=\'engagement-content\']//strong[@class=\'metrics-data align-vmiddle\']"); if (nc != null) { result.Bounce_Rate = nc[0].InnerText.Trim(); result.Pages_per_Visit = nc[1].InnerText.Trim(); result.Pages_per_Visit = result.Pages_per_Visit.Replace('.', ','); result.Time_On_Site = nc[2].InnerText.Trim(); } decimal firstvisitors = 0, addvisitors = 0; node = doc.DocumentNode.SelectSingleNode("//section[@id=\'keyword-content\']//strong[@class=\'metrics-data align-vmiddle\']"); if (node != null) { result.Organic_Search = node.InnerText.Trim(); if (result.Organic_Search != "-") { string s = result.Organic_Search.Remove(node.InnerText.Trim().Length - 2); s = s.Replace('.', ','); try { firstvisitors = Convert.ToDecimal(s); } catch (Exception) { firstvisitors = 0; } node = doc.DocumentNode.SelectSingleNode("//section[@id=\'keyword-content\']//span[@class=\'align-vmiddle change-wrapper change-down color-gen2 \']"); if (node == null) { node = doc.DocumentNode.SelectSingleNode("//section[@id=\'keyword-content\']//span[@class=\'align-vmiddle change-wrapper change-up \']"); } if (node != null) { s = node.InnerText.Trim().Remove(node.InnerText.Trim().Length - 2); s = s.Replace('.', ','); try { addvisitors = Convert.ToDecimal(s); } catch (Exception) { addvisitors = 0; } } result.Overall_Visits = (firstvisitors + addvisitors).ToString(); result.Overall_Visits.Remove(result.Overall_Visits.Length - 1); result.Overall_Visits = result.Overall_Visits.Replace(',', '.'); result.Overall_Visits = result.Overall_Visits.TrimEnd('0').TrimEnd('.'); result.Overall_Visits += "K"; } else { result.Overall_Visits = "-"; } if (result.Organic_Search.Contains('.')) { result.Organic_Search = result.Organic_Search.Remove(result.Organic_Search.IndexOf('.')); } } //table[@id='demographics_div_country_table']/tbody//tr/td[2] HtmlAgilityPack.HtmlNodeCollection htmlNodes = doc.DocumentNode.SelectNodes("//table[@id=\'demographics_div_country_table\']/tbody//tr"); if (htmlNodes != null & htmlNodes.Count > 1) { foreach (HtmlAgilityPack.HtmlNode row in htmlNodes) { string country = row.SelectSingleNode("td[1]").InnerText.Split(';')[1].Trim(); string procent = row.SelectSingleNode("td[2]").InnerText.Trim(); result.CountresAdd(country, procent); } } result.CountraseRelease(); return(result); } catch (Exception) { return(new ParsedResult { Url = domain, Rank = "-" }); } }