public CsvReaderWriter(string filepath, string resultpath) { Sources = GetCsvDomains(filepath); nextposition = 0; result = new List <ParsedResult>(); result.Add(ParsedResult.getHeaders()); fileresultpath = resultpath; }
public override ParsedResult Parse(string domain, string proxy = null) { ParsedResult result = new ParsedResult(); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); string html = getHTML(domain, proxy); if (html == null) { result.Url = domain; result.Rank = "-"; return(result); } doc.LoadHtml(html); result.Url = doc.DocumentNode.SelectSingleNode("//h1[@class=\"domain text-center-xs text-left-not-xs\"]").InnerText.Trim().ToLower(); HtmlAgilityPack.HtmlNode node = doc.DocumentNode.SelectSingleNode("//div[@id=\"worldRanking-item\"]//div[@class=\"rankValue\"]"); if (node != null) { var rg = new Regex(@"#(.*?)<"); result.Rank = rg.Match(node.InnerHtml.TrimEnd()).Groups[1].Value; } HtmlAgilityPack.HtmlNodeCollection nc = doc.DocumentNode.SelectNodes("//td[@class=\"text-right\"]"); if (nc != null) { result.Overall_Visits = nc[0].InnerText; result.Time_On_Site = nc[1].InnerText; result.Pages_per_Visit = nc[2].InnerText; result.Pages_per_Visit = result.Pages_per_Visit.Replace('.', ','); result.Bounce_Rate = nc[3].InnerText; } node = doc.DocumentNode.SelectSingleNode("//div[@id=\"review\"]"); if (node != null) { var rg = new Regex("\"Organic Search\" [(](.*?)%"); result.Organic_Search = rg.Match(node.InnerText).Groups[1].Value.TrimEnd(); } HtmlAgilityPack.HtmlNodeCollection htmlNodes = doc.DocumentNode.SelectNodes("//table[@id=\"countriesBreakdownTable\"]//tr"); if (htmlNodes != null) { foreach (HtmlAgilityPack.HtmlNode row in htmlNodes) { string country = row.SelectSingleNode("td").InnerText; string procent = row.SelectSingleNode("td/div[@class=\"shareValue\"]").InnerText; result.CountresAdd(country, procent); } } result.CountraseRelease(); return(result); }
public override ParsedResult Parse(string domain, string proxy = null) { ParsedResult result = new ParsedResult(); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); string html = getHTML(domain, proxy); if (html == null) { result.Url = domain; result.Rank = "Error 404"; return(result); } doc.LoadHtml(html); result.Url = doc.DocumentNode.SelectSingleNode("//span[@class=\'span8\']//h1[@class=\'h3\']").InnerText.Split(' ')[0].Trim(); //span[@class='col-pad']//strong[@class='metrics-data align-vmiddle'] HtmlAgilityPack.HtmlNode node = doc.DocumentNode.SelectSingleNode("//section[@id=\'rank-panel-content\']//span[@class=\'globleRank\']//strong[@class=\'metrics-data align-vmiddle\']"); if (node != null) { result.Rank = node.InnerText.Trim(); } if (result.Rank == "-") { result.Url = domain; result.Rank = "Error 404"; return(result); } HtmlAgilityPack.HtmlNodeCollection nc = doc.DocumentNode.SelectNodes("//section[@id=\'engagement-content\']//strong[@class=\'metrics-data align-vmiddle\']"); if (nc != null) { result.Bounce_Rate = nc[0].InnerText.Trim(); result.Pages_per_Visit = nc[1].InnerText.Trim(); result.Pages_per_Visit = result.Pages_per_Visit.Replace('.', ','); result.Time_On_Site = nc[2].InnerText.Trim(); } decimal firstvisitors = 0, addvisitors = 0; node = doc.DocumentNode.SelectSingleNode("//section[@id=\'keyword-content\']//strong[@class=\'metrics-data align-vmiddle\']"); if (node != null) { result.Organic_Search = node.InnerText.Trim(); if (result.Organic_Search != "-") { string s = result.Organic_Search.Remove(node.InnerText.Trim().Length - 2); s = s.Replace('.', ','); firstvisitors = Convert.ToDecimal(s); node = doc.DocumentNode.SelectSingleNode("//section[@id=\'keyword-content\']//span[@class=\'align-vmiddle change-wrapper change-down color-gen2 \']"); if (node == null) { node = doc.DocumentNode.SelectSingleNode("//section[@id=\'keyword-content\']//span[@class=\'align-vmiddle change-wrapper change-up \']"); } if (node != null) { s = node.InnerText.Trim().Remove(node.InnerText.Trim().Length - 2); s = s.Replace('.', ','); addvisitors = Convert.ToDecimal(s); } result.Overall_Visits = (firstvisitors + addvisitors).ToString(); result.Overall_Visits.Remove(result.Overall_Visits.Length - 1); result.Overall_Visits = result.Overall_Visits.Replace(',', '.'); result.Overall_Visits = result.Overall_Visits.TrimEnd('0').TrimEnd('.'); result.Overall_Visits += "K"; } else { result.Overall_Visits = "-"; } } //table[@id='demographics_div_country_table']/tbody//tr/td[2] HtmlAgilityPack.HtmlNodeCollection htmlNodes = doc.DocumentNode.SelectNodes("//table[@id=\'demographics_div_country_table\']/tbody//tr"); if (htmlNodes != null & htmlNodes.Count > 1) { foreach (HtmlAgilityPack.HtmlNode row in htmlNodes) { string country = row.SelectSingleNode("td[1]").InnerText.Split(';')[1].Trim(); string procent = row.SelectSingleNode("td[2]").InnerText.Trim(); result.CountresAdd(country, procent); } } result.CountraseRelease(); return(result); }
public void insertParsed(ParsedResult parsed) { result.Add(parsed); }