Пример #1
0
 public CsvReaderWriter(string filepath, string resultpath)
 {
     Sources      = GetCsvDomains(filepath);
     nextposition = 0;
     result       = new List <ParsedResult>();
     result.Add(ParsedResult.getHeaders());
     fileresultpath = resultpath;
 }
Пример #2
0
        public override ParsedResult Parse(string domain, string proxy = null)
        {
            ParsedResult result = new ParsedResult();

            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            string html = getHTML(domain, proxy);

            if (html == null)
            {
                result.Url  = domain;
                result.Rank = "-";
                return(result);
            }
            doc.LoadHtml(html);

            result.Url =
                doc.DocumentNode.SelectSingleNode("//h1[@class=\"domain text-center-xs text-left-not-xs\"]").InnerText.Trim().ToLower();

            HtmlAgilityPack.HtmlNode node = doc.DocumentNode.SelectSingleNode("//div[@id=\"worldRanking-item\"]//div[@class=\"rankValue\"]");
            if (node != null)
            {
                var rg = new Regex(@"#(.*?)<");
                result.Rank = rg.Match(node.InnerHtml.TrimEnd()).Groups[1].Value;
            }

            HtmlAgilityPack.HtmlNodeCollection nc = doc.DocumentNode.SelectNodes("//td[@class=\"text-right\"]");
            if (nc != null)
            {
                result.Overall_Visits  = nc[0].InnerText;
                result.Time_On_Site    = nc[1].InnerText;
                result.Pages_per_Visit = nc[2].InnerText;
                result.Pages_per_Visit = result.Pages_per_Visit.Replace('.', ',');
                result.Bounce_Rate     = nc[3].InnerText;
            }

            node = doc.DocumentNode.SelectSingleNode("//div[@id=\"review\"]");
            if (node != null)
            {
                var rg = new Regex("\"Organic Search\" [(](.*?)%");
                result.Organic_Search = rg.Match(node.InnerText).Groups[1].Value.TrimEnd();
            }

            HtmlAgilityPack.HtmlNodeCollection htmlNodes =
                doc.DocumentNode.SelectNodes("//table[@id=\"countriesBreakdownTable\"]//tr");
            if (htmlNodes != null)
            {
                foreach (HtmlAgilityPack.HtmlNode row in htmlNodes)
                {
                    string country = row.SelectSingleNode("td").InnerText;
                    string procent = row.SelectSingleNode("td/div[@class=\"shareValue\"]").InnerText;
                    result.CountresAdd(country, procent);
                }
            }
            result.CountraseRelease();
            return(result);
        }
Пример #3
0
        public override ParsedResult Parse(string domain, string proxy = null)
        {
            ParsedResult result = new ParsedResult();

            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            string html = getHTML(domain, proxy);

            if (html == null)
            {
                result.Url  = domain;
                result.Rank = "Error 404";
                return(result);
            }
            doc.LoadHtml(html);

            result.Url =
                doc.DocumentNode.SelectSingleNode("//span[@class=\'span8\']//h1[@class=\'h3\']").InnerText.Split(' ')[0].Trim();
            //span[@class='col-pad']//strong[@class='metrics-data align-vmiddle']
            HtmlAgilityPack.HtmlNode node = doc.DocumentNode.SelectSingleNode("//section[@id=\'rank-panel-content\']//span[@class=\'globleRank\']//strong[@class=\'metrics-data align-vmiddle\']");
            if (node != null)
            {
                result.Rank = node.InnerText.Trim();
            }

            if (result.Rank == "-")
            {
                result.Url  = domain;
                result.Rank = "Error 404";
                return(result);
            }

            HtmlAgilityPack.HtmlNodeCollection nc = doc.DocumentNode.SelectNodes("//section[@id=\'engagement-content\']//strong[@class=\'metrics-data align-vmiddle\']");
            if (nc != null)
            {
                result.Bounce_Rate     = nc[0].InnerText.Trim();
                result.Pages_per_Visit = nc[1].InnerText.Trim();
                result.Pages_per_Visit = result.Pages_per_Visit.Replace('.', ',');
                result.Time_On_Site    = nc[2].InnerText.Trim();
            }


            decimal firstvisitors = 0, addvisitors = 0;

            node =
                doc.DocumentNode.SelectSingleNode("//section[@id=\'keyword-content\']//strong[@class=\'metrics-data align-vmiddle\']");
            if (node != null)
            {
                result.Organic_Search = node.InnerText.Trim();
                if (result.Organic_Search != "-")
                {
                    string s = result.Organic_Search.Remove(node.InnerText.Trim().Length - 2);
                    s             = s.Replace('.', ',');
                    firstvisitors = Convert.ToDecimal(s);
                    node          =
                        doc.DocumentNode.SelectSingleNode("//section[@id=\'keyword-content\']//span[@class=\'align-vmiddle change-wrapper change-down color-gen2 \']");
                    if (node == null)
                    {
                        node =
                            doc.DocumentNode.SelectSingleNode("//section[@id=\'keyword-content\']//span[@class=\'align-vmiddle change-wrapper change-up \']");
                    }
                    if (node != null)
                    {
                        s           = node.InnerText.Trim().Remove(node.InnerText.Trim().Length - 2);
                        s           = s.Replace('.', ',');
                        addvisitors = Convert.ToDecimal(s);
                    }
                    result.Overall_Visits = (firstvisitors + addvisitors).ToString();
                    result.Overall_Visits.Remove(result.Overall_Visits.Length - 1);
                    result.Overall_Visits  = result.Overall_Visits.Replace(',', '.');
                    result.Overall_Visits  = result.Overall_Visits.TrimEnd('0').TrimEnd('.');
                    result.Overall_Visits += "K";
                }
                else
                {
                    result.Overall_Visits = "-";
                }
            }


            //table[@id='demographics_div_country_table']/tbody//tr/td[2]
            HtmlAgilityPack.HtmlNodeCollection htmlNodes =
                doc.DocumentNode.SelectNodes("//table[@id=\'demographics_div_country_table\']/tbody//tr");
            if (htmlNodes != null & htmlNodes.Count > 1)
            {
                foreach (HtmlAgilityPack.HtmlNode row in htmlNodes)
                {
                    string country = row.SelectSingleNode("td[1]").InnerText.Split(';')[1].Trim();
                    string procent = row.SelectSingleNode("td[2]").InnerText.Trim();
                    result.CountresAdd(country, procent);
                }
            }
            result.CountraseRelease();
            return(result);
        }
Пример #4
0
 public void insertParsed(ParsedResult parsed)
 {
     result.Add(parsed);
 }