예제 #1
0
            public async Task <IEnumerable <OriginRow> > CrawlerMaster(string path)
            {
                List <OriginRow> rows = new List <OriginRow>();
                string           html = await File.ReadAllTextAsync(path);

                HtmlDocument doc = new HtmlDocument();

                doc.LoadHtml(html);
                HtmlNode           root  = doc.DocumentNode;
                HtmlNodeCollection items = root.SelectNodes("//article/div[@class='list']/div[@class='item']");

                if (items != null && items.Any())
                {
                    foreach (HtmlNode item in items)
                    {
                        OriginRow row = new OriginRow();
                        row.Summary = WebUtility.HtmlDecode(item.SelectSingleNode("./div[@class='item-summary']").InnerText).Trim();
                        HtmlNodeCollection details = item.SelectNodes("./div[@class='item-details']/p");
                        foreach (HtmlNode detail in details)
                        {
                            string key   = detail.SelectSingleNode("./span[1]").InnerText.Trim();
                            string value = detail.SelectSingleNode("./span[2]").InnerText.Trim();
                            switch (key)
                            {
                            case "Notice Type:":
                                break;

                            case "Approval Number:":
                                break;

                            case "Executing Agency:":
                                row.ExecutingAgency = value;
                                break;

                            case "Contractor Name:":
                                row.ContractorName = value;
                                break;

                            case "Address:":
                                row.ContractorAddress = value;
                                break;

                            case "Total Contract Amount (US$):":
                                row.TotalContractAmount = value;
                                break;

                            case "Contract Amount Financed by ADB (US$):":
                                row.FinancedByAdb = value;
                                break;
                            }
                        }
                        rows.Add(row);
                    }
                }
                return(rows);
            }
예제 #2
0
            public async Task <bool> CrawlerMaster(List <OriginRow> rows, string url)
            {
                string html = await GetHtml(url);

                if (html == null)
                {
                    return(false);
                }

                HtmlDocument doc = new HtmlDocument();

                doc.LoadHtml(html);
                HtmlNode           root  = doc.DocumentNode;
                HtmlNodeCollection items = root.SelectNodes("//tbody[@id='posts']/tr");

                if (items != null && items.Any())
                {
                    foreach (HtmlNode item in items)
                    {
                        OriginRow row = new OriginRow();
                        row.IssueDate   = item.SelectSingleNode("./td[1]").InnerText.Trim();
                        row.ClosingDate = item.SelectSingleNode("./td[2]").InnerText.Trim();
                        row.Location    = item.SelectSingleNode("./td[3]").InnerText.Trim();
                        row.ProjectName = item.SelectSingleNode("./td[4]/a").InnerText.Trim();
                        string href = WebUtility.HtmlDecode(item.SelectSingleNode("./td[4]/a").Attributes["href"].Value);
                        if (href.StartsWith("//"))
                        {
                            row.ProjectLink = $"https:{href}";
                        }
                        else if (href.StartsWith("/"))
                        {
                            row.ProjectLink = $"https://www.ebrd.com{href}";
                        }
                        else if (href.StartsWith("http:") || href.StartsWith("https:"))
                        {
                            row.ProjectLink = href;
                        }
                        else
                        {
                            row.ProjectLink = $"https://www.ebrd.com/{href}";
                        }
                        row.ProjectDetail = await CrawlerDetail(row.ProjectLink);

                        row.Sector   = item.SelectSingleNode("./td[5]").InnerText.Trim();
                        row.Contract = item.SelectSingleNode("./td[6]").InnerText.Trim();
                        row.Type     = item.SelectSingleNode("./td[7]").InnerText.Trim();
                        rows.Add(row);
                    }
                    return(true);
                }
                return(false);
            }
예제 #3
0
            public async Task <bool> CrawlerMaster(List <OriginRow> rows, string url)
            {
                string html = await GetHtml(url);

                if (html == null)
                {
                    return(false);
                }
                string fileName = $"{url.Substring(url.IndexOf("?") + 1)}.html";
                string savePath = System.IO.Path.Combine(@"D:\temp\output\html", fileName);
                var    saveTask = System.IO.File.WriteAllTextAsync(savePath, html);

                HtmlDocument doc = new HtmlDocument();

                doc.LoadHtml(html);
                HtmlNode           root  = doc.DocumentNode;
                HtmlNodeCollection items = root.SelectNodes("//article/div[@class='list']/div[@class='item']");

                if (items != null && items.Any())
                {
                    foreach (HtmlNode item in items)
                    {
                        OriginRow row = new OriginRow();
                        row.Summary = WebUtility.HtmlDecode(item.SelectSingleNode("./div[@class='item-summary']").InnerText).Trim();
                        HtmlNodeCollection details = item.SelectNodes("./div[@class='item-details']/p");
                        foreach (HtmlNode detail in details)
                        {
                            string key   = detail.SelectSingleNode("./span[1]").InnerText.Trim();
                            string value = detail.SelectSingleNode("./span[2]").InnerText.Trim();
                            switch (key)
                            {
                            case "Notice Type:":
                                break;

                            case "Approval Number:":
                                break;

                            case "Executing Agency:":
                                row.ExecutingAgency = value;
                                break;

                            case "Contractor Name:":
                                row.ContractorName = value;
                                break;

                            case "Address:":
                                row.ContractorAddress = value;
                                break;

                            case "Total Contract Amount (US$):":
                                row.TotalContractAmount = value;
                                break;

                            case "Contract Amount Financed by ADB (US$):":
                                row.FinancedByAdb = value;
                                break;
                            }
                        }
                        rows.Add(row);
                    }
                    return(true);
                }
                return(false);
            }