コード例 #1
0
ファイル: MakepoloCompany.cs プロジェクト: Qos-xin/Spider
        public static async Task <CompanyDetails> GetDetailContext(string url)
        {
            var CDetails = new CompanyDetails();

            try
            {
                HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlAgilityPack.HtmlWeb();
                var result = await htmlWeb.LoadFromWebAsync(url);

                var DN = result.DocumentNode;
                var id = System.Text.RegularExpressions.Regex.Match(url, @"(?<=/)\d+(?=\.html)").Value;;
                CDetails.Id      = Convert.ToInt64(id);
                CDetails.Name    = DN.SelectSingleNode("/html/body/div[3]/div[2]/div[1]/div[3]/div[2]/ul/li[1]/span").InnerText.Replace("公司名称:", "").Trim();
                CDetails.Address = DN.SelectSingleNode("/html/body/div[3]/div[2]/div[1]/div[3]/div[2]/ul/li[3]/span").InnerText.Replace("公司地址:", "").Trim();
                CDetails.Contect = DN.SelectSingleNode("/html/body/div[3]/div[2]/div[1]/div[3]/div[2]/ul/li[2]/span").InnerText.Replace("法人代表:", "").Trim();
                CDetails.Phone   = DN.SelectSingleNode("/html/body/div[3]/div[2]/div[1]/div[4]/div[2]/ul/li[7]/span").InnerText.Replace("公司传真:", "").Trim();
                CDetails.Details = DN.SelectSingleNode("/html/body/div[3]/div[2]/div[1]/div[2]/div[2]").InnerText.Replace("&nbsp;", "").Trim();
            }
            catch (Exception ex)
            {
                Thread.Sleep(5000);
                throw new Exception(ex.Message + "(" + url + ")");
            }
            return(CDetails);
        }
        /*
         * note: how to use getdownloadlink
         * videourl: is the url of  the video that was scraped from get videos or someone who was introduced by the user
         * quality: if you want low resolution put a 0 on quality. If you want to get the hd /sd videos put 1 on quality
         */
        public string getdownloadlink(string videourl, int quality)
        {
            var doc2 = new HtmlAgilityPack.HtmlWeb();
            /////////////se busca la pagina de info de el video
            var    htmlDoc2 = doc2.LoadFromWebAsync(videourl).Result;
            var    nodee    = htmlDoc2.GetElementbyId("html5video_base");
            var    elems    = nodee.ChildNodes;
            string link     = "";

            if (quality == 0)
            {
                link = elems[1].ChildNodes[0].ChildNodes[0].Attributes["href"].Value;
            }
            else
            {
                link = elems[1].ChildNodes[1].ChildNodes[0].Attributes["href"].Value;
            }


            Console.WriteLine("ok");



            return(link);
        }
コード例 #3
0
ファイル: Program.cs プロジェクト: feiva/ChinesePoetryDotNet
        public static void Run()
        {
            ConsoleHelper.Info("============================Start============================");

            var web        = new HtmlAgilityPack.HtmlWeb();
            var mingJuList = new List <MingJu>();

            var result = Parallel.For(0, MingJuMaxPage, index =>
            {
                var taskId = $"Task#{index}";
                var url    = string.Format(MingJu, index + 1, string.Empty, string.Empty);
                ConsoleHelper.Info($"Start: {taskId}");
                var page = web.LoadFromWebAsync(url).Result;
                var list = page.DocumentNode.SelectNodes("/html/body/div[2]/div[1]/div[2]/div");
                if (list != null)
                {
                    foreach (var item in list)
                    {
                        var content = item.SelectSingleNode("a[1]")?.InnerText;
                        var source  = item.SelectSingleNode("a[2]")?.InnerText;
                        if (string.IsNullOrEmpty(content))
                        {
                            continue;
                        }
                        lock (syncLock)
                        {
                            mingJuList.Add(new Spider.MingJu(content, source));
                        }
                    }
                }
                ConsoleHelper.Success($"Success: {taskId}");
            });

            if (result.IsCompleted)
            {
                ConsoleHelper.Success($"Total: {mingJuList.Count}");
                ConsoleHelper.Info("=============================End=============================");

                foreach (var item in mingJuList)
                {
                    ConsoleHelper.Info($"{item.Content} —— {item.Source}");
                }
            }
        }
コード例 #4
0
        public static async Task <IEnumerable <Titulo> > FetchAsync(string name = "")
        {
            var web = new HtmlAgilityPack.HtmlWeb();
            var doc = await web.LoadFromWebAsync("http://www.tesouro.fazenda.gov.br/tesouro-direto-precos-e-taxas-dos-titulos");

            var tables = doc
                         .DocumentNode
                         .Descendants()
                         .Where(n => n.Name == "table" && n.HasClass("tabelaPrecoseTaxas"));

            var bonds = new List <Titulo>();

            foreach (var table in tables)
            {
                var tableNodes = table
                                 .Descendants()
                                 .Where(n => n.Name == "tr" && n.HasClass("camposTesouroDireto"));

                foreach (var tableNode in tableNodes)
                {
                    var values = tableNode
                                 .Descendants()
                                 .Where(d => d.Name == "td")
                                 .ToArray();

                    var bond = values.Length >= 5 ?
                               new Titulo(values[0].InnerHtml,
                                          values[1].InnerHtml,
                                          values[2].InnerHtml,
                                          values[3].InnerHtml.Replace("R$", string.Empty).Replace(".", string.Empty),
                                          values[4].InnerHtml.Replace("R$", string.Empty).Replace(".", string.Empty), TipoDeTitulo.Compra) :
                               new Titulo(values[0].InnerHtml,
                                          values[1].InnerHtml,
                                          values[2].InnerHtml,
                                          null,
                                          values[3].InnerHtml.Replace("R$", string.Empty).Replace(".", string.Empty), TipoDeTitulo.Venda);

                    bonds.Add(bond);
                }
            }

            return(bonds.Where(t => string.IsNullOrEmpty(name) ? true : t.Nome.ToUpperInvariant().Contains(name.ToUpperInvariant())).ToList());
        }
コード例 #5
0
        public async Task <Property> GetProperty(string propertyUrl)
        {
            var url = new Uri(propertyUrl);

            if (url.Host != Configuration["TradeMeHostUrl"])
            {
                throw new ArgumentException("Only Trade Me listings are supported");
            }

            var web      = new HtmlAgilityPack.HtmlWeb();
            var document = await web.LoadFromWebAsync(propertyUrl);

            var address    = document.DocumentNode.SelectSingleNode(TradeMeAddressXpath).InnerText;
            var homesValue = GetHomesValue(address);

            return(new Property {
                Address = address,
                Cv = 900000,
                HomesValue = homesValue
            });
        }
コード例 #6
0
        public async Task <List <string> > GetWeb(string theUrl)
        {
            var morePages = new List <string>();

            theUrl = theUrl.ToLower();
            if (!DoneUrls.Contains(theUrl))
            {
                DoneUrls.Add(theUrl);
                string pageContent;
                using (var client = new HttpClient())
                {
                    pageContent = await client.GetStringAsync(theUrl).ConfigureAwait(continueOnCapturedContext: false);
                }

                var hap  = new HtmlAgilityPack.HtmlWeb();
                var hdoc = await hap.LoadFromWebAsync(theUrl).ConfigureAwait(continueOnCapturedContext: false);

                //var hrefs = hdoc.s
            }
            return(morePages);
        }
コード例 #7
0
ファイル: MakepoloCompany.cs プロジェクト: Qos-xin/Spider
        public static async Task <List <string> > GetPageContextAsync(string CountyName, string SourceUrl)
        {
            var urlList = new List <string>();

            try
            {
                HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlAgilityPack.HtmlWeb();
                if (string.IsNullOrWhiteSpace(SourceUrl))
                {
                    throw new Exception("发现空地址");
                }
                var result = await htmlWeb.LoadFromWebAsync(SourceUrl);

                var root = result.DocumentNode;
                var list = root.SelectNodes("//h2[@class='colist_item_title']");
                if (list != null)
                {
                    foreach (HtmlAgilityPack.HtmlNode item in list)
                    {
                        var val = item.SelectSingleNode("a").Attributes["href"].Value;
                        urlList.Add(val);
                    }
                    CompanyDetailsQueue.Enqueue(new KeyValuePair <string, string[]>(CountyName, urlList.ToArray()));
                }
                else
                {
                    CountyPageUrlQueue.Enqueue(new KeyValuePair <string, string>(CountyName, SourceUrl));
                    Thread.Sleep(5000);
                    throw new Exception("数据下载不完整,已重新入队.");
                }
            }
            catch (Exception ex)
            {
                Thread.Sleep(5000);
                throw new Exception(ex.Message + "(" + SourceUrl + ")");
            }
            return(urlList);
        }
        /*
         *
         *      how to use getvideos
         *      Pagecount:is the number of pages that will be scraped
         *      querry:is used when you want to search video results that match with the written criteria
         *      page: is used to load the current page of that criteria
         *      if you leave querry with the "" it will automatically search results from the home
         *      .
         *      as result of the getvideos it will return yo an pagedata who contains the following
         *     pagedata {
         *           navigationmax:is the max number of pages that you can use in the current search
         *           videomodels[]: is an array of videomodels who contains some information of each videos that match
         *           with the criteria
         *
         *          }
         *
         */

        public Modals.pagedata getvideos(int pagecount, string querry = "", int page = 0)
        {
            var videos = new List <Modals.videosmodels>();

            Modals.pagedata pagedataa = new Modals.pagedata();
            string          baseurl   = "";

            if (querry == "")
            {
                baseurl = "http://www.xvideos.com/";
            }
            else
            {
                baseurl = "http://www.xvideos.com/?k=" + querry.Replace(' ', '+');
            }

            for (int i = 0; i < pagecount; i++)
            {
                int pageno = i;
                if (page > 0)
                {
                    pageno = page;
                }
                var doc2 = new HtmlAgilityPack.HtmlWeb();
                HtmlAgilityPack.HtmlDocument htmlDoc2 = null;
                /////////////se busca la pagina de info de el video
                if (querry != "")
                {
                    htmlDoc2 = doc2.LoadFromWebAsync(baseurl + "&p=" + pageno).Result;
                }
                else
                {
                    if (page > 0)
                    {
                        htmlDoc2 = doc2.LoadFromWebAsync(baseurl + "new/" + (pageno + 1) + "/").Result;
                    }
                    else
                    {
                        htmlDoc2 = doc2.LoadFromWebAsync(baseurl).Result;
                    }
                }
                var paginations = htmlDoc2.DocumentNode.SelectNodes("//*[contains(@class,'pagination')]");
                if (!paginations.Last().Attributes["class"].Value.Contains("pagination-with-settings"))
                {
                    var elemsx = paginations.Last().ChildNodes["ul"].ChildNodes;

                    var outfake = 0;
                    var numeros = elemsx.Where(aax => int.TryParse(aax.InnerText, out outfake));

                    pagedataa.navigationmax = int.Parse(numeros.Last().InnerText);
                }
                else
                {
                    pagedataa.navigationmax = 0;
                }
                var elems = htmlDoc2.DocumentNode.SelectNodes("//*[contains(@class,'thumb-block')]");
                foreach (var xd in elems)
                {
                    var elemento = new Modals.videosmodels();
                    elemento.link = "http://www.xvideos.com" + xd.Descendants().Where(aax => aax.Attributes["class"].Value == "thumb").First().ChildNodes["a"].Attributes["href"].Value;
                    if (!elemento.link.Contains("/pornstar-channels/") && !elemento.link.Contains("/model-channels/") && !elemento.link.Contains("/profiles/"))
                    {
                        var elemthumb = xd.Descendants().Where(aax => aax.Attributes["class"].Value == "thumb").First().ChildNodes["a"].ChildNodes["img"];
                        try
                        {
                            elemento.thumb = elemthumb.Attributes["data-src"].Value;
                        }
                        catch (Exception)
                        {
                            elemento.thumb = elemthumb.Attributes["src"].Value;
                        }

                        elemento.title    = WebUtility.HtmlDecode(xd.ChildNodes[1].ChildNodes["p"].ChildNodes["a"].Attributes["title"].Value);
                        elemento.duration = xd.ChildNodes[1].ChildNodes[1].ChildNodes["span"].ChildNodes["span"].InnerText;



                        videos.Add(elemento);
                        Console.WriteLine(videos.Count - 1 + "===>" + elemento.title);
                    }
                }
            }



            pagedataa.videos = videos;
            return(pagedataa);
        }