コード例 #1
0
ファイル: HtmlUtil.cs プロジェクト: jcmangubat/YPS.Engine
        public static string GetPageDocument(Uri site)
        {
            var browser = new ScrapySharp.Network.ScrapingBrowser()
            {
                UserAgent = new FakeUserAgent("compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0", "Mozilla/5.0"),
            };
            return browser.DownloadString(site);

            //   var browser = new ScrapingBrowser();
            //   return browser.DownloadString(new Uri(site));
        }
コード例 #2
0
        public static string GetPageDocument(Uri site)
        {
            var browser = new ScrapySharp.Network.ScrapingBrowser()
            {
                UserAgent = new FakeUserAgent("compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0", "Mozilla/5.0"),
            };

            return(browser.DownloadString(site));

            //   var browser = new ScrapingBrowser();
            //   return browser.DownloadString(new Uri(site));
        }
コード例 #3
0
ファイル: WebPage.cs プロジェクト: haptear/scrapysharp
        public WebPage(ScrapingBrowser browser, Uri absoluteUrl, bool autoDownloadPagesResources, RawRequest rawRequest, RawResponse rawResponse,
                       Encoding encoding, bool autoDetectCharsetEncoding)
        {
            this.browser     = browser;
            this.absoluteUrl = absoluteUrl;
            this.rawRequest  = rawRequest;
            this.rawResponse = rawResponse;
            this.autoDetectCharsetEncoding = autoDetectCharsetEncoding;
            Encoding = encoding;

            content   = Encoding.GetString(rawResponse.Body);
            resources = new List <WebResource>();

            LoadHtml();

            if (autoDownloadPagesResources)
            {
                LoadBaseUrl();
                DownloadResources();
            }
        }
コード例 #4
0
        private async void Button_Click(object sender, RoutedEventArgs e)
        {
            HtmlAgilityPack.HtmlNode.ElementsFlags.Remove("option");
            var baseUrl  = @"https://www.infoclimat.fr/observations-meteo/temps-reel/agde/00069.html";
            var baseUrl2 = @"https://www.infoclimat.fr/climatologie-mensuelle/";

            ScrapySharp.Network.ScrapingBrowser browser = new ScrapySharp.Network.ScrapingBrowser();
            var res = await browser.NavigateToPageAsync(new Uri(baseUrl));

            var        sele           = res.Html.CssSelect("#select_station");
            var        nbvi           = sele.First().ChildNodes.Count;
            long       moytimepercity = 1;
            List <int> lstannee       = new List <int>();

            lstannee.Add(2009);
            lstannee.Add(2010);
            lstannee.Add(2011);
            lstannee.Add(2012);
            lstannee.Add(2013);

            Dictionary <string, string> lstmois = new Dictionary <string, string>();

            lstmois.Add("01", "janvier");
            lstmois.Add("02", "fevrier");
            lstmois.Add("03", "mars");
            lstmois.Add("04", "avril");
            lstmois.Add("05", "mai");
            lstmois.Add("06", "juin");
            lstmois.Add("07", "juillet");
            lstmois.Add("08", "aout");
            lstmois.Add("09", "septembre");
            lstmois.Add("10", "octobre");
            lstmois.Add("11", "novembre");
            lstmois.Add("12", "decembre");


            foreach (int annee in lstannee)
            {
                var spendedtime = System.Diagnostics.Stopwatch.StartNew();

                foreach (KeyValuePair <string, string> mois in lstmois)
                {
                    anneetxt.Text = annee.ToString() + "/";
                    moistxt.Text  = mois.Value;
                    var suffix = "/" + mois.Value + "/" + annee.ToString() + "/agde-le-grau.html";
                    var cmpt   = 0;
                    foreach (HtmlAgilityPack.HtmlNode prod in sele.First().ChildNodes)
                    {
                        nbville.Text   = nbvi.ToString();
                        nbcurrent.Text = cmpt.ToString();
                        foreach (HtmlAttribute elem in prod.Attributes)
                        {
                            var timepercity = System.Diagnostics.Stopwatch.StartNew();

                            if (elem.Name == "value" && !String.IsNullOrEmpty(elem.Value))
                            {
                                try
                                {
                                    var result = await browser.NavigateToPageAsync(new Uri(baseUrl2 + elem.Value + suffix));



                                    foreach (HtmlNode day in result.Html.CssSelect(".climday"))
                                    {
                                        Meteo rec = new Meteo();
                                        rec.idMesure = 0;
                                        rec.date     = DateTime.Parse(annee.ToString() + "-" + mois.Key + "-" + day.InnerText);
                                        rec.station  = prod.InnerText;
                                        var mes = day.ParentNode.ParentNode.ParentNode.ChildNodes.CssSelect(".named-units");
                                        if (mes.Count() > 0)
                                        {
                                            var tmp = mes.Where(n => n.InnerText != null && n.InnerText == "&deg;C");

                                            if (tmp.Count() == 2)
                                            {
                                                var mtch = reg5.Match(tmp.First().ParentNode.InnerText);
                                                if (mtch.Groups.Count > 1)
                                                {
                                                    rec.tempmin = float.Parse(mtch.Groups[2].Value.Replace('.', ','));
                                                }
                                                else
                                                {
                                                    Debug.WriteLine(tmp.First().ParentNode.InnerText);
                                                }
                                                var mtch2 = reg5.Match(tmp.Last().ParentNode.InnerText);
                                                if (mtch2.Groups.Count > 1)
                                                {
                                                    rec.tempmax = float.Parse(mtch2.Groups[2].Value.Replace('.', ','));
                                                }
                                                else
                                                {
                                                    Debug.WriteLine(tmp.Last().ParentNode.InnerText);
                                                }
                                            }
                                            var pl = mes.Where(n => n.InnerText != null && n.InnerText == "mm");
                                            if (pl.Count() > 0)
                                            {
                                                rec.precipe = float.Parse(pl.First().ParentNode.InnerText.Replace("mm", "").Replace('.', ','));
                                                // Debug.WriteLine(float.Parse(pl.First().ParentNode.InnerText.Replace("mm", "").Replace('.', ',')).ToString());
                                            }
                                            var vt = mes.Where(n => n.InnerText != null && n.InnerText == " km/h");
                                            if (vt.Count() > 0)
                                            {
                                                rec.ventmax = float.Parse(vt.First().ParentNode.InnerText.Replace(" km/h", "").Replace('.', ','));
                                                // Debug.WriteLine(float.Parse(vt.First().ParentNode.InnerText.Replace(" km/h", "").Replace('.', ',')).ToString());
                                            }



                                            if (rec.date != null && rec.station != null && (rec.tempmax != null || rec.tempmin != null))
                                            {
                                                ctx.Meteo.Add(rec);
                                            }
                                        }
                                    }
                                }
                                catch { }

                                totalcmpt += ctx.SaveChanges();
                                Debug.WriteLine("nb records saved : " + totalcmpt);

                                cmpt++;
                            }
                        }

                        spendedtime.Stop();
                    }
                }
            }
            MessageBox.Show("Done ! total record saved :" + totalcmpt);
        }