Beispiel #1
0
        public List <Match> ExtractRoundByURL(string url)
        {
            List <Match> res = new List <Match>();

            HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load(url);
            try {
                var equipo1Par   = doc.DocumentNode.SelectNodes("//tr[@class='vevent ']//td[@class='equipo1']").ToList();
                var equipo1Impar = doc.DocumentNode.SelectNodes("//tr[@class='vevent impar']//td[@class='equipo1']").ToList();
                var equipo2Par   = doc.DocumentNode.SelectNodes("//tr[@class='vevent ']//td[@class='equipo2']").ToList();
                var equipo2Impar = doc.DocumentNode.SelectNodes("//tr[@class='vevent impar']//td[@class='equipo2']").ToList();



                var resultadosPar   = doc.DocumentNode.SelectNodes("//tr[@class='vevent ']//span[@class='clase']").ToList();
                var resultadosImpar = doc.DocumentNode.SelectNodes("//tr[@class='vevent impar']//span[@class='clase']").ToList();

                for (int i = 0; i < equipo1Par.Count() - 1; i++)
                {
                    res.Add(new Match()
                    {
                        local = equipo1Par[i].InnerText, resultado = resultadosPar[i].InnerText, visitante = equipo2Par[i].InnerText
                    });
                    res.Add(new Match()
                    {
                        local = equipo1Impar[i].InnerText, resultado = resultadosImpar[i].InnerText, visitante = equipo2Impar[i].InnerText
                    });
                }
            }
            catch {
                Console.WriteLine("This round is not available");
            }
            return(res);
        }
        /*
         * note: how to use getdownloadlink
         * videourl: is the url of  the video that was scraped from get videos or someone who was introduced by the user
         * quality: if you want low resolution put a 0 on quality. If you want to get the hd /sd videos put 1 on quality
         */
        public string getdownloadlink(string videourl, int quality)
        {
            var doc2 = new HtmlAgilityPack.HtmlWeb();
            /////////////se busca la pagina de info de el video
            var    htmlDoc2 = doc2.LoadFromWebAsync(videourl).Result;
            var    nodee    = htmlDoc2.GetElementbyId("html5video_base");
            var    elems    = nodee.ChildNodes;
            string link     = "";

            if (quality == 0)
            {
                link = elems[1].ChildNodes[0].ChildNodes[0].Attributes["href"].Value;
            }
            else
            {
                link = elems[1].ChildNodes[1].ChildNodes[0].Attributes["href"].Value;
            }


            Console.WriteLine("ok");



            return(link);
        }
        public WikipediaContentProvider(string ArticleName)
        {
            var hapHtmlWeb   = new HtmlAgilityPack.HtmlWeb();
            var htmlDocument = hapHtmlWeb.Load(string.Format("http://en.wikipedia.org/wiki/{0}", ArticleName));

            wikipediaContent = htmlDocument.DocumentNode.SelectSingleNode("//div[@id=\"mw-content-text\"]").InnerText;
        }
Beispiel #4
0
        static void Main(string[] args)
        {
            string startUrl = "http://www.ebay.com/sch/Accessory-Bundles-/176971/i.html";

            HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlAgilityPack.HtmlWeb();
            var doc  = htmlWeb.Load(startUrl);
            var urls = doc.DocumentNode.SelectNodes("//h3[@class=\"lvtitle\"]//a").Select((linkNode) => linkNode.GetAttributeValue("href", "/"));

            foreach (var url in urls)
            {
                var   subDoc    = htmlWeb.Load(url);
                Regex rgx       = new Regex(@".+""imgArr"" : (.+\]), ""islarge"".+", RegexOptions.Multiline);
                var   jsonTexts = subDoc.DocumentNode.SelectNodes("//script[contains(.,\"imgArr\")]").Select(scriptNode => scriptNode.InnerText).Select(text => rgx.Match(text).Groups[1].Value).SingleOrDefault();
                var   images    = JArray.Parse(jsonTexts).Children().Select(img => img.Value <string>("maxImageUrl"));
                using (WebClient webClient = new WebClient())
                {
                    HashAlgorithm algorithm = MD5.Create();
                    foreach (string image_url in images)
                    {
                        Uri    uri       = new Uri(image_url);
                        string extension = System.IO.Path.GetExtension(uri.LocalPath);
                        string hash      = string.Join("", algorithm.ComputeHash(Encoding.UTF8.GetBytes(image_url)).Select(b => b.ToString("X2")));
                        Console.WriteLine("Downloading {0} file", image_url);
                        webClient.DownloadFile(image_url, string.Format("{0}{1}", hash, extension));
                    }
                }
            }
        }
Beispiel #5
0
        private GrouponData GetPage(string url)
        {
            GrouponData GD = new GrouponData();

            HtmlAgilityPack.HtmlWeb Html = new HtmlAgilityPack.HtmlWeb();

            var Mobsite = url.Replace("www", "m");
            var doc2 = Html.Load(url);
            try
            {
                GD.TimeRemaining = new TimeSpan(0, 0, 0, 0, int.Parse(doc2.DocumentNode.SelectSingleNode("//input[@class='jcurrentTimeLeft']").Attributes["value"].Value));

            }
            catch (Exception)
            {

                GD.TimeRemaining = new TimeSpan(0, 0, 0);
            }
            //GD.TimeRemaining = time.ToString(@"d\ \d\a\y\s\ hh\:mm\:ss");
            var doc = Html.Load(Mobsite);

            GD.URL = url;

            try
            {
                GD.Image = doc.DocumentNode.SelectSingleNode("/html[1]/body[1]/div[2]/div[1]/img[1]").Attributes["src"].Value;

                GD.Title = doc.DocumentNode.SelectSingleNode("/html[1]/body[1]/div[2]/h1[1]").InnerText.Trim();

              //  var TimeRemaining = doc.GetElementbyId("time-left");

                var datanodes = doc.DocumentNode.SelectNodes("/html[1]/body[1]/div[3]/ul[1]/li");

                GD.Variations = new List<GrouponItem>();

                foreach (var item in datanodes)
                {
                    var tmpitem = new GrouponItem();

                    //var pat = item.ChildNodes[0].ChildNodes[0].ChildNodes[0].ChildNodes[0].ChildNodes[0].ChildNodes[0].XPath;
                    //"/html[1]/body[1]/div[3]/ul[1]/li[1]/a[1]/table[1]/tr[1]/td[1]/h3[1]/#text[1]"

                    tmpitem.Title = item.SelectSingleNode(".//tr[1]/td[1]/h3[1]").InnerText;
                    tmpitem.Sold = item.SelectSingleNode(".//tr[1]/td[1]/div[1]/strong[last()]").InnerText;

                    GD.Variations.Add(tmpitem);
                }

                //GD.Sold = doc.GetElementbyId("jDealSoldAmount").InnerText.Trim();

                //GD.DealEnds = time.ToString(@"d\ \d\a\y\s\ hh\:mm\:ss");

            }
            catch (Exception)
            {
                //throw;
            }

            return GD;
        }
Beispiel #6
0
        public static List <History> GetCoinHistory(string coinName)
        {
            string historyUrl = "https://coinmarketcap.com/currencies/" + coinName + "/historical-data/";

            HtmlAgilityPack.HtmlWeb      htmlweb      = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument htmlDocument = htmlweb.Load(historyUrl);
            var            historicalData             = htmlDocument.DocumentNode.SelectNodes("//*[contains(@class,'table-responsive')]");
            List <History> historyList = new List <History>();
            int            rowCount    = historicalData[0].ChildNodes[1].ChildNodes[3].ChildNodes.Count;

            for (int i = 0; i < rowCount; i++)
            {
                if (historicalData[0].ChildNodes[1].ChildNodes[3].ChildNodes[i].Name != "#text")
                {
                    History history = new History();
                    history.Date      = historicalData[0].ChildNodes[1].ChildNodes[3].ChildNodes[i].ChildNodes[1].InnerText;
                    history.Open      = historicalData[0].ChildNodes[1].ChildNodes[3].ChildNodes[i].ChildNodes[3].InnerText;
                    history.High      = historicalData[0].ChildNodes[1].ChildNodes[3].ChildNodes[i].ChildNodes[5].InnerText;
                    history.Low       = historicalData[0].ChildNodes[1].ChildNodes[3].ChildNodes[i].ChildNodes[7].InnerText;
                    history.Close     = historicalData[0].ChildNodes[1].ChildNodes[3].ChildNodes[i].ChildNodes[9].InnerText;
                    history.Volume    = historicalData[0].ChildNodes[1].ChildNodes[3].ChildNodes[i].ChildNodes[11].InnerText;
                    history.MarketCap = historicalData[0].ChildNodes[1].ChildNodes[3].ChildNodes[i].ChildNodes[13].InnerText;
                    historyList.Add(history);
                }
            }

            return(historyList);
        }
Beispiel #7
0
        public List <string> Scrapdata()
        {
            HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load(Link1);
            // Scraping the data using particular nodes.
            var title    = doc.DocumentNode.SelectNodes("//a[@class ='s-item__link']").ToList();
            var subtitle = doc.DocumentNode.SelectSingleNode("//*[@id='srp-river-results']/ul/li[1]/div/div[2]/div[3]").InnerText;
            var link     = doc.DocumentNode.SelectSingleNode("//a[@class ='s-item__link']").GetAttributeValue("href", "default");
            var image    = doc.DocumentNode.SelectSingleNode("//*[@id='srp-river-results']/ul/li[1]/div/div[1]/div/a[1]/div/img").GetAttributeValue("src", "Default");
            var review   = doc.DocumentNode.SelectSingleNode("//*[@class='x-star-rating']/span");
            var price    = doc.DocumentNode.SelectSingleNode("//*[@class='s-item__price']");
            //var est = doc.DocumentNode.SelectSingleNode("//*[@id='srp-river-results']/ul/li[1]/div/div[2]/div[5]/span[2]/span/span").InnerText;
            // var est = doc.DocumentNode.SelectSingleNode("//*[@id='srp - river - results']/ul/li[1]/div/div[2]/div[5]/span[2]/span").InnerText;
            //var est = doc.DocumentNode.SelectSingleNode("//*[@id='srp-river-results']/ul/li[1]/div/div[2]/div[4]/span[2]/span/span").InnerText;
            var est = doc.DocumentNode.SelectSingleNode("//*[@id='srp-river-results']/ul/li[1]/div/div[2]/div[4]/span/span").InnerText;
            // List to store the scraped data.
            List <string> scrap = new List <string>();

            //Adding Scraped data to the List...
            scrap.Add(title[0].InnerText);
            scrap.Add(subtitle);
            scrap.Add(link);
            scrap.Add(image);
            scrap.Add(review.InnerText);
            scrap.Add(price.InnerText);
            scrap.Add(est);
            return(scrap);
        }
        private void updateClick(object sender, RoutedEventArgs e)

        {
            //Button updateBut = (Button)sender;
            HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load("https://coinmarketcap.com/");
        }
        public static void GetWebData()
        {
            HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load("https://www.yellowpages.com/search?search_terms=software&geo_location_terms=chennai");

            var HeaderNames = doc.DocumentNode.SelectNodes("//a[@class='business-name']").ToList();

            foreach (var item in HeaderNames)
            {
                Console.WriteLine(item.InnerText);
            }

            Console.WriteLine("-------------------------------------");
            var services = doc.DocumentNode.SelectNodes("//p[@class='adr']").ToList();

            foreach (var service in services)
            {
                Console.WriteLine(service.InnerText);
            }

            Console.WriteLine("-------------------------------------");
            var cities = doc.DocumentNode.SelectNodes("//ul[@class='to-columns'] && a[@href]").ToList();

            foreach (var city in cities)
            {
                Console.WriteLine(city.InnerText);
            }
        }
Beispiel #10
0
        private string GetWebPageHtmlFromUrl(string url)
        {
            var hw = new HtmlAgilityPack.HtmlWeb();

            HtmlAgilityPack.HtmlDocument doc = hw.Load(url);
            return(doc.DocumentNode.OuterHtml);
        }
Beispiel #11
0
        private static List <string> scraperLoop(string url, string xPath, string attribute)
        {
            var           web        = new HtmlAgilityPack.HtmlWeb();
            var           doc        = web.Load(url);
            List <string> returnList = new List <string>();

            try
            {
                var scraped = doc.DocumentNode.SelectNodes(xPath);
                foreach (var result in scraped)
                {
                    foreach (var attr in result.Attributes)
                    {
                        if (attr.Name != attribute)
                        {
                            continue;
                        }
                        returnList.Add(attr.Value);
                        Console.WriteLine(attr.Value.ToString());
                    }
                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e.Message);
            }
            return(returnList);
        }
Beispiel #12
0
        public void timeTickMonitorWebSite()
        {
            if (CheckBox.Checked)
            {
                try
                {
                    HtmlAgilityPack.HtmlWeb website = new HtmlAgilityPack.HtmlWeb();
                    NewDocument = website.Load(URL);
                }
                catch (IOException)
                {
                }
                catch (WebException)
                {
                }

                KontrolHaberler.Clear();
                if (NewDocument != null)
                {
                    sonDakikaListesiniAl(NewDocument, KontrolHaberler);
                }

                karşılaştır(YüklenenHaberler, KontrolHaberler);
                eskiHaberleriTemizle(YüklenenHaberler, KontrolHaberler);
            }
        }
Beispiel #13
0
        private void getBoxOffice()
        {
            HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load("https://www.imdb.com/chart/boxoffice/?ref_=nv_ch_cht");
            var           mtitle             = doc.DocumentNode.SelectNodes("//td[contains(@class,'titleColumn')]/a");
            var           mrating            = doc.DocumentNode.SelectNodes("//td[contains(@class,'ratingColumn')]");
            var           mgross             = doc.DocumentNode.SelectNodes("//span[contains(@class,'secondaryInfo')]");
            var           weeks = doc.DocumentNode.SelectNodes("//td[contains(@class,'weeksColumn')]");
            List <string> links = new List <string>();

            foreach (var item in mtitle)
            {
                var val = item.Attributes["href"].Value; //10743
                links.Add("https://www.imdb.com/" + val.ToString().TrimStart().TrimEnd());
            }
            lvNews.Items.Clear();

            lvNews.View = View.Details;            //make column headings visible
            for (int i = 0; i < mtitle.Count; i++) //item has strings for each column of one row
            {
                ListViewItem lvi = new ListViewItem(mtitle[i].InnerText);
                lvi.SubItems.Add(mrating[i].InnerText);
                lvi.SubItems.Add(mgross[i].InnerText);
                lvi.SubItems.Add(weeks[i].InnerText);
                // add the listviewitem to a new row of the ListView control
                lvNews.Items.Add(lvi); //show Text1 in column1, Text2 in col2
            }
        }
Beispiel #14
0
        public static async Task <CompanyDetails> GetDetailContext(string url)
        {
            var CDetails = new CompanyDetails();

            try
            {
                HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlAgilityPack.HtmlWeb();
                var result = await htmlWeb.LoadFromWebAsync(url);

                var DN = result.DocumentNode;
                var id = System.Text.RegularExpressions.Regex.Match(url, @"(?<=/)\d+(?=\.html)").Value;;
                CDetails.Id      = Convert.ToInt64(id);
                CDetails.Name    = DN.SelectSingleNode("/html/body/div[3]/div[2]/div[1]/div[3]/div[2]/ul/li[1]/span").InnerText.Replace("公司名称:", "").Trim();
                CDetails.Address = DN.SelectSingleNode("/html/body/div[3]/div[2]/div[1]/div[3]/div[2]/ul/li[3]/span").InnerText.Replace("公司地址:", "").Trim();
                CDetails.Contect = DN.SelectSingleNode("/html/body/div[3]/div[2]/div[1]/div[3]/div[2]/ul/li[2]/span").InnerText.Replace("法人代表:", "").Trim();
                CDetails.Phone   = DN.SelectSingleNode("/html/body/div[3]/div[2]/div[1]/div[4]/div[2]/ul/li[7]/span").InnerText.Replace("公司传真:", "").Trim();
                CDetails.Details = DN.SelectSingleNode("/html/body/div[3]/div[2]/div[1]/div[2]/div[2]").InnerText.Replace("&nbsp;", "").Trim();
            }
            catch (Exception ex)
            {
                Thread.Sleep(5000);
                throw new Exception(ex.Message + "(" + url + ")");
            }
            return(CDetails);
        }
Beispiel #15
0
        public void getTopSeries()
        {
            HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load("https://www.imdb.com/chart/toptv/?ref_=nv_tvv_250");
            var myear   = doc.DocumentNode.SelectNodes("//span[@class='secondaryInfo']");
            var mrating = doc.DocumentNode.SelectNodes("//tr/td[contains(@class,'ratingColumn imdbRating')]");
            var mtitle  = doc.DocumentNode.SelectNodes("//td[contains(@class,'titleColumn')]/a");

            foreach (var item in mtitle)
            {
                //Console.WriteLine(item.InnerText.ToString().TrimStart().TrimEnd());
            }
            List <string> links = new List <string>();

            foreach (var item in mtitle)
            {
                var val = item.Attributes["href"].Value; //10743
                links.Add("https://www.imdb.com/" + val.ToString().TrimStart().TrimEnd());
            }
            lvTopseries.View = View.Details;
            for (int i = 0; i < mtitle.Count; i++)
            {
                var lvi = lvTopseries.Items.Add(mtitle[i].InnerText.ToString());
                lvi.SubItems.Add(mrating[i].InnerText.ToString());
                lvi.SubItems.Add(myear[i].InnerText.ToString());
                lvi.SubItems.Add(links[i].ToString());
            }
        }
        public List <Company> GetInfoSlick()
        {
            var ctx = new Context();

            HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load("https://www.slickcharts.com/sp500");

            var headerContent = doc.DocumentNode
                                .SelectNodes("//table[@class='table table-hover table-borderless table-sm']").Descendants("td").ToList();

            var listOfCompanies = new List <Company>();

            for (int i = 0; i < headerContent.Count() / 7; i++)
            {
                var count = i * 7;

                var companyName = headerContent[count + 1].InnerText;

                var ticker = headerContent[count + 2].InnerText;

                var rank = Convert.ToInt32(headerContent[count].InnerText);

                var priceString = headerContent[count + 4].InnerText;
                var priceTemp   = priceString.Remove(0, 13);
                var price       = Convert.ToDouble(priceTemp, CultureInfo.InvariantCulture);

                var company = new Company(companyName, ticker, rank, price);

                listOfCompanies.Add(company);
            }
            return(listOfCompanies);
        }
        } // End Sub button2_Click

        private System.Data.DataTable GetAllBenchmarks()
        {
            HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb();
            //HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.Load(@"");
            HtmlAgilityPack.HtmlDocument doc = web.Load(@"http://benchmarksgame.alioth.debian.org/");


            System.Data.DataTable dt = new System.Data.DataTable();

            dt.Columns.Add("Name", typeof(string));
            dt.Columns.Add("Url", typeof(string));

            System.Data.DataRow dr = null;

            foreach (HtmlAgilityPack.HtmlNode link in doc.DocumentNode.SelectNodes("//section[1]//li/a[@href]"))
            {
                dr = dt.NewRow();
                // System.Console.WriteLine(link);
                dr["Name"] = System.Web.HttpUtility.HtmlDecode(link.InnerText);
                dr["Url"]  = link.Attributes["href"].Value;

                dt.Rows.Add(dr);
            } // Next link


            System.Data.DataView dv = dt.DefaultView;
            dv.Sort = "Name ASC";
            System.Data.DataTable sortedDT = dv.ToTable();

            return(sortedDT);
        } // End Function GetAllBenchmarks
Beispiel #18
0
        /*
         *  Constuctor: Scrapes weather.com's hour-by-hour data for time, temperature, temperature feeling, precipation %, wind speed,
         *  and location.  Location comes from the zip code provided by the user.
         *
         *  Try/Catch: Pulls data based on zip code provided by the user.
         *
         *  Attributes: Selects nodes of HTML by specifically looking for class names.
         *
         *  userSettings.Location: This pulls city and state from weather.com based on zip code provided by the user.
         */
        public WebScrape(ref Settings lUserSettings)
        {
            HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();

            try
            {
                doc = web.Load("https://weather.com/weather/hourbyhour/l/" + lUserSettings.ZipCode + ":4:US");
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
                Console.WriteLine("\nThe website this program is trying to reach could be down, or a 5-digit zip code needs to be placed" +
                                  " in the settings.txt file. \n\n  Press any button to close the program.");
                Console.ReadLine();
            }

            this.mTimeData     = doc.DocumentNode.SelectNodes("//div[@class='hourly-time']");
            this.mTempData     = doc.DocumentNode.SelectNodes("//td[@class='temp']");
            this.mFeelData     = doc.DocumentNode.SelectNodes("//td[@class='feels']");
            this.mPrecipData   = doc.DocumentNode.SelectNodes("//td[@class='precip']");
            this.mWindData     = doc.DocumentNode.SelectNodes("//td[@class='wind']");
            this.mLocationData = doc.DocumentNode.SelectNodes("//div[@class='locations-title hourly-page-title']/h1");

            lUserSettings.Location = this.mLocationData[0].InnerText.Substring(0, this.mLocationData[0].InnerText.Length - 15);
        }
Beispiel #19
0
        public static List <Storm> GetStorms()
        {
            List <Storm> storms = new List <Storm>();

            try
            {
                string sourceHTML          = "https://evescoutrescue.com/home/stormtrack.php";
                string tableXPath          = "/html/body/div/div[4]/div/div/div[2]/table/tbody";
                HtmlAgilityPack.HtmlWeb hw = new HtmlAgilityPack.HtmlWeb();

                HtmlAgilityPack.HtmlDocument       doc = hw.Load(sourceHTML);
                HtmlAgilityPack.HtmlNodeCollection hnc = doc.DocumentNode.SelectNodes(tableXPath);
                List <List <string> > table            = hnc.Descendants("tr")
                                                         .Where(tr => tr.Elements("td").Count() > 1)
                                                         .Select(tr => tr.Elements("td").Select(td => td.InnerText.Trim()).ToList())
                                                         .ToList();

                foreach (List <string> ls in table)
                {
                    Storm s = new Storm();
                    s.Region = ls[0];
                    s.System = ls[1];
                    s.Type   = ls[3];
                    s.Name   = ls[2];

                    storms.Add(s);
                }
            }
            catch
            {
            }

            return(storms);
        }
        private System.Data.DataTable GetAllBenchmarks()
        {
            HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb();
            //HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.Load(@"");
            HtmlAgilityPack.HtmlDocument doc = web.Load(@"http://benchmarksgame.alioth.debian.org/");

            System.Data.DataTable dt = new System.Data.DataTable();

            dt.Columns.Add("Name", typeof(string));
            dt.Columns.Add("Url", typeof(string));

            System.Data.DataRow dr = null;

            foreach (HtmlAgilityPack.HtmlNode link in doc.DocumentNode.SelectNodes("//section[1]//li/a[@href]"))
            {
                dr = dt.NewRow();
                // System.Console.WriteLine(link);
                dr["Name"] = System.Web.HttpUtility.HtmlDecode(link.InnerText);
                dr["Url"] = link.Attributes["href"].Value;

                dt.Rows.Add(dr);
            } // Next link

            System.Data.DataView dv = dt.DefaultView;
            dv.Sort = "Name ASC";
            System.Data.DataTable sortedDT = dv.ToTable();

            return sortedDT;
        }
Beispiel #21
0
        public HttpResponseMessage GetRecipes(Recipe model)
        {
            var url    = model.Url;
            var webGet = new HtmlAgilityPack.HtmlWeb();

            HtmlAgilityPack.HtmlDocument doc = webGet.Load(url);
            HtmlWeb web              = new HtmlWeb(url);
            string  titleNodes       = doc.DocumentNode.SelectSingleNode("//head/title").InnerText;
            var     descriptionNodes = doc.DocumentNode.SelectSingleNode("//head/meta[@name='description']").Attributes["content"].Value;

            model.Title       = titleNodes;
            model.Description = descriptionNodes;

            try
            {
                if (ModelState.IsValid)
                {
                    ItemResponse <Recipe> response = new ItemResponse <Recipe>();
                    response.Item = model;
                    return(Request.CreateResponse(HttpStatusCode.OK, response));
                }
                else
                {
                    return(Request.CreateErrorResponse(HttpStatusCode.BadRequest, ModelState));
                }
            }
            catch (Exception ex)
            {
                return(Request.CreateResponse(HttpStatusCode.BadRequest, ex.Message));
            }
        }
Beispiel #22
0
        private void Form1_Load(object sender, EventArgs e)
        {
            HtmlAgilityPack.HtmlWeb
                hweb = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument
                hdoc   = hweb.Load("http://www.webtekno.com/uzaktan-kumandali-oyuncak-araba-boyutunda-bir-kesif-araci-gelecek-sene-ay-i-kesfetmeye-gidecek-h57706.html");
            var result = hdoc.DocumentNode
                         .SelectNodes("//script[@type='application/ld+json']")
                         .ToList();
            string richResult = string.Empty;

            foreach (var item in result)
            {
                structerData.Add(item.InnerText);
                richResult += $"{item.InnerText}\n---------------------------------\n";
            }
            richTextBox1.Text = richResult;

            foreach (var ldJsons in result)
            {
                var     _testJsonData = ldJsons;
                string  textData      = _testJsonData.InnerText.ToString().Replace("@", "");
                JObject deserialize   = JsonConvert.DeserializeObject <JObject>(textData);

                Dictionary <string, JToken>
                des = JsonConvert.DeserializeObject <Dictionary <string, JToken> >(deserialize.ToString());
                foreach (var item in des)
                {
                    if (item.Value.GetType() == typeof(JValue))
                    {
                        listBox1.Items.Add($"{{[Key]:{item.Key}-[Value]:{item.Value}}}");
                    }
                    if (item.Key.Equals("author"))
                    {
                        JObject person = JsonConvert.DeserializeObject <JObject>(item.Value.ToString());
                        Dictionary <string, JToken>
                        personDic = JsonConvert.DeserializeObject <Dictionary <string, JToken> >(person.ToString());
                    }
                    if (item.Key.Equals("mainEntityOfPage"))
                    {
                        JObject sayfa = JsonConvert.DeserializeObject <JObject>(item.Value.ToString());
                        Dictionary <string, JToken>
                        sayfaDic = JsonConvert.DeserializeObject <Dictionary <string, JToken> >(sayfa.ToString());
                    }
                    if (item.Key.Equals("image"))
                    {
                        JObject sayfa = JsonConvert.DeserializeObject <JObject>(item.Value.ToString());
                        Dictionary <string, JToken>
                        imageDic = JsonConvert.DeserializeObject <Dictionary <string, JToken> >(sayfa.ToString());
                    }
                    if (item.Key.Equals("publisher"))
                    {
                        JObject sayfa = JsonConvert.DeserializeObject <JObject>(item.Value.ToString());
                        Dictionary <string, JToken>
                        publisherDic = JsonConvert.DeserializeObject <Dictionary <string, JToken> >(sayfa.ToString());
                    }
                }
            }
        }
Beispiel #23
0
        public static void getHtml()
        {
            string URL2 = "https://www.kylottery.com/apps/draw_games/powerball/powerball_pastwinning.html";

            HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load(URL2);
            getArray(doc);
        }
Beispiel #24
0
        void getscraped()
        {
            try
            {
                string s = txtScraped.Text;
                HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
                HtmlAgilityPack.HtmlDocument doc = web.LoadFromBrowser(s);
                var mheader    = doc.DocumentNode.SelectSingleNode("//h1[contains(@data-testid,'hero-title-block__title')]");
                var mdesc      = doc.DocumentNode.SelectSingleNode("//span[contains(@data-testid,'plot-xs')]");
                var mrating    = doc.DocumentNode.SelectNodes("//div[contains(@data-testid,'genres')]/a");
                var mrate      = doc.DocumentNode.SelectSingleNode("//div[contains(@data-testid,'hero-title-block__aggregate-rating__score')]/span");
                var mphotolink = doc.DocumentNode.SelectSingleNode("//a[contains(@class,'ipc-lockup-overlay')]");
                Thread.Sleep(2000);
                var link = doc.DocumentNode
                           .Descendants("a")
                           .First(x => x.Attributes["class"] != null &&
                                  x.Attributes["class"].Value == "ipc-lockup-overlay ipc-focusable");
                string hrefValue = link.Attributes["href"].Value;
                Console.WriteLine("Title " + mheader.InnerText + "Rate " + mrate.InnerText);
                List <string> links = new List <string>();
                Console.WriteLine("Desc : " + mdesc.InnerText);
                foreach (var item in mrating)
                {
                    Console.WriteLine("Genre " + item.InnerText);
                    lblGenre.Text += item.InnerText + "\n";
                }
                string linkforpicbox = "https://www.imdb.com" + hrefValue;
                lblTitle.Text = mheader.InnerText;
                lblRate.Text  = mrate.InnerText + "/10";
                rtbDesc.Text  = mdesc.InnerText;
                var chromeOptions = new ChromeOptions();//δημιουργια αντικειμενου για να περασουμε τις επιλογες που θελουμε για το chrome driver
                //chromeOptions.AddArguments("headless");//επιλογη ωστε το chrome driver να δουλευει χωρις κεφαλη
                chromeOptions.AddUserProfilePreference("profile.default_content_setting_values.cookies", 2);
                var    experimentalFlags = new List <string>();
                string mediasrc          = "";                                        //μεταβλητη που θα την γεμισουμε μετα
                var    driverService     = ChromeDriverService.CreateDefaultService();
                driverService.HideCommandPromptWindow = true;                         //κρυψιμο του prompt του selenium
                ChromeDriver driver = new ChromeDriver(driverService, chromeOptions); //δημιουργια chrome driver αντικειμενου και περασμα παραμετρων

                chromeOptions.AddLocalStatePreference("browser.enabled_labs_experiments",
                                                      experimentalFlags);

                driver.Navigate().GoToUrl("https://www.google.gr/imghp?hl=el&ogbl");
                Thread.Sleep(2000);
                var cookies = driver.Manage().Cookies.AllCookies;

                var txt = driver.FindElement(By.Name("q"));
                txt.SendKeys(mheader.InnerText);
                driver.FindElement(By.ClassName("Tg7LZd")).Click();
                IWebElement l = driver.FindElement(By.XPath("//a[@rel='noopener']"));
                //getAttribute() to get src of image
                Thread.Sleep(2000);
                Console.WriteLine("Src attribute is: " + l.GetAttribute("href"));
                string aa = l.GetAttribute("src");
                picboxScraped.Load("https://img.cineplexx.gr/media/gr/inc/movies_licences/AQuietPlace2_Plakat.jpg");
            }
            catch (System.UriFormatException ex) { }
        }
Beispiel #25
0
        public MainSiteAccessPoint(Common.Data.General.Profile profile)
        {
            _userProfile = profile;
            string urlUserNameString = profile.UserName.Replace('#', '-');

            _mainProfileURL = String.Format(WebsiteStringURL.MainWebsiteURL, PlatformEnumToString(profile.Platform), RegionEnumToString(profile.Region), urlUserNameString);
            HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb();
            _doc = web.Load(_mainProfileURL);
        }
Beispiel #26
0
        private static List <HtmlAgilityPack.HtmlNodeCollection> GetNames(string wikipediaUrl)
        {
            var web       = new HtmlAgilityPack.HtmlWeb();
            var doc       = web.Load(wikipediaUrl);
            var table     = doc.DocumentNode.SelectNodes("/html/body/div[3]/div[3]/div[4]/div/table[2]/tbody").Descendants();;
            var tableRows = table.Where(d => d.Name == "tr");

            return(tableRows.Select(t => t.ChildNodes).ToList());
        }
Beispiel #27
0
        public static int jobLength(String searchJob, String location)
        {
            HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load("https://www.seek.co.nz/" + searchJob + "-jobs/in-" + location);
            var title  = doc.DocumentNode.SelectNodes("//a[@data-automation='jobTitle']").ToList();
            var JobURL = doc.DocumentNode.SelectNodes("//a[@class='_2iNL7wI']").ToList();

            return(title.Count);
        }
        public string Get(string page, string xpath)
        {
            HtmlAgilityPack.HtmlWeb      web          = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument htmlDocument = web.Load(page);

            string value = htmlDocument.DocumentNode.SelectSingleNode(xpath).InnerText;

            return(value);
        }
 public IUrlDataGetter <string[]> CreateCssClassFirstChildPropertyArrayGetter(string cssClassName, string propertyName)
 {
     return(new UrlDataGetter <string[]>((url) =>
     {
         var web = new HtmlAgilityPack.HtmlWeb();
         var doc = web.Load(url);
         return doc.DocumentNode.SelectNodes($"//*[contains(@class,'{cssClassName}')]").Select(node => node.FirstChild.GetAttributeValue(propertyName, string.Empty)).ToArray();
     }));
 }
Beispiel #30
0
        static public List <Product> search(string text)
        {
            var site = "http://belchip.by/";
            var url  = site + "search/?query=" + text;
            var list = new List <Product>();

            HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlWeb().Load(url);
            var nodes = document.DocumentNode.SelectNodes("//div[@class='cat-item']");

            if (nodes != null)
            {
                foreach (var x in nodes)
                {
                    var nameNode = x.SelectNodes("h3/a").First();
                    var imgNode  = x.SelectNodes("div[1]/a[2]/img").First();
                    var costNode = x.SelectNodes("*/*/div[@class='denoPrice']");

                    decimal?cost = null;
                    string  days = null;

                    if (costNode != null)
                    {
                        cost = Convert.ToDecimal(costNode.First().FirstChild.InnerText, format);
                        days = "";
                    }

                    var name        = nameNode.FirstChild.InnerText;
                    var productLink = site + nameNode.Attributes["href"].Value;
                    var pictureLink = site + imgNode.Attributes["src"].Value;


                    var infoNode  = x.SelectNodes("div[@class='popup']").First();
                    var infoTable = infoNode.SelectNodes("*/*/*/*/table");

                    var infoDict = new Dictionary <string, string>();

                    if (infoTable != null && infoTable.Count > 0)
                    {
                        foreach (var p in infoTable.First().ChildNodes)
                        {
                            if (p.Name != "tr")
                            {
                                continue;
                            }
                            var key   = p.SelectNodes("td[1]").First().InnerText;
                            var value = p.SelectNodes("td[2]").First().InnerText;
                            infoDict.Add(key, value);
                        }
                    }

                    list.Add(new Product(name, cost, days, infoDict, productLink, pictureLink));

                    x.Clone();
                }
            }
            return(list);
        }
Beispiel #31
0
        private uint GetLastestComicID()
        {
            HtmlAgilityPack.HtmlDocument archivePage = new HtmlAgilityPack.HtmlDocument();
            HtmlAgilityPack.HtmlWeb      web         = new HtmlAgilityPack.HtmlWeb();
            archivePage = web.Load(XKCD_URL + "archive/");
            HtmlAgilityPack.HtmlNodeCollection items = archivePage.DocumentNode.SelectNodes("//*[@id='middleContainer']/a");

            // Last comic ID = comic count
            return((uint)items.Count);
        }
Beispiel #32
0
        public static List <Jobs> getFromIndeed(String searchJob, String location)
        {
            HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb();
            String indeedURL            = "https://nz.indeed.com/" + searchJob + "-jobs-in-" + location;

            HtmlAgilityPack.HtmlDocument doc = web.Load(indeedURL);

            var jobTitle    = doc.DocumentNode.SelectNodes("//div[@class='title']").ToList();
            var description = doc.DocumentNode.SelectNodes("//div[@class='summary']").ToList();
            var company     = doc.DocumentNode.SelectNodes("//span[@class='company']").ToList();


            String pattern = "href=\"(.*?).\"";

            List <Jobs> jobs = new List <Jobs>();

            for (int i = 0; i < description.Count; i++)
            {
                Match match = Regex.Match(jobTitle[i].InnerHtml, pattern);
                if (match.ToString() != "")
                {
                    String title = jobTitle[i].InnerText;
                    title = title.Replace("\n", "").Trim();
                    String jobLink = "https://nz.indeed.com/" + match;
                    jobLink = CleanIndeedLink(jobLink);
                    String companyName;
                    try
                    {
                        companyName = company[i].InnerText;
                        companyName = companyName.Replace("\n", "").Trim();
                    }
                    catch
                    {
                        companyName = " ";
                    }

                    String desc = description[i].InnerText;
                    desc = desc.Replace("\n", "");
                    desc = desc.Trim();

                    Jobs job = new Jobs
                    {
                        JobTitle       = title,
                        WebUrl         = jobLink,
                        CompanyName    = companyName,
                        Location       = location,
                        JobDescription = desc,
                        ImageUrl       = "blank",
                        Applied        = false,
                    };
                    jobs.Add(job);
                }
            }
            return(jobs);
        }
        public void ParsePlaylistSongs()
        {
            var htmlWeb = new HtmlAgilityPack.HtmlWeb();
            var html = htmlWeb.Load("http://www.rockland.fm/start.php?playlist");
            var parser = new RocklandParser();
            var songs = parser.GetSongs(html);

            Assert.IsTrue(songs.Count > 0);
            foreach (var song in songs)
            {
                Assert.IsTrue(song.TimestampText.EndsWith("Uhr"));
            }
        }
        public static List<NewsInfoForJson> GetNewsInfoList(string url)
        {
            HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load(url);

            HtmlAgilityPack.HtmlNodeCollection clooection = doc.DocumentNode.SelectNodes("//div[@class=\"grid_item visor-article-teaser list_default\"]");

            List<NewsInfoForJson> result = new List<NewsInfoForJson>();
            if (clooection != null && clooection.Count > 0)
            {
                foreach (var c in clooection)
                {
                    NewsInfoForJson news_t = new NewsInfoForJson();
                    HtmlAgilityPack.HtmlNode imageNode = c.SelectSingleNode(".//img");
                    if (imageNode != null)
                    {
                        string image_t = imageNode.GetAttributeValue("src", "");
                        news_t.Titlepic = image_t.StartsWith("http") ? image_t : HOST + image_t.TrimStart('/');
                    }

                    HtmlAgilityPack.HtmlNode urlNode = c.SelectSingleNode(".//a[@class='grid_img']");
                    if (urlNode != null)
                    {
                        string url_t = urlNode.GetAttributeValue("href", "");
                        news_t.befrom = url_t.StartsWith("http") ? url_t : HOST + url_t.TrimStart('/');
                    }

                    HtmlAgilityPack.HtmlNode timeNode = c.SelectSingleNode(".//span[@class='grid_time']");
                    if (timeNode != null)
                    {
                        news_t.NewsTime = timeNode.InnerText;
                    }

                    HtmlAgilityPack.HtmlNode titleNode = c.SelectSingleNode(".//*[@class='grid_title']");
                    if (titleNode != null)
                    {
                        news_t.Title = HttpUtility.HtmlDecode(titleNode.InnerText);
                    }

                    news_t.NewsForm = "news";

                    GetContentText(news_t.befrom, ref news_t);
                    news_t.Onclick = new Random().Next(100, 2000).ToString();

                    result.Add(news_t);
                }
            }

            return result;
        }
 private HtmlAgilityPack.HtmlDocument LoadPage(string AUrl)
 {
     var web = new HtmlAgilityPack.HtmlWeb();
     web.AutoDetectEncoding = true;
     var document = new HtmlAgilityPack.HtmlDocument();
     try
     {
         document = web.Load(AUrl);
     }
     catch
     {
         LoadPage(AUrl);
     }
     return document;
 }
        public static void GetContentText(string url, ref NewsInfoForJson newsModel_t)
        {
            HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load(url);

            HtmlAgilityPack.HtmlNode contentNode = doc.DocumentNode.SelectSingleNode("//div[@class=\"field field-name-body field-type-text-with-summary field-label-hidden\"]/div[@class=\"field-items\"]/div[@class=\"field-item even\"]");
            if (contentNode != null)
            {
                newsModel_t.newstext = contentNode.OuterHtml;
            }

            HtmlAgilityPack.HtmlNode articleTimeNode = doc.DocumentNode.SelectSingleNode("//time[@class=\"article-time\"]");
            if (contentNode != null)
            {
                newsModel_t.NewsTime = articleTimeNode.InnerText.Replace(" at ", " ").Replace(" EDT","");
                newsModel_t.NewsTime = DateTime.Parse(newsModel_t.NewsTime).ToString("yyyy-MM-dd HH:mm:ss");
            }

            HtmlAgilityPack.HtmlNodeCollection nodeCollection = doc.DocumentNode.SelectNodes("//a[@class=\"cta large\"]");
            if (nodeCollection != null && nodeCollection.Count > 0)
            {
                foreach (var node in nodeCollection)
                {
                    string url_t = HttpUtility.HtmlDecode(HttpUtility.UrlDecode(node.GetAttributeValue("href", "")));
                    if (url_t.Contains("www.microsoft.com") && url_t.Contains("store") && url_t.Contains("apps"))
                    {
                        int index_0 = url_t.IndexOf("&url=https") + 5;
                        int index_1 = url_t.LastIndexOf("&token=");
                         string fileName_t = HttpUtility.UrlDecode(
                            index_1 > 0 ? url_t.Substring(index_0, index_1 - index_0) : url_t.Substring(index_0));

                        int index_2 = fileName_t.LastIndexOf("&ourl=http");
                        if (index_2 > 0)
                        {
                            newsModel_t.Filename = fileName_t.Substring(0, index_2);
                        }
                        else
                        {
                            newsModel_t.Filename = fileName_t;
                        }
                        newsModel_t.NewsForm = "pingce";

                        Console.WriteLine("url:" + newsModel_t.Filename);
                    }
                }
            }
        }
        public void getCrn(String subject,String cnbr)
        {
            file.Flush();
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            HtmlAgilityPack.HtmlWeb getHtml = new HtmlAgilityPack.HtmlWeb();
            try
            {
                doc = getHtml.Load("https://selfservice.mypurdue.purdue.edu/prod/bzwsrch.p_search_schedule?term=CURRENT&subject=" + subject + "&campus=PWL&levl=UG&cnbr=" + cnbr);
                //int count = 0;

                foreach (HtmlAgilityPack.HtmlNode link in doc.DocumentNode.SelectNodes("//th[@class=\"ddlabel\"]"))
                {
                    String[] s = link.FirstChild.Attributes.First().Value.Split('=');
                    this.listBox2.Items.Add(s[s.Length-1]);
                }
            }catch(Exception e){
            }
        }
Beispiel #38
0
 public virtual void Execute(IJobExecutionContext context)
 {
     var dateTimeMin = DateTime.Parse(DateTime.Now.ToString("yyyy-MM-dd"));//当天零点
     var list = _ppismItemRepository.GetAll(x => (x.ItemSource == PPism.Model.Enum.DictPPItemSource.一号店) && (!x.LastListenTime.HasValue || x.LastListenTime.Value < dateTimeMin)).ToList();
     var ppismItemBll = new PPismJob.Common.PPismItemBll();
     for (int i = 0, length = list.Count; i < length; i++)
     {
         var item = list[i];
         string mUrl = item.ListenUrl;
         if (!ppismItemBll.CheckIsMUrl(item.ListenUrl))
         {
             var htmlWeb = new HtmlAgilityPack.HtmlWeb();
             var strHtml = htmlWeb.Load(item.ListenUrl).DocumentNode.InnerHtml.ToString();
              mUrl = Regex.Match(strHtml, @"name=""h5""\scontent='(?<mUrl>[^']+)'").Groups["mUrl"].Value.Trim();
         }
         if (!string.IsNullOrEmpty(mUrl))
         {
             using (var req = new xNet.Net.HttpRequest())
             {
                 req.UserAgent = xNet.Net.HttpHelper.FirefoxUserAgent();
                 //http://p.3.cn/prices/get?callback=cnp&type=1&area=1_72_2799&pdtk=&pduid=2002986638&pdpin=&pdbp=0&skuid=J_540462
                 req.CharacterSet = System.Text.Encoding.GetEncoding("utf-8");
                 var strMHtml = req.Get(mUrl).ToString();
                 string reg = @"class=""swipeSlide_detail"">[\s\S]+?<img[\s]src=""(?<imgUrl>[^""]+)[\s\S]+?id=""current_price""[\s\S]+?class=""pd_product-price-num"">(?<price>[^<]+)";
                 var groups = Regex.Match(strMHtml, reg).Groups;
                 var price = groups["price"].Value.Trim().ToDecimal(0);
                 var imgUrl = groups["imgUrl"].Value.Trim();
                 if (price > 0 && !string.IsNullOrEmpty(imgUrl))
                 {
                  
                     var priceItem = ppismItemBll.GetPriceItem(item, price, imgUrl);
                     _priceItemRepository.Add(priceItem);
                     _ppismItemRepository.Update(item);
                 }
             }
         }
     }
     _repositoryContext.Commit();
 }
        public void getCourses(String subject)
        {
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            HtmlAgilityPack.HtmlWeb getHtml = new HtmlAgilityPack.HtmlWeb();
            try
            {

                doc = getHtml.Load("https://selfservice.mypurdue.purdue.edu/prod/bzwsrch.p_search_catalog?term=CURRENT&subject=" + subject);
                foreach (HtmlAgilityPack.HtmlNode link in doc.DocumentNode.SelectNodes("//td[@class=\"nttitle\"]"))
                {
                    this.listBox1.Items.Add(link.FirstChild.InnerText);
                    String[] s = link.FirstChild.InnerText.Split(' ');
                    //file.WriteLine(link.OuterHtml);

                    //file.Flush();
                }

                /*doc = getHtml.Load("https://selfservice.mypurdue.purdue.edu/prod/bzwsrch.p_search_schedule?term=CURRENT&subject=" + subject + "&campus=PWL&levl=UG");
                //int count = 0;

                String currentNum = null;

                foreach (HtmlAgilityPack.HtmlNode link in doc.DocumentNode.SelectNodes("//th[@class=\"ddlabel\"]"))
                {
                    String []s = link.FirstChild.InnerText.Split(new char[]{'-',' '});
                    String courseNum = s[s.Length-4];

                    if (!courseNum.Equals(currentNum))
                    {
                        this.listBox1.Items.Add(courseNum);
                        currentNum = courseNum;
                    }
                }*/
            }
            catch (Exception e)
            {

            }
        }
        public void getSeatRemain(int crn)
        {
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            HtmlAgilityPack.HtmlWeb getHtml = new HtmlAgilityPack.HtmlWeb();
            try
            {
                doc = getHtml.Load("https://selfservice.mypurdue.purdue.edu/prod/bzwsrch.p_schedule_detail?term=CURRENT&crn=" + crn.ToString());
                for (int i = 0; i < 50; i++)
                    this.progressBar1.PerformStep();
                int count = 0;
                foreach (HtmlAgilityPack.HtmlNode link in doc.DocumentNode.SelectNodes("//td[@class=\"dddefault\"]"))
                {
                    this.progressBar1.PerformStep();
                    count++;
                    if (count == 2)
                        this.textBox1.Text = link.FirstChild.WriteTo();
                    else if (count == 3)
                        this.textBox2.Text = link.FirstChild.WriteTo();
                    else if (count == 4)
                        this.textBox3.Text = link.FirstChild.WriteTo();

                }
            }
            catch (Exception e) { }
        }
 public void PlaylistWebsiteShouldContainPlaylistText()
 {
     var htmlWeb = new HtmlAgilityPack.HtmlWeb();
     var html = htmlWeb.Load("http://www.rockland.fm/start.php?playlist");
     Assert.IsTrue(html.DocumentNode.ChildNodes["html"].InnerText.Contains("Playlist - Was lief wann auf ROCKLAND?"));
 }
 public WikipediaContentProvider(string ArticleName)
 {
     var hapHtmlWeb = new HtmlAgilityPack.HtmlWeb();
     var htmlDocument = hapHtmlWeb.Load(string.Format("http://en.wikipedia.org/wiki/{0}", ArticleName));
     wikipediaContent = htmlDocument.DocumentNode.SelectSingleNode("//div[@id=\"mw-content-text\"]").InnerText;
 }
Beispiel #43
0
        private void buttonX2_Click(object sender, EventArgs e)
        {
            HtmlAgilityPack.HtmlWeb website = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument rootDocument = null;

            if (Properties.Settings.Default.requiresProxy)
            {
                rootDocument = website.Load("http://openxcom.org/git-builds/", Properties.Settings.Default.proxyUrl, Properties.Settings.Default.proxyPort, Properties.Settings.Default.proxyUser, Properties.Settings.Default.proxyPwd);
            }
            else
            {
                rootDocument = website.Load("http://openxcom.org/git-builds/");
            }

            int iCount = 0;

            ElementStyle groupStyle = new ElementStyle();
            groupStyle.TextColor = Color.Navy;
            groupStyle.Font =  new Font(this.advTree1.Font.FontFamily, 9.5F);
            groupStyle.Name = "groupstyle";
            advTree1.Styles.Add(groupStyle);

            // Define sub-item style, simply to make text gray
            ElementStyle subItemStyle = new ElementStyle();
            subItemStyle.TextColor = Color.Gray;
            subItemStyle.Name = "subitemstyle";
            advTree1.Styles.Add(subItemStyle);

            Node gnLatest = new Node("Latest", groupStyle);
            gnLatest.Expanded = true;
            advTree1.Nodes.Add(gnLatest);

            Node gnArchive = new Node("Archive", groupStyle);
            gnArchive.Expanded = false;
            advTree1.Nodes.Add(gnArchive);

            //Boolean isHeader = false;
            Node item = null;
            List<String> items = null;
            foreach (HtmlAgilityPack.HtmlNode link in rootDocument.DocumentNode.SelectNodes("//div[@class='text']"))
            {

                // Console.Write(link.InnerHtml);
                foreach (HtmlAgilityPack.HtmlNode p in link.ChildNodes)
                {

                    if (!p.FirstChild.Name.Equals("strong"))
                    {
                        String display = p.ChildNodes[0].InnerHtml.Replace("openxcom_git_master_","");
                        String subText = p.ChildNodes[1].InnerHtml.Replace("- ","");
                        display = display.Replace(".zip", "");

                        String url = p.ChildNodes[0].Attributes[0].Value;
                        items = new List<String>();
                        items.Add(url);

                        item = createChildNode(display, subText, (Image)Properties.Resources.ResourceManager.GetObject("openxcom24"), subItemStyle);

                        if (iCount == 0)
                        {
                            gnLatest.Nodes.Add(item);
                        }
                        else
                        {
                            gnArchive.Nodes.Add(item);
                        }

                        iCount++;
                        item.Tag = items;
                    }
                    else
                    {
                        items.Add(p.InnerHtml);

                    }
                }

            }
        }
Beispiel #44
0
        private static string GetHtmlByXPath(System.Text.Encoding encoding, string url)
        {
            HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = null;
            if (encoding != null)
            {
                htmlWeb.AutoDetectEncoding = false;
                htmlWeb.OverrideEncoding = encoding;
            }
            try
            {
                doc = htmlWeb.Load(url);
            }
            catch (Exception ex)
            {
                throw ex;
            }
            finally
            {

            }
            if (doc == null)
                return "";
            else
                return doc.DocumentNode.InnerHtml;
            //return doc;
        }
 public void getSubject()
 {
     HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
     HtmlAgilityPack.HtmlWeb getHtml = new HtmlAgilityPack.HtmlWeb();
     doc = getHtml.Load("https://selfservice.mypurdue.purdue.edu/prod/bzwsrch.p_search_catalog?term=CURRENT");
     //int count = 0;
     HtmlAgilityPack.HtmlNode link = doc.DocumentNode.SelectSingleNode("//select[@name=\"sel_subj\"]");
     foreach (HtmlAgilityPack.HtmlNode node in link.ChildNodes)
     {
         String[] s = node.InnerText.Split('-');
         if (!s[0].Equals(""))
             this.comboBox1.Items.Add(s[0]);
     }
 }
Beispiel #46
0
 public List<Song> GetSongs(Uri url)
 {
     var htmlWeb = new HtmlAgilityPack.HtmlWeb();
     var html = htmlWeb.Load(url.ToString());
     return GetSongs(html);
 }
        private System.Data.DataTable GetData(string path)
        {
            System.Data.DataTable dt = new System.Data.DataTable();

            string baseURL = "http://benchmarksgame.alioth.debian.org/";
            string URL = baseURL + path;

            dt.Columns.Add("url", typeof(string));
            dt.Columns.Add("Rubrique", typeof(string));
            System.Data.DataRow dr = null;

            HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb();
            //HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.Load(@"");
            HtmlAgilityPack.HtmlDocument doc = web.Load(URL);

            string rubrique = null;
            bool haveHeaders = false;

            foreach (HtmlAgilityPack.HtmlNode link in doc.DocumentNode.SelectNodes("//table[1]//tr"))
            {
                dr = dt.NewRow();
                // System.Console.WriteLine(link);

                HtmlAgilityPack.HtmlNode a = link.SelectSingleNode("./th[@colspan=\"3\"]//a");

                if (a != null)
                {
                    rubrique = a.InnerText;
                    // System.Console.WriteLine(rubrique);
                    continue;
                } // End if (a != null)

                var tableHeaders = link.SelectNodes("./th");

                if (tableHeaders != null)
                {
                    if (haveHeaders)
                        continue;

                    int count = 0;
                    foreach (HtmlAgilityPack.HtmlNode th in tableHeaders)
                    {
                        count++;
                        string colname= th.InnerText.Trim(new char[] { ' ', '\t', '\r', '\n' });
                        // System.Console.WriteLine(colname);

                        if (string.IsNullOrEmpty(colname))
                            colname = "COLUMN_" + count.ToString();

                        if (!dt.Columns.Contains(colname))
                        {
                            dt.Columns.Add(colname, typeof(string));
                        }

                    } // Next th

                    continue;
                } // End if (tableHeaders != null)

                var tableData = link.SelectNodes("./td");

                if (tableData != null)
                {

                    dr = dt.NewRow();

                    dr["url"] = path;
                    dr["Rubrique"] = rubrique;

                    int count = 1;
                    foreach (HtmlAgilityPack.HtmlNode td in tableData)
                    {
                        count++;
                        string val = td.InnerText.Trim(new char[] { ' ', '\t', '\r', '\n' });
                        val = System.Web.HttpUtility.HtmlDecode(val);
                        val = val.Replace(",", "");

                        dr[count] = val;
                    } // Next td

                    dt.Rows.Add(dr);
                    continue;
                } // End if (tableData != null)

                // System.Console.WriteLine(link);

                // dr["Name"] = System.Web.HttpUtility.HtmlDecode(link.InnerText);
                // dr["Url"] = link.Attributes["href"].Value;

            } // Next link

            return dt;
        }
Beispiel #48
0
        private void parseWebPageHtmlAsContent()
        {
            string url = Provider.Request["url"];
            if (string.IsNullOrWhiteSpace(url))
                throw new Exception(Provider.TR("Url belirtiniz"));
            if (!url.StartsWith("http://"))
                url = "http://" + url;

            HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb();
            web.OverrideEncoding = Encoding.GetEncoding("windows-1254");
            HtmlAgilityPack.HtmlDocument doc = web.Load(url);

            doc.DocumentNode.Descendants()
                .Where(n => n.Name == "script" || n.Name == "style")
                .ToList()
                .ForEach(n => n.Remove());

            var result = doc.DocumentNode.SelectNodes("//body//text()");//return HtmlCollectionNode
            string metin = "";
            foreach (var node in result)
            {
                metin += node.InnerText;//Your desire text
            }

            if (metin.Contains('Ä'))
            {
                web = new HtmlAgilityPack.HtmlWeb();
                web.OverrideEncoding = Encoding.UTF8;
                doc = web.Load(url);

                doc.DocumentNode.Descendants()
                    .Where(n => n.Name == "script" || n.Name == "style")
                    .ToList()
                    .ForEach(n => n.Remove());

                result = doc.DocumentNode.SelectNodes("//body//text()");//return HtmlCollectionNode
                metin = "";
                foreach (var node in result)
                {
                    metin += node.InnerText;//Your desire text
                }
            }

            metin = metin.Replace("\r", "");
            while (metin.Contains("\n\n"))
                metin = metin.Replace("\n\n","\n");

            metin = metin.Split('\n').Where(l => !string.IsNullOrWhiteSpace(l)).Select(l=>l.Trim()).ToList().StringJoin("\n\n");

            string title = (from x in doc.DocumentNode.Descendants()
                            where x.Name.ToLower() == "title"
                            select x.InnerText).FirstOrDefault();

            string desc = (from x in doc.DocumentNode.Descendants()
                           where x.Name.ToLower() == "meta"
                           && x.Attributes["name"] != null
                           && x.Attributes["name"].Value.ToLower() == "description"
                           select x.Attributes["content"].Value).FirstOrDefault();

            List<string> imgs = (from x in doc.DocumentNode.Descendants()
                                 where x.Name.ToLower() == "img" && x.Attributes["src"] != null && x.Attributes["src"].Value != null
                                 select (new Uri(new Uri(url), x.Attributes["src"].Value)).ToString()).ToList<String>();

            context.Response.ContentType = "application/json";
            context.Response.Write(JsonConvert.SerializeObject(new { text = Provider.Server.HtmlDecode(metin), imgs = imgs, title = Provider.Server.HtmlDecode(title), desc = Provider.Server.HtmlDecode(desc) }));
            //context.Response.Write("{text:" + metin.ToJS() + ", imgs:" + imgs.ToJSON() + ", title:" + title.ToJS() + ", desc:" + desc.ToJS() + "}");
        }