示例#1
0
        static public List <Product> search(string text)
        {
            var url  = site + "search?searchtext=" + text + "&page=";
            var idx  = 1;
            var list = new List <Product>();

            while (true)
            {
                var address  = url + idx;
                var document = web.Load(address);
                if (web.StatusCode != System.Net.HttpStatusCode.OK)
                {
                    break;
                }
                var notFoundElement = document.DocumentNode.SelectNodes("//span[@class='bc__item']");
                if (null != notFoundElement)
                {
                    var node = notFoundElement.First();
                    if (node.InnerText == "Не найдено")
                    {
                        break;
                    }
                }
                idx++;
                var nodes = document.DocumentNode.SelectNodes("//a[@class='link group-header']");
                if (nodes != null)
                {
                    foreach (var x in nodes)
                    {
                        list.AddRange(processCat(x.Attributes["href"].Value));
                    }
                }
            }
            return(list);
        }
示例#2
0
        static void Main(string[] args)
        {
            string startUrl = "http://www.ebay.com/sch/Accessory-Bundles-/176971/i.html";

            HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlAgilityPack.HtmlWeb();
            var doc  = htmlWeb.Load(startUrl);
            var urls = doc.DocumentNode.SelectNodes("//h3[@class=\"lvtitle\"]//a").Select((linkNode) => linkNode.GetAttributeValue("href", "/"));

            foreach (var url in urls)
            {
                var   subDoc    = htmlWeb.Load(url);
                Regex rgx       = new Regex(@".+""imgArr"" : (.+\]), ""islarge"".+", RegexOptions.Multiline);
                var   jsonTexts = subDoc.DocumentNode.SelectNodes("//script[contains(.,\"imgArr\")]").Select(scriptNode => scriptNode.InnerText).Select(text => rgx.Match(text).Groups[1].Value).SingleOrDefault();
                var   images    = JArray.Parse(jsonTexts).Children().Select(img => img.Value <string>("maxImageUrl"));
                using (WebClient webClient = new WebClient())
                {
                    HashAlgorithm algorithm = MD5.Create();
                    foreach (string image_url in images)
                    {
                        Uri    uri       = new Uri(image_url);
                        string extension = System.IO.Path.GetExtension(uri.LocalPath);
                        string hash      = string.Join("", algorithm.ComputeHash(Encoding.UTF8.GetBytes(image_url)).Select(b => b.ToString("X2")));
                        Console.WriteLine("Downloading {0} file", image_url);
                        webClient.DownloadFile(image_url, string.Format("{0}{1}", hash, extension));
                    }
                }
            }
        }
示例#3
0
        private GrouponData GetPage(string url)
        {
            GrouponData GD = new GrouponData();

            HtmlAgilityPack.HtmlWeb Html = new HtmlAgilityPack.HtmlWeb();

            var Mobsite = url.Replace("www", "m");
            var doc2 = Html.Load(url);
            try
            {
                GD.TimeRemaining = new TimeSpan(0, 0, 0, 0, int.Parse(doc2.DocumentNode.SelectSingleNode("//input[@class='jcurrentTimeLeft']").Attributes["value"].Value));

            }
            catch (Exception)
            {

                GD.TimeRemaining = new TimeSpan(0, 0, 0);
            }
            //GD.TimeRemaining = time.ToString(@"d\ \d\a\y\s\ hh\:mm\:ss");
            var doc = Html.Load(Mobsite);

            GD.URL = url;

            try
            {
                GD.Image = doc.DocumentNode.SelectSingleNode("/html[1]/body[1]/div[2]/div[1]/img[1]").Attributes["src"].Value;

                GD.Title = doc.DocumentNode.SelectSingleNode("/html[1]/body[1]/div[2]/h1[1]").InnerText.Trim();

              //  var TimeRemaining = doc.GetElementbyId("time-left");

                var datanodes = doc.DocumentNode.SelectNodes("/html[1]/body[1]/div[3]/ul[1]/li");

                GD.Variations = new List<GrouponItem>();

                foreach (var item in datanodes)
                {
                    var tmpitem = new GrouponItem();

                    //var pat = item.ChildNodes[0].ChildNodes[0].ChildNodes[0].ChildNodes[0].ChildNodes[0].ChildNodes[0].XPath;
                    //"/html[1]/body[1]/div[3]/ul[1]/li[1]/a[1]/table[1]/tr[1]/td[1]/h3[1]/#text[1]"

                    tmpitem.Title = item.SelectSingleNode(".//tr[1]/td[1]/h3[1]").InnerText;
                    tmpitem.Sold = item.SelectSingleNode(".//tr[1]/td[1]/div[1]/strong[last()]").InnerText;

                    GD.Variations.Add(tmpitem);
                }

                //GD.Sold = doc.GetElementbyId("jDealSoldAmount").InnerText.Trim();

                //GD.DealEnds = time.ToString(@"d\ \d\a\y\s\ hh\:mm\:ss");

            }
            catch (Exception)
            {
                //throw;
            }

            return GD;
        }
示例#4
0
        protected void Timer1_Tick(object sender, EventArgs e)
        {
            // [Dolar]
            HtmlAgilityPack.HtmlDocument alis  = hweb.Load("https://dovizborsa.com/doviz/dolar");
            HtmlAgilityPack.HtmlDocument satis = hweb.Load("https://dovizborsa.com/doviz/dolar");

            foreach (HtmlAgilityPack.HtmlNode item in alis.DocumentNode.SelectNodes("//span[@class='-by-']")) // span da classı by olan nesneyi çekiyoruz
            {
                dolaral.Text = item.InnerText.ToString();
            }
            foreach (HtmlAgilityPack.HtmlNode item in satis.DocumentNode.SelectNodes("//span[@class='-sl-']"))
            {
                dolarsat.Text = item.InnerText.ToString();
            }

            // [EURO]
            HtmlAgilityPack.HtmlDocument alis2  = hweb.Load("https://dovizborsa.com/doviz/ingiliz-sterlini");
            HtmlAgilityPack.HtmlDocument satis2 = hweb.Load("https://dovizborsa.com/doviz/ingiliz-sterlini");

            foreach (HtmlAgilityPack.HtmlNode item in alis2.DocumentNode.SelectNodes("//span[@class='-by-']")) // span da classı by olan nesneyi çekiyoruz
            {
                euroal.Text = item.InnerText.ToString();
            }
            foreach (HtmlAgilityPack.HtmlNode item in satis2.DocumentNode.SelectNodes("//span[@class='-sl-']"))
            {
                eurosat.Text = item.InnerText.ToString();
            }
        }
示例#5
0
        static List <string> FindCitiesUseCivicMA()
        {
            List <string> data       = new List <string>();
            string        url        = "https://www.mma.org/print/16150";
            var           web        = new HtmlAgilityPack.HtmlWeb();
            var           doc        = web.Load(url);
            var           linksNodes = doc.DocumentNode.SelectNodes("//div[@class='linkRow']");

            if (linksNodes != null)
            {
                foreach (var linkNode in linksNodes)
                {
                    var    nameNode    = linkNode.SelectSingleNode("./div[@class='comName']");
                    string cityName    = nameNode.InnerText;
                    var    websiteNode = linkNode.SelectSingleNode(".//a[@href]");

                    if (websiteNode == null)
                    {
                        continue;
                    }

                    string website = websiteNode.Attributes["href"].Value;
                    string record  = string.Format("\"{0}\",\"{1}\",", cityName, website);
                    data.Add(record);
                }
            }

            File.WriteAllLines("MACities.txt", data, Encoding.UTF8);
            List <string> civicList = new List <string>();

            foreach (string line in data)
            {
                string website = line.Trim('"', ',').Split(',').LastOrDefault().Trim('"');
                //string newSite = "http://www.lansingmi.gov/AgendaCenter";
                string newSite = website.TrimEnd('/') + "/agendacenter";
                try
                {
                    doc = web.Load(newSite);
                    if (doc.DocumentNode.SelectSingleNode("//h1[text()='Agenda Center']") != null)
                    {
                        civicList.Add(line);
                    }
                    Console.WriteLine("{0} found use civic", civicList.Count);
                }
                catch
                {
                }
                Console.WriteLine("{0} visited...", data.IndexOf(line));
            }

            File.WriteAllLines("CivicListMA.txt", civicList, Encoding.UTF8);

            return(data);
        }
示例#6
0
        public List <Match> ExtractRoundByURL(string url)
        {
            List <Match> res = new List <Match>();

            HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load(url);
            try {
                var equipo1Par   = doc.DocumentNode.SelectNodes("//tr[@class='vevent ']//td[@class='equipo1']").ToList();
                var equipo1Impar = doc.DocumentNode.SelectNodes("//tr[@class='vevent impar']//td[@class='equipo1']").ToList();
                var equipo2Par   = doc.DocumentNode.SelectNodes("//tr[@class='vevent ']//td[@class='equipo2']").ToList();
                var equipo2Impar = doc.DocumentNode.SelectNodes("//tr[@class='vevent impar']//td[@class='equipo2']").ToList();



                var resultadosPar   = doc.DocumentNode.SelectNodes("//tr[@class='vevent ']//span[@class='clase']").ToList();
                var resultadosImpar = doc.DocumentNode.SelectNodes("//tr[@class='vevent impar']//span[@class='clase']").ToList();

                for (int i = 0; i < equipo1Par.Count() - 1; i++)
                {
                    res.Add(new Match()
                    {
                        local = equipo1Par[i].InnerText, resultado = resultadosPar[i].InnerText, visitante = equipo2Par[i].InnerText
                    });
                    res.Add(new Match()
                    {
                        local = equipo1Impar[i].InnerText, resultado = resultadosImpar[i].InnerText, visitante = equipo2Impar[i].InnerText
                    });
                }
            }
            catch {
                Console.WriteLine("This round is not available");
            }
            return(res);
        }
        public List <Company> GetInfoSlick()
        {
            var ctx = new Context();

            HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load("https://www.slickcharts.com/sp500");

            var headerContent = doc.DocumentNode
                                .SelectNodes("//table[@class='table table-hover table-borderless table-sm']").Descendants("td").ToList();

            var listOfCompanies = new List <Company>();

            for (int i = 0; i < headerContent.Count() / 7; i++)
            {
                var count = i * 7;

                var companyName = headerContent[count + 1].InnerText;

                var ticker = headerContent[count + 2].InnerText;

                var rank = Convert.ToInt32(headerContent[count].InnerText);

                var priceString = headerContent[count + 4].InnerText;
                var priceTemp   = priceString.Remove(0, 13);
                var price       = Convert.ToDouble(priceTemp, CultureInfo.InvariantCulture);

                var company = new Company(companyName, ticker, rank, price);

                listOfCompanies.Add(company);
            }
            return(listOfCompanies);
        }
示例#8
0
        private static List <string> scraperLoop(string url, string xPath, string attribute)
        {
            var           web        = new HtmlAgilityPack.HtmlWeb();
            var           doc        = web.Load(url);
            List <string> returnList = new List <string>();

            try
            {
                var scraped = doc.DocumentNode.SelectNodes(xPath);
                foreach (var result in scraped)
                {
                    foreach (var attr in result.Attributes)
                    {
                        if (attr.Name != attribute)
                        {
                            continue;
                        }
                        returnList.Add(attr.Value);
                        Console.WriteLine(attr.Value.ToString());
                    }
                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e.Message);
            }
            return(returnList);
        }
示例#9
0
        public HttpResponseMessage GetRecipes(Recipe model)
        {
            var url    = model.Url;
            var webGet = new HtmlAgilityPack.HtmlWeb();

            HtmlAgilityPack.HtmlDocument doc = webGet.Load(url);
            HtmlWeb web              = new HtmlWeb(url);
            string  titleNodes       = doc.DocumentNode.SelectSingleNode("//head/title").InnerText;
            var     descriptionNodes = doc.DocumentNode.SelectSingleNode("//head/meta[@name='description']").Attributes["content"].Value;

            model.Title       = titleNodes;
            model.Description = descriptionNodes;

            try
            {
                if (ModelState.IsValid)
                {
                    ItemResponse <Recipe> response = new ItemResponse <Recipe>();
                    response.Item = model;
                    return(Request.CreateResponse(HttpStatusCode.OK, response));
                }
                else
                {
                    return(Request.CreateErrorResponse(HttpStatusCode.BadRequest, ModelState));
                }
            }
            catch (Exception ex)
            {
                return(Request.CreateResponse(HttpStatusCode.BadRequest, ex.Message));
            }
        }
        } // End Sub button2_Click

        private System.Data.DataTable GetAllBenchmarks()
        {
            HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb();
            //HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.Load(@"");
            HtmlAgilityPack.HtmlDocument doc = web.Load(@"http://benchmarksgame.alioth.debian.org/");


            System.Data.DataTable dt = new System.Data.DataTable();

            dt.Columns.Add("Name", typeof(string));
            dt.Columns.Add("Url", typeof(string));

            System.Data.DataRow dr = null;

            foreach (HtmlAgilityPack.HtmlNode link in doc.DocumentNode.SelectNodes("//section[1]//li/a[@href]"))
            {
                dr = dt.NewRow();
                // System.Console.WriteLine(link);
                dr["Name"] = System.Web.HttpUtility.HtmlDecode(link.InnerText);
                dr["Url"]  = link.Attributes["href"].Value;

                dt.Rows.Add(dr);
            } // Next link


            System.Data.DataView dv = dt.DefaultView;
            dv.Sort = "Name ASC";
            System.Data.DataTable sortedDT = dv.ToTable();

            return(sortedDT);
        } // End Function GetAllBenchmarks
示例#11
0
        public static List <History> GetCoinHistory(string coinName)
        {
            string historyUrl = "https://coinmarketcap.com/currencies/" + coinName + "/historical-data/";

            HtmlAgilityPack.HtmlWeb      htmlweb      = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument htmlDocument = htmlweb.Load(historyUrl);
            var            historicalData             = htmlDocument.DocumentNode.SelectNodes("//*[contains(@class,'table-responsive')]");
            List <History> historyList = new List <History>();
            int            rowCount    = historicalData[0].ChildNodes[1].ChildNodes[3].ChildNodes.Count;

            for (int i = 0; i < rowCount; i++)
            {
                if (historicalData[0].ChildNodes[1].ChildNodes[3].ChildNodes[i].Name != "#text")
                {
                    History history = new History();
                    history.Date      = historicalData[0].ChildNodes[1].ChildNodes[3].ChildNodes[i].ChildNodes[1].InnerText;
                    history.Open      = historicalData[0].ChildNodes[1].ChildNodes[3].ChildNodes[i].ChildNodes[3].InnerText;
                    history.High      = historicalData[0].ChildNodes[1].ChildNodes[3].ChildNodes[i].ChildNodes[5].InnerText;
                    history.Low       = historicalData[0].ChildNodes[1].ChildNodes[3].ChildNodes[i].ChildNodes[7].InnerText;
                    history.Close     = historicalData[0].ChildNodes[1].ChildNodes[3].ChildNodes[i].ChildNodes[9].InnerText;
                    history.Volume    = historicalData[0].ChildNodes[1].ChildNodes[3].ChildNodes[i].ChildNodes[11].InnerText;
                    history.MarketCap = historicalData[0].ChildNodes[1].ChildNodes[3].ChildNodes[i].ChildNodes[13].InnerText;
                    historyList.Add(history);
                }
            }

            return(historyList);
        }
示例#12
0
文件: Form1.cs 项目: ljk950327/study
        private string GetWebPageHtmlFromUrl(string url)
        {
            var hw = new HtmlAgilityPack.HtmlWeb();

            HtmlAgilityPack.HtmlDocument doc = hw.Load(url);
            return(doc.DocumentNode.OuterHtml);
        }
示例#13
0
        private void getBoxOffice()
        {
            HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load("https://www.imdb.com/chart/boxoffice/?ref_=nv_ch_cht");
            var           mtitle             = doc.DocumentNode.SelectNodes("//td[contains(@class,'titleColumn')]/a");
            var           mrating            = doc.DocumentNode.SelectNodes("//td[contains(@class,'ratingColumn')]");
            var           mgross             = doc.DocumentNode.SelectNodes("//span[contains(@class,'secondaryInfo')]");
            var           weeks = doc.DocumentNode.SelectNodes("//td[contains(@class,'weeksColumn')]");
            List <string> links = new List <string>();

            foreach (var item in mtitle)
            {
                var val = item.Attributes["href"].Value; //10743
                links.Add("https://www.imdb.com/" + val.ToString().TrimStart().TrimEnd());
            }
            lvNews.Items.Clear();

            lvNews.View = View.Details;            //make column headings visible
            for (int i = 0; i < mtitle.Count; i++) //item has strings for each column of one row
            {
                ListViewItem lvi = new ListViewItem(mtitle[i].InnerText);
                lvi.SubItems.Add(mrating[i].InnerText);
                lvi.SubItems.Add(mgross[i].InnerText);
                lvi.SubItems.Add(weeks[i].InnerText);
                // add the listviewitem to a new row of the ListView control
                lvNews.Items.Add(lvi); //show Text1 in column1, Text2 in col2
            }
        }
示例#14
0
文件: Storm.cs 项目: Slazanger/SMT
        public static List <Storm> GetStorms()
        {
            List <Storm> storms = new List <Storm>();

            try
            {
                string sourceHTML          = "https://evescoutrescue.com/home/stormtrack.php";
                string tableXPath          = "/html/body/div/div[4]/div/div/div[2]/table/tbody";
                HtmlAgilityPack.HtmlWeb hw = new HtmlAgilityPack.HtmlWeb();

                HtmlAgilityPack.HtmlDocument       doc = hw.Load(sourceHTML);
                HtmlAgilityPack.HtmlNodeCollection hnc = doc.DocumentNode.SelectNodes(tableXPath);
                List <List <string> > table            = hnc.Descendants("tr")
                                                         .Where(tr => tr.Elements("td").Count() > 1)
                                                         .Select(tr => tr.Elements("td").Select(td => td.InnerText.Trim()).ToList())
                                                         .ToList();

                foreach (List <string> ls in table)
                {
                    Storm s = new Storm();
                    s.Region = ls[0];
                    s.System = ls[1];
                    s.Type   = ls[3];
                    s.Name   = ls[2];

                    storms.Add(s);
                }
            }
            catch
            {
            }

            return(storms);
        }
示例#15
0
        public void getTopSeries()
        {
            HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load("https://www.imdb.com/chart/toptv/?ref_=nv_tvv_250");
            var myear   = doc.DocumentNode.SelectNodes("//span[@class='secondaryInfo']");
            var mrating = doc.DocumentNode.SelectNodes("//tr/td[contains(@class,'ratingColumn imdbRating')]");
            var mtitle  = doc.DocumentNode.SelectNodes("//td[contains(@class,'titleColumn')]/a");

            foreach (var item in mtitle)
            {
                //Console.WriteLine(item.InnerText.ToString().TrimStart().TrimEnd());
            }
            List <string> links = new List <string>();

            foreach (var item in mtitle)
            {
                var val = item.Attributes["href"].Value; //10743
                links.Add("https://www.imdb.com/" + val.ToString().TrimStart().TrimEnd());
            }
            lvTopseries.View = View.Details;
            for (int i = 0; i < mtitle.Count; i++)
            {
                var lvi = lvTopseries.Items.Add(mtitle[i].InnerText.ToString());
                lvi.SubItems.Add(mrating[i].InnerText.ToString());
                lvi.SubItems.Add(myear[i].InnerText.ToString());
                lvi.SubItems.Add(links[i].ToString());
            }
        }
示例#16
0
        /*
         *  Constuctor: Scrapes weather.com's hour-by-hour data for time, temperature, temperature feeling, precipation %, wind speed,
         *  and location.  Location comes from the zip code provided by the user.
         *
         *  Try/Catch: Pulls data based on zip code provided by the user.
         *
         *  Attributes: Selects nodes of HTML by specifically looking for class names.
         *
         *  userSettings.Location: This pulls city and state from weather.com based on zip code provided by the user.
         */
        public WebScrape(ref Settings lUserSettings)
        {
            HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();

            try
            {
                doc = web.Load("https://weather.com/weather/hourbyhour/l/" + lUserSettings.ZipCode + ":4:US");
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
                Console.WriteLine("\nThe website this program is trying to reach could be down, or a 5-digit zip code needs to be placed" +
                                  " in the settings.txt file. \n\n  Press any button to close the program.");
                Console.ReadLine();
            }

            this.mTimeData     = doc.DocumentNode.SelectNodes("//div[@class='hourly-time']");
            this.mTempData     = doc.DocumentNode.SelectNodes("//td[@class='temp']");
            this.mFeelData     = doc.DocumentNode.SelectNodes("//td[@class='feels']");
            this.mPrecipData   = doc.DocumentNode.SelectNodes("//td[@class='precip']");
            this.mWindData     = doc.DocumentNode.SelectNodes("//td[@class='wind']");
            this.mLocationData = doc.DocumentNode.SelectNodes("//div[@class='locations-title hourly-page-title']/h1");

            lUserSettings.Location = this.mLocationData[0].InnerText.Substring(0, this.mLocationData[0].InnerText.Length - 15);
        }
        private System.Data.DataTable GetAllBenchmarks()
        {
            HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb();
            //HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.Load(@"");
            HtmlAgilityPack.HtmlDocument doc = web.Load(@"http://benchmarksgame.alioth.debian.org/");

            System.Data.DataTable dt = new System.Data.DataTable();

            dt.Columns.Add("Name", typeof(string));
            dt.Columns.Add("Url", typeof(string));

            System.Data.DataRow dr = null;

            foreach (HtmlAgilityPack.HtmlNode link in doc.DocumentNode.SelectNodes("//section[1]//li/a[@href]"))
            {
                dr = dt.NewRow();
                // System.Console.WriteLine(link);
                dr["Name"] = System.Web.HttpUtility.HtmlDecode(link.InnerText);
                dr["Url"] = link.Attributes["href"].Value;

                dt.Rows.Add(dr);
            } // Next link

            System.Data.DataView dv = dt.DefaultView;
            dv.Sort = "Name ASC";
            System.Data.DataTable sortedDT = dv.ToTable();

            return sortedDT;
        }
示例#18
0
        public List <string> Scrapdata()
        {
            HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load(Link1);
            // Scraping the data using particular nodes.
            var title    = doc.DocumentNode.SelectNodes("//a[@class ='s-item__link']").ToList();
            var subtitle = doc.DocumentNode.SelectSingleNode("//*[@id='srp-river-results']/ul/li[1]/div/div[2]/div[3]").InnerText;
            var link     = doc.DocumentNode.SelectSingleNode("//a[@class ='s-item__link']").GetAttributeValue("href", "default");
            var image    = doc.DocumentNode.SelectSingleNode("//*[@id='srp-river-results']/ul/li[1]/div/div[1]/div/a[1]/div/img").GetAttributeValue("src", "Default");
            var review   = doc.DocumentNode.SelectSingleNode("//*[@class='x-star-rating']/span");
            var price    = doc.DocumentNode.SelectSingleNode("//*[@class='s-item__price']");
            //var est = doc.DocumentNode.SelectSingleNode("//*[@id='srp-river-results']/ul/li[1]/div/div[2]/div[5]/span[2]/span/span").InnerText;
            // var est = doc.DocumentNode.SelectSingleNode("//*[@id='srp - river - results']/ul/li[1]/div/div[2]/div[5]/span[2]/span").InnerText;
            //var est = doc.DocumentNode.SelectSingleNode("//*[@id='srp-river-results']/ul/li[1]/div/div[2]/div[4]/span[2]/span/span").InnerText;
            var est = doc.DocumentNode.SelectSingleNode("//*[@id='srp-river-results']/ul/li[1]/div/div[2]/div[4]/span/span").InnerText;
            // List to store the scraped data.
            List <string> scrap = new List <string>();

            //Adding Scraped data to the List...
            scrap.Add(title[0].InnerText);
            scrap.Add(subtitle);
            scrap.Add(link);
            scrap.Add(image);
            scrap.Add(review.InnerText);
            scrap.Add(price.InnerText);
            scrap.Add(est);
            return(scrap);
        }
示例#19
0
        private void updateClick(object sender, RoutedEventArgs e)

        {
            //Button updateBut = (Button)sender;
            HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load("https://coinmarketcap.com/");
        }
        public WikipediaContentProvider(string ArticleName)
        {
            var hapHtmlWeb   = new HtmlAgilityPack.HtmlWeb();
            var htmlDocument = hapHtmlWeb.Load(string.Format("http://en.wikipedia.org/wiki/{0}", ArticleName));

            wikipediaContent = htmlDocument.DocumentNode.SelectSingleNode("//div[@id=\"mw-content-text\"]").InnerText;
        }
示例#21
0
        public static void GetWebData()
        {
            HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load("https://www.yellowpages.com/search?search_terms=software&geo_location_terms=chennai");

            var HeaderNames = doc.DocumentNode.SelectNodes("//a[@class='business-name']").ToList();

            foreach (var item in HeaderNames)
            {
                Console.WriteLine(item.InnerText);
            }

            Console.WriteLine("-------------------------------------");
            var services = doc.DocumentNode.SelectNodes("//p[@class='adr']").ToList();

            foreach (var service in services)
            {
                Console.WriteLine(service.InnerText);
            }

            Console.WriteLine("-------------------------------------");
            var cities = doc.DocumentNode.SelectNodes("//ul[@class='to-columns'] && a[@href]").ToList();

            foreach (var city in cities)
            {
                Console.WriteLine(city.InnerText);
            }
        }
示例#22
0
        public void timeTickMonitorWebSite()
        {
            if (CheckBox.Checked)
            {
                try
                {
                    HtmlAgilityPack.HtmlWeb website = new HtmlAgilityPack.HtmlWeb();
                    NewDocument = website.Load(URL);
                }
                catch (IOException)
                {
                }
                catch (WebException)
                {
                }

                KontrolHaberler.Clear();
                if (NewDocument != null)
                {
                    sonDakikaListesiniAl(NewDocument, KontrolHaberler);
                }

                karşılaştır(YüklenenHaberler, KontrolHaberler);
                eskiHaberleriTemizle(YüklenenHaberler, KontrolHaberler);
            }
        }
示例#23
0
        private void Form1_Load(object sender, EventArgs e)
        {
            HtmlAgilityPack.HtmlWeb
                hweb = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument
                hdoc   = hweb.Load("http://www.webtekno.com/uzaktan-kumandali-oyuncak-araba-boyutunda-bir-kesif-araci-gelecek-sene-ay-i-kesfetmeye-gidecek-h57706.html");
            var result = hdoc.DocumentNode
                         .SelectNodes("//script[@type='application/ld+json']")
                         .ToList();
            string richResult = string.Empty;

            foreach (var item in result)
            {
                structerData.Add(item.InnerText);
                richResult += $"{item.InnerText}\n---------------------------------\n";
            }
            richTextBox1.Text = richResult;

            foreach (var ldJsons in result)
            {
                var     _testJsonData = ldJsons;
                string  textData      = _testJsonData.InnerText.ToString().Replace("@", "");
                JObject deserialize   = JsonConvert.DeserializeObject <JObject>(textData);

                Dictionary <string, JToken>
                des = JsonConvert.DeserializeObject <Dictionary <string, JToken> >(deserialize.ToString());
                foreach (var item in des)
                {
                    if (item.Value.GetType() == typeof(JValue))
                    {
                        listBox1.Items.Add($"{{[Key]:{item.Key}-[Value]:{item.Value}}}");
                    }
                    if (item.Key.Equals("author"))
                    {
                        JObject person = JsonConvert.DeserializeObject <JObject>(item.Value.ToString());
                        Dictionary <string, JToken>
                        personDic = JsonConvert.DeserializeObject <Dictionary <string, JToken> >(person.ToString());
                    }
                    if (item.Key.Equals("mainEntityOfPage"))
                    {
                        JObject sayfa = JsonConvert.DeserializeObject <JObject>(item.Value.ToString());
                        Dictionary <string, JToken>
                        sayfaDic = JsonConvert.DeserializeObject <Dictionary <string, JToken> >(sayfa.ToString());
                    }
                    if (item.Key.Equals("image"))
                    {
                        JObject sayfa = JsonConvert.DeserializeObject <JObject>(item.Value.ToString());
                        Dictionary <string, JToken>
                        imageDic = JsonConvert.DeserializeObject <Dictionary <string, JToken> >(sayfa.ToString());
                    }
                    if (item.Key.Equals("publisher"))
                    {
                        JObject sayfa = JsonConvert.DeserializeObject <JObject>(item.Value.ToString());
                        Dictionary <string, JToken>
                        publisherDic = JsonConvert.DeserializeObject <Dictionary <string, JToken> >(sayfa.ToString());
                    }
                }
            }
        }
示例#24
0
        public string GetLyrics(string post)
        {
            HtmlAgilityPack.HtmlWeb      webPage = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument webDoc  = webPage.Load(post);
            //iframe 내용 열기
            var iframe = webDoc.DocumentNode.SelectSingleNode("//body/iframe").Attributes["src"].Value;

            Console.WriteLine("iframe " + iframe);
            webDoc = webPage.Load(iframe);
            iframe = webDoc.DocumentNode.SelectSingleNode("//body/iframe").Attributes["src"].Value;
            Console.WriteLine("iframe2 " + iframe);
            webDoc = webPage.Load("http://blog.naver.com" + iframe);
            var temp = webDoc.DocumentNode.SelectSingleNode("//body/div[@id='head-skin']/div[@id='body']/div[@id='whole-border']/div[@id='whole-body']/div[@id='wrapper']/div[@id='twocols']/div[@id='content-area']/div[@id='post-area']/div[@id='postListBody']/div[1]/div[@class='post-back']/table[@id='printPost1']/tr[1]/td[2]/div[@id='postViewArea']").InnerText;

            return(HttpUtility.HtmlDecode(temp));
            //*[contains(@class, 'se_textarea')]
        }
示例#25
0
        public static void getHtml()
        {
            string URL2 = "https://www.kylottery.com/apps/draw_games/powerball/powerball_pastwinning.html";

            HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load(URL2);
            getArray(doc);
        }
示例#26
0
        public void Fetch(Encoding encoding, bool inquiryLastModified, bool fromLocal, PathInfo localPathInfo)
        {
            string url = fromLocal ? localPathInfo.LocalFilePathFor(Uri) : Uri.AbsoluteUri;

            WebRequest request = WebRequest.Create(url);

            using (WebResponse response = request.GetResponse())
            {
                this.ContentType = response.ContentType;

                if (inquiryLastModified)
                {
                    if (fromLocal)
                    {
                        FileInfo fi = new FileInfo(url);
                        this.SavedDate = fi.LastWriteTime;
                    }
                    else
                    {
                        this.SavedDate = (response as HttpWebResponse).LastModified;
                    }
                }
            }

            if (this.ContentType.Contains("text") || fromLocal)
            {
                HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb();
                this.htmlDocument = new HtmlAgilityPack.HtmlDocument();

                if (encoding == null)
                {
                    web.AutoDetectEncoding = true;
                    htmlDocument           = web.Load(url);
                }
                else
                {
                    web.AutoDetectEncoding = false;
                    web.OverrideEncoding   = encoding;
                    htmlDocument           = web.Load(url);
                }

                this.TitleElement = this.GetTitleElement(this.htmlDocument);
                this.H1Element    = this.GetH1Element(this.htmlDocument);
            }
        }
示例#27
0
        public static int jobLength(String searchJob, String location)
        {
            HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load("https://www.seek.co.nz/" + searchJob + "-jobs/in-" + location);
            var title  = doc.DocumentNode.SelectNodes("//a[@data-automation='jobTitle']").ToList();
            var JobURL = doc.DocumentNode.SelectNodes("//a[@class='_2iNL7wI']").ToList();

            return(title.Count);
        }
        public string Get(string page, string xpath)
        {
            HtmlAgilityPack.HtmlWeb      web          = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument htmlDocument = web.Load(page);

            string value = htmlDocument.DocumentNode.SelectSingleNode(xpath).InnerText;

            return(value);
        }
示例#29
0
        private static List <HtmlAgilityPack.HtmlNodeCollection> GetNames(string wikipediaUrl)
        {
            var web       = new HtmlAgilityPack.HtmlWeb();
            var doc       = web.Load(wikipediaUrl);
            var table     = doc.DocumentNode.SelectNodes("/html/body/div[3]/div[3]/div[4]/div/table[2]/tbody").Descendants();;
            var tableRows = table.Where(d => d.Name == "tr");

            return(tableRows.Select(t => t.ChildNodes).ToList());
        }
示例#30
0
        public MainSiteAccessPoint(Common.Data.General.Profile profile)
        {
            _userProfile = profile;
            string urlUserNameString = profile.UserName.Replace('#', '-');

            _mainProfileURL = String.Format(WebsiteStringURL.MainWebsiteURL, PlatformEnumToString(profile.Platform), RegionEnumToString(profile.Region), urlUserNameString);
            HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb();
            _doc = web.Load(_mainProfileURL);
        }
 public IUrlDataGetter <string[]> CreateCssClassFirstChildPropertyArrayGetter(string cssClassName, string propertyName)
 {
     return(new UrlDataGetter <string[]>((url) =>
     {
         var web = new HtmlAgilityPack.HtmlWeb();
         var doc = web.Load(url);
         return doc.DocumentNode.SelectNodes($"//*[contains(@class,'{cssClassName}')]").Select(node => node.FirstChild.GetAttributeValue(propertyName, string.Empty)).ToArray();
     }));
 }
示例#32
0
        private uint GetLastestComicID()
        {
            HtmlAgilityPack.HtmlDocument archivePage = new HtmlAgilityPack.HtmlDocument();
            HtmlAgilityPack.HtmlWeb      web         = new HtmlAgilityPack.HtmlWeb();
            archivePage = web.Load(XKCD_URL + "archive/");
            HtmlAgilityPack.HtmlNodeCollection items = archivePage.DocumentNode.SelectNodes("//*[@id='middleContainer']/a");

            // Last comic ID = comic count
            return((uint)items.Count);
        }
示例#33
0
        public void ParsePlaylistSongs()
        {
            var htmlWeb = new HtmlAgilityPack.HtmlWeb();
            var html = htmlWeb.Load("http://www.rockland.fm/start.php?playlist");
            var parser = new RocklandParser();
            var songs = parser.GetSongs(html);

            Assert.IsTrue(songs.Count > 0);
            foreach (var song in songs)
            {
                Assert.IsTrue(song.TimestampText.EndsWith("Uhr"));
            }
        }
示例#34
0
        public static List<NewsInfoForJson> GetNewsInfoList(string url)
        {
            HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load(url);

            HtmlAgilityPack.HtmlNodeCollection clooection = doc.DocumentNode.SelectNodes("//div[@class=\"grid_item visor-article-teaser list_default\"]");

            List<NewsInfoForJson> result = new List<NewsInfoForJson>();
            if (clooection != null && clooection.Count > 0)
            {
                foreach (var c in clooection)
                {
                    NewsInfoForJson news_t = new NewsInfoForJson();
                    HtmlAgilityPack.HtmlNode imageNode = c.SelectSingleNode(".//img");
                    if (imageNode != null)
                    {
                        string image_t = imageNode.GetAttributeValue("src", "");
                        news_t.Titlepic = image_t.StartsWith("http") ? image_t : HOST + image_t.TrimStart('/');
                    }

                    HtmlAgilityPack.HtmlNode urlNode = c.SelectSingleNode(".//a[@class='grid_img']");
                    if (urlNode != null)
                    {
                        string url_t = urlNode.GetAttributeValue("href", "");
                        news_t.befrom = url_t.StartsWith("http") ? url_t : HOST + url_t.TrimStart('/');
                    }

                    HtmlAgilityPack.HtmlNode timeNode = c.SelectSingleNode(".//span[@class='grid_time']");
                    if (timeNode != null)
                    {
                        news_t.NewsTime = timeNode.InnerText;
                    }

                    HtmlAgilityPack.HtmlNode titleNode = c.SelectSingleNode(".//*[@class='grid_title']");
                    if (titleNode != null)
                    {
                        news_t.Title = HttpUtility.HtmlDecode(titleNode.InnerText);
                    }

                    news_t.NewsForm = "news";

                    GetContentText(news_t.befrom, ref news_t);
                    news_t.Onclick = new Random().Next(100, 2000).ToString();

                    result.Add(news_t);
                }
            }

            return result;
        }
示例#35
0
 private HtmlAgilityPack.HtmlDocument LoadPage(string AUrl)
 {
     var web = new HtmlAgilityPack.HtmlWeb();
     web.AutoDetectEncoding = true;
     var document = new HtmlAgilityPack.HtmlDocument();
     try
     {
         document = web.Load(AUrl);
     }
     catch
     {
         LoadPage(AUrl);
     }
     return document;
 }
示例#36
0
        public static void GetContentText(string url, ref NewsInfoForJson newsModel_t)
        {
            HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load(url);

            HtmlAgilityPack.HtmlNode contentNode = doc.DocumentNode.SelectSingleNode("//div[@class=\"field field-name-body field-type-text-with-summary field-label-hidden\"]/div[@class=\"field-items\"]/div[@class=\"field-item even\"]");
            if (contentNode != null)
            {
                newsModel_t.newstext = contentNode.OuterHtml;
            }

            HtmlAgilityPack.HtmlNode articleTimeNode = doc.DocumentNode.SelectSingleNode("//time[@class=\"article-time\"]");
            if (contentNode != null)
            {
                newsModel_t.NewsTime = articleTimeNode.InnerText.Replace(" at ", " ").Replace(" EDT","");
                newsModel_t.NewsTime = DateTime.Parse(newsModel_t.NewsTime).ToString("yyyy-MM-dd HH:mm:ss");
            }

            HtmlAgilityPack.HtmlNodeCollection nodeCollection = doc.DocumentNode.SelectNodes("//a[@class=\"cta large\"]");
            if (nodeCollection != null && nodeCollection.Count > 0)
            {
                foreach (var node in nodeCollection)
                {
                    string url_t = HttpUtility.HtmlDecode(HttpUtility.UrlDecode(node.GetAttributeValue("href", "")));
                    if (url_t.Contains("www.microsoft.com") && url_t.Contains("store") && url_t.Contains("apps"))
                    {
                        int index_0 = url_t.IndexOf("&url=https") + 5;
                        int index_1 = url_t.LastIndexOf("&token=");
                         string fileName_t = HttpUtility.UrlDecode(
                            index_1 > 0 ? url_t.Substring(index_0, index_1 - index_0) : url_t.Substring(index_0));

                        int index_2 = fileName_t.LastIndexOf("&ourl=http");
                        if (index_2 > 0)
                        {
                            newsModel_t.Filename = fileName_t.Substring(0, index_2);
                        }
                        else
                        {
                            newsModel_t.Filename = fileName_t;
                        }
                        newsModel_t.NewsForm = "pingce";

                        Console.WriteLine("url:" + newsModel_t.Filename);
                    }
                }
            }
        }
        public void getCrn(String subject,String cnbr)
        {
            file.Flush();
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            HtmlAgilityPack.HtmlWeb getHtml = new HtmlAgilityPack.HtmlWeb();
            try
            {
                doc = getHtml.Load("https://selfservice.mypurdue.purdue.edu/prod/bzwsrch.p_search_schedule?term=CURRENT&subject=" + subject + "&campus=PWL&levl=UG&cnbr=" + cnbr);
                //int count = 0;

                foreach (HtmlAgilityPack.HtmlNode link in doc.DocumentNode.SelectNodes("//th[@class=\"ddlabel\"]"))
                {
                    String[] s = link.FirstChild.Attributes.First().Value.Split('=');
                    this.listBox2.Items.Add(s[s.Length-1]);
                }
            }catch(Exception e){
            }
        }
示例#38
0
 public virtual void Execute(IJobExecutionContext context)
 {
     var dateTimeMin = DateTime.Parse(DateTime.Now.ToString("yyyy-MM-dd"));//当天零点
     var list = _ppismItemRepository.GetAll(x => (x.ItemSource == PPism.Model.Enum.DictPPItemSource.一号店) && (!x.LastListenTime.HasValue || x.LastListenTime.Value < dateTimeMin)).ToList();
     var ppismItemBll = new PPismJob.Common.PPismItemBll();
     for (int i = 0, length = list.Count; i < length; i++)
     {
         var item = list[i];
         string mUrl = item.ListenUrl;
         if (!ppismItemBll.CheckIsMUrl(item.ListenUrl))
         {
             var htmlWeb = new HtmlAgilityPack.HtmlWeb();
             var strHtml = htmlWeb.Load(item.ListenUrl).DocumentNode.InnerHtml.ToString();
              mUrl = Regex.Match(strHtml, @"name=""h5""\scontent='(?<mUrl>[^']+)'").Groups["mUrl"].Value.Trim();
         }
         if (!string.IsNullOrEmpty(mUrl))
         {
             using (var req = new xNet.Net.HttpRequest())
             {
                 req.UserAgent = xNet.Net.HttpHelper.FirefoxUserAgent();
                 //http://p.3.cn/prices/get?callback=cnp&type=1&area=1_72_2799&pdtk=&pduid=2002986638&pdpin=&pdbp=0&skuid=J_540462
                 req.CharacterSet = System.Text.Encoding.GetEncoding("utf-8");
                 var strMHtml = req.Get(mUrl).ToString();
                 string reg = @"class=""swipeSlide_detail"">[\s\S]+?<img[\s]src=""(?<imgUrl>[^""]+)[\s\S]+?id=""current_price""[\s\S]+?class=""pd_product-price-num"">(?<price>[^<]+)";
                 var groups = Regex.Match(strMHtml, reg).Groups;
                 var price = groups["price"].Value.Trim().ToDecimal(0);
                 var imgUrl = groups["imgUrl"].Value.Trim();
                 if (price > 0 && !string.IsNullOrEmpty(imgUrl))
                 {
                  
                     var priceItem = ppismItemBll.GetPriceItem(item, price, imgUrl);
                     _priceItemRepository.Add(priceItem);
                     _ppismItemRepository.Update(item);
                 }
             }
         }
     }
     _repositoryContext.Commit();
 }
        public void getCourses(String subject)
        {
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            HtmlAgilityPack.HtmlWeb getHtml = new HtmlAgilityPack.HtmlWeb();
            try
            {

                doc = getHtml.Load("https://selfservice.mypurdue.purdue.edu/prod/bzwsrch.p_search_catalog?term=CURRENT&subject=" + subject);
                foreach (HtmlAgilityPack.HtmlNode link in doc.DocumentNode.SelectNodes("//td[@class=\"nttitle\"]"))
                {
                    this.listBox1.Items.Add(link.FirstChild.InnerText);
                    String[] s = link.FirstChild.InnerText.Split(' ');
                    //file.WriteLine(link.OuterHtml);

                    //file.Flush();
                }

                /*doc = getHtml.Load("https://selfservice.mypurdue.purdue.edu/prod/bzwsrch.p_search_schedule?term=CURRENT&subject=" + subject + "&campus=PWL&levl=UG");
                //int count = 0;

                String currentNum = null;

                foreach (HtmlAgilityPack.HtmlNode link in doc.DocumentNode.SelectNodes("//th[@class=\"ddlabel\"]"))
                {
                    String []s = link.FirstChild.InnerText.Split(new char[]{'-',' '});
                    String courseNum = s[s.Length-4];

                    if (!courseNum.Equals(currentNum))
                    {
                        this.listBox1.Items.Add(courseNum);
                        currentNum = courseNum;
                    }
                }*/
            }
            catch (Exception e)
            {

            }
        }
        public void getSeatRemain(int crn)
        {
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            HtmlAgilityPack.HtmlWeb getHtml = new HtmlAgilityPack.HtmlWeb();
            try
            {
                doc = getHtml.Load("https://selfservice.mypurdue.purdue.edu/prod/bzwsrch.p_schedule_detail?term=CURRENT&crn=" + crn.ToString());
                for (int i = 0; i < 50; i++)
                    this.progressBar1.PerformStep();
                int count = 0;
                foreach (HtmlAgilityPack.HtmlNode link in doc.DocumentNode.SelectNodes("//td[@class=\"dddefault\"]"))
                {
                    this.progressBar1.PerformStep();
                    count++;
                    if (count == 2)
                        this.textBox1.Text = link.FirstChild.WriteTo();
                    else if (count == 3)
                        this.textBox2.Text = link.FirstChild.WriteTo();
                    else if (count == 4)
                        this.textBox3.Text = link.FirstChild.WriteTo();

                }
            }
            catch (Exception e) { }
        }
 public WikipediaContentProvider(string ArticleName)
 {
     var hapHtmlWeb = new HtmlAgilityPack.HtmlWeb();
     var htmlDocument = hapHtmlWeb.Load(string.Format("http://en.wikipedia.org/wiki/{0}", ArticleName));
     wikipediaContent = htmlDocument.DocumentNode.SelectSingleNode("//div[@id=\"mw-content-text\"]").InnerText;
 }
示例#42
0
        private static string GetHtmlByXPath(System.Text.Encoding encoding, string url)
        {
            HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = null;
            if (encoding != null)
            {
                htmlWeb.AutoDetectEncoding = false;
                htmlWeb.OverrideEncoding = encoding;
            }
            try
            {
                doc = htmlWeb.Load(url);
            }
            catch (Exception ex)
            {
                throw ex;
            }
            finally
            {

            }
            if (doc == null)
                return "";
            else
                return doc.DocumentNode.InnerHtml;
            //return doc;
        }
示例#43
0
        private void parseWebPageHtmlAsContent()
        {
            string url = Provider.Request["url"];
            if (string.IsNullOrWhiteSpace(url))
                throw new Exception(Provider.TR("Url belirtiniz"));
            if (!url.StartsWith("http://"))
                url = "http://" + url;

            HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb();
            web.OverrideEncoding = Encoding.GetEncoding("windows-1254");
            HtmlAgilityPack.HtmlDocument doc = web.Load(url);

            doc.DocumentNode.Descendants()
                .Where(n => n.Name == "script" || n.Name == "style")
                .ToList()
                .ForEach(n => n.Remove());

            var result = doc.DocumentNode.SelectNodes("//body//text()");//return HtmlCollectionNode
            string metin = "";
            foreach (var node in result)
            {
                metin += node.InnerText;//Your desire text
            }

            if (metin.Contains('Ä'))
            {
                web = new HtmlAgilityPack.HtmlWeb();
                web.OverrideEncoding = Encoding.UTF8;
                doc = web.Load(url);

                doc.DocumentNode.Descendants()
                    .Where(n => n.Name == "script" || n.Name == "style")
                    .ToList()
                    .ForEach(n => n.Remove());

                result = doc.DocumentNode.SelectNodes("//body//text()");//return HtmlCollectionNode
                metin = "";
                foreach (var node in result)
                {
                    metin += node.InnerText;//Your desire text
                }
            }

            metin = metin.Replace("\r", "");
            while (metin.Contains("\n\n"))
                metin = metin.Replace("\n\n","\n");

            metin = metin.Split('\n').Where(l => !string.IsNullOrWhiteSpace(l)).Select(l=>l.Trim()).ToList().StringJoin("\n\n");

            string title = (from x in doc.DocumentNode.Descendants()
                            where x.Name.ToLower() == "title"
                            select x.InnerText).FirstOrDefault();

            string desc = (from x in doc.DocumentNode.Descendants()
                           where x.Name.ToLower() == "meta"
                           && x.Attributes["name"] != null
                           && x.Attributes["name"].Value.ToLower() == "description"
                           select x.Attributes["content"].Value).FirstOrDefault();

            List<string> imgs = (from x in doc.DocumentNode.Descendants()
                                 where x.Name.ToLower() == "img" && x.Attributes["src"] != null && x.Attributes["src"].Value != null
                                 select (new Uri(new Uri(url), x.Attributes["src"].Value)).ToString()).ToList<String>();

            context.Response.ContentType = "application/json";
            context.Response.Write(JsonConvert.SerializeObject(new { text = Provider.Server.HtmlDecode(metin), imgs = imgs, title = Provider.Server.HtmlDecode(title), desc = Provider.Server.HtmlDecode(desc) }));
            //context.Response.Write("{text:" + metin.ToJS() + ", imgs:" + imgs.ToJSON() + ", title:" + title.ToJS() + ", desc:" + desc.ToJS() + "}");
        }
        private System.Data.DataTable GetData(string path)
        {
            System.Data.DataTable dt = new System.Data.DataTable();

            string baseURL = "http://benchmarksgame.alioth.debian.org/";
            string URL = baseURL + path;

            dt.Columns.Add("url", typeof(string));
            dt.Columns.Add("Rubrique", typeof(string));
            System.Data.DataRow dr = null;

            HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb();
            //HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.Load(@"");
            HtmlAgilityPack.HtmlDocument doc = web.Load(URL);

            string rubrique = null;
            bool haveHeaders = false;

            foreach (HtmlAgilityPack.HtmlNode link in doc.DocumentNode.SelectNodes("//table[1]//tr"))
            {
                dr = dt.NewRow();
                // System.Console.WriteLine(link);

                HtmlAgilityPack.HtmlNode a = link.SelectSingleNode("./th[@colspan=\"3\"]//a");

                if (a != null)
                {
                    rubrique = a.InnerText;
                    // System.Console.WriteLine(rubrique);
                    continue;
                } // End if (a != null)

                var tableHeaders = link.SelectNodes("./th");

                if (tableHeaders != null)
                {
                    if (haveHeaders)
                        continue;

                    int count = 0;
                    foreach (HtmlAgilityPack.HtmlNode th in tableHeaders)
                    {
                        count++;
                        string colname= th.InnerText.Trim(new char[] { ' ', '\t', '\r', '\n' });
                        // System.Console.WriteLine(colname);

                        if (string.IsNullOrEmpty(colname))
                            colname = "COLUMN_" + count.ToString();

                        if (!dt.Columns.Contains(colname))
                        {
                            dt.Columns.Add(colname, typeof(string));
                        }

                    } // Next th

                    continue;
                } // End if (tableHeaders != null)

                var tableData = link.SelectNodes("./td");

                if (tableData != null)
                {

                    dr = dt.NewRow();

                    dr["url"] = path;
                    dr["Rubrique"] = rubrique;

                    int count = 1;
                    foreach (HtmlAgilityPack.HtmlNode td in tableData)
                    {
                        count++;
                        string val = td.InnerText.Trim(new char[] { ' ', '\t', '\r', '\n' });
                        val = System.Web.HttpUtility.HtmlDecode(val);
                        val = val.Replace(",", "");

                        dr[count] = val;
                    } // Next td

                    dt.Rows.Add(dr);
                    continue;
                } // End if (tableData != null)

                // System.Console.WriteLine(link);

                // dr["Name"] = System.Web.HttpUtility.HtmlDecode(link.InnerText);
                // dr["Url"] = link.Attributes["href"].Value;

            } // Next link

            return dt;
        }
示例#45
0
 public void PlaylistWebsiteShouldContainPlaylistText()
 {
     var htmlWeb = new HtmlAgilityPack.HtmlWeb();
     var html = htmlWeb.Load("http://www.rockland.fm/start.php?playlist");
     Assert.IsTrue(html.DocumentNode.ChildNodes["html"].InnerText.Contains("Playlist - Was lief wann auf ROCKLAND?"));
 }
 public void getSubject()
 {
     HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
     HtmlAgilityPack.HtmlWeb getHtml = new HtmlAgilityPack.HtmlWeb();
     doc = getHtml.Load("https://selfservice.mypurdue.purdue.edu/prod/bzwsrch.p_search_catalog?term=CURRENT");
     //int count = 0;
     HtmlAgilityPack.HtmlNode link = doc.DocumentNode.SelectSingleNode("//select[@name=\"sel_subj\"]");
     foreach (HtmlAgilityPack.HtmlNode node in link.ChildNodes)
     {
         String[] s = node.InnerText.Split('-');
         if (!s[0].Equals(""))
             this.comboBox1.Items.Add(s[0]);
     }
 }
示例#47
0
        private void buttonX2_Click(object sender, EventArgs e)
        {
            HtmlAgilityPack.HtmlWeb website = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument rootDocument = null;

            if (Properties.Settings.Default.requiresProxy)
            {
                rootDocument = website.Load("http://openxcom.org/git-builds/", Properties.Settings.Default.proxyUrl, Properties.Settings.Default.proxyPort, Properties.Settings.Default.proxyUser, Properties.Settings.Default.proxyPwd);
            }
            else
            {
                rootDocument = website.Load("http://openxcom.org/git-builds/");
            }

            int iCount = 0;

            ElementStyle groupStyle = new ElementStyle();
            groupStyle.TextColor = Color.Navy;
            groupStyle.Font =  new Font(this.advTree1.Font.FontFamily, 9.5F);
            groupStyle.Name = "groupstyle";
            advTree1.Styles.Add(groupStyle);

            // Define sub-item style, simply to make text gray
            ElementStyle subItemStyle = new ElementStyle();
            subItemStyle.TextColor = Color.Gray;
            subItemStyle.Name = "subitemstyle";
            advTree1.Styles.Add(subItemStyle);

            Node gnLatest = new Node("Latest", groupStyle);
            gnLatest.Expanded = true;
            advTree1.Nodes.Add(gnLatest);

            Node gnArchive = new Node("Archive", groupStyle);
            gnArchive.Expanded = false;
            advTree1.Nodes.Add(gnArchive);

            //Boolean isHeader = false;
            Node item = null;
            List<String> items = null;
            foreach (HtmlAgilityPack.HtmlNode link in rootDocument.DocumentNode.SelectNodes("//div[@class='text']"))
            {

                // Console.Write(link.InnerHtml);
                foreach (HtmlAgilityPack.HtmlNode p in link.ChildNodes)
                {

                    if (!p.FirstChild.Name.Equals("strong"))
                    {
                        String display = p.ChildNodes[0].InnerHtml.Replace("openxcom_git_master_","");
                        String subText = p.ChildNodes[1].InnerHtml.Replace("- ","");
                        display = display.Replace(".zip", "");

                        String url = p.ChildNodes[0].Attributes[0].Value;
                        items = new List<String>();
                        items.Add(url);

                        item = createChildNode(display, subText, (Image)Properties.Resources.ResourceManager.GetObject("openxcom24"), subItemStyle);

                        if (iCount == 0)
                        {
                            gnLatest.Nodes.Add(item);
                        }
                        else
                        {
                            gnArchive.Nodes.Add(item);
                        }

                        iCount++;
                        item.Tag = items;
                    }
                    else
                    {
                        items.Add(p.InnerHtml);

                    }
                }

            }
        }
示例#48
0
 public List<Song> GetSongs(Uri url)
 {
     var htmlWeb = new HtmlAgilityPack.HtmlWeb();
     var html = htmlWeb.Load(url.ToString());
     return GetSongs(html);
 }