Exemple #1
0
        public static string getHtmlContent(string path)
        {
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.Load(path);

            return(doc.DocumentNode?.SelectSingleNode("//div[@id='content']").InnerText);
        }
        /// <summary>
        /// Load HTML DOM from given <paramref name="stream"/>.
        /// </summary>
        private object loadHTML(TextReader stream, string filename)
        {
            HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();

            // setup HTML parser
            htmlDoc.OptionOutputAsXml = true;
            //htmlDoc.OptionOutputOriginalCase = true;  // NOTE: we need lower-cased names because of XPath queries
            //htmlDoc.OptionFixNestedTags = true;
            htmlDoc.OptionCheckSyntax     = false;
            htmlDoc.OptionWriteEmptyNodes = true;

            // load HTML (from string or a stream)
            htmlDoc.Load(stream);

            CheckHtmlErrors(htmlDoc, filename);

            // save to string as XML
            using (StringWriter sw = new StringWriter())
            {
                htmlDoc.Save(sw);

                // load as XML
                return(this.loadXMLInternal(sw.ToString(), 0, true));
            }
        }
        public override List<Show> Grab(string xmlParameters, ILogger logger)
        {
            var shows = new List<Show>();
            var doc = XDocument.Parse(xmlParameters);
            var sdElement = doc.Descendants("StartDate").FirstOrDefault();
            var startDateDiff = sdElement != null && sdElement.Value != null ? Convert.ToInt32(sdElement.Value) : -1;
            var edElement = doc.Descendants("EndDate").FirstOrDefault();
            var endDateDays = edElement != null && edElement.Value != null ? Convert.ToInt32(edElement.Value) : 3;

            for (int i =0; i <= endDateDays; i++)
            {
                var date = DateTime.Now.Date.AddDays(i);
                logger.WriteEntry(string.Format("Grabbing reshet.tv for date {0}", date.ToString("d")), LogType.Info);
                var wr = WebRequest.Create(Url);
                wr.Method = "POST";
                wr.ContentType = "application/x-www-form-urlencoded; charset=UTF-8";
                using (var sw = new StreamWriter(wr.GetRequestStream()))
                {
                    sw.Write(string.Format("Values={0}%2F{1}%2F{2}",date.Day.ToString("00"),date.Month.ToString("00"),date.Year));
                }
                var res = (HttpWebResponse)wr.GetResponse();
                var html = new HtmlAgilityPack.HtmlDocument();
                html.Load(res.GetResponseStream(),Encoding.UTF8);
                foreach (var li in html.DocumentNode.Descendants("li"))
                {
                    var time = li.Descendants("span").First().InnerText;
                    var text = li.Descendants("p").First().InnerText;
                }
            }
            return shows;
        }
        public ProgramOptions Get(string configPath)
        {
            var html = new HtmlAgilityPack.HtmlDocument();
            html.Load(configPath);

            var options = new ProgramOptions();
            options.BlogUrl = XmlParse.GetStringNodeValue(html.DocumentNode, "/programoptions/blogurl", "", true);
            options.BlogUser = XmlParse.GetStringNodeValue(html.DocumentNode, "/programoptions/bloguser", "", true);
            options.BlogPassword = XmlParse.GetStringNodeValue(html.DocumentNode, "/programoptions/blogpassword", "", true);
            options.DatabaseUrl = XmlParse.GetStringNodeValue(html.DocumentNode, "/programoptions/databaseurl", "", true);
            options.DatabaseName = XmlParse.GetStringNodeValue(html.DocumentNode, "/programoptions/databasename", "", true);
            options.DatabaseUser = XmlParse.GetStringNodeValue(html.DocumentNode, "/programoptions/databaseuser", "", true);
            options.DatabasePassword = XmlParse.GetStringNodeValue(html.DocumentNode, "/programoptions/databasepassword", "", true);

            options.FtpUrl = XmlParse.GetStringNodeValue(html.DocumentNode, "/programoptions/ftpurl", "", true);
            options.FtpUser = XmlParse.GetStringNodeValue(html.DocumentNode, "/programoptions/ftpuser", "", true);
            options.FtpPassword = XmlParse.GetStringNodeValue(html.DocumentNode, "/programoptions/ftppassword", "", true);

            options.ProxyAddress = XmlParse.GetStringNodeValue(html.DocumentNode, "/programoptions/proxyaddress", "", true);
            options.ProxyPort = XmlParse.GetIntegerNodeValue(html.DocumentNode, "/programoptions/proxyport", 0);
            options.UseProxy = XmlParse.GetBooleanNodeValue(html.DocumentNode, "/programoptions/useproxy", false);

            options.YoutubeClient = XmlParse.GetStringNodeValue(html.DocumentNode, "/programoptions/youtubeclient", "", true);
            options.YoutubeClientSecret = XmlParse.GetStringNodeValue(html.DocumentNode, "/programoptions/youtubeclientsecret", "", true);

            return options;
        }
        public override List <Show> Grab(string xmlParameters, ILogger logger)
        {
            var shows         = new List <Show>();
            var doc           = XDocument.Parse(xmlParameters);
            var sdElement     = doc.Descendants("StartDate").FirstOrDefault();
            var startDateDiff = sdElement != null && sdElement.Value != null?Convert.ToInt32(sdElement.Value) : -1;

            var edElement   = doc.Descendants("EndDate").FirstOrDefault();
            var endDateDays = edElement != null && edElement.Value != null?Convert.ToInt32(edElement.Value) : 3;

            foreach (Channel c in Enum.GetValues(typeof(Channel)))
            {
                var wr = WebRequest.Create(string.Format(url, (int)c, DateTime.Now.Date.AddDays(startDateDiff).ToString(DateFormat), DateTime.Now.Date.AddDays(endDateDays).ToString(DateFormat)));
                wr.Timeout = 30000;
                logger.WriteEntry(string.Format("Grabbing hot.net.il channel {0} ", c.ToString()), LogType.Info);
                var res = (HttpWebResponse)wr.GetResponse();

                var html = new HtmlAgilityPack.HtmlDocument();
                html.Load(res.GetResponseStream(), Encoding.UTF8);

                foreach (var tr in html.DocumentNode.Descendants("tr").Where(x => x.Attributes.Contains("class") && x.Attributes["class"].Value == "redtr_off"))
                {
                    var tds  = tr.Descendants("td").ToList();
                    var show = new Show();
                    show.Title     = tds[2].InnerText;
                    show.StartTime = DateTime.SpecifyKind(Convert.ToDateTime(tds[4].InnerText), DateTimeKind.Unspecified);
                    show.StartTime = TimeZoneInfo.ConvertTime(show.StartTime, TimeZoneInfo.Local, TimeZoneInfo.Utc);
                    show.EndTime   = show.StartTime.Add(Convert.ToDateTime(tds[5].InnerText).TimeOfDay);
                    show.Channel   = c.ToString();
                    shows.Add(show);
                }
            }
            return(shows);
        }
Exemple #6
0
        private string solution2()
        {
            var str = "";

            try
            {
                String          url              = "https://covid19.who.int/";
                HttpWebRequest  request          = (HttpWebRequest)WebRequest.Create(url);
                HttpWebResponse response         = (HttpWebResponse)request.GetResponse();
                StreamReader    sr               = new StreamReader(response.GetResponseStream());
                HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                doc.Load(sr);
                var aTags = doc.DocumentNode.SelectNodes("//div[@class='sc-AxjAm sc-AxiKw sc-qYiqT bQthTu']");

                int counter = 1;


                if (aTags != null)
                {
                    foreach (var aTag in aTags)
                    {
                        str = str + aTag.InnerText;

                        //    richTextBox1.Text += aTag.InnerHtml + "\n";
                        counter++;
                    }
                }
                sr.Close();
            }
            catch (Exception ex)
            {
                // MessageBox.Show("Failed to retrieve related keywords." + ex);
            }
            return(str);
        }
Exemple #7
0
        internal static LoginInfo AnalyzHtml()
        {
            LoginInfo li = new LoginInfo();

            HtmlAgilityPack.HtmlDocument h = new HtmlAgilityPack.HtmlDocument();
            h.Load(Strings.HTML_PATH);
            HtmlAgilityPack.HtmlNode hn_login = h.DocumentNode.SelectSingleNode("//*[@id=\"ogameframe\"]");
            if (hn_login == null)
            {
                li.LoginSuccess = false;
            }
            else
            {
                //if ("rows".Equals(hn_login.Attributes[0].Name) && "*,100".Equals(hn_login.Attributes[0].Value))
                if ("rows".Equals(hn_login.Attributes[0].Name))
                {
                    li.LoginSuccess = true;
                }
                else
                {
                    li.LoginSuccess = false;
                }
            }
            return(li);
        }
        public static async Task<DataGroup> NewsGoVnGroup_Parse(string xmlString, DataModel.DataGroup group, int takeNum)
        {
            StringReader _stringReader = new StringReader(xmlString);
            XDocument _xdoc = XDocument.Load(_stringReader);
            var channelElement = _xdoc.Element("rss").Element("channel");
            if (channelElement != null)
            {
                group.Title = channelElement.Element("title").Value;
                group.Subtitle = channelElement.Element("title").Value;
                group.Description = channelElement.Element("description").Value;

                var items = channelElement.Elements("item");
                foreach (var item in items)
                {
                    if (group.Items.Count == takeNum && takeNum >= 0) break;

                    DataItem dataItem = new DataItem();
                    dataItem.Title = item.Element("title").Value;
                    dataItem.Description = StripHTML(item.Element("description").Value);
                    dataItem.Link = new Uri(item.Element("link").Value, UriKind.Absolute);
                    dataItem.PubDate = item.Element("pubDate").Value;

                    HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
                    htmlDoc.Load(new StringReader(item.Element("description").Value));

                    HtmlAgilityPack.HtmlNode imageLink = getFirstNode("img", htmlDoc.DocumentNode);
                    dataItem.ImageUri = new Uri(imageLink.GetAttributeValue("src", string.Empty).Replace("96.62.jpg", "240.155.jpg"), UriKind.Absolute);

                    dataItem.Group = group;
                    group.Items.Add(dataItem);
                }
            }

            return group;
        }
Exemple #9
0
        private static string GetContent(string url)
        {
            webBrowser1.Navigate(url);

            var sw = new Stopwatch();

            sw.Start();
            Debug.WriteLine("Getting stats from: " + url);

            WaitTillLoad(webBrowser1);

            sw.Stop();

            Debug.WriteLine($"{url} loaded in {sw.ElapsedMilliseconds} mls");

            HtmlAgilityPack.HtmlDocument doc      = new HtmlAgilityPack.HtmlDocument();
            var          documentAsIHtmlDocument3 = (mshtml.IHTMLDocument3)webBrowser1.Document.DomDocument;
            StringReader sr = new StringReader(documentAsIHtmlDocument3.documentElement.outerHTML);

            doc.Load(sr);

            //Debug
            System.IO.File.WriteAllText(@"C:\Users\mbile\Desktop\example.txt", doc.DocumentNode.OuterHtml);

            return(doc.DocumentNode.OuterHtml);
        }
        static void Main(string[] args)
        {
            Uri            targetUri  = new Uri("https://www.youtube.com/watch?v=KB7HyDBy97U");
            HttpWebRequest webRequest = HttpWebRequest.Create(targetUri) as HttpWebRequest;

            using (HttpWebResponse webResponse = webRequest.GetResponse() as HttpWebResponse)
            {
                using (Stream webResponseStream = webResponse.GetResponseStream())
                {
                    Encoding targetEncoding        = Encoding.UTF8;
                    HtmlAgilityPack.HtmlDocument s = new HtmlAgilityPack.HtmlDocument();
                    s.Load(webResponseStream, targetEncoding, true);
                    IXPathNavigable nav = s;

                    string title           = WebUtility.HtmlDecode(nav.CreateNavigator().SelectSingleNode("/ html / head / meta[@property =’og:title’] / @content").ToString());
                    string description     = WebUtility.HtmlDecode(nav.CreateNavigator().SelectSingleNode("/ html / head / meta[@property =’og:description’] / @content").ToString());
                    string fullDescription = WebUtility.HtmlDecode(s.GetElementbyId("eow - description").InnerHtml);

                    fullDescription = Regex.Replace(fullDescription, @"< (br | hr)[^>] >", Environment.NewLine);
                    fullDescription = Regex.Replace(fullDescription, @"<[^>] >", String.Empty).Trim();

                    Console.WriteLine(title);
                    Console.WriteLine(description);
                    Console.WriteLine(fullDescription);
                }
            }
        }
        private void HtmlYukle()
        {
            SozlockIndir indir = new SozlockIndir();

            _html = new HtmlDocument();
            _html.Load(indir.Dizin, Encoding.UTF8);
        }
        public override List<Show> Grab(string xmlParameters, ILogger logger)
        {
            logger.WriteEntry("Grabbing jn1 schedule", LogType.Info);
            var wr = WebRequest.Create(URL);
            var res = (HttpWebResponse)wr.GetResponse();
            var doc = new HtmlAgilityPack.HtmlDocument();
            doc.Load(res.GetResponseStream());
            var lst = new List<Show>();
            var ul = doc.DocumentNode.Descendants("ul").FirstOrDefault(x => x.Attributes.Contains("class") && x.Attributes["class"].Value == "program_list");
            foreach (var li in ul.Descendants("li"))
            {
                var show = new Show();
                show.Channel = "Jewish News One";
                var startTime = li.Descendants("span").First().InnerText.Replace("::", ":");
                show.StartTime = DateTime.SpecifyKind(DateTime.Now.Date + Convert.ToDateTime(startTime).TimeOfDay, DateTimeKind.Unspecified);
                show.StartTime = TimeZoneInfo.ConvertTime(show.StartTime, TimeZoneInfo.FindSystemTimeZoneById("Central European Standard Time"), TimeZoneInfo.Utc);
                show.Title = li.Descendants("span").ToList()[1].InnerText.Trim().ToLower();
                show.Title = show.Title.First().ToString().ToUpper() + String.Join("", show.Title.Skip(1));
                show.Description = li.Descendants("span").ToList()[2].InnerText.Trim();

                lst.Add(show);
            }
            var secondList = new List<Show>();
            foreach (var show in lst)
            {
                var s = show.Clone(); // has same daily schedule every day , so just duplicate entries with tommorow date
                s.StartTime = show.StartTime.AddDays(1);
                secondList.Add(s);
            }
            lst.AddRange(secondList);
            FixShowsEndTimeByStartTime(lst);
            return lst;
        }
Exemple #13
0
        public async Task <bool> GetNewQRCode()
        {
            HttpClient httpClient = new HttpClient();
            var        loginPage  = await httpClient.GetAsync(RequestApis.LoginUrl);

            if (loginPage.IsSuccessStatusCode)
            {
                using (var htmlContent = await loginPage.Content.ReadAsStreamAsync())
                {
                    HtmlAgilityPack.HtmlDocument html = new HtmlAgilityPack.HtmlDocument();
                    html.Load(htmlContent);
                    enc       = html.GetElementbyId("enc")?.Attributes["value"]?.Value;
                    uuid      = html.GetElementbyId("uuid")?.Attributes["value"]?.Value;
                    quickCode = RequestApis.HTTPHost + html.GetElementbyId("quickCode")?.Attributes["src"]?.Value;
                }

                if (quickCode == null || enc == null || uuid == null)
                {
                    return(false);
                }

                return(true);
            }
            else
            {
                return(false);
            }
        }
Exemple #14
0
        public static void FetchRates(int bankId, string bankName)
        {
            var bankExchange = new BankExchange();

            bankExchange.BankId = bankId;

            if (bankName.ToLowerInvariant() == "bank asya")
            {
                var ds = new DataSet("fxPrices");
                ds.ReadXml("http://www.bankasya.com.tr/xml/kur_list.xml");

                bankExchange.USDBuying  = Int32.Parse(ds.Tables[1].Rows[0]["Kur"].ToString().Replace(".", ""));
                bankExchange.USDSelling = Int32.Parse(ds.Tables[1].Rows[1]["Kur"].ToString().Replace(".", ""));
                bankExchange.EURBuying  = Int32.Parse(ds.Tables[1].Rows[2]["Kur"].ToString().Replace(".", ""));
                bankExchange.EURSelling = Int32.Parse(ds.Tables[1].Rows[3]["Kur"].ToString().Replace(".", ""));

                bankExchange.Save();
            }
            else if (bankName.ToLowerInvariant() == "finansbank")
            {
                var doc = new HtmlAgilityPack.HtmlDocument();
                doc.Load("http://www.finansbank.com.tr/bankacilik/alternatif-dagitim-kanallari/internet-bankaciligi/doviz_kurlari.aspx?IntSbMO_FB_Mevduatoranlari_PU".DownloadPage());
            }
            else
            {
                throw new Exception("Banka adı bunlardan biri olabilir: Bank Asya");
            }
        }
Exemple #15
0
        private static string GetSingleValue(Uri uri, string xpath, string regex)
        {
            TextReader reader = Importer.Import(uri);

            if (!String.IsNullOrEmpty(xpath))
            {
                HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                doc.Load(reader);

                HtmlAgilityPack.HtmlNode node = doc.DocumentNode.SelectSingleNode(xpath);
                if (node != null)
                {
                    return(node.InnerText.Trim());
                }
            }
            else if (!String.IsNullOrEmpty(regex))
            {
                String wholeText = reader.ReadToEnd();

                Match m = Regex.Match(wholeText, regex, RegexOptions.IgnoreCase | RegexOptions.Multiline);
                if (m != null && m.Success)
                {
                    return(m.Groups[1].Value);
                }
            }

            return(null);
        }
 List<Show> Grab(GrabParametersBase p, ILogger logger)
 {
     var pp = (GrabParameters)p;
     var url = string.Format(URL, pp.Channel.ToString().Replace("_", "-").Replace("AANNDD", "%26").Replace("PPLLUUSS", "%2B"));
     var wr = WebRequest.Create(url);
     var res = (HttpWebResponse)wr.GetResponse();
     var doc = new HtmlAgilityPack.HtmlDocument();
     logger.WriteEntry(string.Format("Grabbing Channel {0}", pp.Channel), LogType.Info);
     doc.Load(res.GetResponseStream());
     var shows = new List<Show>();
     foreach (Day d in Enum.GetValues(typeof(Day)))
     {
         var dayOfWeek = (DayOfWeek)d;
         var div = doc.DocumentNode.Descendants("div").FirstOrDefault(x => x.Attributes.Contains("id") && x.Attributes["id"].Value == d.ToString());
         if (div != null)
         {
             var date = NextDateOfDayOfWeek(dayOfWeek);
             foreach (var ul in div.Descendants("ul"))
             {
                 foreach (var li in ul.Descendants("li"))
                 {
                     var par = li.Descendants("p").First();
                     var a = li.Descendants("a").First();
                     var show = new Show();
                     show.Channel = pp.Channel.ToString();
                     show.Title = a.InnerText.Trim();
                     show.StartTime = DateTime.SpecifyKind(date + Convert.ToDateTime(par.InnerText.Trim()).TimeOfDay, DateTimeKind.Unspecified);
                     show.StartTime = TimeZoneInfo.ConvertTime(show.StartTime, TimeZoneInfo.FindSystemTimeZoneById("Central European Standard Time"), TimeZoneInfo.Utc);
                     shows.Add(show);
                 }
             }
         }
     }
     return shows;
 }
Exemple #17
0
        public static async Task MainAsyncUsers()
        {
            //if (!File.Exists("output2.txt"))
            //    File.Create("output2.txt");
            //var emotionList = new List<string>() { "happy", "like", "surprised", "fustrated", "puzzled" };
            var directories = Directory.EnumerateDirectories("site-users-userlist");

            foreach (var directory in directories)
            {
                Console.WriteLine(directory);
                var items = WarcItem.ReadIndex($"{directory}/index.cdx").Where(x => x.ContentType.Contains("text/html")).ToList();
                foreach (var item in items)
                {
                    using (var content = item.OpenStream())
                    {
                        var doc = new HtmlAgilityPack.HtmlDocument();
                        doc.Load(content, System.Text.Encoding.UTF8);
                        var postNode = doc.DocumentNode.Descendants("div").FirstOrDefault(n => n.GetAttributeValue("class", "") == "icon-container");
                        if (postNode == null)
                        {
                            continue;
                        }
                        var avatar = postNode.Descendants("img").FirstOrDefault();
                        if (avatar == null)
                        {
                            continue;
                        }
                        var avatarLink = avatar.GetAttributeValue("src", "");
                        File.AppendAllText("output.txt", avatarLink + System.Environment.NewLine);
                    }
                }
            }
        }
Exemple #18
0
        /// <summary>
        /// in the future it might be a worthy idea to include AvalonEdit.TextEditor.Document
        /// or at least the Editor so that we can get to the elements within
        /// to do our writing natively to the Editor.
        /// </summary>
        /// <param name="page"></param>
        /// <param name="justText"></param>
        /// <returns></returns>
        static public string HtmlArtifacts(this Post page, bool justText)
        {
            string output = string.Empty;

//			List<string> output = new List<string>();

            // logging something for no apparent reason.
//			output.Add("created list");

            // Read Html Document
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.OptionDefaultStreamEncoding = System.Text.Encoding.UTF8;
            doc.OptionCheckSyntax           = false;
            doc.OptionOutputAsXml           = true;
            doc.OptionWriteEmptyNodes       = true;

            // why are we buffering tagcontent?
            byte[] buffer = System.Text.Encoding.UTF8.GetBytes(page.TagContent());

            using (MemoryStream instream = new MemoryStream(buffer, 0, buffer.Length, true))
            {
                doc.Load(instream);
                XPathNavigator n = doc.CreateNavigator();
                n.MoveToFirst();
                output += n.Iterate(true);
            }
            doc    = null;
            buffer = null;
            return(output);
        }
		private async Task ParseFromAddress(string url)
		{
			var response = await new System.Net.Http.HttpClient().GetStreamAsync(url);
			var page = new HtmlAgilityPack.HtmlDocument();
			page.Load(response);
			var itemNodes = page.DocumentNode.SelectNodes("//form[@id='list-form']/div[@class='list-items']/div[@class='percent-wrap']");
			foreach (var item in itemNodes)
			{
				var titleNode = item.SelectNodes(".//p[@class='title']/a").FirstOrDefault();
				var priceNode = item.SelectNodes(".//p[@class='price']").FirstOrDefault();
				string link = titleNode.Attributes["href"].Value;
				string title = titleNode.Attributes["title"].Value;
				double price;
				double.TryParse(priceNode.InnerText.Trim().Replace("US$", "").Replace(".",","), out price);
				var lumenMatch = new Regex(@"\d{3,4}[-\s]?(lm|lumen)", RegexOptions.IgnoreCase).Match(title);
				if (!lumenMatch.Groups[0].Success)
					continue;
				string lumenCountString = Regex.Replace(lumenMatch.Groups[0].Value, "[^0-9]", "");
				int lumenCount;
				if (!int.TryParse(lumenCountString, out lumenCount))
					continue;
				Items.Add(new RowItem
					{
						LumenCount =  lumenCount,
						Price = price,
						Link = "http://dx.com" + link
					});
			}
			this.Title = Items.Count.ToString();
		}
            /// <summary>
            /// Gets school info: name and departments name from html polish Wikipedia page.
            /// </summary>
            /// <param name="filePath">Path to locally saved html file.</param>
            /// <returns>School object with name and list of departments name.</returns>
            public static School GetSchool(string filePath)
            {
                HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();

                htmlDoc.OptionFixNestedTags = true;
                var e = htmlDoc.Encoding;

                htmlDoc.Load(filePath, Encoding.UTF8);

                var school = new School();

                if (htmlDoc.ParseErrors != null && htmlDoc.ParseErrors.Count() > 0)
                {
                    // Handle any parse errors as required
                }
                else
                {
                    if (htmlDoc.DocumentNode != null)
                    {
                        var nodes = htmlDoc.DocumentNode.Descendants();
                        school.Name = nodes.First(x => x.Id == "firstHeading").InnerText;
                        var content = nodes.First(x => x.Id == "mw-content-text").Descendants().Where(x => x.Name == "li").ToList();

                        foreach (var item in content)
                        {
                            if (item.InnerText.Contains("Wydział"))
                            {
                                school.Departments.Add(item.InnerText);
                            }
                        }
                    }
                }

                return(school);
            }
        private static String ParseSaveAndFixImages(string contents, string dirPath)
        {
            contents = System.Web.HttpUtility.HtmlDecode(contents);
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.Load(new StringReader(contents));
            var nav           = doc.CreateNavigator();
            var strExpression = "//img";

            HtmlAgilityPack.HtmlNodeCollection imgTags = doc.DocumentNode.SelectNodes(strExpression);
            if (imgTags != null)
            {
                foreach (HtmlAgilityPack.HtmlNode tag in imgTags)
                {
                    if (tag.Attributes["src"] != null)
                    {
                        String imgPath = tag.Attributes["src"].Value;
                        tag.Attributes["src"].Value = GetAndSaveImage(imgPath, dirPath);
                    }
                }
            }

            string finalContents = null;

            using (StringWriter sw = new StringWriter())
            {
                doc.Save(sw);
                finalContents = sw.ToString();
            }

            return(finalContents);
        }
Exemple #22
0
        public virtual void Execute(IJobExecutionContext context)
        {
            var dateTimeMin = DateTime.Parse(DateTime.Now.ToString("yyyy-MM-dd"));//当天零点
            var list = _ppismItemRepository.GetAll(x => (x.ItemSource == PPism.Model.Enum.DictPPItemSource.天猫 || x.ItemSource == PPism.Model.Enum.DictPPItemSource.淘宝) && (!x.LastListenTime.HasValue || x.LastListenTime.Value < dateTimeMin)).ToList();
            for (int i = 0, length = list.Count; i < length; i++)
            {
                var item = list[i];
                Process p = new Process();
                try
                {
                    var environment = Environment.CurrentDirectory;

                    p.StartInfo.FileName = environment + "\\phantomjs\\bin\\phantomjs.exe";

                    p.StartInfo.WorkingDirectory = environment + "\\phantomjs\\bin\\";

                    string strArg = @"{0}\phantomjs\bin\tmallsavehtml.js ""{1}"" ""{0}\{2}""";
                    p.StartInfo.Arguments = string.Format(strArg, environment, item.ListenUrl, "\\phantomjs\\htmltmall\\" + item.Id.ToString());

                    p.StartInfo.CreateNoWindow = true;
                    p.StartInfo.WindowStyle = ProcessWindowStyle.Hidden;
                    if (!p.Start())
                        throw new Exception("无法Headless浏览器.");
                    Thread.Sleep(2 * 1000);
                    string fileUrl = string.Format(@"{0}\phantomjs\htmltmall\{1}.html", environment, item.Id.ToString());
                    var fileInfo = new FileInfo(fileUrl);
                    if (!fileInfo.Exists || fileInfo.LastWriteTime < DateTime.Now.AddHours(-1))
                        Thread.Sleep(5 * 1000);//如果还没有返回就再等5秒
                    if (File.Exists(fileUrl))
                    {
                        var htmldocument = new HtmlAgilityPack.HtmlDocument();
                        htmldocument.Load(fileUrl);
                        var strHtml = htmldocument.DocumentNode.InnerHtml.ToString();
                        string reg = @"id=""J_PromoPrice""[\s\S]+?class=""tm-price"">(?<price>[^<]+)[\s\S]+?<img\s+id=""J_ImgBooth""[\s\S]+?src=""(?<imgUrl>[^""]+)";
                        if (item.ItemSource == PPism.Model.Enum.DictPPItemSource.淘宝)//淘宝和天猫正则不一样
                            reg = @"id=""J_PromoPriceNum""[\s\S]+?class=""tb-rmb-num"">(?<price>[^<]+)[\s\S]+?<img\s+id=""J_ImgBooth""[\s\S]+?src=""(?<imgUrl>[^""]+)";
                        var groups = Regex.Match(strHtml, reg).Groups;
                        var price = groups["price"].Value.Trim().ToDecimal(0);
                        var imgUrl = groups["imgUrl"].Value.Trim();
                        if (price > 0 && !string.IsNullOrEmpty(imgUrl))
                        {
                            var ppismItemBll = new PPismJob.Common.PPismItemBll();
                            var priceItem = ppismItemBll.GetPriceItem(item, price, imgUrl);
                            _priceItemRepository.Add(priceItem);
                            _ppismItemRepository.Update(item);
                        }
                    }
                }
                catch (Exception ex)
                {

                    throw ex;
                }
                finally
                {
                    p.Dispose();
                }
            }
            _repositoryContext.Commit();
        }
        private static List <Choice> ParseChoices(string choicesHtml)
        {
            choicesHtml = System.Web.HttpUtility.HtmlDecode(choicesHtml);
            var choices = new List <Choice>();

            /*
             * <div id="room-choices"><h2>You have 4 choices:</h2>
             * <ul class="choices">
             * <li><a href="/story/choice.php?id=184898" title="184898,94419">Years later&hellip;</a></li>
             * <li><a href="/story/choice.php?id=184899" title="184899,94416">The Empire (Lies)</a></li>
             * <li><a href="/story/choice.php?id=184900" title="184900,94417">The Empire (Truth)</a></li>
             * <li><a href="/story/choice.php?id=184901" title="184901,94418">The Eternal Program (Hope)</a></li>
             * </ul>
             * </div>
             */


            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.Load(new StringReader(choicesHtml));
            var nav           = doc.CreateNavigator();
            var strExpression = "//a";

            foreach (HtmlAgilityPack.HtmlNode link in doc.DocumentNode.SelectNodes(strExpression))
            {
                string   titleValue = link.GetAttributeValue("title", null);
                String[] titleSplit = titleValue.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries);
                Choice   choice     = new Choice(titleSplit[0], titleSplit[1], link.InnerHtml);
                choices.Add(choice);
            }

            return(choices);
        }
Exemple #24
0
        /// <summary>
        /// Load HTML DOM from given <paramref name="stream"/>.
        /// </summary>
        private bool loadHTML(Context ctx, TextReader stream, string filename, int options = 0)
        {
            HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();

            // setup HTML parser
            htmlDoc.OptionOutputAsXml = true;
            //htmlDoc.OptionOutputOriginalCase = true;  // NOTE: we need lower-cased names because of XPath queries
            //htmlDoc.OptionFixNestedTags = true;
            htmlDoc.OptionCheckSyntax     = false;
            htmlDoc.OptionUseIdAttribute  = false;  // only needed when XPath navigator is used on htmlDoc
            htmlDoc.OptionWriteEmptyNodes = true;

            // load HTML (from string or a stream)
            htmlDoc.Load(stream);

            CheckHtmlErrors(ctx, htmlDoc, filename);

            // save to string as XML
            using (var sw = new StringWriter())
            {
                htmlDoc.Save(sw);

                // load as XML
                return(loadXMLInternal(ctx, sw.ToString(), 0, true));
            }
        }
Exemple #25
0
        private void frmSettings_Load(object sender, EventArgs e)
        {
            Thread.CurrentThread.CurrentUICulture = new CultureInfo("en");

            var svgPath = @"C:\Users\Pc\Documents\TBI Chart.svg";

            HtmlAgilityPack.HtmlDocument svg = new HtmlAgilityPack.HtmlDocument();
            svg.Load(svgPath);
            var text = svg.DocumentNode.Descendants("text");
            //var rect = svg.DocumentNode.Descendants("rect");

            var max   = 66.2;
            var basic = 24.2;
            var good  = 45.2;

            double height = svg.GetElementbyId("bar_current").GetAttributeValue("height", 0);
            double top    = svg.GetElementbyId("bar_current").GetAttributeValue("y", 0);

            text.ElementAt(6).InnerHtml = text.ElementAt(0).InnerHtml = $"{max}".ToNumber();
            text.ElementAt(5).InnerHtml = text.ElementAt(1).InnerHtml = $"{max/2}".ToNumber();

            svg.GetElementbyId("bar_basic_as").SetAttributeValue("height", $"{basic * height / max}".ToNumber());
            svg.GetElementbyId("bar_basic_as").SetAttributeValue("y", $"{(top + height) - (basic * height / max)}".ToNumber());

            svg.GetElementbyId("bar_good_as").SetAttributeValue("height", $"{good * height / max}".ToNumber());
            svg.GetElementbyId("bar_good_as").SetAttributeValue("y", $"{(top + height) - (good * height / max)}".ToNumber());

            var savePath = $@"C:\Users\Pc\Documents\temp\{Guid.NewGuid()}.svg";

            svg.Save(savePath);
            webBrowser1.Navigate(svgPath);
        }
        public override List<Show> Grab(string xmlParameters, ILogger logger)
        {
            var shows = new List<Show>();
            var doc = XDocument.Parse(xmlParameters);
            var sdElement = doc.Descendants("StartDate").FirstOrDefault();
            var startDateDiff = sdElement != null && sdElement.Value != null ? Convert.ToInt32(sdElement.Value) : -1;
            var edElement = doc.Descendants("EndDate").FirstOrDefault();
            var endDateDays = edElement != null && edElement.Value != null ? Convert.ToInt32(edElement.Value) : 3;

            foreach (Channel c in Enum.GetValues(typeof(Channel)))
            {
                var wr = WebRequest.Create(string.Format(url, (int)c, DateTime.Now.Date.AddDays(startDateDiff).ToString(DateFormat), DateTime.Now.Date.AddDays(endDateDays).ToString(DateFormat)));
                wr.Timeout = 30000;
                logger.WriteEntry(string.Format("Grabbing hot.net.il channel {0} ", c.ToString()), LogType.Info);
                var res = (HttpWebResponse)wr.GetResponse();

                var html = new HtmlAgilityPack.HtmlDocument();
                html.Load(res.GetResponseStream(), Encoding.UTF8);

                foreach (var tr in html.DocumentNode.Descendants("tr").Where(x => x.Attributes.Contains("class") && x.Attributes["class"].Value == "redtr_off"))
                {
                    var tds = tr.Descendants("td").ToList();
                    var show = new Show();
                    show.Title = tds[2].InnerText;
                    show.StartTime = DateTime.SpecifyKind(Convert.ToDateTime(tds[4].InnerText),DateTimeKind.Unspecified);
                    show.StartTime = TimeZoneInfo.ConvertTime(show.StartTime, TimeZoneInfo.Local, TimeZoneInfo.Utc);
                    show.EndTime = show.StartTime.Add(Convert.ToDateTime(tds[5].InnerText).TimeOfDay);
                    show.Channel = c.ToString();
                    shows.Add(show);
                }
            }
            return shows;
        }
Exemple #27
0
        /// <summary>
        /// Mapea el objeto factura en una plantilla HTML
        /// </summary>
        /// <param name="plantilla">Ruta de la plantilla html a ser usada</param>
        /// <param name="factura">Factura a procesar</param>
        /// <param name="html">Ruta donde será guardada la factura en html</param>
        /// <param name="documentHTML"></param>
        public static void ParsearHtml(string plantilla, Factura factura, string html, HtmlAgilityPack.HtmlDocument documentHTML = null)
        {
            try
            {
                if (factura != null)
                {
                    string nameFacturaHtml = !string.IsNullOrEmpty(factura.NumFactura) ? factura.NumFactura : "No valido";
                    string facturaHtml     = html + nameFacturaHtml + ".html";
                    if (File.Exists(plantilla))
                    {
                        if (!File.Exists(facturaHtml))
                        {
                            File.Copy(plantilla, facturaHtml);
                        }
                        if (documentHTML == null)
                        {
                            documentHTML = new HtmlAgilityPack.HtmlDocument();
                            documentHTML.Load(facturaHtml);
                        }

                        string proveedor     = factura.Proveedor != null ? factura.Proveedor.Nombre : "";
                        string nit           = factura.Proveedor != null ? factura.Proveedor.Nit : "";
                        string direccion     = factura.Proveedor != null ? factura.Proveedor.Direccion : "";
                        string cufe          = !string.IsNullOrEmpty(factura.CUFE) ? factura.CUFE : "";
                        string nfactura      = !string.IsNullOrEmpty(factura.NumFactura) ? factura.NumFactura : "";
                        string observaciones = !string.IsNullOrEmpty(factura.Observaciones) ? factura.Observaciones : "";
                        string nitcliente    = (factura.Cliente == null) ? "" : factura.Cliente.Nit;
                        string nombrecliente = (factura.Cliente == null) ? "" : factura.Cliente.Nombre;
                        string notas         = !string.IsNullOrEmpty(factura.Notas) ? factura.Notas : "";
                        documentHTML.GetElementbyId("proveedor").InnerHtml     = proveedor;
                        documentHTML.GetElementbyId("nit").InnerHtml           = nit;
                        documentHTML.GetElementbyId("direccion").InnerHtml     = direccion;
                        documentHTML.GetElementbyId("cufe").InnerHtml          = cufe;
                        documentHTML.GetElementbyId("nfactura").InnerHtml      = nfactura;
                        documentHTML.GetElementbyId("observaciones").InnerHtml = observaciones;
                        documentHTML.GetElementbyId("notas").InnerHtml         = notas;
                        documentHTML.GetElementbyId("nitcliente").InnerHtml    = nitcliente;
                        documentHTML.GetElementbyId("nombreCliente").InnerHtml = nombrecliente;
                        documentHTML.Save(facturaHtml);
                    }
                    else
                    {
                        throw new Exception($"El archivo {plantilla} no fue encontrado");
                    }
                }
                else
                {
                    throw new Exception("La factura es nula");
                }
            }
            catch (NullReferenceException nre)
            {
                throw nre;
            }
            catch (Exception ex)
            {
                throw ex;
                //throw new Exception($"Se produjo un error parseando html: {ex.Message}");
            }
        }
            /// <summary>
            /// Gets school info: name and departments name from html polish Wikipedia page.
            /// </summary>
            /// <param name="filePath">Path to locally saved html file.</param>
            /// <returns>School object with name and list of departments name.</returns>
            public static School GetSchool(string filePath)
            {
                HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();

                htmlDoc.OptionFixNestedTags = true;
                var e = htmlDoc.Encoding;
                htmlDoc.Load(filePath, Encoding.UTF8);

                var school = new School();

                if (htmlDoc.ParseErrors != null && htmlDoc.ParseErrors.Count() > 0)
                {
                    // Handle any parse errors as required
                }
                else
                {

                    if (htmlDoc.DocumentNode != null)
                    {
                        var nodes = htmlDoc.DocumentNode.Descendants();
                        school.Name = nodes.First(x => x.Id == "firstHeading").InnerText;
                        var content = nodes.First(x => x.Id == "mw-content-text").Descendants().Where(x=>x.Name=="li").ToList();

                        foreach (var item in content)
                        {
                            if(item.InnerText.Contains("Wydział")) {
                                school.Departments.Add(item.InnerText);
                            }
                        }
                    }
                }

                return school;
            }
        public CookieContainer DoLogin()
        {
            const string formUrl = "http://dbunet.dbu.dk";
            var req = (HttpWebRequest)WebRequest.Create(formUrl);
            req.Method = "GET";

            var resp = req.GetResponse() as HttpWebResponse;
            var loginPage = new HtmlDocument();
            loginPage.Load(resp.GetResponseStream());
            
            var viewstate = loginPage.DocumentNode.SelectSingleNode("//input[@name='__VIEWSTATE']").GetAttributeValue("value","---");

            string proxy = null;
            
            var formParams = string.Format("?__VIEWSTATE={0}&_ctl2:sys_txtUsername={1}&_ctl2:sys_txtPassword={2}&_ctl2:cbRememberMe=true&_ctl2:ibtnSignin=Continue&_ctl2:ibtnSignin.x=33&_ctl2:ibtnSignin.y=3&TopMenu_ClientState=", viewstate, UserName, Password);
            req = (HttpWebRequest)WebRequest.Create(formUrl);
            req.ContentType = "application/x-www-form-urlencoded";
            req.Method = "POST";
            req.KeepAlive = true;
            req.AllowAutoRedirect = false;
            req.Proxy = new WebProxy(proxy, true); // ignore for local addresses
            req.CookieContainer = new CookieContainer(); // enable cookies
            byte[] bytes = Encoding.ASCII.GetBytes(formParams);
            req.ContentLength = bytes.Length;
            using (var os = req.GetRequestStream())
            {
                os.Write(bytes, 0, bytes.Length);
            }
            resp = (HttpWebResponse)req.GetResponse();

            return req.CookieContainer;
        }
        List<Show> Grab(GrabParametersBase p)
        {
            var shows = new List<Show>();
            try
            {
                var param = (GrabParameters)p;
                var wr = WebRequest.Create(string.Format(urlFormat, (int)param.ChannelId));
                _logger.WriteEntry(string.Format("Grabbing Channel {0} ...", param.ChannelId), LogType.Info);
                var res = (HttpWebResponse)wr.GetResponse();
                var doc = new HtmlAgilityPack.HtmlDocument();
                doc.Load(res.GetResponseStream());
                doc.OptionOutputAsXml = true;
                var writer = new StringWriter();
                doc.Save(writer);

                var xml = XDocument.Load(new StringReader(writer.ToString()));
                FillShows(xml, shows);
                for (int i = shows.Count - 1; i >= 0; i--)
                {
                    var show = shows[i];
                    show.Channel = param.ChannelId.ToString();
                    if (i == shows.Count - 1)
                        show.EndTime = show.StartTime.AddHours(12);// usually 3-4 days from now , not that important
                    else
                        show.EndTime = shows[i + 1].StartTime;
                }
            }
            catch (Exception ex)
            {
                _logger.WriteEntry(ex.Message, LogType.Error);
            }
            _logger.WriteEntry(string.Format("Found {0} Shows", shows.Count), LogType.Info);
            return shows;
        }
Exemple #31
0
        public override List <Show> Grab(string xmlParameters, ILogger logger)
        {
            var shows         = new List <Show>();
            var doc           = XDocument.Parse(xmlParameters);
            var sdElement     = doc.Descendants("StartDate").FirstOrDefault();
            var startDateDiff = sdElement != null && sdElement.Value != null?Convert.ToInt32(sdElement.Value) : -1;

            var edElement   = doc.Descendants("EndDate").FirstOrDefault();
            var endDateDays = edElement != null && edElement.Value != null?Convert.ToInt32(edElement.Value) : 3;

            for (int i = 0; i <= endDateDays; i++)
            {
                var date = DateTime.Now.Date.AddDays(i);
                logger.WriteEntry(string.Format("Grabbing reshet.tv for date {0}", date.ToString("d")), LogType.Info);
                var wr = WebRequest.Create(Url);
                wr.Method      = "POST";
                wr.ContentType = "application/x-www-form-urlencoded; charset=UTF-8";
                using (var sw = new StreamWriter(wr.GetRequestStream()))
                {
                    sw.Write(string.Format("Values={0}%2F{1}%2F{2}", date.Day.ToString("00"), date.Month.ToString("00"), date.Year));
                }
                var res  = (HttpWebResponse)wr.GetResponse();
                var html = new HtmlAgilityPack.HtmlDocument();
                html.Load(res.GetResponseStream(), Encoding.UTF8);
                foreach (var li in html.DocumentNode.Descendants("li"))
                {
                    var time = li.Descendants("span").First().InnerText;
                    var text = li.Descendants("p").First().InnerText;
                }
            }
            return(shows);
        }
Exemple #32
0
        public async Task <ICollection <Commons.MovieInfo> > GetMoviesAsync()
        {
            for (int page = 1; ; page++)
            {
                var httpRequest = WebRequest.CreateHttp(string.Format(PageFormat, ++_currentPageIndex));
                if (ProxyFactoryDelegate != null)
                {
                    httpRequest.Proxy = ProxyFactoryDelegate();
                }
                var response = await httpRequest.GetResponseAsync();

                var html = new HtmlAgilityPack.HtmlDocument();
                html.Load(response.GetResponseStream());

                foreach (var film in html.DocumentNode.SelectNodes("//div[@class='moviefilm']/a"))
                {
                    var link = film.GetAttributeValue("href", null);
                }

                var pages = html.DocumentNode.SelectSingleNode("//div[@class='wp-pagenavi']/span/text()");
                var match = _pageMatchRegex.Match(pages.InnerText);
                if (match.Success)
                {
                    var currentPage = int.Parse(match.Groups[1].Value, CultureInfo.InvariantCulture);
                    var maxPage     = int.Parse(match.Groups[2].Value, CultureInfo.InvariantCulture);
                    this.IsFinished = currentPage == maxPage;
                }


                return(null);
            }
        }
Exemple #33
0
        public static void FetchRates(int bankId, string bankName)
        {
            var bankExchange = new BankExchange();
            bankExchange.BankId = bankId;

            if (bankName.ToLowerInvariant() == "bank asya")
            {
                var ds = new DataSet("fxPrices");
                ds.ReadXml("http://www.bankasya.com.tr/xml/kur_list.xml");

                bankExchange.USDBuying = Int32.Parse(ds.Tables[1].Rows[0]["Kur"].ToString().Replace(".", ""));
                bankExchange.USDSelling = Int32.Parse(ds.Tables[1].Rows[1]["Kur"].ToString().Replace(".", ""));
                bankExchange.EURBuying = Int32.Parse(ds.Tables[1].Rows[2]["Kur"].ToString().Replace(".", ""));
                bankExchange.EURSelling = Int32.Parse(ds.Tables[1].Rows[3]["Kur"].ToString().Replace(".", ""));

                bankExchange.Save();
            }
            else if(bankName.ToLowerInvariant() == "finansbank")
            {
                var doc = new HtmlAgilityPack.HtmlDocument();
                doc.Load("http://www.finansbank.com.tr/bankacilik/alternatif-dagitim-kanallari/internet-bankaciligi/doviz_kurlari.aspx?IntSbMO_FB_Mevduatoranlari_PU".DownloadPage());

            }
            else
            {
                throw new Exception("Banka adı bunlardan biri olabilir: Bank Asya");
            }
        }
 public void RocklandWebsiteShouldContainROCKLAND_offline()
 {
     var html = new HtmlAgilityPack.HtmlDocument();
     var path = System.IO.Path.GetDirectoryName(
         System.Reflection.Assembly.GetExecutingAssembly().Location) + @"\..\..\TestData";
     html.Load(path + @"\ROCKLAND - Mach an und laut!.html");
     Assert.IsTrue(html.DocumentNode.ChildNodes["html"].InnerText.Contains("ROCKLAND"));
 }
        /// <summary>
        /// 处理http GET请求,返回数据
        /// </summary>
        /// <param name="url">请求的url地址</param>
        /// <returns>http GET成功后返回的数据,失败抛WebException异常</returns>
        public static HtmlAgilityPack.HtmlDocument Get(string url)
        {
            System.GC.Collect();

            HttpWebRequest  request  = null;
            HttpWebResponse response = null;

            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            //请求url以获取数据
            try
            {
                //设置最大连接数
                ServicePointManager.DefaultConnectionLimit = 200;
                //设置https验证方式
                if (url.StartsWith("https", StringComparison.OrdinalIgnoreCase))
                {
                    ServicePointManager.ServerCertificateValidationCallback =
                        new RemoteCertificateValidationCallback(ValidateServerCertificate);
                }

                /***************************************************************
                 * 下面设置HttpWebRequest的相关属性
                 * ************************************************************/
                request = (HttpWebRequest)WebRequest.Create(url);

                request.Method = "GET";

                ////设置代理
                //WebProxy proxy = new WebProxy();
                //proxy.Address = new Uri(WxPayConfig.PROXY_URL);
                //request.Proxy = proxy;

                //获取服务器返回
                response = (HttpWebResponse)request.GetResponse();
                //获取HTTP返回数据
                StreamReader sr = new StreamReader(response.GetResponseStream(), Encoding.UTF8);
                //result = sr.ReadToEnd().Trim();
                doc.Load(sr);
                sr.Close();
            }
            catch (Exception e)
            {
                throw new Exception(e.Message);
            }
            finally
            {
                //关闭连接和流
                if (response != null)
                {
                    response.Close();
                }
                if (request != null)
                {
                    request.Abort();
                }
            }
            return(doc);
        }
        /// <summary>
        /// The GetText
        /// </summary>
        /// <returns>The <see cref="string"/></returns>
        public override string GetText()
        {
            var doc = new HtmlAgilityPack.HtmlDocument();

            doc.Load(FilePath);
            var node = doc.DocumentNode.SelectSingleNode("//body");

            return((node == null) ? string.Empty : node.InnerText);
        }
        public static List<Shows> Parse(DateTime date) //Downloading Titles of shows from specific dat
        {
            string Day = date.ToString("dd-MM-yyyy"); //Getting date in proper format for URL
            string result;
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create("http://www.pogdesign.co.uk/cat/day/" + Day);
            request.Method = "GET";

            using (var stream = request.GetResponse().GetResponseStream())
            using (var reader = new StreamReader(stream, Encoding.UTF8))
            {
                result = reader.ReadToEnd();
            }

            List<string> Titles = new List<string>();
            List<int> Seasons = new List<int>();
            List<int> Episodes = new List<int>();
            List<Shows> TVShow = new List<Shows>();
            List<string> Images = new List<string>();
            var document = new HtmlAgilityPack.HtmlDocument();
            document.Load(new StringReader(result));
            var titleNodes = document.DocumentNode.SelectNodes("//h4"); //Show titles are in <h4>
            var SENodes = document.DocumentNode.SelectNodes("//h5/a/span"); //Season & Episode Nodes
            var imageNodes = document.DocumentNode.SelectNodes("//div[contains(@class,'contbox ovbox')]");

            foreach (var title in titleNodes)
            {
                var text = title.InnerText;
                var wSummary = text.Replace("Summary", "");
                var resultTitle = wSummary.Remove(wSummary.Length - 2); //Deleting white spaces
                Titles.Add(resultTitle);
            }
            foreach (var SE in SENodes)
            {
                string pattern = "(?<=Season )(\\w+)"; // regex pattern for season number
                string pattern1 = "(?<=Episode )(\\w+)"; // regex pattern for episode number
                var textFromSpan = SE.InnerText;

                Match seasonMatch = Regex.Match(textFromSpan, pattern);
                Match episodeMatch = Regex.Match(textFromSpan, pattern1);

                Seasons.Add(Int32.Parse(seasonMatch.ToString()));
                Episodes.Add(Int32.Parse(episodeMatch.ToString()));
            }
            foreach (var image in imageNodes)
            {
                var url = Regex.Match(image.GetAttributeValue("style", ""), @"(?<=url\()(.*)(?=\))").Groups[1].Value;
                var output = "http://www.pogdesign.co.uk/" + url;
                Images.Add(output);

            }
            for (int i = 0; i < Titles.Count; i++)
            {
                TVShow.Add(new Shows(Titles[i], Seasons[i], Episodes[i], Images[i])); // Filing List with Shows objects
            }

            return TVShow;
        }
        public string GetXPathToElement(string filePatch, Dictionary <string, string> attributes)
        {
            _document.Load(filePatch);

            var element = _document.DocumentNode.CssSelect("a")
                          .FirstOrDefault(el => el.Attributes.Contains("rel") && el.Attributes.Contains("title")
                                          &&
                                          (el.Attributes["rel"].Value == attributes["rel"] ||
                                           el.Attributes["title"].Value == attributes["title"]));


            if (element == null)
            {
                throw new Exception("Not found target element");
            }

            return(element.XPath);
        }
Exemple #39
0
        private void Publicar_Click(object sender, EventArgs e)
        {
            Meli m = new Meli(754014355650430, "jR9v7lRr06CfzhSOdppHyrNSMhxYKCKb");

            try
            {
                string cambiarvida = textBox1.Text;
                m.Authorize(cambiarvida, "http://*****:*****@class=\"content csgo-thumbnail\"]";
                    var      node            = doc.DocumentNode.SelectSingleNode(testDivSelector).InnerHtml;
                    string[] words           = node.Split('\'');
                    string   nodo1           = words[3];
                    object   tamanho         = new { source = nodo1 };
                    object[] attr            = { tamanho };
                    var      picturess       = new List <Parameter>(); //Fin mercadolibre
                    var      p  = new Parameter();
                    var      ps = new List <Parameter>();
                    p.Name  = "access_token";
                    p.Value = m.AccessToken;
                    ps.Add(p);
                    string cuchi;
                    if (nombre.Contains("\u2605"))
                    {
                        cuchi = "Cuchi";
                    }
                    else
                    {
                        cuchi = "";
                    }
                    nombre = nombre.Replace("\u2605", "");//Le saca las estrellas que hacen ver mal la publicacion
                    nombre = nombre.Replace("\u2122", "");
                    IRestResponse response = m.Post("/items", ps, new { title = "CSGO " + nombre + " SKIN " + cuchi, category_id = "MLA374211", price = Convert.ToInt32(textBox4.Text), listing_type_id = "free", currency_id = "ARS", available_quantity = 1, buying_mode = "buy_it_now", condition = "used", description = "<div id=\"body\" ms.pgarea=\"body\" class=\"\"> <div><span style=\"text-decoration: underline; color: #0000ff;\"><span style=\"font-size: xx-large;\"><strong>¡Venta de skins CSGO!<br></strong></span></span><p></p></div><div><span style=\"text-decoration: underline; color: #ff0000;\"><span style=\"font-size: xx-large;\"></span></span></div><div><span style=\"text-decoration: underline; color: #ff0000;\"><span style=\"font-size: xx-large;\"><strong><br></strong></span></span></div><div></div><img class=\"\" src=\"http://www.csgopools.com/wp-content/uploads/2015/03/cs-go-skincollection.png\" data-src=\"http://www.csgopools.com/wp-content/uploads/2015/03/cs-go-skincollection.png\"><noscript>&amp;amp;amp;amp;lt;img src=\"https://mla-s2-p.mlstatic.com/103421-MLA20770097984_062016-C.jpg\" /&amp;amp;amp;amp;gt;</noscript><h2>Requisitos:</h2><ul> <li>Tener en \"publico\" el inventario</li> <li>Tener\"Steam Guard Mobile Authenticator\" activo</li> </ul><h2>Descripción: </h2> <p></p><p></p><p><strong><span style=\"font-size: large;\">Arma:</span> </strong><span style=\"font-size: large;\"><span style=\"color: #ff0000;\"><strong>" + nombre + "</strong></span><br></span></p> <p><strong><span style=\"font-size: large;\">El intercambio se hace a través del intercambio de Steam</span></strong>&nbsp; <span style=\"font-size: large; color: #ff0000;\"></span></p><p></p><p></p> <p><span style=\"font-size: large;color: #067935;\"><strong>Se posee otros estados de esta misma arma , consulte.</strong></span></p> <p><span style=\"font-size: large;color: #da00ff;\"><strong>Tenemos todo tipos de skins!</strong></span></p><span style=\"font-size: x-large;color: #731616;\"><u><strong>Importante: Antes de ofertar consultar stock!</strong></u></span> </div>", video_id = "", warranty = "", pictures = attr });
                    var           hola     = JObject.Parse(response.Content);
                    var           HOLA     = (JValue)hola["permalink"];
                    Clipboard.SetText(Convert.ToString(HOLA));
                    MessageBox.Show("Arma publicada con exito y link copiado al clipboard!");
                }


                catch
                {
                    MessageBox.Show("Arma invalida, revise el texto ingresado");
                }
            }
            catch
            {
                MessageBox.Show("Pone el codigo en la otra ventana!!");
            }
        }
Exemple #40
0
        static FineAntsCore.Statement ConvertHSBCHTMLFileToFineAnts(FileInfo fileInfo)
        {
            HtmlAgilityPack.HtmlDocument brokenDocument = new HtmlAgilityPack.HtmlDocument();
            brokenDocument.Load(fileInfo.FullName);
            brokenDocument.OptionOutputAsXml = true;
            string fixedXmlFileName = fileInfo.FullName + ".fixed.xml";
            brokenDocument.Save(fixedXmlFileName);
            XmlDocument document = new XmlDocument();
            document.Load(fixedXmlFileName);

            XmlNamespaceManager namespaceManager = new XmlNamespaceManager(document.NameTable);
            namespaceManager.AddNamespace("d", "http://www.w3.org/1999/xhtml");

            XmlNode closingBalanceNode = document.SelectSingleNode("/span/d:html/d:body/d:div[@id='top']/d:div[@id='innerPage']/d:div[@id='wrapper']/d:div[@id='main']/d:div[@id='content']/d:div[@class='containerMain']/d:div[@class='hsbcMainContent hsbcCol']/d:div[@class='extContentHighlightPib hsbcCol']/d:table/d:tbody/d:tr[last()]/d:td[6]/d:p", namespaceManager);
            XmlNode closingBalanceSignNode = document.SelectSingleNode("/span/d:html/d:body/d:div[@id='top']/d:div[@id='innerPage']/d:div[@id='wrapper']/d:div[@id='main']/d:div[@id='content']/d:div[@class='containerMain']/d:div[@class='hsbcMainContent hsbcCol']/d:div[@class='extContentHighlightPib hsbcCol']/d:table/d:tbody/d:tr[last()]/d:td[7]/d:p", namespaceManager);
            int closingBalance = moneyInPenceFromString(closingBalanceNode.InnerText.Trim());
            if (closingBalanceSignNode.InnerText.Trim() == "D") closingBalance = -closingBalance;

            XmlNode endDateNode = document.SelectSingleNode("/span/d:html/d:body/d:div[@id='top']/d:div[@id='innerPage']/d:div[@id='wrapper']/d:div[@id='main']/d:div[@id='content']/d:div[@class='containerMain']/d:div[@class='hsbcMainContent hsbcCol']/d:div[@class='extContentHighlightPib hsbcCol']/d:div[@class='extPibRow hsbcRow']/d:div[@class='hsbcPadding']/d:div[@class='hsbcTextRight']", namespaceManager);
            string endDateString = HtmlAgilityPack.HtmlEntity.DeEntitize(endDateNode.InnerText).Trim();

            System.Globalization.CultureInfo provider = System.Globalization.CultureInfo.InvariantCulture;
            DateTime endDate = DateTime.ParseExact(endDateString, "dd MMM yyyy", provider);

            XmlNode startDateNode = document.SelectSingleNode("/span/d:html/d:body/d:div[@id='top']/d:div[@id='innerPage']/d:div[@id='wrapper']/d:div[@id='main']/d:div[@id='content']/d:div[@class='containerMain']/d:div[@class='hsbcMainContent hsbcCol']/d:div[@class='extContentHighlightPib hsbcCol']/d:table/d:tbody/d:tr[1]/d:td[1]/d:p", namespaceManager);
            string startDateString = HtmlAgilityPack.HtmlEntity.DeEntitize(startDateNode.InnerText).Trim();

            DateTime startDate = dateFromDateStringFixedUsingUpperBoundDate(startDateString, endDate.AddDays(-1)).AddDays(1);

            List<FineAntsCore.Transaction> transactions = new List<FineAntsCore.Transaction>();

            XmlNodeList transactionNodes = document.SelectNodes("/span/d:html/d:body/d:div[@id='top']/d:div[@id='innerPage']/d:div[@id='wrapper']/d:div[@id='main']/d:div[@id='content']/d:div[@class='containerMain']/d:div[@class='hsbcMainContent hsbcCol']/d:div[@class='extContentHighlightPib hsbcCol']/d:table/d:tbody/d:tr[position()>1 and position()<last()]", namespaceManager);
            foreach (XmlNode node in transactionNodes)
            {
                XmlNode dateNode = node.SelectSingleNode("d:td[1]/d:p", namespaceManager);
                XmlNode typeNode = node.SelectSingleNode("d:td[2]/d:p", namespaceManager);
                XmlNode nameNode = node.SelectSingleNode("d:td[3]/d:p", namespaceManager);
                XmlNode moneyOutNode = node.SelectSingleNode("d:td[4]/d:p", namespaceManager);
                XmlNode moneyInNode = node.SelectSingleNode("d:td[5]/d:p", namespaceManager);

                string date = HtmlAgilityPack.HtmlEntity.DeEntitize(dateNode.InnerText).Trim();
                string name = HtmlAgilityPack.HtmlEntity.DeEntitize(getInnerTextIgnoringLinks(nameNode));
                string moneyIn = HtmlAgilityPack.HtmlEntity.DeEntitize(moneyInNode.InnerText).Trim();
                string moneyOut = HtmlAgilityPack.HtmlEntity.DeEntitize(moneyOutNode.InnerText).Trim();
                int money = moneyIn == "" ? -moneyInPenceFromString(moneyOut) : moneyInPenceFromString(moneyIn);

                transactions.Add(new FineAntsCore.Transaction(money, dateFromDateStringFixedUsingUpperBoundDate(date, endDate), name, ""));
            }

            // remove the temporary fixed file
            System.IO.File.Delete(fixedXmlFileName);

            FineAntsCore.Statement statement = new FineAntsCore.Statement(transactions, startDate, endDate, closingBalance);

            return statement;
        }
        public void ToClearTextTest()
        {
            HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
            htmlDoc.Load("Test.html");

            string expected = "Test \r\n    Some text\r\n     \r\n    Some more text\r\n     \r\nText here and there ";
            string converted = htmlDoc.DocumentNode.ToClearText();

            Assert.AreEqual(expected, converted);
        }
Exemple #42
0
        /// <summary>
        /// Parse root profile page.
        /// </summary>
        /// <param name="userName">Profile page to parse.</param>
        private void ParseProfilePage(String userName)
        {
            String profileURL = String.Format(@"https://myspace.com/{0}", userName);
            var    doc        = new HtmlAgilityPack.HtmlDocument();

            HtmlAgilityPack.HtmlNode.ElementsFlags["br"] = HtmlAgilityPack.HtmlElementFlag.Empty;
            doc.OptionWriteEmptyNodes = true;

            try
            {
                ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12;
                var webRequest = HttpWebRequest.Create(profileURL);
                ((HttpWebRequest)webRequest).UserAgent = CrawlUtil.GetUserAgent();
                Stream stream = webRequest.GetResponse().GetResponseStream();
                doc.Load(stream);
                stream.Close();

                Profile.URL      = String.Format(@"https://myspace.com/{0}", userName);
                Profile.UserName = userName;
                Profile.ProfileThumbnailImageURL = doc.DocumentNode.SelectSingleNode(@"//a[@id='profileImage']//img")?.Attributes["src"]?.Value;
                Profile.ProfileImageURL          = !String.IsNullOrEmpty(Profile.ProfileThumbnailImageURL) ? CrawlUtil.ModifyUriFileName(Profile.ProfileThumbnailImageURL, x => "600x600") : null;
                Profile.ProfileID = doc.DocumentNode.SelectSingleNode(@"//div[@class='connectButton notReversed tooltips']")?.Attributes["data-id"]?.Value;
                String privateFlag = doc.DocumentNode.SelectSingleNode(@"//div[@class='connectButton notReversed tooltips']")?.Attributes["data-is-private"]?.Value;
                Profile.IsPrivate           = privateFlag != null && privateFlag.ToLower().Equals("true");
                Profile.PersonalName        = doc.DocumentNode.SelectSingleNode(@"//div[@class='connectButton notReversed tooltips']")?.Attributes["data-title"]?.Value;
                Profile.LocationDescription = doc.DocumentNode.SelectSingleNode(@"//div[@id='profileDetails']//div[@id='locAndWeb']//div[@class='location_white location ']")?.Attributes["data-display-text"]?.Value;
                Profile.Website             = doc.DocumentNode.SelectSingleNode(@"//div[@id='profileDetails']//div[@id='locAndWeb']//div[@class='ribbon_white website ']//a")?.InnerText;
                Profile.OutConnectionTotal  = doc.DocumentNode.SelectSingleNode(String.Format(@"//div[@id='profileDetails']//div[@id='connectionsCount']//a[@href='/{0}/connections/out']//span", Profile.UserName))?.InnerText;
                Profile.InConnectionTotal   = doc.DocumentNode.SelectSingleNode(String.Format(@"//div[@id='profileDetails']//div[@id='connectionsCount']//a[@href='/{0}/connections/in']//span", Profile.UserName))?.InnerText;

                if (!Profile.IsPrivate)
                {
                    var top8FriendsNode = doc.DocumentNode.SelectNodes(@"//div[@class='friendsWrapper']//ul//li//a");
                    if (top8FriendsNode != null)
                    {
                        foreach (var friendNode in top8FriendsNode)
                        {
                            Top8FriendEntry friendEntry = new Top8FriendEntry();
                            friendEntry.UserURL = friendNode?.Attributes["href"]?.Value;
                            if (!String.IsNullOrEmpty(friendEntry.UserURL) && friendEntry.UserURL.StartsWith("/"))
                            {
                                friendEntry.UserURL = string.Format(@"https://myspace.com{0}", friendEntry.UserURL);
                            }
                            friendEntry.ProfileID    = friendNode?.Attributes["data-profileid"]?.Value;
                            friendEntry.ThumbnailURL = friendNode?.Attributes["data-image-url"]?.Value;
                            friendEntry.UserName     = friendNode?.Attributes["data-title"]?.Value;
                            Profile.Top8Friends.Add(friendEntry);
                        }
                    }
                }
            }
            catch (Exception e)
            {
            }
        }
Exemple #43
0
        /// <summary>
        /// Get list string from google.com by 1 xpath(regex)
        /// </summary>
        /// <param name="keywordsearch">từ khóa</param>
        /// <param name="xpath"></param>
        /// <param name="page">trang</param>
        /// <returns></returns>
        public static List <string> GetListStringFromGoogle(string keywordsearch, string xpath, int page)
        {
            //Random r = new Random();
            //Thread.Sleep(r.Next(15000, 20000));
            string url = "";

            if (page == 0)
            {
                url = "https://www.google.com.vn/search?q=";
            }
            else
            {
                url = string.Format("https://www.google.com.vn/search?q={0}&btnG=T%C3%ACm+v%E1%BB%9Bi+Google&start={1}", System.Web.HttpUtility.UrlEncode(keywordsearch), (page * 10));
            }
            //url += System.Web.HttpUtility.UrlEncode(keywordsearch)+"&start=" + (page * 10);
            //StringBuilder sb = new StringBuilder();
            List <string> listkeywords = new List <string>();

            try
            {
                Uri            urlRoot = new Uri(url, UriKind.RelativeOrAbsolute);
                HttpWebRequest oReq    = (HttpWebRequest)WebRequest.Create(urlRoot);
                oReq.AllowAutoRedirect = true; //Nếu gặp response code 300 hoặc 309 nó sẽ tự chuyển theo response.header['location']
                oReq.UserAgent         = @"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36";
                oReq.Timeout           = 3000;
                HttpWebResponse resp = (HttpWebResponse)oReq.GetResponse();
                //HttpWebResponse resp = (HttpWebResponse)GetResponseNoCache(urlRoot);
                var encoding = Encoding.GetEncoding(resp.CharacterSet);
                if (resp.ContentType.StartsWith("text/html", StringComparison.InvariantCultureIgnoreCase))
                {
                    HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                    var resultStream = resp.GetResponseStream();
                    doc.Load(resultStream, encoding);
                    #region Get Value
                    HtmlAgilityPack.HtmlNodeCollection node = doc.DocumentNode.SelectNodes(xpath);
                    if (node != null)
                    {
                        foreach (HtmlAgilityPack.HtmlNode item in node)
                        {
                            listkeywords.Add(item.InnerText);
                        }
                    }
                    #endregion
                    resultStream.Close();
                }
                resp.Close();
            }
            catch (Exception ex) {
                Log.Error("Error: ", ex);
                Thread.Sleep(1200000);
                return(null);
            }
            return(listkeywords);
        }
Exemple #44
0
        public JsonResult Dlsite([FromBody] string q)
        {
            if (string.IsNullOrWhiteSpace(q))
            {
                return(Json(null));
            }
            var            url        = string.Format(@"http://www.dlsite.com/maniax/fsr/=/language/jp/sex_category%5B0%5D/male/keyword/{0}/per_page/30/show_type/1", WebUtility.UrlEncode(q));
            HttpWebRequest webRequest = WebRequest.CreateHttp(url);

            webRequest.CookieContainer = new CookieContainer();
            webRequest.CookieContainer.Add(new System.Uri(url), new Cookie("adultchecked", "1", "/"));
            webRequest.UserAgent = Request.Headers[HeaderNames.UserAgent];
            webRequest.Accept    = Request.Headers[HeaderNames.Accept];
            try
            {
                var doc = new HtmlAgilityPack.HtmlDocument();
                doc.Load(webRequest.GetResponse().GetResponseStream(), System.Text.Encoding.UTF8);
                var node = doc.GetElementbyId("search_result_list");
                if (node == null)
                {
                    return(Json(new { success = true }));
                }
                var entryList = new List <DLsite>();
                foreach (var dl in node.SelectNodes(".//dl[@class='work_1col']"))
                {
                    var entry     = new DLsite();
                    var titleLink = dl.SelectSingleNode("dt/a");
                    if (titleLink != null)
                    {
                        entry.Title = titleLink.InnerText;
                        entry.Url   = titleLink.GetAttributeValue("href", "");
                        int spos = entry.Url.LastIndexOf("product_id/") + 11;
                        int epos = entry.Url.LastIndexOf(".html");
                        if (spos > 11 && epos > spos)
                        {
                            entry.RjCode = entry.Url.Substring(spos, epos - spos);
                        }
                    }
                    var circleLink = dl.SelectSingleNode("dd[@class='maker_name']/a");
                    if (circleLink != null)
                    {
                        entry.Circle    = circleLink.InnerText;
                        entry.CircleUrl = circleLink.GetAttributeValue("href", "");
                    }
                    entry.Description = dl.SelectSingleNode("dd[@class='work_text']")?.InnerText;
                    entryList.Add(entry);
                }
                return(Json(new { success = true, entries = entryList }));
            }
            catch
            {
                return(Json(new { success = false }));
            }
        }
 public static void readDoc(string sData)
 {
     HtmlAgilityPack.HtmlDocument htmlDoc    = new HtmlAgilityPack.HtmlDocument();
     HtmlAgilityPack.HtmlDocument loadedPage = new HtmlAgilityPack.HtmlDocument();
     loadedPage.Load(sData);
     HtmlAgilityPack.HtmlNode[] nodes = loadedPage.DocumentNode.SelectNodes("//a").ToArray();
     foreach (HtmlAgilityPack.HtmlNode item in nodes)
     {
         Console.WriteLine(item.InnerHtml);
     }
 }
Exemple #46
0
        ILoadedHtmlDocumentProvider GetHtmlDocumentProvider(string htmlFileName)
        {
            var doc = new HtmlAgilityPack.HtmlDocument();

            doc.Load(htmlFileName);

            var provider = Substitute.For <ILoadedHtmlDocumentProvider>();

            provider.GetLoadedHtmlDocument().Returns(doc);

            return(provider);
        }
Exemple #47
0
 public void SongsCanBeSortedByTimestamp()
 {
     var html = new HtmlAgilityPack.HtmlDocument();
     var path = System.IO.Path.GetDirectoryName(
         System.Reflection.Assembly.GetExecutingAssembly().Location) + @"\..\..\TestData";
     html.Load(path + @"\ROCKLAND - Mach an und laut!.html");
     var parser = new RocklandParser();
     var songs = parser.GetSongs(html);
     Assert.AreEqual("Russ Ballard", songs.First().Artist);
     songs.Sort();
     Assert.AreEqual("Russ Ballard", songs.Last().Artist);
 }
        public HtmlAgilityPack.HtmlDocument LoadDocument(string url)
        {
            var document = new HtmlAgilityPack.HtmlDocument();

            try
            {
                using (var responseStream = CreateRequest(url).GetResponse().GetResponseStream())
                {
                    document.Load(responseStream, Encoding.UTF8);
                }
            }
            catch(Exception ) 
            {
                //just do a second try
                Thread.Sleep(1000);
                using (var responseStream = CreateRequest(url).GetResponse().GetResponseStream())
                {
                    document.Load(responseStream, Encoding.UTF8);
                }
            }

            return document;
        }
 public static List<string> GetFullShowSelection() // Getting a list of all shows in calendar
 {
     var Titles = new List<string>();
     var client = new WebClient();
     string page = client.DownloadString("http://www.pogdesign.co.uk/cat/showselect.php");
     var document = new HtmlAgilityPack.HtmlDocument();
     document.Load(new StringReader(page));
     var showNodes = document.DocumentNode.SelectNodes("//label[@class=\"label_check\"]/strong"); //Show titles are in <h4>
     foreach (var node in showNodes)
     {
         Titles.Add(node.InnerText);
     }
     return Titles;
 }
        public void ParseLinksTest()
        {
            HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
            htmlDoc.Load("Test.html");

            List<string> expected = new List<string> { "/wiki/page", "/wiki/page2"};
            List<string> parsed = htmlDoc.DocumentNode.ParseLinks().ToList();

            Assert.AreEqual(expected.Count, parsed.Count);

            for (int i = 0; i < parsed.Count; i++)
            {
                Assert.AreEqual(expected[i], parsed[i]);
            }
        }
        /// <summary>
        /// Creates a document from a stream.
        /// </summary>
        /// <returns>The document.</returns>
        /// <param name="sourceStream">Source stream.</param>
        /// <param name="sourceInfo">Source info.</param>
        /// <param name="encoding">Encoding.</param>
        public IZptDocument CreateDocument(Stream sourceStream, ISourceInfo sourceInfo, Encoding encoding)
        {
            if(sourceStream == null)
              {
            throw new ArgumentNullException(nameof(sourceStream));
              }
              if(encoding == null)
              {
            throw new ArgumentNullException(nameof(encoding));
              }

              var doc = new HtmlAgilityPack.HtmlDocument();
              doc.Load(sourceStream, encoding);

              return CreateDocument(doc, sourceInfo);
        }
		private async Task Populate(string url)
		{
			var response = await new System.Net.Http.HttpClient().GetStreamAsync(url);
			var page = new HtmlAgilityPack.HtmlDocument();
			page.Load(response);
			var itemNodes = page.DocumentNode.SelectNodes("//ul[@class='productList subList']/li");
			foreach (var item in itemNodes)
			{
				var titleNode = item.SelectNodes(".//p[@class='title']/a").FirstOrDefault();
				var priceNode = item.SelectNodes(".//p[@class='price']").FirstOrDefault();
				var reviewNode = item.SelectNodes(".//p[@class='review']/a").FirstOrDefault();
				string link = titleNode.Attributes["href"].Value;
				string title = titleNode.Attributes["title"].Value;
				double price;
				
				if (priceNode.ChildNodes.Count == 3)
					priceNode = priceNode.ChildNodes[2];

				double.TryParse(priceNode.InnerText.Trim().Replace("US$", "").Replace(".",","), out price);
				var lumenMatch = new Regex(@"\d{3,4}[-\s]?(lm|lumen)", RegexOptions.IgnoreCase).Match(title);
				if (!lumenMatch.Groups[0].Success)
					continue;
				string lumenCountString = Regex.Replace(lumenMatch.Groups[0].Value, "[^0-9]", "");
				int lumenCount;
				if (!int.TryParse(lumenCountString, out lumenCount))
					continue;
				int starCount = -1;
				int reviewCount = -1;
				if(reviewNode!=null)
				{
					var starsString = reviewNode.Attributes.Where(a => a.Name == "title").Select(a => a.Value).FirstOrDefault().Replace(" out of 5 starts","");
					var reviewString = reviewNode.SelectNodes(".//span").Where(a => a.Attributes.Count == 0).Select(a=>a.InnerText).FirstOrDefault();
					reviewString = reviewString.Replace("Reviews", "").Trim();
					int.TryParse(starsString, out starCount);
					int.TryParse(reviewString, out reviewCount);
				}
				Items.Add(new RowItem
					{
						LumenCount =  lumenCount,
						Price = price,
						Link = "http://dx.com" + link,
						Stars = starCount,
						ReviewCount = reviewCount
					});
			}
			this.Title = Items.Count.ToString();
		}
Exemple #53
0
 public void UniqueSongCanOnlyBeAddedOnce()
 {
     var html = new HtmlAgilityPack.HtmlDocument();
     var path = System.IO.Path.GetDirectoryName(
         System.Reflection.Assembly.GetExecutingAssembly().Location) + @"\..\..\TestData";
     html.Load(path + @"\ROCKLAND - Mach an und laut!.html");
     var parser = new RocklandParser();
     var songs = parser.GetSongs(html);
     var songDuplicate = new Song();
     songDuplicate.TimestampText = songs[5].TimestampText;
     songDuplicate.Timestamp = songs[5].Timestamp;
     songDuplicate.Artist = songs[5].Artist;
     songDuplicate.Title = songs[5].Title;
     if (!songs.Contains(songDuplicate))
         songs.Add(songDuplicate);
     Assert.AreEqual(8, songs.Count);
 }
 public override Task<object> ReadFromStreamAsync(Type type, Stream stream, HttpContentHeaders contentHeaders, IFormatterLogger formatterLogger)
 {
     return new TaskFactory<Object>().StartNew(() =>
                                           {
                                               if (contentHeaders.ContentType.MediaType == "text/html")
                                               {
                                                   var html = new HtmlAgilityPack.HtmlDocument();
                                                   html.Load(stream);
                                                   return html;
                                               }
                                               else
                                               {
                                                   var doc = XDocument.Load(stream);
                                                   return doc;
                                               }
                                           });
 }
		public static async Task<HtmlDocument> GetHtmlDocJs(string url)
		{
			using(var wb = new WebBrowser())
			{
				var done = false;
				var doc = new HtmlDocument();
				wb.ScriptErrorsSuppressed = true;
				//                  avoid cache
				wb.Navigate(url + "?" + DateTime.Now.Ticks);
				wb.DocumentCompleted += (sender, args) => done = true;

				while(!done)
					await Task.Delay(50);
				doc.Load(wb.DocumentStream);
				return doc;
			}
		}
		public static async Task<HtmlDocument> GetHtmlDocGzip(string url)
		{
			using(var wc = new GzipWebClient())
			{
				wc.Encoding = Encoding.UTF8;
				// add an user-agent to stop some 403's
				wc.Headers.Add("user-agent", "Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0");

				var websiteContent = await wc.DownloadStringTaskAsync(new Uri(url));
				using(var reader = new StringReader(websiteContent))
				{
					var doc = new HtmlDocument();
					doc.Load(reader);
					return doc;
				}
			}
		}
        /// <summary>
        /// Creates a document from a source file.
        /// </summary>
        /// <returns>The document.</returns>
        /// <param name="sourceFile">Source file.</param>
        /// <param name="encoding">Encoding.</param>
        public IZptDocument CreateDocument(FileInfo sourceFile, Encoding encoding)
        {
            if(sourceFile == null)
              {
            throw new ArgumentNullException(nameof(sourceFile));
              }
              if(encoding == null)
              {
            throw new ArgumentNullException(nameof(encoding));
              }

              var sourceInfo = new SourceFileInfo(sourceFile);

              var doc = new HtmlAgilityPack.HtmlDocument();
              doc.Load(sourceFile.FullName, encoding);

              return CreateDocument(doc, sourceInfo);
        }
        public override List<Show> Grab(string xmlParameters, ILogger logger)
        {
            var shows = new List<Show>();

            var doc = XDocument.Parse(xmlParameters);
            var sdElement = doc.Descendants("StartDate").FirstOrDefault();
            var startDateDiff = sdElement != null && sdElement.Value != null ? Convert.ToInt32(sdElement.Value) : -1;
            var edElement = doc.Descendants("EndDate").FirstOrDefault();
            var endDateDays = edElement != null && edElement.Value != null ? Convert.ToInt32(edElement.Value) : 3;
            for (int i = startDateDiff; i <= endDateDays; i++)
            {
                var date = DateTime.Now.Date.AddDays(i);
                var wr = WebRequest.Create(string.Format(url, date.ToString(DateFormat)));
                logger.WriteEntry(string.Format("Grabbing rtd date {0} ...", date.ToString(DateFormat)), LogType.Info);
                var res = (HttpWebResponse)wr.GetResponse();

                var html = new HtmlAgilityPack.HtmlDocument();
                html.Load(res.GetResponseStream());

                var divs = html.DocumentNode.Descendants("div").Where(x => x.Attributes.Contains("class") && x.Attributes["class"].Value.EndsWith("b-lenta-film_one"));
                foreach (var div in divs)
                {
                    var show = new Show();
                    var titleA = div.Descendants("a").First(x => x.Attributes.Contains("class") && x.Attributes["class"].Value == "b-film_heading");
                    show.Title = titleA.InnerText;
                    show.Channel = "RT DOC";

                    var groupA = div.Descendants("div").FirstOrDefault(x => x.Attributes.Contains("class") && x.Attributes["class"].Value == "b-film_group");
                    if (groupA != null)
                    {
                        var groupText = groupA.Descendants("a").First().InnerText;
                        show.Description = show.Title;
                        show.Title = groupText;
                    }
                    show.StartTime = DateTime.SpecifyKind(date.AddMinutes(Convert.ToInt32(div.Attributes["class"].Value.Split(' ')[1].Substring(5))), DateTimeKind.Unspecified);
                    show.StartTime = TimeZoneInfo.ConvertTime(show.StartTime, TimeZoneInfo.FindSystemTimeZoneById("Russian Standard Time"), TimeZoneInfo.Utc);
                    shows.Add(show);
                }
            }
            FixShowsEndTimeByStartTime(shows);
            return shows;
        }
        public bool CheckTorrent(ref string magnet) //Checking if torrent file for a show is available
        {
            string result;
            var client = new WebClient();
            result = client.DownloadString("https://thepiratebay.se/search/" + GetShowData() );
            var document = new HtmlAgilityPack.HtmlDocument();
            document.Load(new StringReader(result));
            var torrentNodes = document.DocumentNode.SelectNodes("//a[@class=\"detLink\"]"); //Getting torrent nodes
            if (torrentNodes == null) return false;

            string data = "S"; ///////////////////////// formatting string for search purposes

            if (Season > 9)
            {
                data += Season.ToString() + "E";
            }
            else
            {
                data += "0" + Season.ToString() + "E";
            }
            if (Episode > 9)
            {
                data += Episode.ToString();
            }
            else
            {
                data += "0" + Episode.ToString();
            }
            ////////////////////////////////////////////// end of formatting string
            foreach (var node in torrentNodes)
            {
                var text = node.InnerText;
                if(text.Contains(data))
                {
                     var magnetNode = document.DocumentNode.SelectSingleNode("//a[contains(@title,'Download this torrent using magnet')]"); // getting magnet link node
                    magnet = magnetNode.Attributes[0].Value;
                    return true;
                }
            }
         
            return false;
        }
        public void ParsePlaylistSongs_offline()
        {
            var html = new HtmlAgilityPack.HtmlDocument();
            var path = System.IO.Path.GetDirectoryName(
                System.Reflection.Assembly.GetExecutingAssembly().Location) + @"\..\..\TestData";
            html.Load(path + @"\ROCKLAND - Mach an und laut!.html");
            var parser = new RocklandParser();
            var songs = parser.GetSongs(html);

            Assert.AreEqual(8, songs.Count);
            Assert.AreEqual("Metallica", songs[2].Artist);
            Assert.AreEqual("11:32 Uhr Russ Ballard - Voices",                             songs[0].ToString());
            Assert.AreEqual("11:26 Uhr Neil Young - Heart Of Gold",                        songs[1].ToString());
            Assert.AreEqual("11:21 Uhr Metallica - I Disappear",                           songs[2].ToString());
            Assert.AreEqual("11:18 Uhr Lenny Kravitz - Rock'n'Roll Is Dead",               songs[3].ToString());
            Assert.AreEqual("11:15 Uhr Hooters - Johnny B.",                               songs[4].ToString());
            Assert.AreEqual("11:11 Uhr Steve Harley &amp; Cockney Rebel - Make Me Smile ", songs[5].ToString());
            Assert.AreEqual("11:08 Uhr Foreigner - When It Comes To Love",                 songs[6].ToString());
            Assert.AreEqual("11:04 Uhr Glenn Frey - The Heat Is On ",                      songs[7].ToString());
        }