public void CreateTextNodeWithText() { var doc = new HtmlDocument(); var a = doc.CreateTextNode("something"); Assert.AreEqual("something", a.InnerText); Assert.AreEqual(a.NodeType, HtmlNodeType.Text); }
public void Head_ReturnsHeadObject() { var doc = new HtmlDocument(); var head = doc.Head; Assert.IsInstanceOf<Head>(head); }
static void Main(string[] args) { HtmlDocument doc = new HtmlDocument(); doc.Load(@"..\..\mshome.htm"); doc.OptionOutputAsXml = true; doc.Save("mshome.xml"); }
public static List<string> GetLinksFromWebsite(string htmlSource) { var doc = new HtmlDocument(); try { doc.LoadHtml(htmlSource); if (doc.DocumentNode.InnerHtml == null) return null; if (doc.DocumentNode.SelectNodes("//a[@ref]") == null) { return doc .DocumentNode .SelectNodes("//a[@href]") .Select(node => node.Attributes["href"].Value) .ToList(); } return doc .DocumentNode .SelectNodes("//a[@ref]") .Select(node => node.Attributes["ref"].Value) .ToList(); } catch (Exception) { } return null; }
public void Mouse_click_will_fail_because_hidden_link_is_matched_instead_of_visible_link() { // Playback.PlaybackSettings.SmartMatchOptions = SmartMatchOptions.None; // tried Sudhish Mathuria's suggestion var example2Path = Path.Combine(TestContext.TestDeploymentDir, "example2.htm"); var window = BrowserWindow.Launch(example2Path); var document = new HtmlDocument(window); document.FilterProperties[HtmlDocument.PropertyNames.Title] = "2ad6de55-75f1-403f-8fcb-1d5defac9786"; var visibleLink = new HtmlHyperlink(document); visibleLink.SearchProperties[HtmlHyperlink.PropertyNames.InnerText] = "Hello"; // ONLY FIND VISIBLE CONTROLS ! visibleLink.SearchConfigurations.Add(SearchConfiguration.VisibleOnly); var allMatches = visibleLink.FindMatchingControls(); Assert.AreEqual(2, allMatches.Count, "Should be two matching."); visibleLink.Find(); // THIS SHOULD BE THE VISIBLE ONE Debug.WriteLine("BoundingRectangle: " + visibleLink.BoundingRectangle); Assert.IsTrue(visibleLink.BoundingRectangle.Width > 0, "Width should positive."); Assert.IsTrue(visibleLink.BoundingRectangle.Height > 0, "Height should positive."); Mouse.Click(visibleLink); }
public void CreateElement() { var doc = new HtmlDocument(); var a = doc.CreateElement("a"); Assert.AreEqual("a", a.Name); Assert.AreEqual(a.NodeType, HtmlNodeType.Element); }
public void CreateAttributeWithText() { var doc = new HtmlDocument(); var a = doc.CreateAttribute("href", "http://something.com"); Assert.AreEqual("href", a.Name); Assert.AreEqual("http://something.com", a.Value); }
public void Head_SetNonExisting_Null_RemovesElement() { HtmlDocument document = new HtmlDocument() { Head = null }; document.Head = null; Assert.True(document.IsEmpty); Assert.Null(document.Head); }
private static string ExtractCss(HtmlDocument doc) { var styleTags = doc.FindTagsByName("style", "link").ToArray(); var css = new StringBuilder(); foreach (var tag in styleTags) { if (tag.IsType("style")) css.AppendLine(tag.ChildElements.ToSeparatedString(" ")); else if (tag.IsType("link") && (string.Compare(tag.Attributes["type"], "text/css", true) == 0 || string.Compare(tag.Attributes["rel"], "stylesheet") == 0)) { var src = tag.Attributes["href"]; if (string.IsNullOrEmpty(src)) continue; var uri = new Uri(src, UriKind.RelativeOrAbsolute); if (!uri.IsAbsoluteUri && HttpContext.Current != null && HttpContext.Current.Request != null) { uri = new Uri(HttpContext.Current.Request.Url, uri); } if (!uri.IsAbsoluteUri) continue; var client = new WebClient(); css.Append(client.DownloadString(uri)); } } foreach (var tag in styleTags) { tag.Remove(); } return css.ToString(); }
public void CreateTextNode() { var doc = new HtmlDocument(); var a = doc.CreateTextNode(); Assert.AreEqual(HtmlNode.HtmlNodeTypeNameText, a.Name); Assert.AreEqual(a.NodeType, HtmlNodeType.Text); }
static void Main (string [] args) { HtmlDocument doc = new HtmlDocument(); doc.Load(args [0]); doc.OptionOutputAsXml = true; doc.Save(args [1]); }
public void CreateCommentWithText() { var doc = new HtmlDocument(); var a = doc.CreateComment("something"); Assert.AreEqual(HtmlNode.HtmlNodeTypeNameComment, a.Name); Assert.AreEqual("something", a.InnerText); Assert.AreEqual(a.NodeType, HtmlNodeType.Comment); }
public void TestCallingExistingMember() { var doc = new HtmlDocument(); doc.LoadHtml("<html><body class=\"asdfasd\"><p>asdf asdf sdf</p></body></html>"); dynamic docElement = doc.DocumentNode; var item = docElement.Closed; Assert.IsInstanceOf<bool>(item); }
public void TestCallingExistingFunction() { var doc = new HtmlDocument(); doc.LoadHtml("<html><body class=\"asdfasd\"><p>asdf asdf sdf</p></body></html>"); dynamic docElement = doc.DocumentNode; var item = docElement.Descendants(); Assert.IsInstanceOf<IEnumerable<HtmlNode>>(item); }
public void Test_Attribute_ToString_Returns_Empty_String_When_Value_Is_Not_Defined() { var doc = new HtmlDocument("<html><head></head><body><a href>yo</a></body></html>"); var link = doc.DocumentNode.SelectSingleNode("//a"); Assert.AreEqual(string.Empty, link.Attributes["href"].ToString()); }
public void Body_ReturnsBodyObject() { var doc = new HtmlDocument(); Body body = doc.Body; Assert.IsInstanceOf<Body>(body); }
public void Head_SetExisting_NonNull_RemovesElement() { HtmlElement value = Tag.Head.WithClass("class"); HtmlDocument document = new HtmlDocument() { Head = Tag.Head }; document.Head = value; Assert.Equal(new HtmlElement[] { value }, document.Elements()); Assert.Equal(value, document.Head); }
public void Test_HasAttribute() { var doc = new HtmlDocument("<html><head></head><body><a href=\"http://flo.se\">yo</a></body></html>"); var link = doc.DocumentNode.SelectSingleNode("//a"); Assert.IsTrue(link.HasAttribute("href")); }
public void Test_Attribute_ToString_Returns_Value() { var doc = new HtmlDocument("<html><head></head><body><a href=\"http://flo.se\">yo</a></body></html>"); var link = doc.DocumentNode.SelectSingleNode("//a"); Assert.AreEqual("http://flo.se", link.Attributes["href"].ToString()); }
public static async Task MainAsync() { string html; using (var client = new HttpClient()) { html = await client.GetStringAsync("http://market.karelia.pro/section/8/"); } var doc = new HtmlDocument(); doc.LoadHtml(html); var pageCount = int.Parse(HttpUtility.ParseQueryString( new Uri(doc.DocumentNode.SelectSingleNode(".//*[@id='paginator']/li[9]/a").GetAttributeValue("href", "")) .Query ).Get("page") ); var task = Enumerable.Range(1, pageCount) .Select((n, i) => $"http://market.karelia.pro/section/8/?page={i + 1}") .Select( async n => { using (var clien = new HttpClient()) { return await clien.GetStringAsync(n); } }).ToList(); var results = await Task.WhenAll(task); var q = results.SelectMany(n => { var innerdoc = new HtmlDocument(); innerdoc.LoadHtml(n); var foo = innerdoc.DocumentNode.SelectNodes(".//*[@id='alist']/li") .Select(x => new { link = x.SelectSingleNode("//div[@class='name']/a").Attributes["href"].Value, title = x.SelectSingleNode("//div[@class='name']/a/span[@class='title']").InnerText, price = x.SelectSingleNode("//div[@class='price']/strong/span").InnerText }); return foo; }).ToList(); new XDocument(new XDeclaration("1.0", null, null), new XElement("root", q.Select( n => new XElement("item", new XElement(nameof(n.link), n.link), new XElement(nameof(n.price), n.price), new XElement(nameof(n.title), n.title))))).Save( "result.xml"); }
public void TestInitial() { var doc = new HtmlDocument(); string html = doc.ToString(); string expected = @"<!DOCTYPE html><html><head></head><body></body></html>"; Assert.AreEqual(expected, html); }
public void TestGetAttribute() { var doc = new HtmlDocument(); doc.LoadHtml("<html><body class=\"asdfasd\"><p>asdf asdf sdf</p></body></html>"); dynamic docElement = doc.DocumentNode; var item = docElement.Html.Body._Class; Assert.IsNotNull(item); Assert.IsInstanceOf<HtmlAttribute>(item); }
public void BodyChanged_GetString_ReturnsCorrectHtml() { var doc = new HtmlDocument(); doc.Body.AddClass("one"); string html = doc.ToString(); string expected = @"<!DOCTYPE html><html><head></head><body class=""one""></body></html>"; Assert.AreEqual(expected, html); }
public void TestGetMember() { var doc = new HtmlDocument(); doc.LoadHtml("<html><body><p>asdf asdf sdf</p></body></html>"); dynamic docElement = doc.DocumentNode; var item = docElement.Html.Body; Assert.IsNotNull(item); Assert.IsInstanceOf<HtmlNode>(item); }
public void StackOverflow() { var url = "http://rewarding.me/active-tel-domains/index.php/index.php?rescan=amour.tel&w=A&url=&by=us&limits=0"; var request = WebRequest.Create(url); var htmlDocument = new HtmlDocument(); htmlDocument.Load((request.GetResponse()).GetResponseStream()); Stream memoryStream = new MemoryStream(); htmlDocument.Save(memoryStream); }
/// <summary> /// Creates an instance of a DocumentWithLinkedFiles. /// </summary> /// <param name="doc">The input HTML document. May not be null.</param> public DocumentWithLinks(HtmlDocument doc) { if (doc == null) { throw new ArgumentNullException("doc"); } _doc = doc; GetLinks(); GetReferences(); }
public void Ctor() { HtmlDocument document = new HtmlDocument(); Assert.Equal("html", document.Tag); Assert.False(document.IsVoid); Assert.True(document.IsEmpty); Assert.Null(document.Head); Assert.Null(document.Body); }
public void AddImageNode(HtmlDocument htmlDoc,HtmlNode new_node,string image_source) { //<img src=\"{1}\" style=\"height:100%;width:100%;\"/> HtmlNode image_node = htmlDoc.CreateElement("img"); HtmlAttribute src_attr = htmlDoc.CreateAttribute("src", image_source); image_node.Attributes.Append(src_attr); HtmlAttribute style_attr = htmlDoc.CreateAttribute("style", "height:100%;width:100%;"); image_node.Attributes.Append(style_attr); new_node.AppendChild(image_node); }
public string Convert(string path) { HtmlDocument doc = new HtmlDocument(); doc.Load(path); StringWriter sw = new StringWriter(); ConvertTo(doc.DocumentNode, sw); sw.Flush(); return sw.ToString(); }
public pageDesc getUrlDesc(string str) { pageDesc pgDesc = new pageDesc(); str = str.Trim(); if (str.Trim() == "") return null; string url = ""; if (str.Contains("http://")) { url = str.Substring(str.IndexOf("http://")); if (url.Contains(" ")) url = url.Substring(0, url.IndexOf(' ')); if (IsAbsoluteUrl(url)) { System.Net.WebClient wc = new System.Net.WebClient(); HtmlDocument doc = new HtmlDocument(); doc.Load(wc.OpenRead(url), true); var metaTags = doc.DocumentNode.SelectNodes("//title"); if (metaTags != null) pgDesc.pageTitle = metaTags[0].InnerText; metaTags = doc.DocumentNode.SelectNodes("//meta"); foreach (var tag in metaTags) { if (tag.Attributes["name"] != null && tag.Attributes["name"].Value == "description") { pgDesc.pageContent = tag.Attributes["content"].Value; } } metaTags = doc.DocumentNode.SelectNodes("//img"); if (metaTags != null) { pgDesc.pageImg = metaTags[0].Attributes["src"].Value; if (!metaTags[0].Attributes["src"].Value.Contains("http://")) { if (metaTags[0].Attributes["src"].Value.IndexOf("/") == 0) pgDesc.pageImg = "http://" + new Uri(url).Host + metaTags[0].Attributes["src"].Value; else pgDesc.pageImg = "http://" + new Uri(url).Host + "/" + metaTags[0].Attributes["src"].Value; } } else pgDesc.pageImg = ""; pgDesc.pageLink = url; return pgDesc; } else return pgDesc; } else return null; }
private void Login(IWebProxy proxy) { if (!cookie.Contains("pixiv") && !cookie.Contains("token=")) { try { HtmlDocument hdoc = new HtmlDocument(); cookie = ""; string data = "", post_key = "", loginpost = "https://accounts.pixiv.net/api/login?lang=zh", loginurl = "https://accounts.pixiv.net/login?lang=zh&source=pc&view_type=page&ref="; int index = rand.Next(0, user.Length); shc.Referer = Referer; shc.Remove("X-Requested-With"); shc.Remove("Accept-Ranges"); shc.ContentType = SessionHeadersValue.AcceptTextHtml; //请求1 获取post_key data = Sweb.Get(loginurl, proxy, shc); hdoc.LoadHtml(data); post_key = hdoc.DocumentNode.SelectSingleNode("//input[@name='post_key']").Attributes["value"].Value; if (post_key.Length < 9) { SiteManager.echoErrLog(SiteName, "自动登录失败 "); } //请求2 POST取登录Cookie shc.ContentType = SessionHeadersValue.ContentTypeFormUrlencoded; data = "pixiv_id=" + user[index] + "&captcha=&g_recaptcha_response=" + "&password="******"&post_key=" + post_key + "&source=pc&ref=&return_to=https%3A%2F%2Fwww.pixiv.net%2F"; data = Sweb.Post(loginpost, data, proxy, shc); cookie = Sweb.GetURLCookies(SiteUrl); if (!data.Contains("success")) { SiteManager.echoErrLog(SiteName, "自动登录失败 " + data); } else if (data.Contains("locked")) { try { throw new Exception("登录Pixiv时IP被封锁,剩余时间:" + Regex.Match(data, "lockout_time_by_ip\":\"(\\d+)\"").Groups[1].Value); } catch { } } else if (cookie.Length < 9) { SiteManager.echoErrLog(SiteName, "自动登录失败 "); } else { cookie = "pixiv;" + cookie; } } catch (Exception e) { SiteManager.echoErrLog(SiteName, e, "可能无法连接到服务器"); } } }
public PluginExecutionResult InstallSolution(string sessionId, string bundleFileName) { Handler = new HttpClientHandler { AllowAutoRedirect = true, UseCookies = false }; using (var client = new HttpClient(Handler)) { client.DefaultRequestHeaders.ExpectContinue = false; client.DefaultRequestHeaders.Add("Cookie", $"sessionId={sessionId}"); client.Timeout = TimeSpan.FromMinutes(10); //find out if the device supports bundle installer var solutionInstaller = client.GetAsync(string.Format(CultureInfo.CurrentCulture, OmniDeviceSolutionUrl, Device.Address)); if (!solutionInstaller.Result.IsSuccessStatusCode) { return(new PluginExecutionResult(PluginResult.Failed, "Solution Installer not available")); } using (var formData = new MultipartFormDataContent()) { formData.Add(new StringContent(CsrfToken), "\"CSRFToken\""); var fileContent = new StreamContent(File.OpenRead(bundleFileName)); fileContent.Headers.Add("Content-Type", "application/octet-stream"); fileContent.Headers.Add("Content-Disposition", "form-data; name=\"bundleFile\"; filename=\"" + Path.GetFileName(bundleFileName) + "\""); formData.Add(fileContent, "bundleFile", Path.GetFileName(bundleFileName)); formData.Add(new StringContent("Install"), "\"InstallButton\""); formData.Add(new StringContent("SolutionInstallViewSectionId"), "\"StepBackAnchor\""); formData.Add(new StringContent("SolutionInstallViewSectionId"), "\"jsAnchor\""); var message = client.PostAsync(string.Format(CultureInfo.CurrentCulture, OmniDeviceSolutionSaveUrl, Device.Address), formData); if (message.Result.IsSuccessStatusCode) { var bodyString = message.Result.Content.ReadAsStringAsync(); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(bodyString.Result); var summaryNode = doc.DocumentNode.SelectSingleNode("//div[@id='Summary']"); if (summaryNode.HasAttributes) { if (summaryNode.Attributes["class"].Value == "message message-warning") { return(new PluginExecutionResult(PluginResult.Failed, $"Solution not installed: {summaryNode.InnerText}")); } } HtmlNode table = doc.DocumentNode.SelectSingleNode("//table[@id='SolutionsTable']"); if (table == null) { return(new PluginExecutionResult(PluginResult.Failed, $"Solution not installed: {summaryNode.InnerText}")); } return(new PluginExecutionResult(PluginResult.Passed, summaryNode.InnerText)); } return(new PluginExecutionResult(PluginResult.Failed, message.Exception?.InnerException)); } } }
/// <summary> /// Метод для парсинга страниц. /// </summary> public List <ProductModel> ParseOneQuery(string url) { RequestModel rModel = SetRequest(url); url = SetRequestUrl(rModel); List <ProductModel> items = new List <ProductModel>(); // Перебор по страницам bool error = false; int pageNum = 0; while (true) { error: string pageContent = new NetworkService().LoadPage(url, error); HtmlDocument document = new HtmlDocument(); document.LoadHtml(pageContent); // Перебор по товарам HtmlNodeCollection cards = document.DocumentNode.SelectNodes("//div[@data-id]"); if (cards != null) { foreach (HtmlNode card in cards) { //descriptions = card.SelectNodes("//div[contains(@class, 'n-snippet-card2__content')]") HtmlNodeCollection titles = card.SelectNodes("//div[contains(@class, 'n-snippet-card2__title')]"), prices = card.SelectNodes("//div[contains(@class, 'n-snippet-card2__main-price-wrapper')]"); for (int i = 0; cards.Count > i; i++) { string price = ""; if (prices[i].InnerText != "") { price = prices[i].InnerText.Replace(" ", ""); price = price.Substring(0, price.Length - 2); } string link = titles[i].FirstChild.GetAttributeValue("href", "").Replace("&", "&"); if (link.Contains("market-click2")) { link = "http:" + link; link.Replace(@"\", "/"); } else { link = "https://market.yandex.ru" + link; } items.Add(new ProductModel { //Description = descriptions[i].InnerText, Name = titles[i].InnerText, Link = link, Price = price }); error = false; // Если указано количество позиций, то количество страниц не учитываются if (rModel.PositionsCount > 0) { if (i >= (rModel.PositionsCount - 1)) { return(items); } } } break; } } else { error = true; goto error; } // Изменить URL для следующего запроса pageNum++; if (rModel.PageCount == 0 || rModel.PageCount == pageNum) { return(items); } url = UpdatePageNumInRequestUrl(url, pageNum + 1); } }
public override void Compose() { #region Copy CSS File to Output directory string cssFile = OptionsContext.Current.Options.BlogCSSFile; string cssFolder = Path.Combine(OptionsContext.Current.Options.OutputDirectory, "css"); IOContext.Current.CreateDirectory(cssFolder); IOContext.Current.FileCopy(cssFile, Path.Combine(cssFolder, cssFile)); #endregion IReadOnlyCollection <IBasePage> blogPosts = _blogProvider.Pages; int index = 1; int posts = blogPosts.Count; IList<PopulatedTemplate> populatedTemplates = new List<PopulatedTemplate>(); PopulatedTemplate rootIndexTemplate = null; foreach (IBlogPost blogPost in blogPosts) { // ASSUMPTION: First blog post in the list is the newest, should have been sorted by date in the BlogPostProvider. // Only run this code once. if (rootIndexTemplate == null) { rootIndexTemplate = createRootIndexPopulatedTemplate(blogPost, posts); } try { HtmlDocument template = CopyOfTemplate; addBlogCSS(template, blogPost); replaceBlogDiv(template, blogPost); replaceAllTitles(template, blogPost); replaceAllDates(template, blogPost); replaceAllFragments(template); NavigationButtons buttonsNeeded; if (posts == 1) { buttonsNeeded = NavigationButtons.None; } else { if (index == 1) { buttonsNeeded = NavigationButtons.PreviousOnly; } else if (index == posts) { buttonsNeeded = NavigationButtons.NextOnly; } else { buttonsNeeded = NavigationButtons.Both; } } populatedTemplates.Add(new PopulatedTemplate(blogPost, template, buttonsNeeded, OptionsContext.Current.Options.OutputDirectory)); } catch (Exception e) { ErrorWriterContext.Current.WriteLine(Invariant($"Error creating blog post with title {blogPost.Metadata.Title}.")); ErrorWriterContext.Current.WriteLine(e.ToString()); throw e; } index++; } #region Root Index page creation // Create the main page root index file first if (rootIndexTemplate == null) { throw new InvalidOperationException("Stopped because the main page wasn't going to be created"); } // Only two possible cases: // 1. There is only one blog post // 2. This is the first blog post of many // The first blog post should not need next or both navigation buttons. switch (rootIndexTemplate.ButtonsNeeded) { case NavigationButtons.None: { hideNext(rootIndexTemplate); hidePrevious(rootIndexTemplate); } break; case NavigationButtons.PreviousOnly: { // ASSUMPTION: The first index will be the previous page, based on date sorting PopulatedTemplate previousTemplate = populatedTemplates[1]; replacePrevious(rootIndexTemplate, previousTemplate.RootRelativePath); hideNext(rootIndexTemplate); } break; default: throw new InvalidOperationException(Invariant($"Enum value {rootIndexTemplate.ButtonsNeeded} is not valid for the first blog post")); } rootIndexTemplate.SaveAsRootIndex(); #endregion // Now create the rest of the blog pages for (int i = 0; i < populatedTemplates.Count; i++) { PopulatedTemplate currentTemplate = populatedTemplates[i]; switch (currentTemplate.ButtonsNeeded) { case NavigationButtons.None: { hideNext(currentTemplate); hidePrevious(currentTemplate); } break; case NavigationButtons.NextOnly: { PopulatedTemplate nextTemplate = populatedTemplates[i - 1]; replaceNext(currentTemplate, nextTemplate.RelativePath); hidePrevious(currentTemplate); } break; case NavigationButtons.PreviousOnly: { PopulatedTemplate previousTemplate = populatedTemplates[i + 1]; replacePrevious(currentTemplate, previousTemplate.RelativePath); hideNext(currentTemplate); } break; case NavigationButtons.Both: { PopulatedTemplate previousTemplate = populatedTemplates[i + 1]; replacePrevious(currentTemplate, previousTemplate.RelativePath); PopulatedTemplate nextTemplate = populatedTemplates[i - 1]; replaceNext(currentTemplate, nextTemplate.RelativePath); } break; default: throw new InvalidOperationException(Invariant($"Enum value {currentTemplate.ButtonsNeeded} not supported")); } currentTemplate.Save(); } }
public static int MaxDepth(this HtmlDocument doc) { var _nodes = doc.GetNodes(); return(_nodes.Select(c => c.Depth()).OrderByDescending(c => c).FirstOrDefault()); }
protected override async Task <IEnumerable <ProgramSource> > GetNewChannelList() { var result = new List <ProgramSource>(); try { HttpClient client = new HttpClient(); var buffer = await client.GetStreamAsync("http://hdtv.neu6.edu.cn/"); HtmlDocument doc = new HtmlDocument(); doc.Load(buffer); var channelNodes = doc.DocumentNode.SelectNodes("//div[@class='entry-content']/table/tr/td"); foreach (var chNode in channelNodes) { if (chNode.ChildNodes.Count <= 2) { continue; } string chName = chNode.FirstChild.InnerText.Trim(); string chLink = chNode.ChildNodes["a"].GetAttributeValue("href", string.Empty); string chCode = chLink.Substring(chLink.LastIndexOf("=") + 1); string chCodeUnified = chCode; if (chCode.StartsWith("jlu")) { chCodeUnified = chCode.Substring(4); } else if (chCode.StartsWith("hls")) { // 计算频道ID string editName = chName.Replace("+", "").Replace("-", ""); if (chName.EndsWith("卫视高清")) { editName = editName.Replace("卫视高清", "hd"); } else { editName = editName.Replace("卫视", "tv"); editName = editName.Replace("高清", "hd"); } chCodeUnified = SuperEncoding.GetSpellCode(editName, false); } Channel channel = Channel.GetChannel(chCodeUnified, chName); result.Add(new ProgramSource() { IsThumbAvaliable = true, //小的缩略图:http://hdtv.neu6.edu.cn/wall/img/{chCode}_s.png ThumbImage = new Uri($"http://hdtv.neu6.edu.cn/wall/img/{chCode}.png"), MediaSource = new Uri($"http://media2.neu6.edu.cn/hls/{chCode}.m3u8"), MediaSourceTag = chCode, IsMediaAvaliable = true, SourceStation = this, ProgramInfo = new Program() { Name = chName, Channel = channel } }); LoggingService.Debug("Television", $"[NEU]{chName:10} : {chCodeUnified:10}"); } } catch (Exception e) { LoggingService.Debug("Television", e.Message, Windows.Foundation.Diagnostics.LoggingLevel.Error); System.Diagnostics.Debugger.Break(); } return(result); }
internal HtmlCommentNode(HtmlDocument ownerdocument, int index) : base(HtmlNodeType.Comment, ownerdocument, index) { }
public static string SearchPageTitle(this HtmlDocument htmldocument) { var titleNode = htmldocument.DocumentNode.SelectSingleNode("//title"); return HttpUtility.HtmlDecode(titleNode.InnerText); }
public List <Category> GetAllCategoryList(string strHtml) { HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(strHtml); ////*[@id="category-first"] string oneCate = "//span[@id='category-first']", twoCate = "//dl[@id='category-second']", threeCate = "//*[@id='category-third']"; HtmlNode oneNode = doc.DocumentNode.SelectSingleNode(oneCate); #region 1.0 添加分类集合 List <Category> category = new List <Category>(); if (oneNode != null) { //一级菜单 category.Add(new Category() { CategoryLevel = 1, Code = oneNode.Attributes["data-code"].Value, Name = oneNode.InnerText, ParentCode = "", Url = "", State = 0 }); } HtmlNode twoNode = doc.DocumentNode.SelectSingleNode(twoCate); if (twoNode != null) { //二级菜单 category.Add(new Category() { CategoryLevel = 2, Code = twoNode.SelectSingleNode("//dd").Attributes["modelid"].Value, Name = twoNode.SelectSingleNode("//dt").InnerText, ParentCode = oneNode.Attributes["data-code"].Value, Url = "", State = 0 }); } HtmlNode threeNode = doc.DocumentNode.SelectSingleNode(threeCate); if (threeNode != null) { //var threeCategory = new Category() //{ // CategoryLevel = 3, // Code = threeNode.SelectSingleNode("//dd[@id='category-box-third']").Attributes["modelid"].Value, // Name = threeNode.SelectSingleNode("dt").InnerText, // ParentCode = twoNode.SelectSingleNode("//dd").Attributes["modelid"].Value, // Url = "", // State = 0 //}; ////三级菜单 //category.Add(threeCategory); var fourNodes = threeNode.SelectNodes("//dd/a"); foreach (var item in fourNodes) { //四级菜单 category.Add(new Category() { CategoryLevel = 3, Code = item.Attributes["data-code"].Value, Name = item.InnerText, ParentCode = twoNode.SelectSingleNode("//dd").Attributes["modelid"].Value, Url = "http:" + item.Attributes["href"].Value, State = 0 }); } } #endregion return(category); }
//excution part public override void Excute() { //TODO: implement the excute function try { DateTime begin = DateTime.Now; string url = string.Empty; string refererUrl = string.Empty; string text = string.Empty; string s = string.Empty; int num = 0; //count base.IniDataTable(); base.http = new DoNet4.Utilities.HttpClientHelper(0x4e20); for (int i = 0; i < base.keywordInfList.Count; i++) { //iterate the keyword list base.keywordInf = base.keywordInfList[i]; if (string.IsNullOrEmpty(base.keywordInf.keyword)) { continue; } base.updateTextBox(base.keywordInf.keyword + " 开始查询", true); url = "http://1688.com"; byte retry = 0; while (true) { if (base.http.Get(url).IndexOf("阿里巴巴1688.com - 全球领先的采购批发平台") != -1) { goto Label_search_taobao; } retry = (byte)(retry + 1); if (retry >= 3) { break; } base.updateTextBox("阿里巴巴主页打开失败,重试: " + retry.ToString(), true); } num++; continue; Label_search_taobao: Thread.Sleep(0x7d0); System.Text.Encoding gb2312 = System.Text.Encoding.GetEncoding("gb2312"); url = "http://s.1688.com/selloffer/offer_search.htm?keywords=" + HttpUtility.UrlEncode(base.keywordInf.keyword, gb2312).ToUpper();//(base.keywordInf.keyword).ToString(); text = base.http.Get(url); refererUrl = url; if (text.Contains("没找到与")) { num++; base.updateTextBox(base.keywordInf.keyword + " 没有找到相关商品", true); Thread.Sleep(200); } else if (text.Contains("淘宝会员(仅限会员名)请在此登录")) { base.updateTextBox(base.keywordInf.keyword + " 阿里巴巴要求登录", true); Thread.Sleep(200); } else { HtmlDocument document = new HtmlDocument(); document.LoadHtml(text); HtmlNode node = null; int result = 0; node = document.DocumentNode.SelectSingleNode("//span[@class='sm-widget-offer']"); if (node == null) { base.updateTextBox(base.keywordInf.keyword + " node为空(null)!", true); } else { s = StrUnit.MidStrEx(node.InnerHtml, "<em>", "</em>"); if (!int.TryParse(s, out result)) { base.updateTextBox(base.keywordInf.keyword + " 获取商品数量错误! - 2", true); num++; } } s = StrUnit.MidStrEx(text, "共<em>", "</em>件"); if (!int.TryParse(s, out result)) { base.updateTextBox(base.keywordInf.keyword + " 获取商品数量错误! - 2", true); num++; } else { //get the useful data, go into iterations //for () base.updateTextBox("共检索到" + s + " 件相关商品, 开始爬取", true); for (string str6 = this.GetData(text); !string.IsNullOrEmpty(str6); str6 = this.GetData1(text)) //? { Thread.Sleep(0x7d0); text = base.http.Get(str6, refererUrl); } TimeSpan span = (TimeSpan)(DateTime.Now - begin); base.updateTextBox(string.Concat(new object[] { base.keywordInf.keyword, " 获取完毕,耗时:", span.TotalSeconds, "秒" }), true); } } } base.updateTextBox(base.keywordInf.keyword + " 已到达规定页数,结束", true); base.updateTextBox("共 " + base.keywordInfList.Count.ToString() + " 件商品查询完毕,其中 " + num.ToString() + "件未检索到数据", true); base.http.Free(); base.Stoped = true; } catch (Exception e) { //exception handling Log.WriteLog("taobaoTh Excute Err:" + e.Message + " ,Err Stack:" + e.StackTrace); } }
private string GetData1(string text) { string str = string.Empty; innerHtml = string.Empty; //innerHtml content string source = string.Empty; try { HtmlDocument document = new HtmlDocument(); document.LoadHtml(text); HtmlNode node = null; node = document.DocumentNode.SelectSingleNode("//ul[@id='sm-offer-list']"); if (node == null) { base.updateTextBox("获取商品列表失败", true); throw new Exception("获取商品列表失败"); } foreach (string str2 in node.InnerHtml.Split(new string[] { "<li t" }, StringSplitOptions.RemoveEmptyEntries)) { if (str2.IndexOf("-rank") != -1) { source = StrUnit.MidStrEx(str2, "rank", "</li>"); DataRow row = base.OutDataTable.NewRow(); row["Platform"] = "1688.com"; row["Keyword"] = base.keywordInf.keyword; row["ItemID"] = StrUnit.MidStrEx(source, "t-offer-id=\"", "\""); row["SellerId"] = StrUnit.MidStrEx(source, "t-member-id=\"", "\""); string goodInfo = string.Empty; goodInfo = StrUnit.MidStrEx(source, "sm-offer-photo sw-dpl-offer-photo", "</div>"); row["Url"] = StrUnit.MidStrEx(goodInfo, "href=\"", "\""); row["Title"] = StrUnit.MidStrEx(goodInfo, "title=\"", "\""); string priceInfo = string.Empty; priceInfo = StrUnit.MidStrEx(source, "<div class=\"s-widget-offershopwindowprice sm-offer-price sw-dpl-offer-price\">", "</div>"); row["Price"] = StrUnit.MidStrEx(priceInfo, "title=\"¥", "\""); string companyInfo = string.Empty; companyInfo = StrUnit.MidStrEx(source, "<div class=\"s-widget-offershopwindowcompanyinfo sm-offer-company sw-dpl-offer-company\">", "</div>"); row["StoreName"] = StrUnit.MidStrEx(companyInfo, "title=\"", "\""); row["StoreUrl"] = StrUnit.MidStrEx(companyInfo, "href=\"", "\""); base.OutDataTable.Rows.Add(row); Thread.Sleep(50); if (string.IsNullOrEmpty(row["ItemID"].ToString())) { throw new Exception("获取商品列表数据失败! - 1"); } } }//end of info retrieving /* HtmlNode node2; //node2 = new HtmlNode(); node2 = document.DocumentNode.SelectSingleNode("//div[@id='fui_widget_4']/span"); string pageSelectionInfo = StrUnit.MidStrEx(text, "<span class=\"fui-paging-list\">", "</span>"); string nextPage = string.Empty; if (node2 != null) { //innerHtml = node.InnerHtml; foreach (string pageInfo in node2.InnerHtml.Split(new string[] { "<a href=#" }, StringSplitOptions.RemoveEmptyEntries)) { if (pageInfo.IndexOf("fui-next") != -1) { nextPage = StrUnit.MidStrEx(pageInfo, "data-page=\"", "\""); break; } int resultPage = 0; if (!int.TryParse(nextPage, out resultPage)) { System.Text.Encoding gb2312 = System.Text.Encoding.GetEncoding("gb2312"); str = "https://s.1688.com/selloffer/offer_search.htm?keywords=" + HttpUtility.UrlEncode(base.keywordInf.keyword, gb2312).ToUpper() + "&beginPage=" + nextPage; return str; } base.updateTextBox(base.keywordInf.keyword + " 第 " + (resultPage-1) + " 页获取完成", true); if (resultPage > base.keywordInf.endPage) { str = string.Empty; } }//end of for each //source = StrUnit. return str; }*/ base.updateTextBox(base.keywordInf.keyword + " 第 " + (page) + " 页获取完成", true); if (page >= base.keywordInf.endPage) { str = string.Empty; return str; } ++page; System.Text.Encoding gb2312 = System.Text.Encoding.GetEncoding("gb2312"); str = "https://s.1688.com/selloffer/offer_search.htm?keywords=" + HttpUtility.UrlEncode(base.keywordInf.keyword, gb2312).ToUpper() + "&beginPage=" + page; return str; } catch (Exception e) { base.updateTextBox("GetData Err:" + e.Message, true); } return str; }
public void DownloadCouncilPdfFiles() { var docs = this.LoadDocumentsDoneSQL(); var queries = this.LoadQueriesDoneSQL(); WebClient c = new WebClient(); HtmlWeb web = new HtmlWeb(); Regex dateReg = new Regex("[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}"); List <string> years = new List <string>(); for (int i = this.dtStartFrom.Year; i <= DateTime.Now.Year; i++) { years.Add(i.ToString()); } string yearRegText = string.Format("({0})", string.Join("|", years)); Regex yearReg = new Regex(yearRegText); foreach (string url in this.docUrls) { string category = url.Split('*')[0]; string categoryUrl = url.Split('*')[1]; HtmlDocument doc = web.Load(categoryUrl); HtmlNodeCollection fileNodes = doc.DocumentNode.SelectNodes("//a[contains(@href,'.pdf')]"); if (fileNodes != null) { foreach (HtmlNode fileNode in fileNodes) { string fileUrl = fileNode.Attributes["href"].Value; fileUrl = fileUrl.StartsWith("http") ? fileUrl : this.cityEntity.CityUrl + fileUrl; string nodeText = System.Web.HttpUtility.HtmlDecode(fileNode.InnerText).Trim('\r', '\n', '\t', (char)32, (char)160); if (string.IsNullOrEmpty(nodeText)) { continue; } string meetingDateText = dateReg.Match(nodeText).ToString(); if (string.IsNullOrEmpty(meetingDateText)) { if (yearReg.IsMatch(fileNode.ParentNode.InnerText)) { dateReg = new Regex("[a-zA-Z]+[\\s]{0,2}[0-9]{1,2},[\\s]{0,2}[0-9]{4}"); meetingDateText = dateReg.Match(fileNode.ParentNode.InnerText).ToString(); } } if (string.IsNullOrEmpty(meetingDateText)) { continue; } DateTime meetingDate = DateTime.Parse(meetingDateText); if (meetingDate < this.dtStartFrom) { Console.WriteLine("Too early, skip..."); continue; } this.ExtractADoc(c, fileUrl, category, "pdf", meetingDate, ref docs, ref queries); } } } }
/// <summary> /// Get all anchors in the HTML document as a sequence of ILink. /// </summary> /// <param name="html"></param> /// <param name="response">The response from which the html originates. /// The response information is used to convert relative links to absolute links.</param> /// <returns></returns> public static IEnumerable<Anchor> Anchors(this HtmlDocument html, Response response) { return Anchors(html, response != null ? response.BaseUri : null); }
private static string GetGenre(HtmlDocument htmlDoc) { return(WebUtility.HtmlDecode(htmlDoc.DocumentNode.Descendants("a") .Where(node => node.GetAttributeValue("href", "") .Contains("/genre/")).First().InnerHtml.ToString())); }
static void GetSpecificData(string url) { Console.WriteLine($"обрабатываю {url}"); HtmlWeb webDoc = new HtmlWeb(); HtmlDocument doc = webDoc.Load(url); //description HtmlNodeCollection DescriptionNodes = doc.DocumentNode.SelectNodes("//div[@class='weather-cells']/div[@data-text]"); if (DescriptionNodes != null) { foreach (HtmlNode node in DescriptionNodes) { descriptions.Add(node.Attributes["data-text"].Value); } } else { Console.WriteLine("Ошибка: пустая коллекция DescriptionNodes"); } //date HtmlNodeCollection DateNodes = doc.DocumentNode.SelectNodes("//div[@class='weather-cells']/div[@data-text]/div[1]/div[1]"); if (DateNodes != null) { string someMonth = "SomeMonth"; for (int i = 0; i < DateNodes.Count; i++) { if (DateNodes[i].InnerText.Trim().Split(" ").Count() == 2) { someMonth = DateNodes[i].InnerText.Trim().Split(" ")[1]; dates.Add(DateNodes[i].InnerText.Trim()); } else { dates.Add(DateNodes[i].InnerText.Trim() + " " + someMonth); } } //city name HtmlNode NameNode = doc.DocumentNode.SelectSingleNode("//div[@class='pageinfo_title index-h1']//h1"); if (NameNode != null) { for (int i = 0; i < DateNodes.Count; i++) { names.Add(NameNode.InnerText.Replace(" на месяц", "")); } } else { Console.WriteLine("Ошибка: NameNode не обнаружен"); } } else { Console.WriteLine("Ошибка: пустая коллекция DateNodes"); } //max temperature HtmlNodeCollection MaxTemperatureNodes = doc.DocumentNode.SelectNodes("//div[@class='weather-cells']/div[@data-text]//div[@class='temp_max js_meas_container']//span[@class='value unit unit_temperature_c']"); if (MaxTemperatureNodes != null) { foreach (HtmlNode node in MaxTemperatureNodes) { maxTemperatures.Add(node.InnerText.Trim().Replace("−", "-")); } } else { Console.WriteLine("Ошибка: пустая коллекция MaxTemperatureNodes"); } //min temperature HtmlNodeCollection MinTemperatureNodes = doc.DocumentNode.SelectNodes("//div[@class='weather-cells']/div[@data-text]//div[@class='temp_min js_meas_container']//span[@class='value unit unit_temperature_c']"); if (MinTemperatureNodes != null) { foreach (HtmlNode node in MinTemperatureNodes) { minTemperatures.Add(node.InnerText.Trim().Replace("−", "-")); } } else { Console.WriteLine("Ошибка: пустая коллекция MinTemperatureNodes"); } Thread.Sleep(100); }
private void webBrowser_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e) { HtmlDocument html = this.webBrowser.Document; html.Body.InnerHtml = this.LicenceText; }
public IHttpActionResult Get() { List <Movie> movies = new List <Movie>(); var url = "http://www.imdb.com/movies-coming-soon/?ref_=nv_mv_cs_4"; HtmlWeb web = new HtmlWeb(); var htmlDoc = web.Load(url); var liste = htmlDoc.DocumentNode.SelectNodes("//div[@class='list detail']/div"); foreach (var item in liste) { var htmlSubDoc = new HtmlDocument(); htmlSubDoc.LoadHtml(item.InnerHtml); Movie movie = new Movie(); movie.MovieName = htmlSubDoc.DocumentNode.SelectSingleNode("//h4[@itemprop='name']/a").InnerHtml; movie.ImageUrl = htmlSubDoc.DocumentNode.SelectSingleNode("//img[@class='poster shadowed']").Attributes["src"].Value; movie.Description = htmlSubDoc.DocumentNode.SelectSingleNode("//div[@itemprop='description']").InnerHtml; var s = htmlSubDoc.DocumentNode.SelectSingleNode("//time"); movie.Duration = s != null ? s.InnerHtml : ":("; try { #region Stars var NodesStar = htmlSubDoc .DocumentNode .SelectNodes("//div[@class='txt-block']/span[@itemprop='actors']/span/a"); List <Star> stars = new List <Star>(); foreach (var str in NodesStar) { Star star = new Star(); star.FullName = str.InnerHtml; stars.Add(star); } movie.Stars = stars; #endregion } catch (System.Exception) { continue; } #region Directors var NodesDirector = htmlSubDoc .DocumentNode .SelectNodes("//div[@class='txt-block']/span[@itemprop='director']/span/a"); List <Director> directors = new List <Director>(); foreach (var drc in NodesDirector) { Director director = new Director { FullName = drc.InnerHtml }; directors.Add(director); } movie.Directors = directors; #endregion #region Genre var NodesGenre = htmlSubDoc .DocumentNode .SelectNodes("//span[@itemprop='genre']"); List <Genre> genres = new List <Genre>(); foreach (var gnr in NodesGenre) { Genre genre = new Genre { Name = gnr.InnerHtml }; genres.Add(genre); } movie.Genre = genres; #endregion movies.Add(movie); } return(Json(movies)); }
private HtmlNode GenerateTooltip(HtmlDocument doc, Periode periode, Options options) { var tooltipLeft = -50; var tooltipTop = 10; if (periode.Position.X > 400) { tooltipLeft = -245; } if (periode.Position.Y > 300) { tooltipTop = -(180 + (options.CircleWidth + borderWidth) / 2); } var tooltip = CreateDiv(doc, "tt", string.Format("left:{0}px;top:{1}px;", periode.Position.X + tooltipLeft, periode.Position.Y + tooltipTop)); var panelTitle = CreateDiv(doc, "p-t b-b", string.Empty); var periodeTitle = doc.CreateElement("span"); periodeTitle.AddClass("lieu b-b"); periodeTitle.AppendChild(doc.CreateTextNode(periode.Lieu)); var panelBody = CreateDiv(doc, "p-b-w", string.Empty); if (options.DisplayByYear) { foreach (var subPeriode in periode.SubPeriodes) { if (periode.SubPeriodes.Count > 1) { panelBody.AppendChild(CreatePeriodePeriode(doc, subPeriode)); //panelBody.AppendChild(doc.CreateElement("br")); } foreach (var rp in subPeriode.Rps) { GenerateRpNode(doc, panelBody, rp, periode.SubPeriodes.Count == 1 && subPeriode.Rps.Count == 1); } } } else { foreach (var rp in periode.Rps) { GenerateRpNode(doc, panelBody, rp, periode.Rps.Count == 1); } } panelTitle.AppendChild(periodeTitle); if (!options.DisplayByYear || periode.SubPeriodes.Count == 1) { periodeTitle.AppendChild(doc.CreateElement("br")); panelTitle.AppendChild(CreatePeriodePeriode(doc, periode)); } tooltip.AppendChild(panelTitle); tooltip.AppendChild(panelBody); tooltip.AppendChild(CreateDiv(doc, "te", string.Empty)); return(tooltip); }
/********* ** Private methods *********/ /// <summary>Get metadata about a mod by scraping the Nexus website.</summary> /// <param name="id">The Nexus mod ID.</param> /// <returns>Returns the mod info if found, else <c>null</c>.</returns> private async Task <NexusMod> GetModFromWebsiteAsync(uint id) { // fetch HTML string html; try { html = await this.WebClient .GetAsync(string.Format(this.WebModScrapeUrlFormat, id)) .AsString(); } catch (ApiException ex) when(ex.Status == HttpStatusCode.NotFound) { return(null); } // parse HTML var doc = new HtmlDocument(); doc.LoadHtml(html); // handle Nexus error message HtmlNode node = doc.DocumentNode.SelectSingleNode("//div[contains(@class, 'site-notice')][contains(@class, 'warning')]"); if (node != null) { string[] errorParts = node.InnerText.Trim().Split(new[] { '\n' }, 2, System.StringSplitOptions.RemoveEmptyEntries); string errorCode = errorParts[0]; string errorText = errorParts.Length > 1 ? errorParts[1] : null; switch (errorCode.Trim().ToLower()) { case "not found": return(null); default: return(new NexusMod { Error = $"Nexus error: {errorCode} ({errorText}).", Status = this.GetWebStatus(errorCode) }); } } // extract mod info string url = this.GetModUrl(id); string name = doc.DocumentNode.SelectSingleNode("//div[@id='pagetitle']//h1")?.InnerText.Trim(); string version = doc.DocumentNode.SelectSingleNode("//ul[contains(@class, 'stats')]//li[@class='stat-version']//div[@class='stat']")?.InnerText.Trim(); SemanticVersion.TryParse(version, out ISemanticVersion parsedVersion); // extract files var downloads = new List <IModDownload>(); foreach (var fileSection in doc.DocumentNode.SelectNodes("//div[contains(@class, 'files-tabs')]")) { string sectionName = fileSection.Descendants("h2").First().InnerText; if (sectionName != "Main files" && sectionName != "Optional files") { continue; } foreach (var container in fileSection.Descendants("dt")) { string fileName = container.GetDataAttribute("name").Value; string fileVersion = container.GetDataAttribute("version").Value; string description = container.SelectSingleNode("following-sibling::*[1][self::dd]//div").InnerText?.Trim(); // get text of next <dd> tag; derived from https://stackoverflow.com/a/25535623/262123 downloads.Add( new GenericModDownload(fileName, description, fileVersion) ); } } // yield info return(new NexusMod { Name = name, Version = parsedVersion?.ToString() ?? version, Url = url, Downloads = downloads.ToArray() }); }
/// <summary> /// 爬取区县 /// </summary> /// <param name="parent"></param> /// <param name="url"></param> /// <param name="provinceCode"></param> /// <returns></returns> private void CrawlingCounty(AreaCrawlingModel parent, string url, string provinceCode) { try { var isTown = false; var html = GetResponse(BaseUrl + url).Result; var doc = new HtmlDocument(); doc.LoadHtml(html); var nodeList = doc.DocumentNode.SelectNodes("//tr[@class='countytr']"); if (nodeList == null) { nodeList = doc.DocumentNode.SelectNodes("//tr[@class='towntr']"); isTown = true; } if (nodeList == null) { _logger.LogDebug("没有数据"); return; } foreach (var node in nodeList) { var codeNode = node.SelectSingleNode("td[1]/a"); var nameNode = node.SelectSingleNode("td[2]/a"); if (codeNode == null) { codeNode = node.SelectSingleNode("td[1]"); nameNode = node.SelectSingleNode("td[2]"); } if (codeNode == null || nameNode == null || nameNode.InnerText == "市辖区") { continue; } var model = new AreaCrawlingModel { Code = codeNode.InnerText, Name = nameNode.InnerText, FullName = parent.FullName + nameNode.InnerText }; SetPinyin(model); CrawlingCoord(model).ConfigureAwait(false); if (!isTown) { var hrefAttribute = codeNode.Attributes["href"]; if (hrefAttribute != null) { CrawlingTown(model, hrefAttribute.Value, provinceCode); } } parent.Children.Add(model); } } catch (Exception ex) { Thread.Sleep(5000); parent.Children = new List <AreaCrawlingModel>(); _logger.LogError($"爬取{parent.Name}下的区县失败"); _logger.LogError(ex.Message); CrawlingCounty(parent, url, provinceCode); } }
/// <summary> /// 指定したURLのWebページに表示されている画像アドレスを取得し、 /// リストにして返す。 /// </summary> /// <param name="url">Webページのアドレス</param> public List <string> DoGetImages(string url) { List <string> imageAdresses = new List <string>(); UnDisplayedBrowser udb = new UnDisplayedBrowser(); udb.NavigateAndWait(url); HtmlDocument doc = udb.Document; //Webページに表示されている画像の取得 foreach (HtmlElement imgElement in doc.GetElementsByTagName("IMG")) { string imgUrl = imgElement.GetAttribute("src"); if (imgExtensions.Contains(Path.GetExtension(imgUrl)) == false) { continue; } //フィルター式が設定されている場合、除外する if (filter_ != null) { bool isFiltered = false; foreach (var item in filter_) { if (imgUrl.Contains(item)) { isFiltered = true; break; } } if (isFiltered) { continue; } } if (imageAdresses.Contains(imgUrl) == false) { imageAdresses.Add(imgUrl); } } //サムネイル画像をリンク先画像に差し替え foreach (HtmlElement linkElement in doc.GetElementsByTagName("A")) { string imgUrl = linkElement.GetAttribute("href"); if (imgExtensions.Contains(Path.GetExtension(imgUrl)) == false) { continue; } foreach (HtmlElement imgElement in linkElement.GetElementsByTagName("IMG")) { if (imageAdresses.Contains(imgElement.GetAttribute("src"))) { imageAdresses.Remove(imgElement.GetAttribute("src")); imageAdresses.Add(imgUrl); } } } return(imageAdresses); }
public async Task <ChapterLoadData> GetPageChapTer(Truyen truyen, int page, bool allowGetNoidung = true) { var pageUrl = string.Format(TRUYEN_URL, truyen.TruyenUrl, page); var htmlData = await WebUtils.DoRequestSimpleGet(pageUrl, null, "", HOME_PAGE); if (htmlData.Status) { var dom = new HtmlDocument(); dom.LoadHtml(htmlData.Data); var chapterDoms = dom.DocumentNode.SelectNodes(DANH_SACH_CHUONG_XPATH); var maxPage = 1; if (chapterDoms != null) { var i = 0; var j = 0; var lchapter = new List <Chapter>(); foreach (var chapDom in chapterDoms) { i++; if (i < 3) { continue; } var chap = new Chapter(); HtmlNode tdom = null; chap.TruyenUrl = truyen.TruyenUrl; chap.PageOfChapter = page; tdom = chapDom.SelectSingleNode(CHUONGNUMBER_XPATH); if (tdom != null) { chap.SoThuTu = tdom.InnerText.Trim(); } tdom = chapDom.SelectSingleNode(TENCHUONG_XPATH); if (tdom != null) { chap.TenChuong = tdom.InnerText.Trim(); } else { continue; } tdom = chapDom.SelectSingleNode(SOURCE_XPATH); if (tdom != null) { chap.Nguon = tdom.InnerText.Trim(); } if (allowGetNoidung) { tdom = chapDom.SelectSingleNode(NOIDUNG_XPATH); if (tdom != null) { List <HtmlNode> listRemove = tdom.ChildNodes.Where(iNode => iNode.Name == "span").ToList(); foreach (var iNode in listRemove) { tdom.RemoveChild(iNode); } chap.NoiDung = tdom.InnerText.Trim(); } else { continue; } } chap.IndexNumberPageOfChapter = j++; lchapter.Add(chap); } var ldom = dom.DocumentNode.SelectNodes(MAX_PAGE_TRUYEN_XPATH); if (ldom != null) { foreach (var idom in ldom) { var cdom = idom.SelectSingleNode("./a"); if (cdom != null) { var href = cdom.GetAttributeValue("href", ""); if (href.IndexOf("=") != -1 && href.IndexOf("&") != -1) { try { href = href.Substring(href.IndexOf("=") + 1, href.IndexOf("&") - href.IndexOf("=") - 1); } catch (Exception) { // ignored } } var n = 0; int.TryParse(href, out n); if (n > maxPage) { maxPage = n; } } } } var result = new ChapterLoadData(); result.ListChapter = lchapter; result.MaxPageIndex = maxPage; result.IndexStartOfChapter = 0; return(result); } } return(null); }
private static void replaceBlogDiv(HtmlDocument template, IBlogPost blogPost) { HtmlNode blogDiv = template.DocumentNode.SelectSingleNode(BlogXPath); blogDiv.InnerHtml = blogPost.HTML; }
private static SiteTruyenData ReadSiteTruyenHtmlData(string htmlData) { var siteTruyen = new SiteTruyenData(); var dom = new HtmlDocument(); dom.LoadHtml(htmlData); var tdom = dom.DocumentNode.SelectNodes(DANHSACHTOPTRUYEN_XPATH); if (tdom != null) { var i = 0; var listTruyen = new List <Truyen>(); foreach (var idom in tdom) { i++; // bo? 2 node dau` if (i < 3) { continue; } var truyen = new Truyen(SiteTruyen.LuongSonBac); try { var cdom = idom.SelectSingleNode("./td[2]"); if (cdom != null) { truyen.Category = WebUtility.HtmlDecode(cdom.InnerText.Trim()); } else { continue; } cdom = idom.SelectSingleNode("./td[3]"); if (cdom != null) { truyen.Title = cdom.InnerText.Trim(); } cdom = idom.SelectSingleNode("./td[4]"); if (cdom != null) { truyen.Author = cdom.InnerText.Trim(); } cdom = idom.SelectSingleNode("./td[6]"); if (cdom != null) { truyen.NumberChaper = int.Parse(cdom.InnerText.Trim()); } cdom = idom.SelectSingleNode("./td[7]"); if (cdom != null) { truyen.NumberView = int.Parse(cdom.InnerText.Trim().Replace(".", "")); } cdom = idom.SelectSingleNode("./td[3]/a[1]"); if (cdom != null) { truyen.TruyenUrl = cdom.GetAttributeValue("href", ""); } else { continue; } } catch (Exception) { continue; } listTruyen.Add(truyen); } // tim so trang tdom = dom.DocumentNode.SelectNodes(MAXPAGEINDEX_XPATH); var maxPage = 1; if (tdom != null) { foreach (var idom in tdom) { var href = idom.GetAttributeValue("href", ""); if (href.LastIndexOf('-') != -1) { href = href.Substring(href.LastIndexOf('-', href.Length - href.LastIndexOf("-"))); var n = 0; int.TryParse(href, out n); if (n > maxPage) { maxPage = n; } } } } siteTruyen.ListTruyen = listTruyen; siteTruyen.MaxPageIndex = maxPage; return(siteTruyen); } return(siteTruyen); }
public override async Task <List <Item> > StartScraping(string searchText, int itemsCount = 0, int minPrice = 0, int maxPrice = 0) { int ItemsCount = 0; List <Item> SearchResults = new List <Item>(); while (true) { string Url; if (minPrice != 0 || maxPrice != 0) { string pr1 = FilterChanger.Insert(FilterChanger.IndexOf(";"), minPrice.ToString()); string prices = pr1.Insert(pr1.IndexOf("&"), maxPrice.ToString()); Url = SimpleUrl + searchText.Replace(" ", "%20"); Url = Url.Insert(Url.IndexOf("search="), prices) + PageChanger + StartPageNum; } else { Url = SimpleUrl + searchText.Replace(" ", "%20") + PageChanger + StartPageNum; } HtmlDocument htmlDoc = await GetHtmlDocument(Url); List <HtmlNode> ProdsHtml; try { ProdsHtml = htmlDoc.DocumentNode.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Contains("catalog_main_table j-products-container")).ToList(); } catch (NullReferenceException) { Console.WriteLine("Your search did not match any postings."); return(null); } var ProductsList = new List <HtmlNode>(); foreach (HtmlNode Htmlnode in ProdsHtml) { ProductsList.AddRange(Htmlnode.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Contains("j-card-item")).ToList()); } Console.WriteLine($"LENGTH IS: {ProductsList.Count}"); foreach (var item in ProductsList) { Console.WriteLine($"Item: {ItemsCount}"); if (ItemsCount >= itemsCount) { return(SearchResults); } HtmlNode nameTag = item.Descendants("span") .Where(node => node.GetAttributeValue("class", "") .Equals("goods-name")).FirstOrDefault(); string name = nameTag == null ? "\n" : nameTag.InnerText; HtmlNode priceTag; string price; priceTag = item.Descendants("ins") .Where(node => node.GetAttributeValue("class", "") .Equals("lower-price")).FirstOrDefault(); if (priceTag != null) { price = priceTag.InnerText; } else { priceTag = item.Descendants("span") .Where(node => node.GetAttributeValue("class", "") .Equals("lower-price")).FirstOrDefault(); price = priceTag == null ? "\n" : priceTag.InnerText; } HtmlNode hrefTag = item.Descendants("a") .Where(node => node.GetAttributeValue("class", "") .Contains("ref_goods_n_p")).FirstOrDefault(); string realLink = hrefTag == null ? "\n" : hrefTag.GetAttributeValue("href", ""); HtmlNode imgTag = item.Descendants("img") .Where(node => node.GetAttributeValue("class", "") .Equals("thumbnail")).FirstOrDefault(); HtmlAttributeCollection attributes; string imgLink; if (imgTag != null) { attributes = imgTag.Attributes; if (attributes.Contains("data-original")) { imgLink = imgTag.GetAttributeValue("data-original", ""); } else if (attributes.Contains("src")) { imgLink = imgTag.GetAttributeValue("src", ""); } else { imgLink = "\n"; } } else { imgLink = "\n"; } Item result = new Item(realLink, imgLink, name, price, SiteName); SearchResults.Add(result); Console.WriteLine(); ItemsCount++; } var PageSpan = htmlDoc.DocumentNode.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Equals("pageToInsert")).FirstOrDefault(); if (PageSpan != null) { HtmlNode PageHref; PageHref = PageSpan.Descendants("a") .Where(node => node.InnerText == (StartPageNum + 1).ToString()).FirstOrDefault(); if (PageHref != null) { StartPageNum++; } else { return(SearchResults); } } else { return(SearchResults); } } }
public void DownloadCouncilPdfFiles() { List <Documents> docs = this.LoadDocumentsDoneSQL(); List <QueryResult> queries = this.LoadQueriesDoneSQL(); HtmlWeb web = new HtmlWeb(); WebClient c = new WebClient(); foreach (string url in this.docUrls) { HtmlDocument listDoc = web.Load(url); HtmlNodeCollection docNodeList = listDoc.DocumentNode.SelectNodes("//div[contains(@id,'vid')]//ul/li/a[@href]"); if (docNodeList != null) { Console.WriteLine("{0} files...", docNodeList.Count); foreach (HtmlNode docNode in docNodeList) { string pdfName = docNode.InnerText.Trim('\t', '\r', '\n', (char)32, (char)160); string tag = pdfName.ToLower().Contains("agenda") ? "agenda" : "minute"; string pdfUrl = docNode.Attributes["href"].Value; pdfUrl = pdfUrl.StartsWith("http") ? pdfUrl : "http://www.saginaw-mi.com" + pdfUrl; DateTime meetingDate = DateTime.Parse(pdfUrl.Split('?').FirstOrDefault().Split('/').LastOrDefault().Replace(".pdf", string.Empty)); if (meetingDate < this.dtStartFrom) { Console.WriteLine("{0} earlier than {1}. Skip...", meetingDate, dtStartFrom); continue; } Documents localdoc = docs.FirstOrDefault(t => t.DocSource == pdfUrl); if (localdoc == null) { string category = "Council"; localdoc = new Documents(); localdoc.DocId = Guid.NewGuid().ToString(); localdoc.DocType = category; localdoc.CityId = this.cityEntity.CityId; localdoc.DocSource = pdfUrl; string localPath = string.Format("{0}\\{1}_{2}", this.localDirectory, tag, pdfUrl.Split('?').FirstOrDefault().Split('/').LastOrDefault()); localPath = HttpUtility.UrlDecode(localPath); localdoc.DocLocalPath = localPath; try { c.DownloadFile(pdfUrl, localPath); } catch { } docs.Add(localdoc); } else { Console.ForegroundColor = ConsoleColor.Yellow; Console.WriteLine("This document already downloaded..."); Console.ResetColor(); } this.ReadText(false, localdoc.DocLocalPath, ref localdoc); QueryResult qr = queries.FirstOrDefault(t => t.DocId == localdoc.DocId); if (qr == null) { qr = new QueryResult(); qr.DocId = localdoc.DocId; qr.CityId = localdoc.CityId; qr.MeetingDate = meetingDate; qr.SearchTime = DateTime.Now; queries.Add(qr); } this.ExtractQueriesFromDoc(localdoc, ref qr); Console.WriteLine("{0} documents saved...", docs.Count); Console.WriteLine("{0} query results saved...", queries.Count); } } this.SaveMeetingResultsToSQL(docs, queries); } }
private async Task CheckPage(HtmlDocument htmlDocument) { if (htmlDocument == null) { Bot.ArchiLogger.LogNullError(nameof(htmlDocument)); return; } HtmlNodeCollection htmlNodes = htmlDocument.DocumentNode.SelectNodes("//div[@class='badge_row_inner']"); if (htmlNodes == null) { // No eligible badges whatsoever return; } List <Task> backgroundTasks = null; foreach (HtmlNode htmlNode in htmlNodes) { HtmlNode statsNode = htmlNode.SelectSingleNode(".//div[@class='badge_title_stats_content']"); HtmlNode appIDNode = statsNode?.SelectSingleNode(".//div[@class='card_drop_info_dialog']"); if (appIDNode == null) { // It's just a badge, nothing more continue; } string appIDText = appIDNode.GetAttributeValue("id", null); if (string.IsNullOrEmpty(appIDText)) { Bot.ArchiLogger.LogNullError(nameof(appIDText)); continue; } string[] appIDSplitted = appIDText.Split('_'); if (appIDSplitted.Length < 5) { Bot.ArchiLogger.LogNullError(nameof(appIDSplitted)); continue; } appIDText = appIDSplitted[4]; if (!uint.TryParse(appIDText, out uint appID) || (appID == 0)) { Bot.ArchiLogger.LogNullError(nameof(appID)); continue; } if (SalesBlacklist.Contains(appID) || Program.GlobalConfig.Blacklist.Contains(appID) || Bot.IsBlacklistedFromIdling(appID) || (Bot.BotConfig.IdlePriorityQueueOnly && !Bot.IsPriorityIdling(appID))) { // We're configured to ignore this appID, so skip it continue; } if (IgnoredAppIDs.TryGetValue(appID, out DateTime ignoredUntil)) { if (ignoredUntil < DateTime.UtcNow) { // This game served its time as being ignored IgnoredAppIDs.TryRemove(appID, out _); } else { // This game is still ignored continue; } } // Cards HtmlNode progressNode = statsNode.SelectSingleNode(".//span[@class='progress_info_bold']"); if (progressNode == null) { Bot.ArchiLogger.LogNullError(nameof(progressNode)); continue; } string progressText = progressNode.InnerText; if (string.IsNullOrEmpty(progressText)) { Bot.ArchiLogger.LogNullError(nameof(progressText)); continue; } ushort cardsRemaining = 0; Match progressMatch = Regex.Match(progressText, @"\d+"); // This might fail if we have no card drops remaining, 0 is not printed in this case - that's fine if (progressMatch.Success) { if (!ushort.TryParse(progressMatch.Value, out cardsRemaining) || (cardsRemaining == 0)) { Bot.ArchiLogger.LogNullError(nameof(cardsRemaining)); continue; } } if (cardsRemaining == 0) { // Normally we'd trust this information and simply skip the rest // However, Steam is so f****d up that we can't simply assume that it's correct // It's entirely possible that actual game page has different info, and badge page lied to us // We can't check every single game though, as this will literally kill people with cards from games they don't own // Luckily for us, it seems to happen only with some specific games if (!UntrustedAppIDs.Contains(appID)) { continue; } // To save us on extra work, check cards earned so far first HtmlNode cardsEarnedNode = statsNode.SelectSingleNode(".//div[@class='card_drop_info_header']"); if (cardsEarnedNode == null) { Bot.ArchiLogger.LogNullError(nameof(cardsEarnedNode)); continue; } string cardsEarnedText = cardsEarnedNode.InnerText; if (string.IsNullOrEmpty(cardsEarnedText)) { Bot.ArchiLogger.LogNullError(nameof(cardsEarnedText)); continue; } Match cardsEarnedMatch = Regex.Match(cardsEarnedText, @"\d+"); if (!cardsEarnedMatch.Success) { Bot.ArchiLogger.LogNullError(nameof(cardsEarnedMatch)); continue; } if (!ushort.TryParse(cardsEarnedMatch.Value, out ushort cardsEarned)) { Bot.ArchiLogger.LogNullError(nameof(cardsEarned)); continue; } if (cardsEarned > 0) { // If we already earned some cards for this game, it's very likely that it's done // Let's hope that trusting cardsRemaining AND cardsEarned is enough // If I ever hear that it's not, I'll most likely need a doctor continue; } // If we have no cardsRemaining and no cardsEarned, it's either: // - A game we don't own physically, but we have cards from it in inventory // - F2P game that we didn't spend any money in, but we have cards from it in inventory // - Steam issue // As you can guess, we must follow the rest of the logic in case of Steam issue } // Hours HtmlNode timeNode = statsNode.SelectSingleNode(".//div[@class='badge_title_stats_playtime']"); if (timeNode == null) { Bot.ArchiLogger.LogNullError(nameof(timeNode)); continue; } string hoursText = timeNode.InnerText; if (string.IsNullOrEmpty(hoursText)) { Bot.ArchiLogger.LogNullError(nameof(hoursText)); continue; } float hours = 0.0F; Match hoursMatch = Regex.Match(hoursText, @"[0-9\.,]+"); // This might fail if we have exactly 0.0 hours played, as it's not printed in that case - that's fine if (hoursMatch.Success) { if (!float.TryParse(hoursMatch.Value, NumberStyles.Number, CultureInfo.InvariantCulture, out hours) || (hours <= 0.0F)) { Bot.ArchiLogger.LogNullError(nameof(hours)); continue; } } // Names HtmlNode nameNode = statsNode.SelectSingleNode("(.//div[@class='card_drop_info_body'])[last()]"); if (nameNode == null) { Bot.ArchiLogger.LogNullError(nameof(nameNode)); continue; } string name = nameNode.InnerText; if (string.IsNullOrEmpty(name)) { Bot.ArchiLogger.LogNullError(nameof(name)); continue; } // We handle two cases here - normal one, and no card drops remaining int nameStartIndex = name.IndexOf(" by playing ", StringComparison.Ordinal); if (nameStartIndex <= 0) { nameStartIndex = name.IndexOf("You don't have any more drops remaining for ", StringComparison.Ordinal); if (nameStartIndex <= 0) { Bot.ArchiLogger.LogNullError(nameof(nameStartIndex)); continue; } nameStartIndex += 32; // + 12 below } nameStartIndex += 12; int nameEndIndex = name.LastIndexOf('.'); if (nameEndIndex <= nameStartIndex) { Bot.ArchiLogger.LogNullError(nameof(nameEndIndex)); continue; } name = WebUtility.HtmlDecode(name.Substring(nameStartIndex, nameEndIndex - nameStartIndex)); // Levels byte badgeLevel = 0; HtmlNode levelNode = htmlNode.SelectSingleNode(".//div[@class='badge_info_description']/div[2]"); if (levelNode != null) { // There is no levelNode if we didn't craft that badge yet (level 0) string levelText = levelNode.InnerText; if (string.IsNullOrEmpty(levelText)) { Bot.ArchiLogger.LogNullError(nameof(levelText)); continue; } int levelIndex = levelText.IndexOf("Level ", StringComparison.OrdinalIgnoreCase); if (levelIndex < 0) { Bot.ArchiLogger.LogNullError(nameof(levelIndex)); continue; } levelIndex += 6; if (levelText.Length <= levelIndex) { Bot.ArchiLogger.LogNullError(nameof(levelIndex)); continue; } levelText = levelText.Substring(levelIndex, 1); if (!byte.TryParse(levelText, out badgeLevel) || (badgeLevel == 0) || (badgeLevel > 5)) { Bot.ArchiLogger.LogNullError(nameof(badgeLevel)); continue; } } // Done with parsing, we have two possible cases here // Either we have decent info about appID, name, hours, cardsRemaining (cardsRemaining > 0) and level // OR we strongly believe that Steam lied to us, in this case we will need to check game individually (cardsRemaining == 0) if (cardsRemaining > 0) { GamesToFarm.Add(new Game(appID, name, hours, cardsRemaining, badgeLevel)); } else { Task task = CheckGame(appID, name, hours, badgeLevel); switch (Program.GlobalConfig.OptimizationMode) { case GlobalConfig.EOptimizationMode.MinMemoryUsage: await task.ConfigureAwait(false); break; default: if (backgroundTasks == null) { backgroundTasks = new List <Task>(); } backgroundTasks.Add(task); break; } } } // If we have any background tasks, wait for them if ((backgroundTasks != null) && (backgroundTasks.Count > 0)) { await Task.WhenAll(backgroundTasks).ConfigureAwait(false); } }
private async Task <bool?> IsAnythingToFarm() { // Find the number of badge pages Bot.ArchiLogger.LogGenericInfo(Strings.CheckingFirstBadgePage); HtmlDocument htmlDocument = await Bot.ArchiWebHandler.GetBadgePage(1).ConfigureAwait(false); if (htmlDocument == null) { Bot.ArchiLogger.LogGenericWarning(Strings.WarningCouldNotCheckBadges); return(null); } byte maxPages = 1; HtmlNode htmlNode = htmlDocument.DocumentNode.SelectSingleNode("(//a[@class='pagelink'])[last()]"); if (htmlNode != null) { string lastPage = htmlNode.InnerText; if (string.IsNullOrEmpty(lastPage)) { Bot.ArchiLogger.LogNullError(nameof(lastPage)); return(null); } if (!byte.TryParse(lastPage, out maxPages) || (maxPages == 0)) { Bot.ArchiLogger.LogNullError(nameof(maxPages)); return(null); } } GamesToFarm.Clear(); Task mainTask = CheckPage(htmlDocument); switch (Program.GlobalConfig.OptimizationMode) { case GlobalConfig.EOptimizationMode.MinMemoryUsage: await mainTask.ConfigureAwait(false); if (maxPages > 1) { Bot.ArchiLogger.LogGenericInfo(Strings.CheckingOtherBadgePages); for (byte page = 2; page <= maxPages; page++) { await CheckPage(page).ConfigureAwait(false); } } break; default: List <Task> tasks = new List <Task>(maxPages) { mainTask }; if (maxPages > 1) { Bot.ArchiLogger.LogGenericInfo(Strings.CheckingOtherBadgePages); for (byte page = 2; page <= maxPages; page++) { // We need a copy of variable being passed when in for loops, as loop will proceed before our task is launched byte currentPage = page; tasks.Add(CheckPage(currentPage)); } } await Task.WhenAll(tasks).ConfigureAwait(false); break; } if (GamesToFarm.Count == 0) { ShouldResumeFarming = false; return(false); } ShouldResumeFarming = true; await SortGamesToFarm().ConfigureAwait(false); return(true); }
/// <summary> /// Получить перечисление с парами у указанного преподавателя /// </summary> /// <param name="siteTeacherId">ID преподавателя на сайте</param> /// <returns>Перечисление с парами у указанного преподавателя</returns> /// <exception cref="FlurlHttpException">Выбрасывается, если сайт не вернул положительный Http код</exception> public async Task <IEnumerable <Lesson> > GetLessons(int siteTeacherId) { var response = await _client .SetQueryParam("timetable") .SetQueryParam("lecturer", siteTeacherId) .GetAsync(); var doc = new HtmlDocument(); doc.Load(await response.Content.ReadAsStreamAsync()); var timetableNode = doc.DocumentNode.SelectNodes("//div[contains(@class, 'timetable_sheet')]"); if (timetableNode is null) { throw new NullReferenceException("Can not find lessons on the page"); } var teacher = doc.DocumentNode.SelectSingleNode("//a[@class='navbar-brand']/span[2]") .GetNormalizedInnerText(); var lessons = new List <Lesson>(); foreach (var lessonNode in timetableNode) { if (lessonNode.ChildNodes.Count <= 3) // если пустая рамка с парой { continue; } var date = lessonNode.ParentNode.SelectSingleNode(".//div[contains(@class,'dayofweek')]") .GetNormalizedInnerText() .Split(new[] { ',' }, 2)[1] .Trim(); var adr = lessonNode.SelectSingleNode(".//span[contains(@class,'auditorium')]") .GetNormalizedInnerText() .Split(new[] { ',' }, 2) .Select(x => x.Trim()) .ToArray(); var time = lessonNode.SelectSingleNode(".//span[contains(@class,'time_para')]") .GetNormalizedInnerText() .Split(new[] { '–' }, 2); var groups = lessonNode.SelectSingleNode(".//span[contains(@class,'group')]").GetNormalizedInnerText(); var number = byte.Parse(lessonNode.SelectSingleNode(".//span[contains(@class,'num_para')]") .GetNormalizedInnerText()); var lessonName = lessonNode.SelectSingleNode(".//span[contains(@class,'discipline')]").GetNormalizedInnerText(); var lessonType = lessonNode.SelectSingleNode(".//span[contains(@class,'kindOfWork')]").GetNormalizedInnerText(); lessons.Add(new Lesson { Address = adr[1], Auditory = adr[0], Number = number, Groups = groups, Name = lessonName, Type = lessonType, Teacher = teacher, StartTime = DateTime.ParseExact($"{date} {time[0]}", "dd.MM.yyyy HH:mm", null, DateTimeStyles.AssumeLocal), EndTime = DateTime.ParseExact($"{date} {time[1]}", "dd.MM.yyyy HH:mm", null, DateTimeStyles.AssumeLocal) }); } return(lessons); }
private static string GetLabel(HtmlDocument htmlDoc) { return(WebUtility.HtmlDecode(htmlDoc.DocumentNode .SelectSingleNode("//*[@id=\"page_content\"]/div[1]/div[3]/div[2]/a") .InnerText.ToString())); }