public static string Submit(this HtmlAgilityPack.HtmlNode form, Encoding encoding) { if (form == null) { throw new ArgumentNullException(nameof(form)); } if (encoding == null) { throw new ArgumentNullException(nameof(encoding)); } string postDataStr = form == null ? string.Empty : form.SelectNodes("//input").ToList() .ToDictionary( (input => input.GetAttributeValue("name", string.Empty)), (input => HttpUtility.UrlEncode(input.GetAttributeValue("value", string.Empty))) ) .SerializeData <string>(); byte[] responseData = HttpRequestUtil.Post(form?.GetAttributeValue("action", null), encoding.GetBytes(postDataStr) ); return(encoding.GetString(responseData)); }
private List <string> FindAllAppKeysId(string htmlContent, string keyName = null) { List <string> htmlValues = new List <string>(); HtmlAgilityPack.HtmlDocument htmlDocument = new HtmlAgilityPack.HtmlDocument(); htmlDocument.LoadHtml(htmlContent); HtmlAgilityPack.HtmlNode node = htmlDocument.DocumentNode.SelectSingleNode("//div[contains(@class, 'ui key list')]"); HtmlAgilityPack.HtmlNodeCollection nodes = node.ChildNodes; foreach (HtmlAgilityPack.HtmlNode keyNode in nodes) { if (keyNode.OuterHtml.Contains(keyName == null ? "AltinnStudioAppKey" : keyName)) { // Returns the button node HtmlAgilityPack.HtmlNode deleteButtonNode = keyNode.SelectSingleNode("./div/button"); string dataId = deleteButtonNode.GetAttributeValue("data-id", string.Empty); htmlValues.Add(dataId); } } return(htmlValues); }
public static XRefDetails From(HtmlAgilityPack.HtmlNode node) { if (node.Name != "xref") { throw new NotSupportedException("Only xref node is supported!"); } var xref = new XRefDetails(); var uid = node.GetAttributeValue("uid", null); var rawHref = node.GetAttributeValue("href", null); NameValueCollection queryString = null; if (!string.IsNullOrEmpty(rawHref)) { if (!string.IsNullOrEmpty(uid)) { Logger.LogWarning($"Both href and uid attribute are defined for {node.OuterHtml}, use href instead of uid."); } string others; var anchorIndex = rawHref.IndexOf("#"); if (anchorIndex == -1) { xref.Anchor = string.Empty; others = rawHref; } else { xref.Anchor = rawHref.Substring(anchorIndex); others = rawHref.Remove(anchorIndex); } var queryIndex = others.IndexOf("?"); if (queryIndex == -1) { xref.Uid = HttpUtility.UrlDecode(others); } else { xref.Uid = HttpUtility.UrlDecode(others.Remove(queryIndex)); queryString = HttpUtility.ParseQueryString(others.Substring(queryIndex)); } } else { xref.Uid = uid; } xref.InnerHtml = node.InnerHtml; xref.DisplayProperty = node.GetAttributeValue("displayProperty", queryString?.Get("displayProperty") ?? XRefSpec.NameKey); xref.AltProperty = node.GetAttributeValue("altProperty", queryString?.Get("altProperty") ?? "fullName"); xref.Text = node.GetAttributeValue("text", node.GetAttributeValue("name", StringHelper.HtmlEncode(queryString?.Get("text")))); xref.Alt = node.GetAttributeValue("alt", node.GetAttributeValue("fullname", StringHelper.HtmlEncode(queryString?.Get("alt")))); xref.Title = node.GetAttributeValue("title", queryString?.Get("title")); xref.SourceFile = node.GetAttributeValue("sourceFile", null); xref.SourceStartLineNumber = node.GetAttributeValue("sourceStartLineNumber", 0); xref.SourceEndLineNumber = node.GetAttributeValue("sourceEndLineNumber", 0); // Both `data-raw-html` and `data-raw-source` are html encoded. Use `data-raw-html` with higher priority. // `data-raw-html` will be decoded then displayed, while `data-raw-source` will be displayed directly. xref.RawSource = node.GetAttributeValue("data-raw-source", null); var raw = node.GetAttributeValue("data-raw-html", null); if (!string.IsNullOrEmpty(raw)) { xref.Raw = StringHelper.HtmlDecode(raw); } else { xref.Raw = xref.RawSource; } xref.ThrowIfNotResolved = node.GetAttributeValue("data-throw-if-not-resolved", false); return(xref); }
static UpdateInfo GetUpdateInfo(HtmlAgilityPack.HtmlNode doc, string NewVersion = null) { UpdateInfo updateInfo = new UpdateInfo { verson = NewVersion, files = new List <UpdateFileInfo>(), desc = new List <string>() }; //label-latest HtmlAgilityPack.HtmlNode releaseMain = GetReleaseMain(doc); HtmlAgilityPack.HtmlNode Pre = releaseMain.SelectSingleNode("//div[contains(@class,'flex-self-start')]"); HtmlAgilityPack.HtmlNode header = releaseMain.SelectSingleNode("//div[contains(@class,'release-header')]"); HtmlAgilityPack.HtmlNode body = releaseMain.SelectSingleNode("//div[contains(@class,'markdown-body')]"); if (Pre != null) { if (Pre.InnerText.Contains("Pre")) { updateInfo.pre = true; } } if (header != null) { if (NewVersion != null) { HtmlAgilityPack.HtmlNode headers = header.SelectSingleNode("div[1]/div[1]"); if (headers != null) { updateInfo.title = headers.InnerText.Replace("\n", "").Trim(); } } else { HtmlAgilityPack.HtmlNode headers = header.SelectSingleNode("div[1]/div[1]/a[1]"); if (headers != null) { string _Newversion = headers.GetAttributeValue("href", null); if (_Newversion != null) { updateInfo.verson = _Newversion.Substring(_Newversion.LastIndexOf("/") + 1).TrimStart('v'); } updateInfo.title = headers.InnerText.Replace("\n", "").Trim(); } } HtmlAgilityPack.HtmlNode headersTime = header.SelectSingleNode("p[1]/relative-time[1]"); if (headersTime != null) { string time = headersTime.GetAttributeValue("datetime", null);//2020-07-07T07:58:07Z updateInfo.time = time.ToDateTime(DateTime.Now); } } if (body != null) { HtmlAgilityPack.HtmlNodeCollection nodes = body.SelectNodes("p"); if (nodes != null && nodes.Count > 0) { foreach (HtmlAgilityPack.HtmlNode item in nodes) { HtmlAgilityPack.HtmlNodeCollection Anodes = item.ChildNodes; if (Anodes != null && Anodes.Count > 0) { foreach (HtmlAgilityPack.HtmlNode Aitem in Anodes) { switch (Aitem.Name) { case "a": string url = Aitem.GetAttributeValue("href", null); if (url != null) { updateInfo.files.Add(new UpdateFileInfo { name = Aitem.InnerText.Trim(), url = url }); } break; case "#text": updateInfo.desc.Add(Aitem.InnerText.Replace("\n", "").Trim()); break; default: Debug.WriteLine(Aitem.Name); break; } } } else { updateInfo.desc.Add(item.InnerText.Trim()); } } } else { var reg = new System.Text.RegularExpressions.Regex(@"(?is)<a(?:(?!href=).)*href=(['""]?)(?<url>[^""\s>]*)\1[^>]*>(?<text>(?:(?!</?a\b).)*)</a>"); var mc = reg.Matches(body.InnerHtml); foreach (System.Text.RegularExpressions.Match m in mc) { string url = m.Groups["url"].Value; if (url.StartsWith("/files/")) { updateInfo.files.Add(new UpdateFileInfo { name = m.Groups["text"].Value, url = url }); } } } //string title = header.InnerText.Trim(); } if (updateInfo.files.Count == 0) { HtmlAgilityPack.HtmlNode filebody = releaseMain.SelectSingleNode("//div[contains(@class,'Box--condensed')]"); if (filebody != null) { HtmlAgilityPack.HtmlNodeCollection files = filebody.SelectNodes("div[1]/div"); if (files != null) { foreach (HtmlAgilityPack.HtmlNode file in files) { HtmlAgilityPack.HtmlNode Afiles = file.SelectSingleNode("a[1]"); if (Afiles != null) { string url = Afiles.GetAttributeValue("href", null); if (url != null) { if (url.StartsWith("/")) { url = "https://github.com" + url; } //https://github.com/giant-app/LiveWallpaper/archive/v1.4.67.zip updateInfo.files.Add(new UpdateFileInfo { name = Afiles.InnerText.Trim(), url = url }); } } } } } } return(updateInfo); }
private static void UpdateXref(HtmlAgilityPack.HtmlNode xref, Dictionary <string, XRefSpec> internalXRefMap, Dictionary <string, XRefSpec> externalXRefMap, Func <string, string> updater, string language) { var key = xref.GetAttributeValue("href", null); // If name | fullName exists, use the one from xref because spec name is different from name for generic types // e.g. return type: IEnumerable<T>, spec name should be IEnumerable var name = xref.GetAttributeValue("name", null); var fullName = xref.GetAttributeValue("fullName", null); string displayName; string href = null; XRefSpec spec = null; if (internalXRefMap.TryGetValue(key, out spec)) { href = updater(spec.Href); var hashtagIndex = href.IndexOf('#'); if (hashtagIndex == -1) { var htmlId = GetHtmlId(key); // TODO: What if href is not html? href = href + "#" + htmlId; } } else if (externalXRefMap.TryGetValue(key, out spec) && !string.IsNullOrEmpty(spec.Href)) { href = spec.Href; } // If href is not null, use name if (href != null) { if (!string.IsNullOrEmpty(name)) { displayName = name; } else { displayName = string.IsNullOrEmpty(fullName) ? key : fullName; if (spec != null) { displayName = StringHelper.HtmlEncode(GetLanguageSpecificAttribute(spec, language, displayName, "name")); } } var anchorNode = $"<a class=\"xref\" href=\"{href}\">{displayName}</a>"; xref.ParentNode.ReplaceChild(HtmlAgilityPack.HtmlNode.CreateNode(anchorNode), xref); } else { // If href is null, use fullName if (!string.IsNullOrEmpty(fullName)) { displayName = fullName; } else { displayName = string.IsNullOrEmpty(name) ? key : name; if (spec != null) { displayName = StringHelper.HtmlEncode(GetLanguageSpecificAttribute(spec, language, displayName, "fullName", "name")); } } var spanNode = $"<span class=\"xref\">{displayName}</span>"; xref.ParentNode.ReplaceChild(HtmlAgilityPack.HtmlNode.CreateNode(spanNode), xref); } }
private string[] ProcessNode(HtmlAgilityPack.HtmlNode htmlNode) { if (htmlNode == null || string.IsNullOrWhiteSpace(htmlNode.Name)) { return(null); } string nodeName = htmlNode.Name.ToLower().Trim(); if (nodeName.StartsWith("#")) { return(null); } switch (nodeName) { case "a": { string href = htmlNode.GetAttributeValue("href", string.Empty); if (!string.IsNullOrWhiteSpace(href)) { string url = ProcessUrl(href, false); if (!string.IsNullOrWhiteSpace(url)) { return new string[] { url } } ; } else if (_isLiveATC) { string onClick = htmlNode.GetAttributeValue("onClick", string.Empty); if (!string.IsNullOrWhiteSpace(onClick) && onClick.ToLower().Contains("mydirectstream") && onClick.IndexOf('\'') > 0) { onClick = onClick.Substring(onClick.IndexOf('\'') + 1); if (!string.IsNullOrWhiteSpace(onClick) && onClick.IndexOf('\'') > 0) { onClick = onClick.Substring(0, onClick.IndexOf('\'')); string url = ProcessUrl(string.Format("http://d.liveatc.net/{0}.m3u", onClick), false); if (!string.IsNullOrWhiteSpace(url)) { return new string[] { url } } ; } } } break; } case "meta": { string metaType = htmlNode.GetAttributeValue("http-equiv", string.Empty); if (string.Equals(metaType, "refresh", StringComparison.InvariantCultureIgnoreCase)) { string metaContent = htmlNode.GetAttributeValue("content", string.Empty); if (!string.IsNullOrWhiteSpace(metaContent)) { int indUrl = metaContent.ToLower().IndexOf("url="); if (indUrl >= 0) { metaContent = metaContent.Substring(indUrl + 4); } string url = ProcessUrl(metaContent, false); if (!string.IsNullOrWhiteSpace(url)) { return new string[] { url } } ; } } break; } case "object": { string classid = htmlNode.GetAttributeValue("classid", string.Empty); if (string.Equals(classid, "CLSID:22d6f312-b0f6-11d0-94ab-0080c74c7e95", StringComparison.InvariantCultureIgnoreCase)) { //this is a media player object... string mediaURL = string.Empty; foreach (HtmlAgilityPack.HtmlNode subNode in htmlNode.ChildNodes) { string subNodeName = subNode.Name.ToLower().Trim(); if (string.Equals(subNodeName, "param", StringComparison.InvariantCultureIgnoreCase) && string.Equals(subNode.GetAttributeValue("name", string.Empty), "filename", StringComparison.InvariantCultureIgnoreCase)) { string strFileName = subNode.GetAttributeValue("value", string.Empty); if (!string.IsNullOrWhiteSpace(strFileName)) { return(new string[] { strFileName }); } } } } break; } case "script": { if (string.IsNullOrWhiteSpace(htmlNode.GetAttributeValue("src", string.Empty)) && !string.IsNullOrWhiteSpace(htmlNode.InnerText)) { //process script... System.IO.StringReader rdr = new System.IO.StringReader(htmlNode.InnerText); string inLine = string.Empty; do { inLine = rdr.ReadLine(); if (!string.IsNullOrWhiteSpace(inLine)) { inLine = inLine.ToLower(); int indx = inLine.IndexOf(".playlist"); if (indx >= 0) { indx += 9; inLine = inLine.Substring(indx); indx = inLine.IndexOf('\''); if (indx >= 0) { inLine = inLine.Substring(indx + 1); indx = inLine.IndexOf('\''); if (indx >= 0) { inLine = GetFeedUrlFromValue(inLine.Substring(0, indx), true); if (!string.IsNullOrWhiteSpace(inLine)) { return(new string[] { inLine }); } } } } else { indx = inLine.IndexOf("ipadurl"); if (indx >= 0) { indx += 7; inLine = inLine.Substring(indx); indx = inLine.IndexOf("\""); if (indx < 0) { indx = inLine.IndexOf("'"); } if (indx >= 0) { inLine = inLine.Substring(indx + 1); indx = inLine.LastIndexOf("\""); if (indx < 0) { indx = inLine.LastIndexOf("'"); } if (indx >= 0) { inLine = GetFeedUrlFromValue(inLine.Substring(0, indx), true); if (!string.IsNullOrWhiteSpace(inLine)) { return(new string[] { inLine }); } } } } else { indx = inLine.IndexOf(".m3u"); if (indx >= 0) { int indxStart = inLine.IndexOf('"'); int indxEnd = inLine.LastIndexOf('"'); if (indxEnd > 0) { inLine = inLine.Substring(0, indxEnd); } if (indxStart >= 0) { inLine = inLine.Substring(indxStart + 1); } if (_isBroadcastify && inLine.IndexOf('"') >= 0) { indxStart = inLine.IndexOf('"'); indxEnd = inLine.LastIndexOf('"'); string tmpStart = inLine.Substring(0, indxStart); string tmpEnd = inLine.Substring(indxEnd + 1); if (string.IsNullOrWhiteSpace(_broadcastifyFeedId)) { _broadcastifyListPrefix = tmpStart; _broadcastifyListSuffix = tmpEnd; } else { inLine = tmpStart + _broadcastifyFeedId + tmpEnd; if (!inLine.StartsWith("/")) { inLine = _broadcastifyPrefix + "/" + inLine; } else { inLine = _broadcastifyPrefix + inLine; } if (!string.IsNullOrWhiteSpace(inLine)) { return(new string[] { inLine }); } } } } } } } }while (inLine != null); } break; } default: { if (htmlNode.HasChildNodes) { List <string> subLinks = new List <string>(); foreach (HtmlAgilityPack.HtmlNode subNode in htmlNode.ChildNodes) { string[] links = ProcessNode(subNode); if (links != null) { foreach (string strLink in links) { if (!subLinks.Contains(strLink)) { subLinks.Add(strLink); } } } } PostProcessBroadcastifyFeeds(subLinks); return(subLinks.ToArray()); } break; } } return(null); }
public void Convert(HtmlAgilityPack.HtmlNode node, System.Xml.XmlWriter writer) { var classOfParagraph = node.GetAttributeValue("class", null); if (string.IsNullOrEmpty(classOfParagraph)) { if (!string.IsNullOrEmpty(node.InnerText.Trim())) { using (writer.StartElement("p")) { writer.WriteAttributeString("content-type", "2.2 Story Text"); var innerContent = new InnerTextNode(); innerContent.Convert(node, writer); } } } else if (classOfParagraph.Equals("article-paragraph", StringComparison.CurrentCultureIgnoreCase)) { using (writer.StartElement("p")) { writer.WriteAttributeString("content-type", "2.2 Story Text"); var innerContent = new InnerTextNode(); innerContent.Convert(node, writer); } } else if (classOfParagraph.Equals("nomargin", StringComparison.CurrentCultureIgnoreCase)) { using (writer.StartElement("p")) { writer.WriteAttributeString("content-type", "2.25 Story Text - No Spacing"); var innerContent = new InnerTextNode(); innerContent.Convert(node, writer); } } else if (classOfParagraph.Equals("exhibitnumber", StringComparison.CurrentCultureIgnoreCase)) { // exhibit numbers are handled by the image/table. return; } else if (classOfParagraph.Equals("exhibittitle", StringComparison.CurrentCultureIgnoreCase)) { // exhibit titles are handled by the image/table. return; } else if (classOfParagraph.Equals("source", StringComparison.CurrentCultureIgnoreCase)) { // sources are handled by the image/table. return; } else if (classOfParagraph.Equals("question", StringComparison.CurrentCultureIgnoreCase)) { var interviewQuestion = new InterviewQuestionNode(); interviewQuestion.Convert(node, writer); } else if (classOfParagraph.Equals("answer", StringComparison.CurrentCultureIgnoreCase)) { var interviewAnswer = new InterviewAnswerNode(); interviewAnswer.Convert(node, writer); } else if (classOfParagraph.Equals("CompanyName", StringComparison.CurrentCultureIgnoreCase)) { var companyName = new CompanyNameNode(); companyName.Convert(node, writer); } }
public List <SubtitleWebList> Search(string search) { List <SubtitleWebList> zimu1s = new List <SubtitleWebList>(); List <HttpLib.Val> _conmand = new List <HttpLib.Val> { new HttpLib.Val("q", search) }; List <HttpLib.Val> _header = new List <HttpLib.Val> { new HttpLib.Val("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"), new HttpLib.Val("Accept-Encoding", "gzip, deflate"), new HttpLib.Val("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6"), new HttpLib.Val("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36 Edg/83.0.478.37"), }; string html = HttpLib.Http.Get(urlbase + "/search").header(_header).data(_conmand).redirect(true).request(); if (!string.IsNullOrEmpty(html)) { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); HtmlAgilityPack.HtmlNode htmlNode = doc.DocumentNode; var sd = htmlNode.SelectNodes("//div[@class='box clearfix']/div[@class='item prel clearfix']"); if (sd != null) { foreach (HtmlAgilityPack.HtmlNode item in sd) { try { HtmlAgilityPack.HtmlNode main1 = item.SelectSingleNode("div[1]/a"); HtmlAgilityPack.HtmlNode main1_img = main1.SelectSingleNode("img"); HtmlAgilityPack.HtmlNode main2 = item.SelectSingleNode("div[2]"); var main2_table = main2.SelectNodes("div/table/tbody/tr"); SubtitleWebList zimu1 = new SubtitleWebList { title = main2.SelectSingleNode("p[1]/a").InnerText, intro = main2.SelectSingleNode("p[2]/a").InnerText, img = "http:" + main1_img.GetAttributeValue("data-original", null), url = urlbase + main1.GetAttributeValue("href", null), data = new List <SubtitleWebItem>(), }; foreach (HtmlAgilityPack.HtmlNode table in main2_table) { try { HtmlAgilityPack.HtmlNode td1 = table.SelectSingleNode("td[1]"); HtmlAgilityPack.HtmlNode td2 = table.SelectSingleNode("td[2]/div/i"); HtmlAgilityPack.HtmlNode td11 = td1.SelectSingleNode("img"); if (td2 != null && td11 != null) { HtmlAgilityPack.HtmlNode td1a = td1.SelectSingleNode("a"); string type = td2.GetAttributeValue("title", null).TrimEnd('分'); SubtitleWebItem zimu2 = new SubtitleWebItem { imgalt = td11.GetAttributeValue("alt", null).Replace(" ", " ").Trim(), imgsrc = urlbase + td11.GetAttributeValue("src", null), title = td1a.InnerText, url = urlbase + td1a.GetAttributeValue("href", null), down = table.SelectSingleNode("td[3]").InnerText, }; if (type.StartsWith("字幕质量:")) { type = type.Remove(0, 5).Trim(); int _type; if (int.TryParse(type, out _type)) { zimu2.type = _type; } } zimu1.data.Add(zimu2); } } catch { } } if (zimu1.data.Count > 0) { zimu1s.Add(zimu1); } } catch { } } } } return(zimu1s); }
public static bool HasClass(HtmlAgilityPack.HtmlNode node, params string[] arrClassName) { return(arrClassName.All(x => node.GetAttributeValue("class", "").Split(' ').Contains(x))); }
/// <summary> /// 抓取用户信息 /// </summary> /// <remarks> /// 步骤: /// 1、加载用户名片地址,获取用户名片中的加userid密文(b89034eecee41f9b87b1eac2a0faac43)(如果获取不到,也就是它有可能跳转到用户店铺首页,此时获取另一种密文UvGx0OmvyvGIu) /// 2、根据第1步中的密文,转到用户个人信誉页面提取数据 /// </remarks> private void GrabeBuyerInfo(XCLShouCang.Model.TB_SearchKey keyModel, Action <string> action_ShowProcess, XCLNetTools.Message.MessageModel msgModel) { action_ShowProcess("正准备查询账号信息..."); string searchUrl = string.Empty; HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); HtmlAgilityPack.HtmlNode tempHTMLNode = null; HtmlAgilityPack.HtmlNodeCollection tempHTMLNodeCon = null; string pageHtml = string.Empty; XCLShouCang.Model.TB_BuyerInfo buyerModel = new XCLShouCang.Model.TB_BuyerInfo(); Regex regNumber = new Regex(@"[-\+]?\d+(\.\d+)?"); #region 基本信息 action_ShowProcess("正在查询基本信息..."); Regex regUserID = new Regex("(user_id=)(.*)"); Regex regArea = new Regex("(:)(.*)"); Regex regReg = new Regex(@"(\d{4}.\d{2}.\d{2})"); searchUrl = Web.Common.CommonHelper.WebInfo.TB_UserProfileURL.Replace("{UID}", System.Web.HttpUtility.UrlEncode(keyModel.KeyName, System.Text.Encoding.GetEncoding("GB2312"))); pageHtml = Web.Common.CommonHelper.DownLoadHTMLString(searchUrl); doc.LoadHtml(pageHtml); buyerModel.NickName = (keyModel.KeyName ?? "").Trim(); //名片 var nameCardNode = doc.DocumentNode.SelectSingleNode("//div[@id='NameCard']"); if (null != nameCardNode) { //店铺 var shopNode = nameCardNode.SelectSingleNode("//li[contains(text(),'淘宝店铺')]/a"); if (null != shopNode) { buyerModel.ShopName = (shopNode.InnerText ?? "").Trim(); buyerModel.ShopURL = shopNode.GetAttributeValue("href", string.Empty); } //UID buyerModel.UserID = regUserID.Match(buyerModel.ShopURL).Groups[2].Value; //注册时间 tempHTMLNode = nameCardNode.SelectSingleNode("//li[contains(text(),'注册时间')]"); if (null != tempHTMLNode) { buyerModel.RegTime = XCLNetTools.StringHander.Common.GetDateTimeNullable(regReg.Match(tempHTMLNode.InnerText.Trim()).Groups[1].Value); } //认证 tempHTMLNode = nameCardNode.SelectSingleNode("//li[contains(text(),'认证情况')]/a/img"); if (null != tempHTMLNode) { buyerModel.CertificateLinkImg = tempHTMLNode.GetAttributeValue("src", string.Empty); buyerModel.CertificateType = (tempHTMLNode.GetAttributeValue("title", string.Empty) ?? "").Trim(); } //所在地 tempHTMLNode = nameCardNode.SelectSingleNode("//li[contains(text(),'所 在 地')]"); if (null != tempHTMLNode) { buyerModel.Area = (regArea.Match(tempHTMLNode.InnerText.Trim()).Groups[2].Value ?? "").Trim(); } } else { //此时可能被跳转到店铺首页去了 Match userIDMT = new Regex(@"user-rate-([A-Za-z0-9]+)\.htm").Match(pageHtml);//UvGx0OmvyvGIu if (userIDMT.Success) { buyerModel.UserID = (userIDMT.Groups[1].Value ?? "").Trim(); buyerModel.ShopName = new Regex("首页|-|淘宝网").Replace(doc.DocumentNode.SelectSingleNode("//title").InnerText, "").Trim(); } } if (string.IsNullOrEmpty(buyerModel.UserID)) { msgModel.Message = "未找到该用户信息!"; msgModel.IsSuccess = false; return; } #endregion #region 信用首页获取其它信息 action_ShowProcess("正在查询信用信息..."); pageHtml = Web.Common.CommonHelper.DownLoadHTMLString(Web.Common.CommonHelper.WebInfo.TB_UserRateHomeURL.Replace("{UID}", buyerModel.UserID.ToString())); doc.LoadHtml(pageHtml); var docNode = doc.DocumentNode; //买家 http://rate.taobao.com/user-rate-49095164e83c97b0a713aa536ea1cbc5.htm //卖家 http://rate.taobao.com/user-rate-b89034eecee41f9b87b1eac2a0faac43.htm var personNode = docNode.SelectSingleNode("//div[contains(@class,'personal-info-box')]"); buyerModel.ShopCreateTime = XCLNetTools.StringHander.Common.GetDateTimeNullable(docNode.SelectSingleNode("//input[@id='J_showShopStartDate']").GetAttributeValue("value", string.Empty)); buyerModel.IsSeller = null != buyerModel.ShopCreateTime; if (buyerModel.IsSeller) { tempHTMLNode = docNode.SelectSingleNode("//li[contains(text(),'当前主营')]"); if (null != tempHTMLNode) { buyerModel.ShopMainType = (tempHTMLNode.SelectSingleNode("a").InnerText ?? "").Trim(); } tempHTMLNode = docNode.SelectSingleNode("//input[@id='J_ShopIdHidden']"); if (null != tempHTMLNode) { buyerModel.ShopID = XCLNetTools.StringHander.Common.GetLong(tempHTMLNode.GetAttributeValue("value", string.Empty)); } tempHTMLNode = docNode.SelectSingleNode(string.Format("//a[contains(text(),'{0}')]", buyerModel.NickName)); if (null != tempHTMLNode) { buyerModel.ShopURL = (tempHTMLNode.GetAttributeValue("href", string.Empty) ?? "").Trim(); } tempHTMLNode = docNode.SelectSingleNode("//li[contains(text(),'卖家信用')]"); if (null != tempHTMLNode) { buyerModel.SellerCreditValue = XCLNetTools.StringHander.Common.GetInt(regNumber.Match(tempHTMLNode.InnerText).Value); if (buyerModel.SellerCreditValue > 0) { var sellerCreditImgNode = tempHTMLNode.SelectSingleNode("child::a/img"); if (null != sellerCreditImgNode) { buyerModel.SellerCreditImg = (sellerCreditImgNode.GetAttributeValue("src", string.Empty) ?? "").Trim(); } } } var buyerCreditNode = docNode.SelectSingleNode("//li[contains(text(),'买家信用')]"); buyerModel.BuyerCreditValue = XCLNetTools.StringHander.Common.GetInt(regNumber.Match(buyerCreditNode.InnerText).Value); if (buyerModel.BuyerCreditValue > 0) { tempHTMLNode = buyerCreditNode.SelectSingleNode("child::a/img"); if (null != tempHTMLNode) { buyerModel.BuyerCreditImg = (tempHTMLNode.GetAttributeValue("src", string.Empty) ?? "").Trim(); } } } else { var buyerCreditNode = docNode.SelectSingleNode("//li[contains(text(),'买家信用')]"); buyerModel.BuyerCreditValue = XCLNetTools.StringHander.Common.GetInt(buyerCreditNode.SelectSingleNode("child::a[@id='J_BuyerRate']").InnerText.Trim()); if (buyerModel.BuyerCreditValue > 0) { buyerModel.BuyerCreditImg = (buyerCreditNode.SelectSingleNode(".//img").GetAttributeValue("src", string.Empty) ?? "").Trim(); } } //认证信息 if (string.IsNullOrEmpty(buyerModel.CertificateType)) { tempHTMLNodeCon = docNode.SelectNodes("//li[contains(text(),'认证信息')]/a/img | //dt[contains(text(),'认证信息')]"); if (null != tempHTMLNodeCon && tempHTMLNodeCon.Count > 0) { foreach (var m in tempHTMLNodeCon) { buyerModel.CertificateLinkImg = (m.GetAttributeValue("src", string.Empty) ?? "").Trim(); buyerModel.CertificateType = (m.GetAttributeValue("title", string.Empty) ?? "").Trim(); if (string.IsNullOrEmpty(buyerModel.CertificateType) && string.Equals(m.Name, "dt", StringComparison.OrdinalIgnoreCase)) { var cerImgNode = m.SelectSingleNode("following-sibling::dd/a/img"); if (null != cerImgNode) { buyerModel.CertificateLinkImg = (cerImgNode.GetAttributeValue("src", string.Empty) ?? "").Trim(); buyerModel.CertificateType = (cerImgNode.GetAttributeValue("title", string.Empty) ?? "").Trim(); } } if (!string.IsNullOrEmpty(buyerModel.CertificateType)) { break; } } } } //所在地 if (string.IsNullOrEmpty(buyerModel.Area)) { tempHTMLNodeCon = docNode.SelectNodes("//li[contains(text(),'所在地区')] | //dt[contains(text(),'所在地区')]"); if (null != tempHTMLNodeCon && tempHTMLNodeCon.Count > 0) { foreach (var m in tempHTMLNodeCon) { buyerModel.Area = (new Regex(@"所在地区:|\s+").Replace(m.InnerText, "") ?? "").Trim(); if (string.IsNullOrEmpty(buyerModel.Area) && string.Equals(m.Name, "dt", StringComparison.OrdinalIgnoreCase)) { buyerModel.Area = m.SelectSingleNode("following-sibling::dd").InnerText.Trim(); } if (!string.IsNullOrEmpty(buyerModel.Area)) { break; } } } } //评价信息节点 var appraiseNode = docNode.SelectSingleNode("//div[contains(@class,'personal-rating')]//div[contains(@class,'rate-box')]"); //好评率 var goodRateNode = appraiseNode.SelectSingleNode("//em[contains(text(),'好评率')]"); if (null != goodRateNode) { buyerModel.GoodAppraiseRate = XCLNetTools.StringHander.Common.GetDecimal(regNumber.Match(goodRateNode.InnerText).Value); } //评价统计节点 var appraiseInfoNode = appraiseNode.SelectNodes("//div[@id='J_menu_list']//ul//li"); if (null != appraiseInfoNode && appraiseInfoNode.Count > 0) { var appraiseCountList = new List <XCLShouCang.Model.TB_AppraiseCount>(); var regGetCount = new Regex(@"\[(\d+),"); foreach (var m in appraiseInfoNode) { //[[5,{'rater':'1','direction':'0','result':'1','timeLine':'-7'}],[0,{'rater':'1','direction':'0','result':'0','timeLine':'-7'}],[0,{'rater':'1','direction':'0','result':'-1','timeLine':'-7'}]] string relValue = m.GetAttributeValue("rel", string.Empty); string mText = m.InnerText.Trim(); var matchs = regGetCount.Matches(relValue); if (mText.Contains("最近一周")) { appraiseCountList.Add(new XCLShouCang.Model.TB_AppraiseCount() { AppraiseTimeType = Web.Common.CommonHelper.AppraiseTimeTypeEnum.最近一周.ToString(), GoodCount = XCLNetTools.StringHander.Common.GetInt(matchs[0].Groups[1].Value), MiddleCount = XCLNetTools.StringHander.Common.GetInt(matchs[1].Groups[1].Value), BadCount = XCLNetTools.StringHander.Common.GetInt(matchs[2].Groups[1].Value) }); } else if (mText.Contains("最近一月")) { appraiseCountList.Add(new XCLShouCang.Model.TB_AppraiseCount() { AppraiseTimeType = Web.Common.CommonHelper.AppraiseTimeTypeEnum.最近一月.ToString(), GoodCount = XCLNetTools.StringHander.Common.GetInt(matchs[0].Groups[1].Value), MiddleCount = XCLNetTools.StringHander.Common.GetInt(matchs[1].Groups[1].Value), BadCount = XCLNetTools.StringHander.Common.GetInt(matchs[2].Groups[1].Value) }); } else if (mText.Contains("最近半年")) { appraiseCountList.Add(new XCLShouCang.Model.TB_AppraiseCount() { AppraiseTimeType = Web.Common.CommonHelper.AppraiseTimeTypeEnum.最近半年.ToString(), GoodCount = XCLNetTools.StringHander.Common.GetInt(matchs[0].Groups[1].Value), MiddleCount = XCLNetTools.StringHander.Common.GetInt(matchs[1].Groups[1].Value), BadCount = XCLNetTools.StringHander.Common.GetInt(matchs[2].Groups[1].Value) }); } else if (mText.Contains("半年以前")) { if (buyerModel.IsSeller) { var tmpRate = m.SelectNodes("//a[@data-point-val][contains(@href,'timeLine|-211')]"); if (null != tmpRate && tmpRate.Count == 3) { appraiseCountList.Add(new XCLShouCang.Model.TB_AppraiseCount() { AppraiseTimeType = Web.Common.CommonHelper.AppraiseTimeTypeEnum.半年以前.ToString(), GoodCount = XCLNetTools.StringHander.Common.GetInt(tmpRate[0].InnerText.Trim()), MiddleCount = XCLNetTools.StringHander.Common.GetInt(tmpRate[1].InnerText.Trim()), BadCount = XCLNetTools.StringHander.Common.GetInt(tmpRate[2].InnerText.Trim()) }); } } else { //[[260,'http://ratehis.taobao.com/user-rate-UvFkbMmQyMGHL--isarchive|true--detailed|1--goodNeutralOrBad|1--timeLine|-211--receivedOrPosted|0--buyerOrSeller|1.htm#RateType'],[0,'http://ratehis.taobao.com/user-rate-UvFkbMmQyMGHL--isarchive|true--detailed|1--goodNeutralOrBad|0--timeLine|-211--receivedOrPosted|0--buyerOrSeller|1.htm#RateType'],[0,'http://ratehis.taobao.com/user-rate-UvFkbMmQyMGHL--isarchive|true--detailed|1--goodNeutralOrBad|-1--timeLine|-211--receivedOrPosted|0--buyerOrSeller|1.htm#RateType']] appraiseCountList.Add(new XCLShouCang.Model.TB_AppraiseCount() { AppraiseTimeType = Web.Common.CommonHelper.AppraiseTimeTypeEnum.半年以前.ToString(), GoodCount = XCLNetTools.StringHander.Common.GetInt(matchs[0].Groups[1].Value), MiddleCount = XCLNetTools.StringHander.Common.GetInt(matchs[1].Groups[1].Value), BadCount = XCLNetTools.StringHander.Common.GetInt(matchs[2].Groups[1].Value) }); } } } if (buyerModel.IsSeller) { buyerModel.SellerAppraiseCountList = appraiseCountList; } else { buyerModel.BuyerAppraiseCountList = appraiseCountList; } } #endregion #region 向数据库中保存数据 XCLShouCang.BLL.TB_SearchKey keyBLL = new XCLShouCang.BLL.TB_SearchKey(); keyBLL.Add(keyModel); #endregion msgModel.CustomObject = buyerModel; msgModel.IsSuccess = true; return; }
public static XRefDetails From(HtmlAgilityPack.HtmlNode node) { if (node.Name != "xref") { throw new NotSupportedException("Only xref node is supported!"); } var xref = new XRefDetails(); var rawUid = GetRawUid(node); NameValueCollection queryString = null; if (!string.IsNullOrEmpty(rawUid)) { string others; var anchorIndex = rawUid.IndexOf("#"); if (anchorIndex == -1) { xref.Anchor = string.Empty; others = rawUid; } else { xref.Anchor = rawUid.Substring(anchorIndex); others = rawUid.Remove(anchorIndex); } var queryIndex = others.IndexOf("?"); if (queryIndex == -1) { xref.Uid = HttpUtility.UrlDecode(others); } else { xref.Uid = HttpUtility.UrlDecode(others.Remove(queryIndex)); queryString = HttpUtility.ParseQueryString(others.Substring(queryIndex)); } } xref.InnerHtml = node.InnerHtml; xref.DisplayProperty = node.GetAttributeValue("displayProperty", queryString?.Get("displayProperty") ?? XRefSpec.NameKey); xref.AltProperty = node.GetAttributeValue("altProperty", queryString?.Get("altProperty") ?? "fullName"); xref.Text = node.GetAttributeValue("text", node.GetAttributeValue("name", StringHelper.HtmlEncode(queryString?.Get("text")))); xref.Alt = node.GetAttributeValue("alt", node.GetAttributeValue("fullname", StringHelper.HtmlEncode(queryString?.Get("alt")))); xref.Title = node.GetAttributeValue("title", queryString?.Get("title")); // Both `data-raw-html` and `data-raw` are html encoded. Use `data-raw-html` with higher priority. // `data-raw-html` will be decoded then displayed, while `data-raw` will be displayed directly. var raw = node.GetAttributeValue("data-raw-html", null); if (!string.IsNullOrEmpty(raw)) { xref.Raw = StringHelper.HtmlDecode(raw); } else { xref.Raw = node.GetAttributeValue("data-raw", null); } xref.ThrowIfNotResolved = node.GetAttributeValue("data-throw-if-not-resolved", false); return(xref); }
/// <summary> /// Prepare Html /// </summary> /// <param name="input"></param> /// <param name="output"></param> private void PrepareHtml(HtmlAgilityPack.HtmlNode input, HtmlAgilityPack.HtmlNode output) { HtmlAgilityPack.HtmlNode parent = output; switch (input.NodeType) { case HtmlAgilityPack.HtmlNodeType.Document: break; case HtmlAgilityPack.HtmlNodeType.Element: if (!availableTags.Contains(input.OriginalName)) { return; } if (input.OriginalName.Equals("img")) { output.AppendChild(input.CloneNode(true)); return; } parent = output.AppendChild(input.CloneNode(false)); string newStyle = string.Empty; string style = input.GetAttributeValue("style", string.Empty); string href = input.GetAttributeValue("href", string.Empty); parent.Attributes.RemoveAll(); if (style != string.Empty) { foreach (string item in style.Split(';')) { if ((item.Contains("font-weight") && item.Contains("bold")) || (item.Contains("font-style") && item.Contains("italic")) || (item.Contains("text-decoration") && item.Contains("underline"))) { newStyle += string.Format("{0};", item); } } } if (newStyle != string.Empty) { parent.SetAttributeValue("style", newStyle); } if (href != string.Empty) { parent.SetAttributeValue("href", href); } break; case HtmlAgilityPack.HtmlNodeType.Text: output.AppendChild(input.CloneNode(true)); return; default: return; } foreach (HtmlAgilityPack.HtmlNode child in input.ChildNodes) { PrepareHtml(child, parent); } }
public string Convert(HtmlAgilityPack.HtmlNode node) { var x = node.GetAttributeValue("checked", false) ? "x" : " "; return($"- [{x}] "); }
public static string GetHref(this HtmlAgilityPack.HtmlNode htmlNode, string defaultValue = null) { return(htmlNode.GetAttributeValue("href", defaultValue)); }
public bool RecognitionPattern(HtmlAgilityPack.HtmlNode node) { return(node.GetAttributeValue("class", string.Empty).Equals("last_packages")); }
public static bool ParseGoogleLoginPage(string strEmail, string strPassword, string strHTMLLoginPage, string strReferer, out string strData, out string strPostUri) { // Expected strData BEGIN // ====================== // GALX=K2kN1B3RhzI // &continue=https%3A%2F%2Faccounts.google.com%2Fo%2Foauth2%2Fauth%3Fscope%3Demail%26response_type%3Dcode%26redirect_uri%3Dhttps%3A%2F%2Fteechip.com%2Fgoogle%26client_id%3D759311199253-liule4h0ij4trokni0clghutesmfrpaq.apps.googleusercontent.com%26hl%3Dvi%26from_login%3D1%26as%3D6c13d32c4c484e0a // &service=lso // <mpl=popup // &shdf=Cm4LEhF0aGlyZFBhcnR5TG9nb1VybBoADAsSFXRoaXJkUGFydHlEaXNwbGF5TmFtZRoHVGVlQ2hpcAwLEgZkb21haW4aB1RlZUNoaXAMCxIVdGhpcmRQYXJ0eURpc3BsYXlUeXBlGgdERUZBVUxUDBIDbHNvIhR9f65sYBLeiTyA3ksWmSKfvjjy-CgBMhR69Mfqff5tofmPaXiU63kbTQdAzw // &scc=1 // &sarp=1 // &_utf8=%E2%98%83 // &bgresponse=%21qapCVouLleiO7EZE_x4-8E88lh4CAAAAMFIAAAAFKgEfCMGz3oizuMFi9s1elOqqg8JcOP5CRdkNPXH--weicJaEAyf-pqsUcGZzFbdUIPWD6hVYEdNFEz35CvkIrsyHVpQRGe5ksfhAee9ehS3pR6bzwXyNFX7Pap-J7SQaN1swC53RMscX-GgiAB7Bd4pINn3QSjFPup0K0JDhJsMqBdkFQikNK4TCndA3NFztVcnkdr91yQuMEwxpfG6CjcHt3ckSyopOpBsFuHRcRqeEorlpvItyd9ITIvZdWd6LNZJgkDdAVbI2TdP3bPWBv9Lu5ux73a-RC4Oc7ow5uAWWbcMQ2D_4hKd4xT66Ie6pV8U4JxgvxhegMPaylafpK6NrqFNbEtEHjTksqILJKLRInylFLU37UpdQuwqPtUWz240 // &pstMsg=1 // &dnConn= // &checkConnection=youtube%3A3988%3A1 // &checkedDomains=youtube // &Email=<strEmail> // &Passwd=<strPassword> // &signIn=Sign+in // &PersistentCookie=yes // ====================== // Expected strData END strData = ""; strPostUri = ""; HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.LoadHtml(strHTMLLoginPage); if (htmlDoc.DocumentNode != null) { HtmlAgilityPack.HtmlNode frmNode = htmlDoc.DocumentNode.SelectSingleNode("//form[@id=\"gaia_loginform\"]"); if (frmNode != null) { strPostUri = frmNode.GetAttributeValue("action", ""); BuildDataString(frmNode, "Page", ref strData, true); BuildDataString(frmNode, "GALX", ref strData, string.IsNullOrEmpty(strData)); BuildDataString(frmNode, "gxf", ref strData, string.IsNullOrEmpty(strData)); BuildDataString(GetValueByName(frmNode, "input", "continue"), "continue", ref strData); BuildDataString(GetValueByName(frmNode, "input", "followup"), "followup", ref strData); BuildDataString(frmNode, "service", ref strData); BuildDataString(frmNode, "ProfileInformation", ref strData); BuildDataString(GetValueByName(frmNode, "input", "_utf8"), "_utf8", ref strData); BuildDataString(frmNode, "bgresponse", ref strData); BuildDataString(frmNode, "pstMsg", ref strData); BuildDataString(frmNode, "dnConn", ref strData); BuildDataString(frmNode, "checkConnection", ref strData); BuildDataString(frmNode, "checkedDomains", ref strData); BuildDataString(frmNode, "identifiertoken", ref strData); BuildDataString(frmNode, "identifiertoken_audio", ref strData); BuildDataString(strEmail, "Email", ref strData); BuildDataString(strPassword, "Passwd", ref strData); BuildDataString(frmNode, "rmShown", ref strData); } } return(false);; }