private void GetPersonPageUrls(string localDir, string listPageUrl, List<Dictionary<string, string>> allPersonPageUrlInfos) { try { string listPageLocalPath = this.RunPage.GetFilePath(listPageUrl, localDir); HtmlAgilityPack.HtmlDocument pageHtmlDoc = HtmlDocumentHelper.Load(listPageLocalPath); HtmlNodeCollection allLiNodes = pageHtmlDoc.DocumentNode.SelectNodes("//ol[@class=\"search-results\"]/li"); foreach (HtmlNode liNode in allLiNodes) { if (liNode.GetAttributeValue("class", "").Contains("people")) { HtmlNode personLinkNode = liNode.SelectSingleNode("./div[@class=\"bd\"]/h3/a"); string personUrl = CommonUtil.UrlDecodeSymbolAnd(personLinkNode.GetAttributeValue("href", "")); string personName = personLinkNode.InnerText.Trim(); Dictionary<string, string> personPageUrlInfo = new Dictionary<string, string>(); personPageUrlInfo.Add("personUrl", personUrl); personPageUrlInfo.Add("personName", personName); allPersonPageUrlInfos.Add(personPageUrlInfo); } } } catch (Exception ex) { this.RunPage.InvokeAppendLogText(ex.Message, LogLevelType.Error, true); throw new Exception("解析列表页出错, listPageUrl = +" + listPageUrl, ex); } }
private void GetPersonPageUrls(string localDir, string listPageUrl, List <Dictionary <string, string> > allPersonPageUrlInfos, string keyWords) { try { string listPageLocalPath = this.RunPage.GetFilePath(listPageUrl, localDir); HtmlAgilityPack.HtmlDocument pageHtmlDoc = HtmlDocumentHelper.Load(listPageLocalPath); HtmlNodeCollection allDivNodes = pageHtmlDoc.DocumentNode.SelectNodes("//div[@class=\"result c-container \"]"); foreach (HtmlNode divNode in allDivNodes) { string linkedinUrlPart = HtmlDocumentHelper.TryGetNodeInnerText(divNode, "./div[@class=\"f13\"]/a", true, true, null, null); if (linkedinUrlPart == null) { linkedinUrlPart = HtmlDocumentHelper.TryGetNodeInnerText(divNode, "./div/div[@class=\"f13\"]/a", true, true, null, null); } string abstractText = HtmlDocumentHelper.TryGetNodeInnerText(divNode, true, true, null, null); if (linkedinUrlPart != null && linkedinUrlPart.Contains(".linkedin.com/in/") && abstractText != null && abstractText.ToLower().Contains(keyWords.ToLower())) { try { string personName = HtmlDocumentHelper.TryGetNodeInnerText(divNode, "./h3/a", true, true, null, null); string personUrl = HtmlDocumentHelper.TryGetNodeAttributeValue(divNode, "./h3/a", "href", true, true, null, null); foreach (string postfix in this.BaiduLinkedinItemPostfix) { personName = personName.Replace(postfix, "").Trim(); } Dictionary <string, string> personPageUrlInfo = new Dictionary <string, string>(); personPageUrlInfo.Add("personUrl", personUrl); personPageUrlInfo.Add("personName", personName.Trim()); allPersonPageUrlInfos.Add(personPageUrlInfo); } catch (Exception ex) { throw new Exception("获取个人网页地址时出错", ex); } } } } catch (Exception ex) { this.RunPage.InvokeAppendLogText(ex.Message, LogLevelType.Error, true); throw new Exception("解析Baidu列表页出错, listPageUrl = +" + listPageUrl, ex); } }
private void GetPersonPageUrls(string localDir, string listPageUrl, List <Dictionary <string, string> > allPersonPageUrlInfos) { try { string listPageLocalPath = this.RunPage.GetFilePath(listPageUrl, localDir); HtmlAgilityPack.HtmlDocument pageHtmlDoc = HtmlDocumentHelper.Load(listPageLocalPath); HtmlNodeCollection allANodes = pageHtmlDoc.DocumentNode.SelectNodes("//div[@class=\"rc\"]/h3/a"); foreach (HtmlNode aNode in allANodes) { string personUrl = HtmlDocumentHelper.TryGetNodeAttributeValue(aNode, "data-href", true, true, null, null); if (personUrl == null) { personUrl = HtmlDocumentHelper.TryGetNodeAttributeValue(aNode, "href", true, true, null, null); } if (personUrl.Contains(".linkedin.com/in/")) { try { string personName = aNode.InnerText.Trim(); foreach (string postfix in this.GoogleLinkedinItemPostfix) { personName = personName.Replace(postfix, ""); } Dictionary <string, string> personPageUrlInfo = new Dictionary <string, string>(); personUrl = CommonUtil.UrlDecode(personUrl); personPageUrlInfo.Add("personUrl", personUrl); personPageUrlInfo.Add("personName", personName.Trim()); allPersonPageUrlInfos.Add(personPageUrlInfo); } catch (Exception ex) { throw new Exception("获取个人网页地址时出错", ex); } } } } catch (Exception ex) { this.RunPage.InvokeAppendLogText(ex.Message, LogLevelType.Error, true); throw new Exception("解析Google列表页出错, listPageUrl = +" + listPageUrl, ex); } }
/// <summary> /// 同步考次(考试时间) /// </summary> /// <returns></returns> private List <DictionaryModel> SysTimes() { List <DictionaryModel> datas = new List <DictionaryModel>(); var options = new HttpClientOptions(); options.URL = AppHelper.UrlApplyPage; options.Method = "GET"; options.CookieCollection = CurrentCookies; var result = new HttpWebClientUtility().Request(options); if (!VerifyHelper.IsEmpty(result.Content)) { var doc = HtmlDocumentHelper.Load(result.Content); if (doc != null) { var timesNode = HtmlDocumentHelper.FindChildNodes(doc, AppHelper.XPathTimes); if (timesNode != null) { foreach (var item in timesNode.Where(x => x.OriginalName == "a")) { var valueAttribute = item.Attributes["attrval"]; if (valueAttribute != null) { datas.Add(new DictionaryModel() { Id = Guid.NewGuid(), Genre = AppConst.DictionaryTimes, Name = StringHelper.Get(item.InnerText), Value = StringHelper.Get(valueAttribute.Value), Parent = "", Sort = 0 }); } } } } } return(datas); }