Ejemplo n.º 1
0
 private void GetPersonPageUrls(string localDir, string listPageUrl, List<Dictionary<string, string>> allPersonPageUrlInfos)
 {
     try
     {
         string listPageLocalPath = this.RunPage.GetFilePath(listPageUrl, localDir);
         HtmlAgilityPack.HtmlDocument pageHtmlDoc = HtmlDocumentHelper.Load(listPageLocalPath);
         HtmlNodeCollection allLiNodes = pageHtmlDoc.DocumentNode.SelectNodes("//ol[@class=\"search-results\"]/li");
         foreach (HtmlNode liNode in allLiNodes)
         {
             if (liNode.GetAttributeValue("class", "").Contains("people"))
             {
                 HtmlNode personLinkNode = liNode.SelectSingleNode("./div[@class=\"bd\"]/h3/a");
                 string personUrl = CommonUtil.UrlDecodeSymbolAnd(personLinkNode.GetAttributeValue("href", ""));
                 string personName = personLinkNode.InnerText.Trim();
                 Dictionary<string, string> personPageUrlInfo = new Dictionary<string, string>();
                 personPageUrlInfo.Add("personUrl", personUrl);
                 personPageUrlInfo.Add("personName", personName);
                 allPersonPageUrlInfos.Add(personPageUrlInfo);
             }
         }
     }
     catch (Exception ex)
     {
         this.RunPage.InvokeAppendLogText(ex.Message, LogLevelType.Error, true);
         throw new Exception("解析列表页出错, listPageUrl = +" + listPageUrl, ex);
     }
 }
Ejemplo n.º 2
0
        private void GetPersonPageUrls(string localDir, string listPageUrl, List <Dictionary <string, string> > allPersonPageUrlInfos, string keyWords)
        {
            try
            {
                string listPageLocalPath = this.RunPage.GetFilePath(listPageUrl, localDir);
                HtmlAgilityPack.HtmlDocument pageHtmlDoc = HtmlDocumentHelper.Load(listPageLocalPath);
                HtmlNodeCollection           allDivNodes = pageHtmlDoc.DocumentNode.SelectNodes("//div[@class=\"result c-container \"]");
                foreach (HtmlNode divNode in allDivNodes)
                {
                    string linkedinUrlPart = HtmlDocumentHelper.TryGetNodeInnerText(divNode, "./div[@class=\"f13\"]/a", true, true, null, null);
                    if (linkedinUrlPart == null)
                    {
                        linkedinUrlPart = HtmlDocumentHelper.TryGetNodeInnerText(divNode, "./div/div[@class=\"f13\"]/a", true, true, null, null);
                    }

                    string abstractText = HtmlDocumentHelper.TryGetNodeInnerText(divNode, true, true, null, null);

                    if (linkedinUrlPart != null && linkedinUrlPart.Contains(".linkedin.com/in/") && abstractText != null && abstractText.ToLower().Contains(keyWords.ToLower()))
                    {
                        try
                        {
                            string personName = HtmlDocumentHelper.TryGetNodeInnerText(divNode, "./h3/a", true, true, null, null);
                            string personUrl  = HtmlDocumentHelper.TryGetNodeAttributeValue(divNode, "./h3/a", "href", true, true, null, null);
                            foreach (string postfix in this.BaiduLinkedinItemPostfix)
                            {
                                personName = personName.Replace(postfix, "").Trim();
                            }
                            Dictionary <string, string> personPageUrlInfo = new Dictionary <string, string>();
                            personPageUrlInfo.Add("personUrl", personUrl);
                            personPageUrlInfo.Add("personName", personName.Trim());
                            allPersonPageUrlInfos.Add(personPageUrlInfo);
                        }
                        catch (Exception ex)
                        {
                            throw new Exception("获取个人网页地址时出错", ex);
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                this.RunPage.InvokeAppendLogText(ex.Message, LogLevelType.Error, true);
                throw new Exception("解析Baidu列表页出错, listPageUrl = +" + listPageUrl, ex);
            }
        }
Ejemplo n.º 3
0
 private void GetPersonPageUrls(string localDir, string listPageUrl, List <Dictionary <string, string> > allPersonPageUrlInfos)
 {
     try
     {
         string listPageLocalPath = this.RunPage.GetFilePath(listPageUrl, localDir);
         HtmlAgilityPack.HtmlDocument pageHtmlDoc = HtmlDocumentHelper.Load(listPageLocalPath);
         HtmlNodeCollection           allANodes   = pageHtmlDoc.DocumentNode.SelectNodes("//div[@class=\"rc\"]/h3/a");
         foreach (HtmlNode aNode in allANodes)
         {
             string personUrl = HtmlDocumentHelper.TryGetNodeAttributeValue(aNode, "data-href", true, true, null, null);
             if (personUrl == null)
             {
                 personUrl = HtmlDocumentHelper.TryGetNodeAttributeValue(aNode, "href", true, true, null, null);
             }
             if (personUrl.Contains(".linkedin.com/in/"))
             {
                 try
                 {
                     string personName = aNode.InnerText.Trim();
                     foreach (string postfix in this.GoogleLinkedinItemPostfix)
                     {
                         personName = personName.Replace(postfix, "");
                     }
                     Dictionary <string, string> personPageUrlInfo = new Dictionary <string, string>();
                     personUrl = CommonUtil.UrlDecode(personUrl);
                     personPageUrlInfo.Add("personUrl", personUrl);
                     personPageUrlInfo.Add("personName", personName.Trim());
                     allPersonPageUrlInfos.Add(personPageUrlInfo);
                 }
                 catch (Exception ex)
                 {
                     throw new Exception("获取个人网页地址时出错", ex);
                 }
             }
         }
     }
     catch (Exception ex)
     {
         this.RunPage.InvokeAppendLogText(ex.Message, LogLevelType.Error, true);
         throw new Exception("解析Google列表页出错, listPageUrl = +" + listPageUrl, ex);
     }
 }
Ejemplo n.º 4
0
        /// <summary>
        /// 同步考次(考试时间)
        /// </summary>
        /// <returns></returns>
        private List <DictionaryModel> SysTimes()
        {
            List <DictionaryModel> datas = new List <DictionaryModel>();

            var options = new HttpClientOptions();

            options.URL              = AppHelper.UrlApplyPage;
            options.Method           = "GET";
            options.CookieCollection = CurrentCookies;
            var result = new HttpWebClientUtility().Request(options);

            if (!VerifyHelper.IsEmpty(result.Content))
            {
                var doc = HtmlDocumentHelper.Load(result.Content);
                if (doc != null)
                {
                    var timesNode = HtmlDocumentHelper.FindChildNodes(doc, AppHelper.XPathTimes);
                    if (timesNode != null)
                    {
                        foreach (var item in timesNode.Where(x => x.OriginalName == "a"))
                        {
                            var valueAttribute = item.Attributes["attrval"];
                            if (valueAttribute != null)
                            {
                                datas.Add(new DictionaryModel()
                                {
                                    Id     = Guid.NewGuid(),
                                    Genre  = AppConst.DictionaryTimes,
                                    Name   = StringHelper.Get(item.InnerText),
                                    Value  = StringHelper.Get(valueAttribute.Value),
                                    Parent = "",
                                    Sort   = 0
                                });
                            }
                        }
                    }
                }
            }

            return(datas);
        }