예제 #1
0
        private void GetPersonPageUrls(string localDir, string listPageUrl, List <Dictionary <string, string> > allPersonPageUrlInfos, string keyWords)
        {
            try
            {
                string listPageLocalPath = this.RunPage.GetFilePath(listPageUrl, localDir);
                HtmlAgilityPack.HtmlDocument pageHtmlDoc = HtmlDocumentHelper.Load(listPageLocalPath);
                HtmlNodeCollection           allDivNodes = pageHtmlDoc.DocumentNode.SelectNodes("//div[@class=\"result c-container \"]");
                foreach (HtmlNode divNode in allDivNodes)
                {
                    string linkedinUrlPart = HtmlDocumentHelper.TryGetNodeInnerText(divNode, "./div[@class=\"f13\"]/a", true, true, null, null);
                    if (linkedinUrlPart == null)
                    {
                        linkedinUrlPart = HtmlDocumentHelper.TryGetNodeInnerText(divNode, "./div/div[@class=\"f13\"]/a", true, true, null, null);
                    }

                    string abstractText = HtmlDocumentHelper.TryGetNodeInnerText(divNode, true, true, null, null);

                    if (linkedinUrlPart != null && linkedinUrlPart.Contains(".linkedin.com/in/") && abstractText != null && abstractText.ToLower().Contains(keyWords.ToLower()))
                    {
                        try
                        {
                            string personName = HtmlDocumentHelper.TryGetNodeInnerText(divNode, "./h3/a", true, true, null, null);
                            string personUrl  = HtmlDocumentHelper.TryGetNodeAttributeValue(divNode, "./h3/a", "href", true, true, null, null);
                            foreach (string postfix in this.BaiduLinkedinItemPostfix)
                            {
                                personName = personName.Replace(postfix, "").Trim();
                            }
                            Dictionary <string, string> personPageUrlInfo = new Dictionary <string, string>();
                            personPageUrlInfo.Add("personUrl", personUrl);
                            personPageUrlInfo.Add("personName", personName.Trim());
                            allPersonPageUrlInfos.Add(personPageUrlInfo);
                        }
                        catch (Exception ex)
                        {
                            throw new Exception("获取个人网页地址时出错", ex);
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                this.RunPage.InvokeAppendLogText(ex.Message, LogLevelType.Error, true);
                throw new Exception("解析Baidu列表页出错, listPageUrl = +" + listPageUrl, ex);
            }
        }