public List <LaywerModel> GetLaywerList(string html) { var laywerList = new List <LaywerModel>(); try { html = File.ReadAllText(@"C:\IIS\test\log.txt", Encoding.UTF8); html = html.Replace(@"\\\", ""); var doc = new HtmlDocument(); doc.LoadHtml(html); var lawyerDivs = doc.DocumentNode.SelectNodes("//div[@class='row1 cloudlaw-profile user-menu-container square biz-card clearfix']"); foreach (HtmlNode lawyerDiv in lawyerDivs) { var laywer = new LaywerModel(); var laywerDetail = lawyerDiv.SelectSingleNode(".//div[@class='col-md-7 user-details']"); var h41 = laywerDetail.SelectSingleNode(".//h4"); laywer.Name = h41.InnerText.Trim(); HtmlNode profileLink = laywerDetail.SelectSingleNode(".//a[@zb-role='profile-link']"); var url = profileLink.Attributes["href"].Value; url = "https://www.zeekbeek.com" + url; var userId = profileLink.Attributes["userid"].Value; laywer.userId = userId; HtmlDocument detailDoc = GetPage(url); if (ReadDetailData(detailDoc, laywer)) { laywerList.Add(laywer); } else { //retry one time Thread.Sleep(2000); _cookies = new CookieContainer(); GetHomePageCookie(); detailDoc = GetPage(url); laywerList.Add(laywer); } } } catch (Exception ex) { File.AppendAllText(@"C:\IIS\test\error.txt", "GetLaywerList exception:" + DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") + "\r\n"); File.AppendAllText(@"C:\IIS\test\error.txt", ex.ToString() + "\r\n"); } return(laywerList); }
public bool ReadDetailData(HtmlDocument doc, LaywerModel laywer) { //var html = File.ReadAllText(@"C:\IIS\test\detail.txt", Encoding.UTF8); // doc = new HtmlDocument(); // doc.LoadHtml(html); var result = true; try { var uls = doc.DocumentNode.SelectNodes("//ul[@class='list-group']"); if (uls != null) { foreach (HtmlNode ul in uls) { HtmlNode org = ul.SelectSingleNode(".//span[@itemtype='http://schema.org/Place']"); if (org != null) { laywer.Org = org.InnerText.Trim(); } HtmlNode title = ul.SelectSingleNode(".//div[@itemprop='jobTitle']"); if (title != null) { laywer.Title = title.InnerText.Trim(); } HtmlNode addressLink = ul.SelectSingleNode(".//div[@itemprop='address']"); if (addressLink != null) { HtmlNode street = addressLink.SelectSingleNode(".//div[@class='btn-block']"); if (street != null) { laywer.Street = street.InnerText.Trim(); } HtmlNode addressLocality = addressLink.SelectSingleNode(".//span[@itemprop='addressLocality']"); if (addressLocality != null) { laywer.AddressLocality = addressLocality.InnerText.Trim(); } HtmlNode region = addressLink.SelectSingleNode(".//span[@itemprop='addressRegion']"); if (region != null) { laywer.Region = region.InnerText.Trim(); } HtmlNode postalCode = addressLink.SelectSingleNode(".//span[@itemprop='postalCode']"); if (postalCode != null) { laywer.PostalCode = postalCode.InnerText.Trim(); } } HtmlNode web = ul.SelectSingleNode(".//a[@zb-role='web-vpl-link']"); if (web != null) { laywer.WebUrl = web.InnerText.Trim(); } HtmlNode telphone = ul.SelectSingleNode(".//a[@zb-role='phone-vpl-link']"); if (telphone != null) { laywer.Telphone = telphone.InnerText.Trim(); } var lis = ul.SelectNodes(".//li[@class='list-group-item']"); foreach (HtmlNode li in lis) { if (li.InnerText.Contains("County")) { laywer.County = li.InnerText.Replace("County:", "").Trim(); } if (li.InnerText.Contains("Country")) { laywer.Country = li.InnerText.Replace("Country:", "").Trim(); } if (li.InnerText.Contains("(C)") || li.InnerText.Contains("(T)")) { laywer.Cellphone = li.InnerText.Replace("(C)", "").Replace("(T)", "").Trim(); } if (li.InnerText.Contains("(F)")) { laywer.Fax = li.InnerText.Replace("(F)", "").Trim(); } if (li.InnerHtml.Contains("JavaScript")) { laywer.Email = ExtractEmails(li.InnerHtml); } if (li.InnerText.ToLower().Contains("registration") || li.InnerText.ToLower().Contains("admission")) { var text = li.InnerHtml; foreach (var str in Regex.Split(text, "<br>")) { if (str.Contains("State of Admission")) { laywer.LicensedIn = str.Replace("State of Admission:", "").Trim(); } if (str.Contains("Registration")) { laywer.Registration = str.Replace("Registration #:", "").Trim(); } if (str.Contains("Date of Admission")) { laywer.DateOfAdmission = str.Replace("Date of Admission:", "").Trim(); } } } } } } else { result = false; } } catch (Exception ex) { File.AppendAllText(@"C:\IIS\test\error.txt", "GetuserDetail exception:" + DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") + "\r\n"); File.AppendAllText(@"C:\IIS\test\error.txt", ex.ToString() + "\r\n"); } return(result); }