예제 #1
0
        public List <LaywerModel> GetLaywerList(string html)
        {
            var laywerList = new List <LaywerModel>();

            try
            {
                html = File.ReadAllText(@"C:\IIS\test\log.txt", Encoding.UTF8);
                html = html.Replace(@"\\\", "");
                var doc = new HtmlDocument();
                doc.LoadHtml(html);

                var lawyerDivs = doc.DocumentNode.SelectNodes("//div[@class='row1 cloudlaw-profile user-menu-container square biz-card clearfix']");

                foreach (HtmlNode lawyerDiv in lawyerDivs)
                {
                    var laywer       = new LaywerModel();
                    var laywerDetail = lawyerDiv.SelectSingleNode(".//div[@class='col-md-7 user-details']");
                    var h41          = laywerDetail.SelectSingleNode(".//h4");
                    laywer.Name = h41.InnerText.Trim();
                    HtmlNode profileLink = laywerDetail.SelectSingleNode(".//a[@zb-role='profile-link']");
                    var      url         = profileLink.Attributes["href"].Value;
                    url = "https://www.zeekbeek.com" + url;
                    var userId = profileLink.Attributes["userid"].Value;
                    laywer.userId = userId;
                    HtmlDocument detailDoc = GetPage(url);
                    if (ReadDetailData(detailDoc, laywer))
                    {
                        laywerList.Add(laywer);
                    }
                    else
                    {
                        //retry one time
                        Thread.Sleep(2000);
                        _cookies = new CookieContainer();
                        GetHomePageCookie();
                        detailDoc = GetPage(url);
                        laywerList.Add(laywer);
                    }
                }
            }
            catch (Exception ex)
            {
                File.AppendAllText(@"C:\IIS\test\error.txt", "GetLaywerList exception:" + DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") + "\r\n");
                File.AppendAllText(@"C:\IIS\test\error.txt", ex.ToString() + "\r\n");
            }
            return(laywerList);
        }
예제 #2
0
        public bool ReadDetailData(HtmlDocument doc, LaywerModel laywer)
        {
            //var html = File.ReadAllText(@"C:\IIS\test\detail.txt", Encoding.UTF8);
            // doc = new HtmlDocument();
            // doc.LoadHtml(html);

            var result = true;

            try
            {
                var uls = doc.DocumentNode.SelectNodes("//ul[@class='list-group']");
                if (uls != null)
                {
                    foreach (HtmlNode ul in uls)
                    {
                        HtmlNode org = ul.SelectSingleNode(".//span[@itemtype='http://schema.org/Place']");
                        if (org != null)
                        {
                            laywer.Org = org.InnerText.Trim();
                        }

                        HtmlNode title = ul.SelectSingleNode(".//div[@itemprop='jobTitle']");
                        if (title != null)
                        {
                            laywer.Title = title.InnerText.Trim();
                        }

                        HtmlNode addressLink = ul.SelectSingleNode(".//div[@itemprop='address']");
                        if (addressLink != null)
                        {
                            HtmlNode street = addressLink.SelectSingleNode(".//div[@class='btn-block']");
                            if (street != null)
                            {
                                laywer.Street = street.InnerText.Trim();
                            }
                            HtmlNode addressLocality = addressLink.SelectSingleNode(".//span[@itemprop='addressLocality']");
                            if (addressLocality != null)
                            {
                                laywer.AddressLocality = addressLocality.InnerText.Trim();
                            }
                            HtmlNode region = addressLink.SelectSingleNode(".//span[@itemprop='addressRegion']");
                            if (region != null)
                            {
                                laywer.Region = region.InnerText.Trim();
                            }
                            HtmlNode postalCode = addressLink.SelectSingleNode(".//span[@itemprop='postalCode']");
                            if (postalCode != null)
                            {
                                laywer.PostalCode = postalCode.InnerText.Trim();
                            }
                        }

                        HtmlNode web = ul.SelectSingleNode(".//a[@zb-role='web-vpl-link']");
                        if (web != null)
                        {
                            laywer.WebUrl = web.InnerText.Trim();
                        }

                        HtmlNode telphone = ul.SelectSingleNode(".//a[@zb-role='phone-vpl-link']");
                        if (telphone != null)
                        {
                            laywer.Telphone = telphone.InnerText.Trim();
                        }
                        var lis = ul.SelectNodes(".//li[@class='list-group-item']");
                        foreach (HtmlNode li in lis)
                        {
                            if (li.InnerText.Contains("County"))
                            {
                                laywer.County = li.InnerText.Replace("County:", "").Trim();
                            }
                            if (li.InnerText.Contains("Country"))
                            {
                                laywer.Country = li.InnerText.Replace("Country:", "").Trim();
                            }

                            if (li.InnerText.Contains("(C)") || li.InnerText.Contains("(T)"))
                            {
                                laywer.Cellphone = li.InnerText.Replace("(C)", "").Replace("(T)", "").Trim();
                            }
                            if (li.InnerText.Contains("(F)"))
                            {
                                laywer.Fax = li.InnerText.Replace("(F)", "").Trim();
                            }
                            if (li.InnerHtml.Contains("JavaScript"))
                            {
                                laywer.Email = ExtractEmails(li.InnerHtml);
                            }
                            if (li.InnerText.ToLower().Contains("registration") || li.InnerText.ToLower().Contains("admission"))
                            {
                                var text = li.InnerHtml;
                                foreach (var str in Regex.Split(text, "<br>"))
                                {
                                    if (str.Contains("State of Admission"))
                                    {
                                        laywer.LicensedIn = str.Replace("State of Admission:", "").Trim();
                                    }
                                    if (str.Contains("Registration"))
                                    {
                                        laywer.Registration = str.Replace("Registration #:", "").Trim();
                                    }
                                    if (str.Contains("Date of Admission"))
                                    {
                                        laywer.DateOfAdmission = str.Replace("Date of Admission:", "").Trim();
                                    }
                                }
                            }
                        }
                    }
                }
                else
                {
                    result = false;
                }
            }
            catch (Exception ex)
            {
                File.AppendAllText(@"C:\IIS\test\error.txt", "GetuserDetail exception:" + DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") + "\r\n");
                File.AppendAllText(@"C:\IIS\test\error.txt", ex.ToString() + "\r\n");
            }
            return(result);
        }