public List <LaywerModel> GetLaywerList(string html) { var laywerList = new List <LaywerModel>(); try { // html = File.ReadAllText(@"C:\IIS\test\log.txt", Encoding.UTF8); html = html.Replace(@"\\\", ""); // File.AppendAllText(@"C:\IIS\test\normalLog.txt", html + "\r\n"); var doc = new HtmlDocument(); doc.LoadHtml(html); var lawyerDivs = doc.DocumentNode.SelectNodes("//div[@class='row1 cloudlaw-profile user-menu-container square biz-card clearfix']"); if (lawyerDivs != null) { //File.AppendAllText(@"C:\IIS\test\normalLog.txt", "div load" + "\r\n"); foreach (HtmlNode lawyerDiv in lawyerDivs) { var laywer = new LaywerModel(); laywer.userId = lawyerDiv.Attributes["userid"]?.Value; var laywerDetail = lawyerDiv.SelectSingleNode(".//div[@class='col-md-7 user-details']"); var h41 = laywerDetail.SelectSingleNode(".//h4"); laywer.Name = h41?.InnerText.Trim(); var ps = laywerDetail.SelectNodes(".//p"); foreach (HtmlNode p in ps) { var str = p.InnerText; if (str.Contains("Licensed")) { laywer.LicensedIn = str.Replace("Licensed In:", "").Trim(); } if (str.Contains("Reg #")) { laywer.Registration = str.Replace("Reg #:", "").Trim(); } if (str.Contains("Date of Admission")) { laywer.DateOfAdmission = str.Replace("Date of Admission:", "").Trim(); } if (str.Contains("County:")) { laywer.County = str.Replace("County:", "").Trim(); } } ReadFromVcard(laywer, laywer.userId); laywerList.Add(laywer); } } } catch (Exception ex) { File.AppendAllText(@"C:\IIS\test\error.txt", "GetLaywerList exception:" + DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") + "\r\n"); File.AppendAllText(@"C:\IIS\test\error.txt", ex.StackTrace + "\r\n"); } return(laywerList); }
public void ReadData(string html) { var laywerList = new List <LaywerModel>(); // html = File.ReadAllText(@"C:\IIS\test\log.txt", Encoding.UTF8); // html = html.Replace(@"\\\", ""); var doc = new HtmlDocument(); doc.LoadHtml(html); var lawyerDivs = doc.DocumentNode.SelectNodes("//div[@class='row1 cloudlaw-profile user-menu-container square biz-card clearfix']"); foreach (HtmlNode lawyerDiv in lawyerDivs) { var laywer = new LaywerModel(); var laywerDetail = lawyerDiv.SelectSingleNode("//div[@class='col-md-7 user-details']"); var h41 = laywerDetail.SelectSingleNode(".//h4"); laywer.Name = h41.InnerText.Trim(); HtmlNode profileLink = laywerDetail.SelectSingleNode("//a[@zb-role='profile-link']"); var url = profileLink.Attributes["href"].Value; HtmlDocument detailDoc = web.Load(url); ReadDetailData(detailDoc, laywer); //var ps = laywerDetail.SelectNodes("//div[@class='user-pad biz-address']/p"); //foreach(HtmlNode p in ps) //{ // if(p.InnerText.Contains("Admission")) // { // laywer.DateOfAdmission = p.InnerText; // } // if (p.InnerText.Contains("Licensed ")) // { // laywer.LicensedIn = p.InnerText; // } // if (p.InnerText.Contains("Reg")) // { // laywer.Registration = p.InnerText; // } //} laywerList.Add(laywer); } }
public void ReadFromVcard(LaywerModel laywer, string userId) { try { var html = ""; var url = "http://www.zeekbeek.com/vcard.ashx?userId=" + userId; File.AppendAllText(@"C:\IIS\test\normalLog.txt", url + "\r\n"); HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); request.Method = "GET"; using (var response = (HttpWebResponse)request.GetResponse()) { using (var reader = new StreamReader(response.GetResponseStream())) { html = reader.ReadToEnd(); } } File.AppendAllText(@"C:\IIS\test\normalLog.txt", html + "\r\n"); // var file = @"C:\IIS\test\P36344.vcf"; var dataArr = html.Split(new string[] { "\r\n" }, StringSplitOptions.None).ToList(); //var dataArr = File.ReadAllLines(file); foreach (var str in dataArr) { if (str.StartsWith("N:")) { var name = str.Replace("N:", "").Split(';'); if (name.Length >= 3) { laywer.Surname = name[0]; laywer.GivenName = name[1]; laywer.MiddleName = name[2]; } } if (str.StartsWith("FN:")) { laywer.Name = str.Replace("FN:", ""); } if (str.StartsWith("ORG:")) { laywer.Org = str.Replace("ORG:", "");; } if (str.StartsWith("TITLE:")) { laywer.Title = str.Replace("TITLE:", "");; } if (str.StartsWith("EMAIL;PREF;INTERNET:")) { laywer.Email = str.Replace("EMAIL;PREF;INTERNET:", ""); } if (str.StartsWith("URL;WORK:")) { laywer.WebUrl = str.Replace("URL;WORK:", ""); } if (str.StartsWith("TEL;WORK;FAX:")) { laywer.Fax = str.Replace("TEL;WORK;FAX:", ""); } if (str.StartsWith("TEL;WORK;VOICE:")) { laywer.Telphone = str.Replace("TEL;WORK;VOICE:", ""); } if (str.StartsWith("TEL;CELL;VOICE:")) { laywer.Cellphone = str.Replace("TEL;CELL;VOICE:", ""); } if (str.StartsWith("ADR;PREF;WORK;PARCEL;ENCODING=QUOTED-PRINTABLE:")) { var address = str.Replace("ADR;PREF;WORK;PARCEL;ENCODING=QUOTED-PRINTABLE:", "").Split(';'); if (address.Length >= 7) { laywer.Street = address[2]; laywer.AddressLocality = address[3]; laywer.Region = address[4]; laywer.PostalCode = address[5]; laywer.Country = address[6]; } } } } catch (Exception ex) { File.AppendAllText(@"C:\IIS\test\error.txt", "ReadFromVcard exception:" + DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") + "\r\n"); File.AppendAllText(@"C:\IIS\test\error.txt", ex.ToString() + "\r\n"); } }
public void ReadDetailDataNew(string html, LaywerModel laywer) { html = File.ReadAllText(@"C:\IIS\test\detail.txt", Encoding.UTF8); var doc = new HtmlDocument(); doc.LoadHtml(html); var uls = doc.DocumentNode.SelectNodes("//ul[@class='list-group']"); //var div= doc.DocumentNode.SelectSingleNode("//div[@class='panel-body profile-pa']"); //if(div!=null) //{ // var alist = div.SelectNodes("//a[@class='tag tagdlg disabled']"); // foreach(HtmlNode practise in alist) // { // laywer.ParctiseArea += practise.InnerText + " | "; // } // if(laywer.ParctiseArea.Length>0) // { // laywer.ParctiseArea = laywer.ParctiseArea.Trim().Substring(0, laywer.ParctiseArea.Length - 1); // } //} foreach (HtmlNode ul in uls) { HtmlNode org = ul.SelectSingleNode(".//span[@itemtype='http://schema.org/Place']"); if (org != null) { laywer.Org = org.InnerText.Trim(); } HtmlNode addressLink = ul.SelectSingleNode(".//div[@itemprop='address']"); if (addressLink != null) { HtmlNode street = addressLink.SelectSingleNode(".//div[@class='btn-block']"); if (street != null) { laywer.Street = street.InnerText.Trim(); } HtmlNode addressLocality = addressLink.SelectSingleNode(".//span[@itemprop='addressLocality']"); if (addressLocality != null) { laywer.AddressLocality = addressLocality.InnerText.Trim(); } HtmlNode region = addressLink.SelectSingleNode(".//span[@itemprop='addressRegion']"); if (region != null) { laywer.Region = region.InnerText.Trim(); } HtmlNode postalCode = addressLink.SelectSingleNode(".//span[@itemprop='postalCode']"); if (postalCode != null) { laywer.PostalCode = postalCode.InnerText.Trim(); } } //HtmlNode tel = ul.SelectSingleNode(".//a[@zb-role='phone-vpl-link']"); //if (tel != null) //{ // laywer.Tel = tel.InnerText.Trim(); //} HtmlNode web = ul.SelectSingleNode(".//a[@zb-role='web-vpl-link']"); if (web != null) { laywer.WebUrl = web.InnerText.Trim(); } var lis = ul.SelectNodes(".//li[@class='list-group-item']"); foreach (HtmlNode li in lis) { //HtmlNode job = li.SelectSingleNode(".//div[@itemprop='jobTitle']"); //if (job != null) //{ // laywer.Org = li.InnerText.Replace(job.InnerText, " | " + job.InnerText).Trim().Replace(" ", ""); //} if (li.InnerText.Contains("County")) { laywer.County = li.InnerText.Replace("County:", "").Trim(); } if (li.InnerText.Contains("Country")) { laywer.Country = li.InnerText.Replace("Country:", "").Trim(); } if (li.InnerText.Contains("(T)")) { laywer.Telphone = li.InnerText.Replace("(T)", "").Trim(); } if (li.InnerText.Contains("(F)")) { laywer.Fax = li.InnerText.Replace("(F)", "").Trim(); } if (li.InnerHtml.Contains("Javascript")) { } if (li.InnerText.ToLower().Contains("registration") || li.InnerText.ToLower().Contains("admission")) { var text = li.InnerHtml; foreach (var str in Regex.Split(text, "<br>")) { if (str.Contains("State of Admission") || str.Contains("Licensed")) { laywer.LicensedIn = str.Replace("State of Admission:", "").Trim(); } if (str.Contains("Registration")) { laywer.Registration = str.Replace("Registration #:", "").Trim(); } if (str.Contains("Date of Admission")) { laywer.DateOfAdmission = str.Replace("Date of Admission:", "").Trim(); } } } } } }
public void ReadDetailData(HtmlDocument doc, LaywerModel laywer) { //html = File.ReadAllText(@"C:\IIS\test\detail.txt", Encoding.UTF8); // var doc = new HtmlDocument(); // doc.LoadHtml(html); var uls = doc.DocumentNode.SelectNodes("//ul[@class='list-group']"); //var div= doc.DocumentNode.SelectSingleNode("//div[@class='panel-body profile-pa']"); //if(div!=null) //{ // var alist = div.SelectNodes("//a[@class='tag tagdlg disabled']"); // foreach(HtmlNode practise in alist) // { // laywer.ParctiseArea += practise.InnerText + " | "; // } // if(laywer.ParctiseArea.Length>0) // { // laywer.ParctiseArea = laywer.ParctiseArea.Trim().Substring(0, laywer.ParctiseArea.Length - 1); // } //} var metaDiv = doc.DocumentNode.SelectSingleNode("//div[@class='DnnModule DnnModule-ZBUserProfileViewer DnnModule-3386']"); if (metaDiv != null) { var emailMeta = metaDiv.SelectSingleNode(".//meta[@itemprop='email']"); if (emailMeta != null) { laywer.Email = emailMeta.Attributes["content"].Value; } var faxMeta = metaDiv.SelectSingleNode(".//meta[@itemprop='faxNumber']"); if (faxMeta != null) { laywer.Fax = faxMeta.Attributes["content"].Value; } var telephoneMeta = metaDiv.SelectSingleNode(".//meta[@itemprop='telephone']"); if (telephoneMeta != null) { laywer.Telphone = telephoneMeta.Attributes["content"].Value; } } foreach (HtmlNode ul in uls) { HtmlNode org = ul.SelectSingleNode(".//span[@itemtype='http://schema.org/Place']"); if (org != null) { laywer.Org = org.InnerText.Trim(); } HtmlNode addressLink = ul.SelectSingleNode(".//div[@itemprop='address']"); if (addressLink != null) { HtmlNode street = addressLink.SelectSingleNode(".//div[@class='btn-block']"); if (street != null) { laywer.Street = street.InnerText.Trim(); } HtmlNode addressLocality = addressLink.SelectSingleNode(".//span[@itemprop='addressLocality']"); if (addressLocality != null) { laywer.AddressLocality = addressLocality.InnerText.Trim(); } HtmlNode region = addressLink.SelectSingleNode(".//span[@itemprop='addressRegion']"); if (region != null) { laywer.Region = region.InnerText.Trim(); } HtmlNode postalCode = addressLink.SelectSingleNode(".//span[@itemprop='postalCode']"); if (postalCode != null) { laywer.PostalCode = postalCode.InnerText.Trim(); } } HtmlNode web = ul.SelectSingleNode(".//a[@zb-role='web-vpl-link']"); if (web != null) { laywer.WebUrl = web.InnerText.Trim(); } var lis = ul.SelectNodes(".//li[@class='list-group-item']"); foreach (HtmlNode li in lis) { if (li.InnerText.Contains("County")) { laywer.County = li.InnerText.Replace("County:", "").Trim(); } if (li.InnerText.Contains("Country")) { laywer.Country = li.InnerText.Replace("Country:", "").Trim(); } if (string.IsNullOrEmpty(laywer.Telphone)) { if (li.InnerText.Contains("(C)") || li.InnerText.Contains("(T)")) { laywer.Telphone = li.InnerText.Replace("(C)", "").Replace("(T)", "").Trim(); } } if (li.InnerText.ToLower().Contains("registration") || li.InnerText.ToLower().Contains("admission")) { var text = li.InnerHtml; foreach (var str in Regex.Split(text, "<br>")) { if (str.Contains("State of Admission") || str.Contains("Licensed")) { laywer.LicensedIn = str.Replace("State of Admission:", "").Trim(); } if (str.Contains("Registration")) { laywer.Registration = str.Replace("Registration #:", "").Trim(); } if (str.Contains("Date of Admission")) { laywer.DateOfAdmission = str.Replace("Date of Admission:", "").Trim(); } } } } } }
public void ReadFromVcard(LaywerModel laywer, string file) { try { var dataArr = File.ReadAllLines(file); foreach (var str in dataArr) { if (str.StartsWith("N:")) { var name = str.Replace("N:", "").Split(';'); if (name.Length >= 3) { laywer.Surname = name[0]; laywer.GivenName = name[1]; laywer.MiddleName = name[2]; } } // if (str.StartsWith("FN:")) // { // laywer.Name = str.Replace("FN:", ""); // } if (str.StartsWith("ORG:")) { laywer.Org = str.Replace("ORG:", "").Replace(";", ""); } if (str.StartsWith("TITLE:")) { laywer.Title = str.Replace("TITLE:", "");; } if (str.StartsWith("EMAIL;PREF;INTERNET:")) { laywer.Email = str.Replace("EMAIL;PREF;INTERNET:", ""); } if (str.StartsWith("URL;WORK:")) { laywer.WebUrl = str.Replace("URL;WORK:", ""); } if (str.StartsWith("URL;PREF;WORK:")) { laywer.ZeekBeekUrl = str.Replace("URL;PREF;WORK:", ""); } if (str.StartsWith("TEL;WORK;FAX:")) { laywer.Fax = str.Replace("TEL;WORK;FAX:", ""); } if (str.StartsWith("TEL;WORK;VOICE:")) { laywer.Telphone = str.Replace("TEL;WORK;VOICE:", ""); } if (str.StartsWith("TEL;CELL;VOICE:")) { laywer.Cellphone = str.Replace("TEL;CELL;VOICE:", ""); } if (str.StartsWith("ADR;PREF;WORK;PARCEL;ENCODING=QUOTED-PRINTABLE:")) { var address = str.Replace("ADR;PREF;WORK;PARCEL;ENCODING=QUOTED-PRINTABLE:", "").Split(';'); if (address.Length >= 7) { laywer.Street = address[2]; laywer.AddressLocality = address[3]; laywer.Region = address[4]; laywer.PostalCode = address[5]; laywer.Country = address[6]; } } } } catch (Exception ex) { File.AppendAllText(@"C:\IIS\zeekbeek\error.txt", "ReadFromVcard exception:" + DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") + "\r\n"); File.AppendAllText(@"C:\IIS\zeekbeek\error.txt", ex.ToString() + "\r\n"); } }