public List<BuyerInfo> GetBuyerInforList(QueryObject<BuyerInfo> query) { string sql = "SELECT Id, Type, CompanyName, CompanyInfo, Category, Mail,BuyerName, ContactInfo, Url, UrlTitle, Status FROM BuyerInfo"; if (query.IsPager) { query.RecordCount = dbHelper.GetItemCount(sql, null); sql = sql + " order by Id asc limit " + query.Start + ", " + query.PageSize; } DataTable dt = dbHelper.ExecuteDataTable(sql, null); List<BuyerInfo> list = new List<BuyerInfo>(); foreach (DataRow row in dt.Rows) { BuyerInfo kw = new BuyerInfo(); kw.Id = Convert.ToInt32(row["Id"]); kw.Type = (string)row["Type"]; kw.CompanyName = (string)row["CompanyName"]; kw.CompanyInfo = (string)row["CompanyInfo"]; kw.Category = (string)row["Category"]; kw.Mail = (string)row["Mail"]; kw.BuyerName = (string)row["BuyerName"]; kw.ContactInfo = (string)row["ContactInfo"]; kw.Url = (string)row["Url"]; kw.UrlTitle = (string)row["UrlTitle"]; kw.Status = Convert.ToInt32(row["Status"]); list.Add(kw); } return list; }
public void Insert(BuyerInfo item) { if (item.Emails.Count == 0) { return; } string InsSql = @"Insert into BuyerInfo(Type, CompanyName, CompanyInfo, Category, Mail,BuyerName, ContactInfo, Url, UrlTitle) " + "values(@Type, @CompanyName, @CompanyInfo, @Category, @Mail, @BuyerName, @ContactInfo, @Url, @UrlTitle)"; foreach (string mail in item.Emails) { if (ExistEmail(mail)) { continue; } SQLiteParameter[] parameter = new SQLiteParameter[] { new SQLiteParameter("@Type",item.Type), new SQLiteParameter("@CompanyName",item.CompanyName), new SQLiteParameter("@CompanyInfo",item.CompanyInfo), new SQLiteParameter("@Category",string.IsNullOrEmpty(item.Category)?"-":item.Category), new SQLiteParameter("@Mail", mail), new SQLiteParameter("@BuyerName",item.BuyerName), new SQLiteParameter("@ContactInfo",item.ContactInfo), new SQLiteParameter("@Url",item.Url), new SQLiteParameter("@UrlTitle",item.UrlTitle) }; dbHelper.ExecuteNonQuery(InsSql, parameter); } }
public BuyerInfo GetBuyerInfo(string url, HtmlDocument document) { BuyerInfo buyerInfo = new BuyerInfo(); buyerInfo.Url = url; buyerInfo.UrlTitle = HtmlUtils.GetHtmlNodeText(document.DocumentNode.SelectSingleNode("/html/head/title")); if (url.IndexOf("/company/") > 0) { if (url.StartsWith("http://buyer.waimaoba.com")) { buyerInfo.Type = "Buyer"; HtmlNodeCollection fieldsetNodes = document.DocumentNode.SelectNodes("//fieldset[@class='fieldgroup group-information']/table/tbody/tr/td[2]/div"); if (fieldsetNodes !=null && fieldsetNodes.Count > 1) { string companyInfo = HtmlUtils.GetHtmlNodeText(fieldsetNodes[0]); HtmlUtils.Log("companyInfo==========================================\r\n" + companyInfo); buyerInfo.CompanyInfo = companyInfo; HtmlNode node1 = fieldsetNodes[0].SelectSingleNode("div[2]");//公司 string companyName = HtmlUtils.GetHtmlNodeText(node1); HtmlUtils.Log("companyName==========================================\r\n" + companyName); buyerInfo.CompanyName = companyName; string contactInfo = HtmlUtils.GetHtmlNodeText(fieldsetNodes[1]);//ContactInfo HtmlUtils.Log("contactInfo==========================================\r\n" + contactInfo); buyerInfo.ContactInfo = contactInfo; buyerInfo.Emails = HtmlUtils.getEmails(contactInfo);//Emails Regex r = new Regex(@"Contact Person \(联系人\):(.*?)\n"); GroupCollection gc = r.Match(contactInfo).Groups; if (gc != null && gc.Count > 1) { string person = gc[1].Value.Trim(); HtmlUtils.Log("person==========================================\r\n" + person); buyerInfo.BuyerName = person; } if (string.IsNullOrEmpty(buyerInfo.BuyerName)) { r = new Regex(@"Contact Person:(.*?)\n"); gc = r.Match(contactInfo).Groups; if (gc != null && gc.Count > 1) { string person = gc[1].Value.Trim(); HtmlUtils.Log("person==========================================\r\n" + person); buyerInfo.BuyerName = person; } } } HtmlNode categoryNode = document.DocumentNode.SelectSingleNode("//fieldset[@class='fieldgroup group-industry']/table/tbody/tr/td/div/div[2]"); string categoryInfo = HtmlUtils.GetHtmlNodeText(categoryNode); buyerInfo.Category = categoryInfo; } else { buyerInfo.Type = "Company"; HtmlNodeCollection fieldsetNodes = document.DocumentNode.SelectNodes("//fieldset[@class='fieldgroup group-information']/table/tbody/tr/td[2]"); if (fieldsetNodes != null && fieldsetNodes.Count > 1) { string companyInfo = HtmlUtils.GetHtmlNodeText(fieldsetNodes[0]); HtmlUtils.Log("companyInfo==========================================\r\n" + companyInfo); buyerInfo.CompanyInfo = companyInfo; HtmlNode node1 = fieldsetNodes[0].SelectSingleNode("div/div[2]");//公司 string companyName = HtmlUtils.GetHtmlNodeText(node1); HtmlUtils.Log("companyName==========================================\r\n" + companyName); buyerInfo.CompanyName = companyName; node1 = fieldsetNodes[1].SelectSingleNode("div/div[2]");//Contact Method string contactInfo = HtmlUtils.GetHtmlNodeText(node1); HtmlUtils.Log("contactInfo==========================================\r\n" + contactInfo); buyerInfo.ContactInfo = contactInfo; Regex r = new Regex(@"Contact Person:(.*?)\n"); GroupCollection gc = r.Match(contactInfo).Groups; if (gc != null && gc.Count > 1) { string person = gc[1].Value.Trim(); HtmlUtils.Log("person==========================================\r\n" + person); buyerInfo.BuyerName = person; } buyerInfo.Emails = HtmlUtils.getEmails(contactInfo); } HtmlNode categoryNode = document.DocumentNode.SelectSingleNode("//fieldset[@class='fieldgroup group-industry']/table/tbody/tr/td/div/div/div[2]"); string categoryInfo = HtmlUtils.GetHtmlNodeText(categoryNode); buyerInfo.Category = categoryInfo; } } if (url.IndexOf("/inquiry/") > 0 || url.IndexOf("/content/") > 0 ) { buyerInfo.Type = "Inquiry"; HtmlNodeCollection fieldsetNodes = document.DocumentNode.SelectNodes("//fieldset[@class='fieldgroup group-information']/table/tbody/tr/td[2]"); if (fieldsetNodes != null && fieldsetNodes.Count > 1) { string companyInfo = HtmlUtils.GetHtmlNodeText(fieldsetNodes[0]); HtmlUtils.Log("companyInfo==========================================\r\n" + companyInfo); buyerInfo.CompanyInfo = companyInfo; HtmlNode node1 = fieldsetNodes[1].SelectSingleNode("div/div[2]");//Contact Method string contactInfo = HtmlUtils.GetHtmlNodeText(node1); HtmlUtils.Log("contactInfo==========================================\r\n" + contactInfo); buyerInfo.ContactInfo = contactInfo; Regex r = new Regex(@"Contact Person:(.*?)\n"); GroupCollection gc = r.Match(contactInfo).Groups; if (gc != null && gc.Count > 1) { string person = gc[1].Value.Trim(); HtmlUtils.Log("person==========================================\r\n" + person); buyerInfo.BuyerName = person; } r = new Regex(@"Company:(.*?)\n"); gc = r.Match(contactInfo).Groups; if (gc != null && gc.Count > 1) { string company = gc[1].Value.Trim(); HtmlUtils.Log("Company:==========================================\r\n" + company); buyerInfo.CompanyName = company; } if (string.IsNullOrEmpty(buyerInfo.CompanyName)) { r = new Regex(@"Co:(.*?)\n"); gc = r.Match(contactInfo).Groups; if (gc != null && gc.Count > 1) { string company = gc[1].Value.Trim(); HtmlUtils.Log("Company:==========================================\r\n" + company); buyerInfo.CompanyName = company; } } buyerInfo.Emails = HtmlUtils.getEmails(contactInfo); } HtmlNode categoryNode = document.DocumentNode.SelectSingleNode("//fieldset[@class='fieldgroup group-industry']/table/tbody/tr/td/div/div[2]"); string categoryInfo = HtmlUtils.GetHtmlNodeText(categoryNode); buyerInfo.Category = categoryInfo; } return buyerInfo; }