protected override void ExtractItemFromPage() { Trace.TraceInformation("Extracting infomation from " + driver.Url); using (var ctx = new ScrapedItem()) { var profile = new BuildUppProfile(); profile.NameOfCompany = this.GetTextByXPath("//*[@id='mainContainer']/h2"); profile.OfficeAddress = this.GetTextByXPath("//*[@id='companyDetails']//span[text()='Address:']/../following-sibling::td"); profile.PhoneNumber = this.GetTextByXPath("//*[@id='companyDetails']//span[text()='Tel:']/../following-sibling::td"); profile.FaxNumber = this.GetTextByXPath("//*[@id='companyDetails']//span[text()='Fax:']/../following-sibling::td"); profile.EmailAddress = this.GetTextByXPath("//*[@id='companyDetails']//span[text()='Email:']/../following-sibling::td"); profile.WebsiteAddress = this.GetTextByXPath("//*[@id='companyDetails']//span[text()='Web:']/../following-sibling::td"); profile.Category = this.GroupAllTextByXPath("//*[@id='companyDetails']//span[text()='See also:']/../following-sibling::td/a", ", "); profile.PageURL = driver.Url; profile.WebsiteScraped = WebsiteName; ctx.BuildUppProfiles.Add(profile); ctx.SaveChanges(); Trace.TraceInformation("Completed scraping infomation from " + driver.Url); }; }
protected override void ExtractItemFromPage() { Trace.TraceInformation("Extracting infomation from " + driver.Url); using (var ctx = new ScrapedItem()) { var profile = new BuildUppProfile(); profile.NameOfCompany = this.GetTextByXPath("//*[@id='consultant-header']/h1"); profile.OfficeAddress = this.GetTextByXPath("//*[@id='consultant-tabs']/div[2]/table/tbody/tr[1]/td[1]/p"); profile.EmailAddress = this.GetTextByXPath("//*[@id='consultant-tabs']//table//tr//p[starts-with(text(), 'Email:')]//a"); profile.WebsiteAddress = this.GetTextByXPath("//*[@id='consultant-tabs']//table//tr//p[starts-with(text(), 'Website:')]//a"); profile.AboutUs = this.GroupAllTextByXPath("//*[@id='consultant-tabs']/following-sibling::p", "\r\n\r\n"); string otherDetails = this.GetTextByXPath("//*[@id='consultant-tabs']//table/tbody/tr[1]/td[2]/p"); // Take line from paragraph by word, then replace the text profile.PhoneNumber = this.GetLineContaining(otherDetails, "Telephone: ").Replace("Telephone: ", ""); profile.FaxNumber = this.GetLineContaining(otherDetails, "Fax: ").Replace("Fax: ", ""); profile.MobileNumber = this.GetLineContaining(otherDetails, "Mobile: ").Replace("Mobile: ", ""); profile.ContactName = this.GetLineContaining(otherDetails, "Contact: ").Replace("Contact: ", ""); profile.PageURL = driver.Url; profile.WebsiteScraped = WebsiteName; ctx.BuildUppProfiles.Add(profile); ctx.SaveChanges(); Trace.TraceInformation("Completed scraping infomation from " + driver.Url); }; }
protected override void ExtractItemFromPage() { Trace.TraceInformation("Extracting infomation from " + driver.Url); using (var ctx = new ScrapedItem()) { var profile = new BuildUppProfile(); string nameOfCompanyXpath = string.Format("//*[@id='member-listing']/li[{0}]/div[1]/h3", this.CurrentResultOnPage + 1); string emailAddressXpath = string.Format("//*[@id='member-listing']/li[{0}]/div[3]/ul/li/a[contains(.,'Email us')]", this.CurrentResultOnPage + 1); string phoneNumberXpath = string.Format("//*[@id='member-listing']/li[{0}]/div[3]/p/span/strong", this.CurrentResultOnPage + 1); string officeAddressXpath = string.Format("//*[@id='member-listing']/li[{0}]/div[2]/p[@class='location']", this.CurrentResultOnPage + 1); //string aboutUsAddressXpath = string.Format("", this.CurrentResultOnPage + 1); string servicesProvidedXPath = string.Format("//*[@id='member-listing']/li[{0}]/div[1]/p[2]/strong", this.CurrentResultOnPage + 1); string websiteAddressXpath = string.Format("//*[@id='member-listing']/li[{0}]/div[3]/ul/li/a[contains(.,'Visit Website')]", this.CurrentResultOnPage + 1); profile.NameOfCompany = this.GetTextByXPath(nameOfCompanyXpath); profile.EmailAddress = this.GetAttributeValueByXPath(emailAddressXpath, "href")?.Replace("mailto:", ""); profile.PhoneNumber = this.GetTextByXPath(phoneNumberXpath); profile.OfficeAddress = this.GetTextByXPath(officeAddressXpath); profile.ServicesProvided = this.GetTextByXPath(servicesProvidedXPath); profile.WebsiteAddress = this.GetAttributeValueByXPath(websiteAddressXpath, "href"); profile.PageURL = driver.Url; profile.WebsiteScraped = WebsiteName; ctx.BuildUppProfiles.Add(profile); ctx.SaveChanges(); Trace.TraceInformation("Completed scraping infomation from " + driver.Url); }; }
protected override void ExtractItemFromPage() { Trace.TraceInformation("Extracting infomation from " + driver.Url); using (var ctx = new ScrapedItem()) { var profile = new BuildUppProfile(); profile.NameOfCompany = this.GetTextByXPath("//*[@id='njh_container']/div[3]/div/div/div[1]/h2"); List <string> address = new List <string>(); string addressLine = this.GetTextByXPath("//*[@id='OrderingForm']/table//tr//td//table//td[text()='Address: ']/following-sibling::td"); string county = this.GetTextByXPath("//*[@id='OrderingForm']/table//tr//td//table//td[text()='County: ']/following-sibling::td"); string postcode = this.GetTextByXPath("//*[@id='OrderingForm']/table//tr//td//table//td[text()='Postcode: ']/following-sibling::td"); string country = this.GetTextByXPath("//*[@id='OrderingForm']/table//tr//td//table//td[text()='Country: ']/following-sibling::td"); if (!string.IsNullOrEmpty(addressLine)) { address.Add(addressLine.Trim().Trim(',')); } if (!string.IsNullOrEmpty(county)) { address.Add(county.Trim().Trim(',')); } if (!string.IsNullOrEmpty(postcode)) { address.Add(postcode.Trim().Trim(',')); } if (!string.IsNullOrEmpty(country)) { address.Add(addressLine.Trim().Trim(',')); } profile.OfficeAddress = string.Join(", ", address); profile.PhoneNumber = this.GetTextByXPath("//*[@id='OrderingForm']/table//tr//td//table//td[text()='Telephone: ']/following-sibling::td"); profile.BusinessType = this.GetTextByXPath("//*[@id='OrderingForm']/table//tr//td//table//td[text()='Type: ']/following-sibling::td"); profile.AreasServed = this.GetTextByXPath("//*[@id='OrderingForm']/table//tr//td//table//td[text()='Search District: ']/following-sibling::td"); profile.EmailAddress = this.GetTextByXPath("//*[@id='OrderingForm']/table//span[@class='email_l']"); profile.WebsiteAddress = this.GetTextByXPath("//*[@id='OrderingForm']/table//span[@class='website_l']"); profile.AboutUs = this.GroupAllTextByXPath("//*[@id='OrderingForm']/table/tbody//div[@class='description']/following-sibling::*"); profile.PageURL = driver.Url; profile.WebsiteScraped = WebsiteName; ctx.BuildUppProfiles.Add(profile); ctx.SaveChanges(); Trace.TraceInformation("Completed scraping infomation from " + driver.Url); }; }
protected override void ExtractItemFromPage() { Trace.TraceInformation("Extracting infomation from " + driver.Url); using (var ctx = new ScrapedItem()) { var profile = new BuildUppProfile(); profile.NameOfCompany = this.GetTextByXPath("/html/body/div/section[1]/div/div/div/div/h2"); profile.EmailAddress = this.GetTextByXPath("//*[@id='member-info']/tbody/tr/th[text()='Email']/following-sibling::td/a"); profile.PhoneNumber = this.GetTextByXPath("//*[@id='member-info']/tbody/tr/th[text()='Telephone']/following-sibling::td"); profile.WebsiteAddress = this.GetTextByXPath("//*[@id='member-info']/tbody/tr/th[text()='Website']/following-sibling::td/a"); var addressLines = new List <string>(); var addressLinesCorrected = new List <string>(); addressLines.Add(this.GroupAllTextByXPath("//*[@id='member-info']/tbody/tr[5]/td")); addressLines.Add(this.GroupAllTextByXPath("//*[@id='member-info']/tbody/tr[7]/td")); addressLines.Add(this.GroupAllTextByXPath("//*[@id='member-info']/tbody/tr[9]/td")); addressLines.Add(this.GroupAllTextByXPath("//*[@id='member-info']/tbody/tr[11]/td")); addressLines.Add(this.GroupAllTextByXPath("//*[@id='member-info']/tbody/tr[13]/td")); addressLines.Add(this.GroupAllTextByXPath("//*[@id='member-info']/tbody/tr[15]/td")); // Loop over each addressLine only, copying non blanks into corrected version foreach (string addressLine in addressLines) { if (!string.IsNullOrEmpty(addressLine?.Trim())) { addressLinesCorrected.Add(addressLine); } } profile.OfficeAddress = string.Join(", ", addressLinesCorrected); profile.PageURL = driver.Url; profile.WebsiteScraped = WebsiteName; ctx.BuildUppProfiles.Add(profile); ctx.SaveChanges(); Trace.TraceInformation("Completed scraping infomation from " + driver.Url); }; }
protected override void ExtractItemFromPage() { Trace.TraceInformation("Extracting infomation from " + driver.Url); using (var ctx = new ScrapedItem()) { var profile = new BuildUppProfile(); profile.NameOfCompany = this.GetTextByXPath("//h1"); profile.PageURL = driver.Url; profile.WebsiteScraped = WebsiteName; ctx.BuildUppProfiles.Add(profile); ctx.SaveChanges(); Trace.TraceInformation("Completed scraping infomation from " + driver.Url); }; }
protected override void ExtractItemFromPage() { Trace.TraceInformation("Extracting infomation from " + driver.Url); using (var ctx = new ScrapedItem()) { var profile = new BuildUppProfile(); profile.NameOfCompany = this.GetTextByXPath("//*[@id='page_container']/header/div[3]/div/h1/span"); profile.WebsiteAddress = this.GetAttributeValueByXPath("//*[@id='page_container']/section/div/div/div[2]/div[2]/div/div/p[1]/a", "href"); profile.OfficeAddress = this.GetTextByXPath("//*[@id='page_container']/section/div/div/div[2]/div[2]/div/div/p[2]"); profile.AreasServed = this.GroupAllTextByXPath("//*[@id='page_container']/section/div/div/div[3]/div[2]/ul/li", ", "); profile.AboutUs = this.GroupAllTextByXPath("//*[@id='page_container']/section/div/div/div[3]/div[1]/p"); profile.ContactName = this.GetTextByXPath("//*[@id='page_container']/header/div[3]/div/h1"); if (!string.IsNullOrEmpty(profile.NameOfCompany)) { profile.ContactName = profile.ContactName.Replace(profile.NameOfCompany, "").Trim(); } string infoBox = this.GetTextByXPath("//*[@id='page_container']/section/div/div/div[2]/div[2]/div/div/p[1]"); profile.PhoneNumber = this.GetLineContaining(infoBox, "Tel:").Replace("Tel: ", ""); try { profile.PageURL = driver.Url; } catch (Exception e) { }; try { profile.WebsiteScraped = WebsiteName; } catch (Exception e) { }; ctx.BuildUppProfiles.Add(profile); ctx.SaveChanges(); Trace.TraceInformation("Completed scraping infomation from " + driver.Url); }; }
protected override void ExtractItemFromPage() { Trace.TraceInformation("Extracting infomation from " + driver.Url); using (var ctx = new ScrapedItem()) { var profile = new BuildUppProfile(); profile.NameOfCompany = this.GetTextByXPath("//*[@id='block-system-main']/div/div/table/tbody/tr/td/div[1]/h1"); profile.ContactName = this.GetTextByXPath("//div[contains(@class,'views-field')]/strong[text()='Contact Name: ']/following-sibling::span"); profile.PhoneNumber = this.GetTextByXPath("//div[contains(@class,'views-field')]/strong[text()='Telephone: ']/following-sibling::span"); profile.EmailAddress = this.GetTextByXPath("//div[contains(@class,'views-field')]/strong[text()='Email Address: ']/following-sibling::span"); profile.WebsiteAddress = this.GetTextByXPath("//div[contains(@class,'views-field')]/strong[text()='Website: ']/following-sibling::span"); profile.OfficeAddress = this.GetTextByXPath("//span[contains(@class,'views-field')]/strong[text()='Contact Name: ']/following-sibling::span"); profile.AboutUs = this.GetTextByXPath("//span[contains(@class, 'field-content')]/strong[text()='Company Information:']/../.."); profile.AboutUs = string.IsNullOrEmpty(profile.AboutUs) ? null : profile.AboutUs.Replace("Company Information:", "").Trim(); profile.ServicesProvided = this.GroupAllTextByXPath("//*[@id='buyersGuideCompany_productsAndServices']/li"); List <string> address = new List <string>(); string streetAddress = this.GetTextByXPath("//span[contains(@class, 'views-field-street-address')]/span"); string addressLine1 = this.GetTextByXPath("//span[contains(@class, 'views-field-supplemental-address-1')]/span"); string city = this.GetTextByXPath("//span[contains(@class, 'views-field-city')]/span"); string province = this.GetTextByXPath("//span[contains(@class, 'views-field-state-province')]/span"); string postcode = this.GetTextByXPath("//span[contains(@class, 'views-field-postal-code')]/span"); string country = this.GetTextByXPath("//span[contains(@class, 'views-field-country')]/span"); if (!string.IsNullOrEmpty(streetAddress)) { address.Add(streetAddress.Trim().Trim(',')); } if (!string.IsNullOrEmpty(addressLine1)) { address.Add(addressLine1.Trim().Trim(',')); } if (!string.IsNullOrEmpty(city)) { address.Add(city.Trim().Trim(',')); } if (!string.IsNullOrEmpty(province)) { address.Add(province.Trim().Trim(',')); } if (!string.IsNullOrEmpty(postcode)) { address.Add(postcode.Trim().Trim(',')); } if (!string.IsNullOrEmpty(country)) { address.Add(country.Trim().Trim(',')); } profile.OfficeAddress = string.Join(", ", address); profile.PageURL = driver.Url; profile.WebsiteScraped = WebsiteName; ctx.BuildUppProfiles.Add(profile); ctx.SaveChanges(); Trace.TraceInformation("Completed scraping infomation from " + driver.Url); }; }