Esempio n. 1
0
        protected override void ExtractItemFromPage()
        {
            Trace.TraceInformation("Extracting infomation from " + driver.Url);
            using (var ctx = new ScrapedItem())
            {
                var profile = new BuildUppProfile();

                profile.NameOfCompany  = this.GetTextByXPath("//*[@id='mainContainer']/h2");
                profile.OfficeAddress  = this.GetTextByXPath("//*[@id='companyDetails']//span[text()='Address:']/../following-sibling::td");
                profile.PhoneNumber    = this.GetTextByXPath("//*[@id='companyDetails']//span[text()='Tel:']/../following-sibling::td");
                profile.FaxNumber      = this.GetTextByXPath("//*[@id='companyDetails']//span[text()='Fax:']/../following-sibling::td");
                profile.EmailAddress   = this.GetTextByXPath("//*[@id='companyDetails']//span[text()='Email:']/../following-sibling::td");
                profile.WebsiteAddress = this.GetTextByXPath("//*[@id='companyDetails']//span[text()='Web:']/../following-sibling::td");
                profile.Category       = this.GroupAllTextByXPath("//*[@id='companyDetails']//span[text()='See also:']/../following-sibling::td/a", ", ");

                profile.PageURL        = driver.Url;
                profile.WebsiteScraped = WebsiteName;


                ctx.BuildUppProfiles.Add(profile);
                ctx.SaveChanges();

                Trace.TraceInformation("Completed scraping infomation from " + driver.Url);
            };
        }
Esempio n. 2
0
        protected override void ExtractItemFromPage()
        {
            Trace.TraceInformation("Extracting infomation from " + driver.Url);
            using (var ctx = new ScrapedItem())
            {
                var profile = new BuildUppProfile();

                profile.NameOfCompany  = this.GetTextByXPath("//*[@id='consultant-header']/h1");
                profile.OfficeAddress  = this.GetTextByXPath("//*[@id='consultant-tabs']/div[2]/table/tbody/tr[1]/td[1]/p");
                profile.EmailAddress   = this.GetTextByXPath("//*[@id='consultant-tabs']//table//tr//p[starts-with(text(), 'Email:')]//a");
                profile.WebsiteAddress = this.GetTextByXPath("//*[@id='consultant-tabs']//table//tr//p[starts-with(text(), 'Website:')]//a");
                profile.AboutUs        = this.GroupAllTextByXPath("//*[@id='consultant-tabs']/following-sibling::p", "\r\n\r\n");
                string otherDetails = this.GetTextByXPath("//*[@id='consultant-tabs']//table/tbody/tr[1]/td[2]/p");

                // Take line from paragraph by word, then replace the text
                profile.PhoneNumber  = this.GetLineContaining(otherDetails, "Telephone: ").Replace("Telephone: ", "");
                profile.FaxNumber    = this.GetLineContaining(otherDetails, "Fax: ").Replace("Fax: ", "");
                profile.MobileNumber = this.GetLineContaining(otherDetails, "Mobile: ").Replace("Mobile: ", "");
                profile.ContactName  = this.GetLineContaining(otherDetails, "Contact: ").Replace("Contact: ", "");


                profile.PageURL        = driver.Url;
                profile.WebsiteScraped = WebsiteName;


                ctx.BuildUppProfiles.Add(profile);
                ctx.SaveChanges();

                Trace.TraceInformation("Completed scraping infomation from " + driver.Url);
            };
        }
Esempio n. 3
0
        protected override void ExtractItemFromPage()
        {
            Trace.TraceInformation("Extracting infomation from " + driver.Url);
            using (var ctx = new ScrapedItem())
            {
                var profile = new BuildUppProfile();

                string nameOfCompanyXpath = string.Format("//*[@id='member-listing']/li[{0}]/div[1]/h3", this.CurrentResultOnPage + 1);
                string emailAddressXpath  = string.Format("//*[@id='member-listing']/li[{0}]/div[3]/ul/li/a[contains(.,'Email us')]", this.CurrentResultOnPage + 1);
                string phoneNumberXpath   = string.Format("//*[@id='member-listing']/li[{0}]/div[3]/p/span/strong", this.CurrentResultOnPage + 1);
                string officeAddressXpath = string.Format("//*[@id='member-listing']/li[{0}]/div[2]/p[@class='location']", this.CurrentResultOnPage + 1);
                //string aboutUsAddressXpath = string.Format("", this.CurrentResultOnPage + 1);
                string servicesProvidedXPath = string.Format("//*[@id='member-listing']/li[{0}]/div[1]/p[2]/strong", this.CurrentResultOnPage + 1);
                string websiteAddressXpath   = string.Format("//*[@id='member-listing']/li[{0}]/div[3]/ul/li/a[contains(.,'Visit Website')]", this.CurrentResultOnPage + 1);



                profile.NameOfCompany    = this.GetTextByXPath(nameOfCompanyXpath);
                profile.EmailAddress     = this.GetAttributeValueByXPath(emailAddressXpath, "href")?.Replace("mailto:", "");
                profile.PhoneNumber      = this.GetTextByXPath(phoneNumberXpath);
                profile.OfficeAddress    = this.GetTextByXPath(officeAddressXpath);
                profile.ServicesProvided = this.GetTextByXPath(servicesProvidedXPath);
                profile.WebsiteAddress   = this.GetAttributeValueByXPath(websiteAddressXpath, "href");


                profile.PageURL        = driver.Url;
                profile.WebsiteScraped = WebsiteName;

                ctx.BuildUppProfiles.Add(profile);
                ctx.SaveChanges();

                Trace.TraceInformation("Completed scraping infomation from " + driver.Url);
            };
        }
Esempio n. 4
0
        protected override void ExtractItemFromPage()
        {
            Trace.TraceInformation("Extracting infomation from " + driver.Url);
            using (var ctx = new ScrapedItem())
            {
                var profile = new BuildUppProfile();

                profile.NameOfCompany = this.GetTextByXPath("//*[@id='njh_container']/div[3]/div/div/div[1]/h2");

                List <string> address     = new List <string>();
                string        addressLine = this.GetTextByXPath("//*[@id='OrderingForm']/table//tr//td//table//td[text()='Address: ']/following-sibling::td");
                string        county      = this.GetTextByXPath("//*[@id='OrderingForm']/table//tr//td//table//td[text()='County: ']/following-sibling::td");
                string        postcode    = this.GetTextByXPath("//*[@id='OrderingForm']/table//tr//td//table//td[text()='Postcode: ']/following-sibling::td");
                string        country     = this.GetTextByXPath("//*[@id='OrderingForm']/table//tr//td//table//td[text()='Country: ']/following-sibling::td");

                if (!string.IsNullOrEmpty(addressLine))
                {
                    address.Add(addressLine.Trim().Trim(','));
                }
                if (!string.IsNullOrEmpty(county))
                {
                    address.Add(county.Trim().Trim(','));
                }
                if (!string.IsNullOrEmpty(postcode))
                {
                    address.Add(postcode.Trim().Trim(','));
                }
                if (!string.IsNullOrEmpty(country))
                {
                    address.Add(addressLine.Trim().Trim(','));
                }

                profile.OfficeAddress = string.Join(", ", address);


                profile.PhoneNumber  = this.GetTextByXPath("//*[@id='OrderingForm']/table//tr//td//table//td[text()='Telephone: ']/following-sibling::td");
                profile.BusinessType = this.GetTextByXPath("//*[@id='OrderingForm']/table//tr//td//table//td[text()='Type: ']/following-sibling::td");
                profile.AreasServed  = this.GetTextByXPath("//*[@id='OrderingForm']/table//tr//td//table//td[text()='Search District: ']/following-sibling::td");


                profile.EmailAddress   = this.GetTextByXPath("//*[@id='OrderingForm']/table//span[@class='email_l']");
                profile.WebsiteAddress = this.GetTextByXPath("//*[@id='OrderingForm']/table//span[@class='website_l']");

                profile.AboutUs = this.GroupAllTextByXPath("//*[@id='OrderingForm']/table/tbody//div[@class='description']/following-sibling::*");

                profile.PageURL        = driver.Url;
                profile.WebsiteScraped = WebsiteName;


                ctx.BuildUppProfiles.Add(profile);
                ctx.SaveChanges();

                Trace.TraceInformation("Completed scraping infomation from " + driver.Url);
            };
        }
Esempio n. 5
0
        protected override void ExtractItemFromPage()
        {
            Trace.TraceInformation("Extracting infomation from " + driver.Url);

            using (var ctx = new ScrapedItem())
            {
                var profile = new BuildUppProfile();

                profile.NameOfCompany  = this.GetTextByXPath("/html/body/div/section[1]/div/div/div/div/h2");
                profile.EmailAddress   = this.GetTextByXPath("//*[@id='member-info']/tbody/tr/th[text()='Email']/following-sibling::td/a");
                profile.PhoneNumber    = this.GetTextByXPath("//*[@id='member-info']/tbody/tr/th[text()='Telephone']/following-sibling::td");
                profile.WebsiteAddress = this.GetTextByXPath("//*[@id='member-info']/tbody/tr/th[text()='Website']/following-sibling::td/a");

                var addressLines          = new List <string>();
                var addressLinesCorrected = new List <string>();

                addressLines.Add(this.GroupAllTextByXPath("//*[@id='member-info']/tbody/tr[5]/td"));
                addressLines.Add(this.GroupAllTextByXPath("//*[@id='member-info']/tbody/tr[7]/td"));
                addressLines.Add(this.GroupAllTextByXPath("//*[@id='member-info']/tbody/tr[9]/td"));
                addressLines.Add(this.GroupAllTextByXPath("//*[@id='member-info']/tbody/tr[11]/td"));
                addressLines.Add(this.GroupAllTextByXPath("//*[@id='member-info']/tbody/tr[13]/td"));
                addressLines.Add(this.GroupAllTextByXPath("//*[@id='member-info']/tbody/tr[15]/td"));

                // Loop over each addressLine only, copying non blanks into corrected version
                foreach (string addressLine in addressLines)
                {
                    if (!string.IsNullOrEmpty(addressLine?.Trim()))
                    {
                        addressLinesCorrected.Add(addressLine);
                    }
                }


                profile.OfficeAddress = string.Join(", ", addressLinesCorrected);

                profile.PageURL        = driver.Url;
                profile.WebsiteScraped = WebsiteName;


                ctx.BuildUppProfiles.Add(profile);
                ctx.SaveChanges();

                Trace.TraceInformation("Completed scraping infomation from " + driver.Url);
            };
        }
Esempio n. 6
0
        protected override void ExtractItemFromPage()
        {
            Trace.TraceInformation("Extracting infomation from " + driver.Url);
            using (var ctx = new ScrapedItem())
            {
                var profile = new BuildUppProfile();

                profile.NameOfCompany = this.GetTextByXPath("//h1");

                profile.PageURL        = driver.Url;
                profile.WebsiteScraped = WebsiteName;


                ctx.BuildUppProfiles.Add(profile);
                ctx.SaveChanges();

                Trace.TraceInformation("Completed scraping infomation from " + driver.Url);
            };
        }
Esempio n. 7
0
        protected override void ExtractItemFromPage()
        {
            Trace.TraceInformation("Extracting infomation from " + driver.Url);
            using (var ctx = new ScrapedItem())
            {
                var profile = new BuildUppProfile();

                profile.NameOfCompany  = this.GetTextByXPath("//*[@id='page_container']/header/div[3]/div/h1/span");
                profile.WebsiteAddress = this.GetAttributeValueByXPath("//*[@id='page_container']/section/div/div/div[2]/div[2]/div/div/p[1]/a", "href");
                profile.OfficeAddress  = this.GetTextByXPath("//*[@id='page_container']/section/div/div/div[2]/div[2]/div/div/p[2]");
                profile.AreasServed    = this.GroupAllTextByXPath("//*[@id='page_container']/section/div/div/div[3]/div[2]/ul/li", ", ");
                profile.AboutUs        = this.GroupAllTextByXPath("//*[@id='page_container']/section/div/div/div[3]/div[1]/p");
                profile.ContactName    = this.GetTextByXPath("//*[@id='page_container']/header/div[3]/div/h1");

                if (!string.IsNullOrEmpty(profile.NameOfCompany))
                {
                    profile.ContactName = profile.ContactName.Replace(profile.NameOfCompany, "").Trim();
                }
                string infoBox = this.GetTextByXPath("//*[@id='page_container']/section/div/div/div[2]/div[2]/div/div/p[1]");
                profile.PhoneNumber = this.GetLineContaining(infoBox, "Tel:").Replace("Tel: ", "");

                try
                {
                    profile.PageURL = driver.Url;
                }
                catch (Exception e) { };

                try
                {
                    profile.WebsiteScraped = WebsiteName;
                }
                catch (Exception e) { };


                ctx.BuildUppProfiles.Add(profile);
                ctx.SaveChanges();

                Trace.TraceInformation("Completed scraping infomation from " + driver.Url);
            };
        }
Esempio n. 8
0
        protected override void ExtractItemFromPage()
        {
            Trace.TraceInformation("Extracting infomation from " + driver.Url);
            using (var ctx = new ScrapedItem())
            {
                var profile = new BuildUppProfile();

                profile.NameOfCompany    = this.GetTextByXPath("//*[@id='block-system-main']/div/div/table/tbody/tr/td/div[1]/h1");
                profile.ContactName      = this.GetTextByXPath("//div[contains(@class,'views-field')]/strong[text()='Contact Name: ']/following-sibling::span");
                profile.PhoneNumber      = this.GetTextByXPath("//div[contains(@class,'views-field')]/strong[text()='Telephone: ']/following-sibling::span");
                profile.EmailAddress     = this.GetTextByXPath("//div[contains(@class,'views-field')]/strong[text()='Email Address: ']/following-sibling::span");
                profile.WebsiteAddress   = this.GetTextByXPath("//div[contains(@class,'views-field')]/strong[text()='Website: ']/following-sibling::span");
                profile.OfficeAddress    = this.GetTextByXPath("//span[contains(@class,'views-field')]/strong[text()='Contact Name: ']/following-sibling::span");
                profile.AboutUs          = this.GetTextByXPath("//span[contains(@class, 'field-content')]/strong[text()='Company Information:']/../..");
                profile.AboutUs          = string.IsNullOrEmpty(profile.AboutUs) ? null : profile.AboutUs.Replace("Company Information:", "").Trim();
                profile.ServicesProvided = this.GroupAllTextByXPath("//*[@id='buyersGuideCompany_productsAndServices']/li");

                List <string> address = new List <string>();

                string streetAddress = this.GetTextByXPath("//span[contains(@class, 'views-field-street-address')]/span");
                string addressLine1  = this.GetTextByXPath("//span[contains(@class, 'views-field-supplemental-address-1')]/span");
                string city          = this.GetTextByXPath("//span[contains(@class, 'views-field-city')]/span");
                string province      = this.GetTextByXPath("//span[contains(@class, 'views-field-state-province')]/span");
                string postcode      = this.GetTextByXPath("//span[contains(@class, 'views-field-postal-code')]/span");
                string country       = this.GetTextByXPath("//span[contains(@class, 'views-field-country')]/span");

                if (!string.IsNullOrEmpty(streetAddress))
                {
                    address.Add(streetAddress.Trim().Trim(','));
                }
                if (!string.IsNullOrEmpty(addressLine1))
                {
                    address.Add(addressLine1.Trim().Trim(','));
                }
                if (!string.IsNullOrEmpty(city))
                {
                    address.Add(city.Trim().Trim(','));
                }
                if (!string.IsNullOrEmpty(province))
                {
                    address.Add(province.Trim().Trim(','));
                }
                if (!string.IsNullOrEmpty(postcode))
                {
                    address.Add(postcode.Trim().Trim(','));
                }
                if (!string.IsNullOrEmpty(country))
                {
                    address.Add(country.Trim().Trim(','));
                }

                profile.OfficeAddress = string.Join(", ", address);


                profile.PageURL        = driver.Url;
                profile.WebsiteScraped = WebsiteName;


                ctx.BuildUppProfiles.Add(profile);
                ctx.SaveChanges();

                Trace.TraceInformation("Completed scraping infomation from " + driver.Url);
            };
        }