public static void Scrapeit(string id, ApportiswebscrapperContext db) { try { BrowserSession b = new BrowserSession(); b.Get("http://healthlibrary.epnet.com/authenticate.aspx?token=0EF30C79-8A3A-42F4-BE9F-2A90807D3B47&ReturnUrl=http%3a%2f%2fhealthlibrary.epnet.com%2fGetContent.aspx%3faccount%3dhlhosp"); b.FormElements["sUserName"] = "******"; b.FormElements["sPassword"] = "******"; HtmlNode response = b.Post("http://healthlibrary.epnet.com/authenticate.aspx?token=0EF30C79-8A3A-42F4-BE9F-2A90807D3B47&ReturnUrl=http%3a%2f%2fhealthlibrary.epnet.com%2fGetContent.aspx%3faccount%3dhlhosp"); condtionsearch(b, id); response = b.Post("http://healthlibrary.epnet.com/GetContent.aspx"); CondtionResults(b, response, id, db); } catch (Exception ex) { throw ex; } }
/// <summary> /// /// </summary> /// <param name="search"></param> /// <param name="db"></param> /// <returns></returns> public static string Scrapeit(string search, ApportiswebscrapperContext db) { try { var gurl = "http://healthlibrary.epnet.com/authenticate.aspx?token=0EF30C79-8A3A-42F4-BE9F-2A90807D3B47&ReturnUrl=http%3a%2f%2fhealthlibrary.epnet.com%2fGetContent.aspx%3faccount%3dhlhosp"; BrowserSession b = new BrowserSession(); b.Get("http://healthlibrary.epnet.com/authenticate.aspx?token=0EF30C79-8A3A-42F4-BE9F-2A90807D3B47&ReturnUrl=http%3a%2f%2fhealthlibrary.epnet.com%2fGetContent.aspx%3faccount%3dhlhosp"); b.FormElements["sUserName"] = "******"; b.FormElements["sPassword"] = "******"; HtmlNode response = b.Post("http://healthlibrary.epnet.com/authenticate.aspx?token=0EF30C79-8A3A-42F4-BE9F-2A90807D3B47&ReturnUrl=http%3a%2f%2fhealthlibrary.epnet.com%2fGetContent.aspx%3faccount%3dhlhosp"); essentialsearch(b, search); response = b.Post("http://healthlibrary.epnet.com/authenticate.aspx?token=0EF30C79-8A3A-42F4-BE9F-2A90807D3B47&ReturnUrl=http%3a%2f%2fhealthlibrary.epnet.com%2fGetContent.aspx%3faccount%3dhlhosp"); DefaultResults(b, response, db, search); } catch (Exception ex) { throw ex; return("false"); } return("true"); }
private static void DefaultResults(BrowserSession b, HtmlNode resulttable, ApportiswebscrapperContext db, string search) { resulttable = resulttable.SelectSingleNode("//table[@class='ep_searchResultTable']"); var doc = new HtmlDocument(); doc.LoadHtml(resulttable.InnerHtml); var listoflinks = doc.DocumentNode.SelectNodes("//a[@href]"); int id = 0; foreach (var row in listoflinks) { id++; Essentials obj = new Essentials(); var href = row.Attributes["href"].Value; HtmlNode html = b.Get(HttpUtility.HtmlDecode(href)); var datadefinition = html.SelectSingleNode("//div[@id='definition']"); if (datadefinition != null) { obj.definition = datadefinition.InnerText; } var causes = html.SelectSingleNode("//div[@id='causes']"); if (causes != null) { obj.causes = causes.InnerText; } var risk = html.SelectSingleNode("//div[@id='risk']"); if (risk != null) { obj.risk = risk.InnerText; } var symptoms = html.SelectSingleNode("//div[@id='symptoms']"); if (symptoms != null) { obj.symptoms = symptoms.InnerText; } var treatment = html.SelectSingleNode("//div[@id='treatment']"); if (treatment != null) { obj.treatment = treatment.InnerText; } var prevention = html.SelectSingleNode("//div[@id='prevention']"); if (prevention != null) { obj.prevention = prevention.InnerText; } obj.Articleno = id; obj.Searchkeyword = search; //PutEsssomething(obj, id); try { db.Essentials.Add(obj); db.SaveChanges(); } catch (DbUpdateConcurrencyException) { throw; } } }
/// <summary> /// / /// </summary> /// <param name="browser"></param> /// <param name="resulttable"></param> private static void ProcedureResults(BrowserSession b, HtmlNode resulttable, string id, ApportiswebscrapperContext db) { // throw new NotImplementedException(); resulttable = resulttable.SelectSingleNode("//table[@class='ep_searchResultTable']"); var doc = new HtmlDocument(); doc.LoadHtml(resulttable.InnerHtml); var listoflinks = doc.DocumentNode.SelectNodes("//a[@href]"); int count = 0; foreach (var row in listoflinks) { if (count == 3) { break; } Procedures obj = new Procedures(); count++; var href = row.Attributes["href"].Value; HtmlNode DataPageResult = b.Get(HttpUtility.HtmlDecode(href)); var innernode = DataPageResult.SelectSingleNode("//div[@id='definition']"); if (innernode != null) { obj.definition = innernode.InnerText; } // Printthedata(innernode); innernode = DataPageResult.SelectSingleNode("//div[@id='reasons']"); if (innernode != null) { obj.reasons = innernode.InnerText; } //Printthedata(innernode); innernode = DataPageResult.SelectSingleNode("//div[@id='risk']"); if (innernode != null) { obj.risk = innernode.InnerText; } //Printthedata(innernode); innernode = DataPageResult.SelectSingleNode("//div[@id='expect']"); if (innernode != null) { obj.expect = innernode.InnerText; } // Printthedata(innernode); innernode = DataPageResult.SelectSingleNode("//div[@id='call']"); if (innernode != null) { obj.call = innernode.InnerText; } //Printthedata(innernode); try { db.Procedures.Add(obj); db.SaveChanges(); } catch (Exception ex) { throw ex; } } }
/// <summary> /// /// </summary> /// <param name="browser"></param> /// <param name="resulttable"></param> private static void CondtionResults(BrowserSession b, HtmlNode resulttable, string id, ApportiswebscrapperContext db) { resulttable = resulttable.SelectSingleNode("//table[@class='ep_searchResultTable']"); if (resulttable == null) { return; } try { var doc = new HtmlDocument(); doc.LoadHtml(resulttable.InnerHtml); var listoflinks = doc.DocumentNode.SelectNodes("//a[@href]"); int count = 0; foreach (var row in listoflinks) { var href = row.Attributes["href"].Value; HtmlNode getthepage = b.Get(HttpUtility.HtmlDecode(href)); var doc2 = new HtmlDocument(); doc2.LoadHtml(getthepage.InnerHtml); var Menucontainer = doc2.DocumentNode.SelectSingleNode("//div[@class='ep_groupNavigation']"); doc2.LoadHtml(Menucontainer.InnerHtml); var listofpagestogo = doc2.DocumentNode.SelectNodes("//a[@href]"); if (count == 3) { break; } Conditions obj = new Conditions(); count++; foreach (var links in listofpagestogo) { var linktopage = links.Attributes["href"].Value; var x = (HttpUtility.HtmlDecode(linktopage)); linktopage = String.Concat("http://healthlibrary.epnet.com", x.ToString()); HtmlNode html = b.Get(linktopage); var doc3 = new HtmlDocument(); doc3.LoadHtml(html.InnerHtml); HtmlNode innernode = null; switch (links.InnerHtml.ToString()) { case "Main Page": innernode = doc3.DocumentNode.SelectSingleNode("//div[@id='CIDintroduction']"); if (innernode != null) { obj.CIDintroduction = innernode.InnerText; } break; case "Risk Factors": innernode = doc3.DocumentNode.SelectSingleNode("//div[@id='CIDrisk']"); if (innernode != null) { obj.CIDrisk = innernode.InnerText; } break; case "Symptoms": innernode = doc3.DocumentNode.SelectSingleNode("//div[@id='CIDsymptoms']"); if (innernode != null) { obj.CIDsymptoms = innernode.InnerText; } break; case "Treatment": innernode = doc3.DocumentNode.SelectSingleNode("//div[@id='CIDtreatment']"); if (innernode != null) { obj.CIDtreatment = innernode.InnerText; } break; case "Screening": innernode = doc3.DocumentNode.SelectSingleNode("//div[@id='CIDscreening']"); if (innernode != null) { obj.CIDscreening = innernode.InnerText; } break; case "Reducing Your Risk": innernode = doc3.DocumentNode.SelectSingleNode("//div[@id='CIDriskreduction']"); if (innernode != null) { obj.CIDriskreduction = innernode.InnerText; } break; case "Talking to Your Doctor": innernode = doc3.DocumentNode.SelectSingleNode("//div[@id='CIDtalking']"); if (innernode != null) { obj.CIDtalking = innernode.InnerText; } break; case "Resource Guide": break; default: break; } // Printthedata(innernode); } try { obj.searchkey = id; db.Conditions.Add(obj); db.SaveChanges(); } catch (Exception e) { throw e; } //var datadefinition = doc.DocumentNode.SelectSingleNode("//div[@id='CIDTreatment']"); //Console.WriteLine(datadefinition.InnerText); //var causes = DataPageResult.Html.SelectSingleNode("//div[@id='CIDrisk']"); //Console.WriteLine(causes.InnerText); //var risk = DataPageResult.Html.SelectSingleNode("//div[@id='CIDsymptoms']"); //Console.WriteLine(risk.InnerText); //var symptoms = DataPageResult.Html.SelectSingleNode("//div[@id='CIDdiagnosis']"); //Console.WriteLine(symptoms.InnerText); //var treatment = DataPageResult.Html.SelectSingleNode("//div[@id='CIDTreatment']"); //Console.WriteLine(treatment.InnerText); //var prevention = DataPageResult.Html.SelectSingleNode("//div[@id='CIDscreening']"); //Console.WriteLine(prevention.InnerText); } } catch (Exception e) { throw e; } }
/// <summary> /// /// </summary> /// <param name="browser"></param> /// <param name="resulttable"></param> private static void Healthnewsresults(BrowserSession browser, HtmlNode resulttable, string id, ApportiswebscrapperContext db) { resulttable = resulttable.SelectSingleNode("//table[@class='ep_searchResultTable']"); var doc = new HtmlDocument(); doc.LoadHtml(resulttable.InnerHtml); var listoflinks = doc.DocumentNode.SelectNodes("//a[@href]"); int count = 0; foreach (var row in listoflinks) { if (count == 3) { break; } Healthnews obj = new Healthnews(); count++; var href = row.Attributes["href"].Value; HtmlNode DataPageResult = browser.Get((HttpUtility.HtmlDecode(href))); var doc2 = new HtmlDocument(); doc2.LoadHtml(DataPageResult.InnerHtml); var innernode = doc2.DocumentNode.SelectSingleNode("//div[@id='ep_documentBody']"); if (innernode != null) { obj.ep_documentBody = innernode.InnerText; } obj.Articleno = count; obj.searchkey = id; try { db.Healthnews.Add(obj); db.SaveChanges(); } catch (Exception e) { throw e; } //Printthedata(innernode); var x = 90; } }
private static void Wellnesssearchesults(BrowserSession browser, HtmlNode resulttable, string searchkey, ApportiswebscrapperContext db) { resulttable = resulttable.SelectSingleNode("//table[@class='ep_searchResultTable']"); //throw new NotImplementedException(); var doc = new HtmlDocument(); doc.LoadHtml(resulttable.InnerHtml); var listoflinks = doc.DocumentNode.SelectNodes("//a[@href]"); int count = 0; foreach (var row in listoflinks) { if (count == 3) { break; } Wellness obj = new Wellness(); count++; var href = row.Attributes["href"].Value; HtmlNode DataPageResult = browser.Get((HttpUtility.HtmlDecode(href))); var doc2 = new HtmlDocument(); doc2.LoadHtml(DataPageResult.InnerHtml); var innernode = doc2.DocumentNode.SelectSingleNode("//div[@id='ep_documentBody']"); if (innernode != null) { obj.ep_documentBody = innernode.InnerText; obj.Articleno = count; obj.searchkey = searchkey; } try { db.Wellnesses.Add(obj); db.SaveChanges(); } catch (DbUpdateConcurrencyException ex) { throw ex; } var x = 80; // Printthedata(innernode); } }
/// <summary> /// /// </summary> /// <param name="browser"></param> /// <param name="resulttable"></param> private static void Drugdatabase(BrowserSession browser, HtmlNode resulttable, string id, ApportiswebscrapperContext db) { resulttable = resulttable.SelectSingleNode("//table[@class='ep_searchResultTable']"); var doc = new HtmlDocument(); doc.LoadHtml(resulttable.InnerHtml); var listoflinks = doc.DocumentNode.SelectNodes("//a[@href]"); int count = 0; foreach (var row in listoflinks) { if (count == 3) { break; } Drugcs obj = new Drugcs(); count++; var href = row.Attributes["href"].Value; HtmlNode DataPageResult = browser.Get((HttpUtility.HtmlDecode(href))); var innernode = DataPageResult.SelectSingleNode("//div[@id='drgAboutYourTreatment']"); if (innernode != null) { obj.drgAboutYourTreatment = innernode.InnerText;; } // Printthedata(innernode); innernode = DataPageResult.SelectSingleNode("//div[@id='drgAdministeringYourMed']"); if (innernode != null) { obj.drgAboutYourTreatment = innernode.InnerText; } // Printthedata(innernode); innernode = DataPageResult.SelectSingleNode("//div[@id='drgStorage']"); if (innernode != null) { obj.drgStorage = innernode.InnerText; } // Printthedata(innernode); innernode = DataPageResult.SelectSingleNode("//a[@name='Whatllow?']"); if (innernode != null) { obj.Whatllow = innernode.InnerText; } obj.Articleno = count; obj.searchkey = id; try { db.Drugcs.Add(obj); db.SaveChanges(); } catch (Exception ex) { throw ex; } // Printthedata(innernode); //innernode = DataPageResult.Html.SelectSingleNode("//div[@id='call']"); //Printthedata(innernode); } }