public static void getschdata(List<TitleIsbn13> TitleIsbn13s) { using (WebClient client = new WebClient()) { //loop over each incoming isbn/title pair foreach (TitleIsbn13 ti in TitleIsbn13s) { //instantiate new sch_data class schdata schdata = new schdata(); schdata.isbn13 = ti.isbn13; schdata.title = ti.title; //take a title string, return a schdata object //title should be parsed into format title-of-book string urltitle = lib.cleantitle(ti.title); //create url for sch with incoming isbn string url = "http://www.scholastic.com/teachers/book/" + urltitle; //add user agent client.Headers.Add("user-agent", "User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31"); string html = client.DownloadString(url); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); //parse through the html if (doc != null) { HtmlAgilityPack.HtmlNode detailnode = doc.DocumentNode.SelectSingleNode("//div[@id='bookdetail_attrib']"); string interestrange = detailnode.SelectSingleNode("//div[@class='col_1']//span[1]").InnerHtml; string gradepattern = "Grade"; Match isgrade = Regex.Match(interestrange, gradepattern, RegexOptions.IgnoreCase); string intPattern = "(\\d+)"; MatchCollection intMatches = Regex.Matches(interestrange, intPattern); if (isgrade.Success) { string kpattern = "K"; Match kindergarten = Regex.Match(interestrange, kpattern); if (intMatches.Count == 1 && kindergarten.Success) { schdata.interestlowgrade = "K"; schdata.interesthighgrade = intMatches[0].Value; } else if (intMatches.Count > 1) { schdata.interestlowgrade = intMatches[0].Value; schdata.interesthighgrade = intMatches[1].Value; } } else { MatchCollection iMatches = Regex.Matches(interestrange, intPattern); if (iMatches.Count == 1) { int age = int.Parse(iMatches[0].Value); if (age < 10) { schdata.interesthighgrade = iMatches[0].Value; } else { schdata.interestlowgrade = iMatches[0].Value; } } else if (iMatches.Count > 1) { schdata.interestlowgrade = iMatches[0].Value; schdata.interesthighgrade = iMatches[1].Value; } } HtmlAgilityPack.HtmlNode levelnode = detailnode.SelectSingleNode(".//div[@class='col_2']"); //string gradestring = levelnode.SelectSingleNode(".//span[1]").InnerHtml; HtmlAgilityPack.HtmlNode grade = doc.DocumentNode.SelectSingleNode("//div[@id='bookdetail_attrib']//div[@class='col_2']//span[1]"); HtmlAgilityPack.HtmlNode paradra = doc.DocumentNode.SelectSingleNode("//div[@id='bookdetail_attrib']//div[@class='col_2']//p[4]"); HtmlAgilityPack.HtmlNode dra = paradra.LastChild; HtmlAgilityPack.HtmlNode paragr = doc.DocumentNode.SelectSingleNode("//div[@id='bookdetail_attrib']//div[@class='col_2']//p[5]"); HtmlAgilityPack.HtmlNode gr = paragr.LastChild; try { string gradestring = grade.InnerHtml; schdata.gradeequiv = decimal.Parse(gradestring); } catch { } try { //schdata.dra = levelnode.SelectSingleNode(".//span[3]").InnerHtml; schdata.dra = dra.InnerHtml; } catch { } try { //schdata.guidedreading = levelnode.SelectSingleNode(".//span[4]").InnerHtml; schdata.guidedreading = gr.InnerHtml; } catch { } } loadschdata( schdata); } } }
public static void loadschdata(schdata schdata) { // receive schdata, load to database using (LexileTitlesEntities lr = new LexileTitlesEntities()) { List<Scholastic> existing = new List<Scholastic>(from s in lr.Scholastics where s.Isbn13 == schdata.isbn13 select s); if (existing.Count == 0) { // create new BarnesAndNoble Scholastic sch = new Scholastic(); sch.Isbn13 = schdata.isbn13; sch.InterestLowGrade = schdata.interestlowgrade; sch.InterestHighGrade = schdata.interesthighgrade; sch.GradeEquiv = schdata.gradeequiv; sch.DRA = schdata.dra; sch.GuidedReading = schdata.guidedreading; // genre TBD lr.Scholastics.Add(sch); lr.SaveChanges(); } else { // update fields on existing BarnesAndNoble existing[0].InterestLowGrade = schdata.interestlowgrade; existing[0].InterestHighGrade = schdata.interesthighgrade; existing[0].GradeEquiv = schdata.gradeequiv; existing[0].DRA = schdata.dra; existing[0].GuidedReading = schdata.guidedreading; lr.SaveChanges(); } } }