public static List<StandardResult> getbndata(List<string> isbns, string action = "post") { List<StandardResult> resultlist = new List<StandardResult>(); // for each isbn, get info from bn.com using (WebClient client = new WebClient()) { foreach (string isbn in isbns) { BarnesDto bndata = new BarnesDto(); bndata.Isbn13 = isbn; //create url for bn with incoming isbn string url = "http://www.barnesandnoble.com/s/" + isbn + "?store=book&keyword=" + isbn; //add user agent client.Headers.Add("user-agent", "User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31"); string html = client.DownloadString(url); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); //parse through the html HtmlAgilityPack.HtmlNode ratingnode = doc.DocumentNode.SelectSingleNode("//span[@class='starDisplay']"); try { HtmlAgilityPack.HtmlNode starnode = ratingnode.SelectSingleNode("//span/span[1]"); string ratingstring = starnode.GetAttributeValue("title", "attribute not found"); // if rating is found, string will be "Average rating of 4.33333 out of 5 stars" // exception cases: "attribute not found", or "No rating data available yet" if (ratingstring != "attribute not found" && ratingstring != "No rating data available yet") { string rPattern = "(\\d+(?:\\.\\d*)?|\\.\\d+)"; Match rMatch = Regex.Match(ratingstring, rPattern); string rating = rMatch.Value; bndata.BarnesAvg = double.Parse(rating) * .02; } } catch { } // get Product Details node, searching for Age Range HtmlAgilityPack.HtmlNodeCollection detailsnode = doc.DocumentNode.SelectNodes("//div[@class='product-details box']/ul/li"); try { foreach (HtmlAgilityPack.HtmlNode detailnode in detailsnode) { //check through details nodes searching for <span>Age range: </span> //the text node which follows is the age range string agerangestring = "no age range found"; string tPattern = "Age range"; Match tMatch = Regex.Match(detailnode.InnerHtml, tPattern); if (tMatch.Success) { agerangestring = detailnode.InnerHtml; string arPattern = "(\\d+)"; MatchCollection arMatches = Regex.Matches(agerangestring, arPattern); if (arMatches.Count > 0) { bndata.BarnesAgeYoung = byte.Parse(arMatches[0].Value); } if (arMatches.Count > 1) { bndata.BarnesAgeOld = byte.Parse(arMatches[1].Value); } } } } catch { } StandardResult result = new StandardResult(); if (action == "post" || action == "both") { //post new csmdata to BookCaveSvc result = BookCaveSvc.postBarnesNoble(bndata); } if (action == "loaddb" || action == "both") { string msg = loadbndata(bndata); if (result.isbn13 == null) { result.isbn13 = bndata.Isbn13; result.message = msg; } else { string postmsg = result.message; result.message = postmsg + " " + msg; } } resultlist.Add(result); } } return resultlist; }
///////////////////////////////////////////////////////////////////////////////////////// public static List<StandardResult> getcsmdata(List<TitleIsbn13> TitleIsbn13s, string action = "post") { List<StandardResult> resultlist = new List<StandardResult>(); using (WebClient client = new WebClient()) { foreach (TitleIsbn13 titleisbn in TitleIsbn13s) { string urititle = Uri.EscapeUriString(titleisbn.title); string searchurl = "http://www.commonsensemedia.org/search/" + urititle + "?filters=ss_nr_csm_review2field_entertainment_product_type%3Acsm_book"; //add user agent client.Headers.Add("user-agent", "User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31"); string html = client.DownloadString(searchurl); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); // get first hit from results HtmlAgilityPack.HtmlNode n = doc.DocumentNode.SelectSingleNode("//div[@class='csm-search-result-title']/h4/a"); // test if this first hit is actually a decent match // and if there is a hit at all if (n != null && lib.cleantitle(n.InnerText) == lib.cleantitle(titleisbn.title)) { string hiturl = n.Attributes["href"].Value; string rhtml = client.DownloadString(hiturl); HtmlAgilityPack.HtmlDocument rdoc = new HtmlAgilityPack.HtmlDocument(); rdoc.LoadHtml(rhtml); csmdata csmdata = new csmdata(); // CommonSenseDto csmdata = new CommonSenseDto(); csmdata.isbn13 = titleisbn.isbn13; csmdata.url = hiturl; // scan result for: // onage, offage, pauseage, review, parentreview, kidreview HtmlAgilityPack.HtmlNode g = rdoc.DocumentNode.SelectSingleNode("//div[@id='csm-review-product-subtitle']/div/ul/li[@class='last']/a"); try { csmdata.genre = g.Attributes["href"].Value; } catch { } HtmlAgilityPack.HtmlNodeCollection agediv = rdoc.DocumentNode.SelectNodes("//script"); //int nodecount = agediv.Count; //for (int i = 0; i < nodecount; i++) //{ // Console.WriteLine("SCRIPT " + i + ":==========================="); // Console.Write(agediv[i].InnerText); // Console.WriteLine("=========================================="); //} List<string> scriptarr = new List<string>(); foreach (HtmlAgilityPack.HtmlNode h in agediv) { scriptarr.Add(h.InnerText); } string scriptAll = string.Join("", scriptarr); Match notforkids = Regex.Match(scriptAll, @"not_for_kids\s*:\s*\w+"); try { csmdata.notforkids = notforkids.ToString().EndsWith("true"); } catch { } Match redendsage = Regex.Match(scriptAll, "red_ends_age\"\\s*:\\s*\\d+"); string redends = redendsage.ToString(); Match reAge = Regex.Match(redends, "\\d+"); try { csmdata.offage = int.Parse(reAge.ToString()); } catch { } Match greenbeginsage = Regex.Match(scriptAll, "green_begins_age\"\\s*:\\s*\\d+"); try { csmdata.onage = int.Parse(Regex.Match(greenbeginsage.ToString(), "\\d+").ToString()); } catch { } Match agerecommendation = Regex.Match(scriptAll, "age_recommendations\"\\s*:\\s*\\d+"); StandardResult result = new StandardResult(); if (action == "post" || action == "both") { //post new csmdata to BookCaveSvc result = BookCaveSvc.postCommonSenseMedia(csmdata); } if (action == "loaddb" || action == "both") { string msg = loadcsmdata(csmdata); if (result.isbn13 == null) { result.isbn13 = csmdata.isbn13; result.message = msg; } else { string postmsg = result.message; result.message = postmsg + " " + msg; } } resultlist.Add(result); } } return resultlist; } }
public static List<StandardResult> getschdata(List<TitleIsbn13> TitleIsbn13s, string action = "post") { List<StandardResult> resultlist = new List<StandardResult>(); using (WebClient client = new WebClient()) { //loop over each incoming isbn/title pair foreach (TitleIsbn13 ti in TitleIsbn13s) { //instantiate new sch_data class ScholasticDto schdata = new ScholasticDto(); schdata.Isbn13 = ti.isbn13; //take a title string, return a schdata object //title should be parsed into format title-of-book string urltitle = lib.cleantitle(ti.title); //create url for sch with incoming isbn string url = "http://www.scholastic.com/teachers/book/" + urltitle; //add user agent client.Headers.Add("user-agent", "User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31"); string html = client.DownloadString(url); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); //parse through the html if (doc != null) { HtmlAgilityPack.HtmlNode detailnode = doc.DocumentNode.SelectSingleNode("//div[@id='bookdetail_attrib']"); string interestrange = detailnode.SelectSingleNode("//div[@class='col_1']//span[1]").InnerHtml; string gradepattern = "Grade"; Match isgrade = Regex.Match(interestrange, gradepattern, RegexOptions.IgnoreCase); string intPattern = "(\\d+)"; MatchCollection intMatches = Regex.Matches(interestrange, intPattern); if (isgrade.Success) { string kpattern = "K"; Match kindergarten = Regex.Match(interestrange, kpattern); if (intMatches.Count == 1 && kindergarten.Success) { schdata.ScholasticGradeLower = "K"; schdata.ScholasticGradeHigher = byte.Parse(intMatches[0].Value); } else if (intMatches.Count > 1) { schdata.ScholasticGradeLower = intMatches[0].Value; schdata.ScholasticGradeHigher = byte.Parse(intMatches[1].Value); } } else { MatchCollection iMatches = Regex.Matches(interestrange, intPattern); if (iMatches.Count == 1) { int age = int.Parse(iMatches[0].Value); if (age < 10) { schdata.ScholasticGradeHigher = byte.Parse(iMatches[0].Value); } else { schdata.ScholasticGradeLower = iMatches[0].Value; } } else if (iMatches.Count > 1) { schdata.ScholasticGradeLower = iMatches[0].Value; schdata.ScholasticGradeHigher = byte.Parse(iMatches[1].Value); } } HtmlAgilityPack.HtmlNode levelnode = detailnode.SelectSingleNode(".//div[@class='col_2']"); //string gradestring = levelnode.SelectSingleNode(".//span[1]").InnerHtml; HtmlAgilityPack.HtmlNode grade = doc.DocumentNode.SelectSingleNode("//div[@id='bookdetail_attrib']//div[@class='col_2']//span[1]"); HtmlAgilityPack.HtmlNode paradra = doc.DocumentNode.SelectSingleNode("//div[@id='bookdetail_attrib']//div[@class='col_2']//p[4]"); HtmlAgilityPack.HtmlNode dra = paradra.LastChild; HtmlAgilityPack.HtmlNode paragr = doc.DocumentNode.SelectSingleNode("//div[@id='bookdetail_attrib']//div[@class='col_2']//p[5]"); HtmlAgilityPack.HtmlNode gr = paragr.LastChild; try { string gradestring = grade.InnerHtml; schdata.ScholasticGrade = double.Parse(gradestring); } catch { } try { //schdata.dra = levelnode.SelectSingleNode(".//span[3]").InnerHtml; schdata.Dra = dra.InnerHtml; } catch { } try { //schdata.guidedreading = levelnode.SelectSingleNode(".//span[4]").InnerHtml; schdata.GuidedReading = gr.InnerHtml; } catch { } } StandardResult result = new StandardResult(); if (action == "post" || action == "both") { //post new csmdata to BookCaveSvc result = BookCaveSvc.postScholastic(schdata); } if (action == "loaddb" || action == "both") { string msg = loadschdata(schdata); if (result.isbn13 == null) { result.isbn13 = schdata.Isbn13; result.message = msg; } else { string postmsg = result.message; result.message = postmsg + " " + msg; } } resultlist.Add(result); } } return resultlist; }
public static StandardResult standardResult(byte[] result, string isbn) { UTF8Encoding enc = new System.Text.UTF8Encoding(); StandardResult stdres = new StandardResult(); stdres.message = enc.GetString(result); stdres.isbn13 = isbn; return stdres; }