public static string loadcsmdata(csmdata csmdata) { string msg = "(DB LOADING ERROR)"; // receive bndata, load to database try { using (LexileTitlesEntities lr = new LexileTitlesEntities()) { List<CommonSenseMedia> existing = new List<CommonSenseMedia>(from c in lr.CommonSenseMedias where c.Isbn13 == csmdata.isbn13 select c); if (existing.Count == 0) { // create new BarnesAndNoble CommonSenseMedia csm = new CommonSenseMedia(); csm.Isbn13 = csmdata.isbn13; csm.url = csmdata.url; csm.notforkids = csmdata.notforkids; csm.offage = csmdata.offage; csm.onage = csmdata.onage; csm.review = csmdata.review; csm.parentreview = csmdata.parentreview; csm.kidreview = csmdata.kidreview; csm.genre = csmdata.genre; lr.CommonSenseMedias.Add(csm); lr.SaveChanges(); msg = "(Loaded new CSM to DB)"; } else { // update fields on existing BarnesAndNoble existing[0].url = csmdata.url; existing[0].notforkids = csmdata.notforkids; existing[0].offage = csmdata.offage; existing[0].onage = csmdata.onage; existing[0].review = csmdata.review; existing[0].parentreview = csmdata.parentreview; existing[0].kidreview = csmdata.kidreview; existing[0].genre = csmdata.genre; lr.SaveChanges(); msg = "(Updated existing CSM in DB)"; } } } catch { } return msg; }
///////////////////////////////////////////////////////////////////////////////////////// public static List<StandardResult> getcsmdata(List<TitleIsbn13> TitleIsbn13s, string action = "post") { List<StandardResult> resultlist = new List<StandardResult>(); using (WebClient client = new WebClient()) { foreach (TitleIsbn13 titleisbn in TitleIsbn13s) { string urititle = Uri.EscapeUriString(titleisbn.title); string searchurl = "http://www.commonsensemedia.org/search/" + urititle + "?filters=ss_nr_csm_review2field_entertainment_product_type%3Acsm_book"; //add user agent client.Headers.Add("user-agent", "User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31"); string html = client.DownloadString(searchurl); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); // get first hit from results HtmlAgilityPack.HtmlNode n = doc.DocumentNode.SelectSingleNode("//div[@class='csm-search-result-title']/h4/a"); // test if this first hit is actually a decent match // and if there is a hit at all if (n != null && lib.cleantitle(n.InnerText) == lib.cleantitle(titleisbn.title)) { string hiturl = n.Attributes["href"].Value; string rhtml = client.DownloadString(hiturl); HtmlAgilityPack.HtmlDocument rdoc = new HtmlAgilityPack.HtmlDocument(); rdoc.LoadHtml(rhtml); csmdata csmdata = new csmdata(); // CommonSenseDto csmdata = new CommonSenseDto(); csmdata.isbn13 = titleisbn.isbn13; csmdata.url = hiturl; // scan result for: // onage, offage, pauseage, review, parentreview, kidreview HtmlAgilityPack.HtmlNode g = rdoc.DocumentNode.SelectSingleNode("//div[@id='csm-review-product-subtitle']/div/ul/li[@class='last']/a"); try { csmdata.genre = g.Attributes["href"].Value; } catch { } HtmlAgilityPack.HtmlNodeCollection agediv = rdoc.DocumentNode.SelectNodes("//script"); //int nodecount = agediv.Count; //for (int i = 0; i < nodecount; i++) //{ // Console.WriteLine("SCRIPT " + i + ":==========================="); // Console.Write(agediv[i].InnerText); // Console.WriteLine("=========================================="); //} List<string> scriptarr = new List<string>(); foreach (HtmlAgilityPack.HtmlNode h in agediv) { scriptarr.Add(h.InnerText); } string scriptAll = string.Join("", scriptarr); Match notforkids = Regex.Match(scriptAll, @"not_for_kids\s*:\s*\w+"); try { csmdata.notforkids = notforkids.ToString().EndsWith("true"); } catch { } Match redendsage = Regex.Match(scriptAll, "red_ends_age\"\\s*:\\s*\\d+"); string redends = redendsage.ToString(); Match reAge = Regex.Match(redends, "\\d+"); try { csmdata.offage = int.Parse(reAge.ToString()); } catch { } Match greenbeginsage = Regex.Match(scriptAll, "green_begins_age\"\\s*:\\s*\\d+"); try { csmdata.onage = int.Parse(Regex.Match(greenbeginsage.ToString(), "\\d+").ToString()); } catch { } Match agerecommendation = Regex.Match(scriptAll, "age_recommendations\"\\s*:\\s*\\d+"); StandardResult result = new StandardResult(); if (action == "post" || action == "both") { //post new csmdata to BookCaveSvc result = BookCaveSvc.postCommonSenseMedia(csmdata); } if (action == "loaddb" || action == "both") { string msg = loadcsmdata(csmdata); if (result.isbn13 == null) { result.isbn13 = csmdata.isbn13; result.message = msg; } else { string postmsg = result.message; result.message = postmsg + " " + msg; } } resultlist.Add(result); } } return resultlist; } }
public static csmdata getcsmdata_old(string title, string isbn13 = "") { string urititle = Uri.EscapeUriString(title); string searchurl = "http://www.commonsensemedia.org/search/" + urititle + "?filters=ss_nr_csm_review2field_entertainment_product_type%3Acsm_book"; using (WebClient client = new WebClient()) { //add user agent client.Headers.Add("user-agent", "User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31"); string html = client.DownloadString(searchurl); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); // get first hit from results HtmlAgilityPack.HtmlNode n = doc.DocumentNode.SelectSingleNode("//div[@class='csm-search-result-title']/h4/a"); // test if this first hit is actually a decent match // and if there is a hit at all if (n != null && lib.cleantitle(n.InnerText) == lib.cleantitle(title)) { string hiturl = n.Attributes["href"].Value; string rhtml = client.DownloadString(hiturl); HtmlAgilityPack.HtmlDocument rdoc = new HtmlAgilityPack.HtmlDocument(); rdoc.LoadHtml(rhtml); csmdata csmdata = new csmdata(); csmdata.isbn13 = isbn13; csmdata.url = hiturl; // scan result for: // onage, offage, pauseage, review, parentreview, kidreview HtmlAgilityPack.HtmlNode g = rdoc.DocumentNode.SelectSingleNode("//div[@id='csm-review-product-subtitle']/div/ul/li[@class='last']/a"); try { csmdata.genre = g.Attributes["href"].Value; } catch { } HtmlAgilityPack.HtmlNodeCollection agediv = rdoc.DocumentNode.SelectNodes("//script"); //int nodecount = agediv.Count; //for (int i = 0; i < nodecount; i++) //{ // Console.WriteLine("SCRIPT " + i + ":==========================="); // Console.Write(agediv[i].InnerText); // Console.WriteLine("=========================================="); //} List<string> scriptarr = new List<string>(); foreach (HtmlAgilityPack.HtmlNode h in agediv) { scriptarr.Add(h.InnerText); } string scriptAll = string.Join("", scriptarr); Match notforkids = Regex.Match(scriptAll, @"not_for_kids\s*:\s*\w+"); try { csmdata.notforkids = notforkids.ToString().EndsWith("true"); } catch { } Match redendsage = Regex.Match(scriptAll, "red_ends_age\"\\s*:\\s*\\d+"); string redends = redendsage.ToString(); Match reAge = Regex.Match(redends, "\\d+"); try { csmdata.offage = int.Parse(reAge.ToString()); } catch { } Match greenbeginsage = Regex.Match(scriptAll, "green_begins_age\"\\s*:\\s*\\d+"); try { csmdata.onage = int.Parse(Regex.Match(greenbeginsage.ToString(), "\\d+").ToString()); } catch { } Match agerecommendation = Regex.Match(scriptAll, "age_recommendations\"\\s*:\\s*\\d+"); //if (csmdata.onage != int.Parse(Regex.Match(greenbeginsage.ToString(), "\\d+").ToString())) //{ // Console.WriteLine("=============================================================="); // Console.WriteLine("Age Recommendation did not match Green Age for {0}", title); // Console.WriteLine("=============================================================="); //} //div[@id='mini-panel-review_product_overview']/div[1]/div[5]/div[1]/div[1] //*[@id='sliderContainer'] //*[@id="mini-panel-review_product_overview"]/div[1]/div[5] //*[@id="slider-1250625"] //*[@id="sliderContainer"] return csmdata; } else { return null; } } }
public static void loadcsmdata(csmdata csmdata) { // receive bndata, load to database using (LexileTitlesEntities lr = new LexileTitlesEntities()) { List<CommonSenseMedia> existing = new List<CommonSenseMedia>(from c in lr.CommonSenseMedias where c.Isbn13 == csmdata.isbn13 select c); if (existing.Count == 0) { // create new BarnesAndNoble CommonSenseMedia csm = new CommonSenseMedia(); csm.Isbn13 = csmdata.isbn13; csm.url = csmdata.url; csm.notforkids = csmdata.notforkids; csm.offage = csmdata.offage; csm.onage = csmdata.onage; csm.review = csmdata.review; csm.parentreview = csmdata.parentreview; csm.kidreview = csmdata.kidreview; csm.genre = csmdata.genre; lr.CommonSenseMedias.Add(csm); lr.SaveChanges(); } else { // update fields on existing BarnesAndNoble existing[0].url = csmdata.url; existing[0].notforkids = csmdata.notforkids; existing[0].offage = csmdata.offage; existing[0].onage = csmdata.onage; existing[0].review = csmdata.review; existing[0].parentreview = csmdata.parentreview; existing[0].kidreview = csmdata.kidreview; existing[0].genre = csmdata.genre; lr.SaveChanges(); } } }
public static StandardResult postCommonSenseMedia(csmdata csm) { // map CommonSenseMedia to CommonSenseDto var csmDto = new CommonSenseDto(); csmDto.Isbn13 = csm.isbn13; // csmDto.CommonSenseUrl = csm.url; csmDto.CommonSenseOn = (byte)csm.onage; csmDto.CommonSensePause = (byte)(csm.offage - 1); csmDto.CommonSenseNoKids = csm.notforkids; // csmDto.CommonSenseGenre = csm.genre; // csmDto.CommonSenseReview = (double?)csm.review; // csmDto.CommonSenseParentReview = (double?)csm.parentreview; // csmDto.CommonSenseKidReview = (double?)csm.kidreview; StandardResult result = postcsm(csmDto); return result; }