/// <summary> /// 按正则表达式处理字符串 /// </summary> /// <param name="str">源字符串</param> /// <param name="regex">正则表达式</param> /// <param name="begin"></param> /// <param name="end"></param> /// <returns>处理后字符串</returns> public static string SplitRegex(string str, string regex, string begin, string end) { System.Text.StringBuilder temp = new System.Text.StringBuilder(); Regex rWriters = new Regex(regex, RegexOptions.IgnoreCase); MatchCollection mcWriters = rWriters.Matches(str); foreach (Match item in mcWriters) { temp.Append(HtmlDecoder.CutString(item.ToString(), begin, end)).Append("/"); } return(temp.Remove(temp.Length - 1, 1).ToString()); }
/// <summary> /// 从豆瓣返回的json中提取电影信息 /// </summary> /// <param name="json">豆瓣电影json</param> /// <param name="mappath">电影海报保存地址</param> /// <returns></returns> public static tbl_Movie JsonToMovie(JObject json, string mappath) { tbl_Movie movie = new tbl_Movie(); //电影名 movie.movie_Title = json["title"].ToString(); //外文名 movie.movie_TitleEn = json["original_title"].ToString(); //又名 System.Text.StringBuilder temp = new System.Text.StringBuilder(); var aka = json["aka"].Children().Values(); foreach (var item in aka) { temp.Append(item.ToString()).Append("/"); } if (temp.Length > 0) { movie.movie_Aka = temp.Remove(temp.Length - 1, 1).ToString(); } movie.movie_Summary = json["summary"].ToString(); //剧情简介 movie.movie_DoubanID = json["id"].ToString(); //豆瓣编号 movie.movie_Year = json["year"].ToString(); //年代 //类型 temp.Clear(); var genre = json["genres"].Children().Values(); foreach (var item in genre) { temp.Append(item.ToString()).Append("/"); } if (temp.Length > 0) { movie.movie_Genres = Translator.GenreToId(temp.Remove(temp.Length - 1, 1).ToString()); } //制片国家/地区 temp.Clear(); var country = json["countries"].Children().Values(); foreach (var item in country) { temp.Append(item.ToString()).Append("/"); } if (temp.Length > 0) { movie.movie_Countries = Translator.CountryToId(temp.Remove(temp.Length - 1, 1).ToString()); } temp.Clear(); temp.Append(json["rating"]["average"].ToString());//评分 if (temp.Length == 1) { temp.Append(".0"); } movie.movie_Rating = temp.ToString(); movie.movie_RatingCount = json["ratings_count"].ToString();//评分人数 //海报 string imgurl = json["images"]["large"].ToString(); System.IO.MemoryStream ms = new System.IO.MemoryStream(HtmlDecoder.GetImage(imgurl)); System.Drawing.Image img = System.Drawing.Image.FromStream(ms); Uri uri = new Uri(imgurl); string filePath = mappath + uri.Segments[uri.Segments.Length - 1]; img.Save(filePath); movie.movie_Avatar = uri.Segments[uri.Segments.Length - 1]; //导演 string sHtmlCode = HtmlDecoder.GetHtmlCode(string.Format("{0}{1}{2}", "http://movie.douban.com/subject/", movie.movie_DoubanID, "/")); string sDivInfo = HtmlDecoder.CutString(sHtmlCode, "<div id=\"info\">", "</div>"); if (sDivInfo.Contains(">导演")) { movie.movie_Directors = HtmlDecoder.SplitRegex(HtmlDecoder.CutString(sDivInfo, "<span class='pl'>导演</span>", "</span></span><br/>"), "<a[^>]+>([^<]+)</a>"); movie.movie_DirectorsId = HtmlDecoder.SplitRegex(HtmlDecoder.CutString(sDivInfo, "<span class='pl'>导演</span>", "</span></span><br/>"), "<a[^>]+>([^<]+)</a>", "href=\"/celebrity/", "/\""); temp.Clear(); //MR_DataClassesDataContext _db = new MR_DataClassesDataContext(); MRDataEntities _db = new MRDataEntities(); foreach (var item in movie.movie_DirectorsId.Split('/')) { var director = _db.tbl_Celebrity.SingleOrDefault(c => c.celeb_DoubanID != null && c.celeb_DoubanID == item); if (director == null) { temp.Append(item.Trim()).Append("/"); } else { temp.Append(director.celeb_Id).Append("/"); } } movie.movie_DirectorsId = temp.Remove(temp.Length - 1, 1).ToString(); } //编剧 if (sDivInfo.Contains(">编剧")) { movie.movie_Writers = HtmlDecoder.SplitRegex(HtmlDecoder.CutString(sDivInfo, "<span class='pl'>编剧</span>: <span class='attrs'>", "</span></span><br/>"), "<a[^>]+>([^<]+)</a>"); movie.movie_WritersId = HtmlDecoder.SplitRegex(HtmlDecoder.CutString(sDivInfo, "<span class='pl'>编剧</span>: <span class='attrs'>", "</span></span><br/>"), "<a[^>]+>([^<]+)</a>", "href=\"/celebrity/", "/\""); temp.Clear(); //MR_DataClassesDataContext _db = new MR_DataClassesDataContext(); MRDataEntities _db = new MRDataEntities(); foreach (var item in movie.movie_WritersId.Split('/')) { var writer = _db.tbl_Celebrity.SingleOrDefault(c => c.celeb_DoubanID != null && c.celeb_DoubanID == item); if (writer == null) { temp.Append(item.Trim()).Append("/"); } else { temp.Append(writer.celeb_Id).Append("/"); } } movie.movie_WritersId = temp.Remove(temp.Length - 1, 1).ToString(); } //主演 if (sDivInfo.Contains(">主演")) { movie.movie_Casts = HtmlDecoder.SplitRegex(HtmlDecoder.CutString(sDivInfo, "<span class=\"actor\"><span class='pl'>主演</span>: <span class='attrs'>", "</span></span><br/>"), "<a[^>]+>([^<]+)</a>"); movie.movie_CastsId = HtmlDecoder.SplitRegex(HtmlDecoder.CutString(sDivInfo, "<span class=\"actor\"><span class='pl'>主演</span>: <span class='attrs'>", "</span></span><br/>"), "<a[^>]+>([^<]+)</a>", "href=\"/celebrity/", "/\""); temp.Clear(); //MR_DataClassesDataContext _db = new MR_DataClassesDataContext(); MRDataEntities _db = new MRDataEntities(); foreach (var item in movie.movie_CastsId.Split('/')) { var cast = _db.tbl_Celebrity.SingleOrDefault(c => c.celeb_DoubanID != null && c.celeb_DoubanID == item); if (cast == null) { temp.Append(item.Trim()).Append("/"); } else { temp.Append(cast.celeb_Id).Append("/"); } } movie.movie_CastsId = temp.Remove(temp.Length - 1, 1).ToString(); } //语言 if (sDivInfo.Contains(">语言")) { string sLangs = HtmlDecoder.CutString(sDivInfo, "<span class=\"pl\">语言:</span>", "<br/>"); movie.movie_Languages = Translator.LangToId(HtmlDecoder.SplitSlash(sLangs)); } //上映日期 if (sDivInfo.Contains(">上映日期")) { movie.movie_Pubdates = HtmlDecoder.SplitRegex(HtmlDecoder.CutString(sDivInfo, "<span class=\"pl\">上映日期:</span>", "<br/>"), "<span[^>]+>([^<]+)</span>"); } //片长 if (sDivInfo.Contains(">片长")) { movie.movie_Durations = HtmlDecoder.CutString(HtmlDecoder.CutString(sDivInfo, "<span class=\"pl\">片长:</span>", "<br/>"), "\">", "</span>").Trim(); } //IMDb链接 if (sDivInfo.Contains(">IMDb链接")) { movie.movie_IMDbID = HtmlDecoder.CutString(HtmlDecoder.CutString(sDivInfo, "<span class=\"pl\">IMDb链接:</span>", "<br>"), "\">", "</a>").Trim(); } return(movie); }
/// <summary> /// 从豆瓣返回的json中提取影人信息 /// </summary> /// <param name="json">豆瓣影人json</param> /// <param name="mappath">影人海报保存地址</param> /// <returns></returns> public static tbl_Celebrity JsonToCeleb(JObject json, string mappath) { tbl_Celebrity celeb = new tbl_Celebrity(); celeb.celeb_Name = json["name"].ToString(); //中文名 celeb.celeb_NameEn = json["name_en"].ToString(); //外文名 celeb.celeb_Gender = Translator.GenderToId(json["gender"].ToString()); //性别 celeb.celeb_DoubanID = json["id"].ToString(); //豆瓣编号 celeb.celeb_BornPlace = json["born_place"].ToString(); //出生地 //更多中文名 System.Text.StringBuilder temp = new System.Text.StringBuilder(); var aka = json["aka"].Children().Values(); foreach (var item in aka) { temp.Append(item.ToString()).Append("/"); } if (temp.Length > 0) { celeb.celeb_Aka = temp.Remove(temp.Length - 1, 1).ToString(); } //更多外文名 temp = new System.Text.StringBuilder(); var akaen = json["aka_en"].Children().Values(); foreach (var item in akaen) { temp.Append(item.ToString()).Append("/"); } if (temp.Length > 0) { celeb.celeb_AkaEn = temp.Remove(temp.Length - 1, 1).ToString(); } //照片 string imgurl = json["avatars"]["large"].ToString(); System.IO.MemoryStream ms = new System.IO.MemoryStream(HtmlDecoder.GetImage(imgurl)); System.Drawing.Image img = System.Drawing.Image.FromStream(ms); Uri uri = new Uri(imgurl); string filePath = mappath + uri.Segments[uri.Segments.Length - 1]; img.Save(filePath); celeb.celeb_Avatar = uri.Segments[uri.Segments.Length - 1]; //生卒日期 string sHtmlCode = HtmlDecoder.GetHtmlCode(string.Format("{0}{1}{2}", "https://movie.douban.com/celebrity/", celeb.celeb_DoubanID, "/")); string sDivInfo = HtmlDecoder.CutString(sHtmlCode, "<div id=\"headline\" class=\"item\">", "<div id=\"opt-bar\" class=\"mod\">"); sDivInfo = HtmlDecoder.CutString(sDivInfo, "<div class=\"info\">", "</div>"); if (sDivInfo.Contains("<span>出生日期")) { celeb.celeb_Birthday = HtmlDecoder.CutString(sDivInfo, "<span>出生日期</span>:", "</li>"); } else if (sDivInfo.Contains("<span>生卒日期")) { celeb.celeb_Birthday = HtmlDecoder.CutString(sDivInfo, "<span>生卒日期</span>:", "</li>").Split('至')[0].Trim(); celeb.celeb_Deathday = HtmlDecoder.CutString(sDivInfo, "<span>生卒日期</span>:", "</li>").Split('至')[1].Trim(); } //职业 if (sDivInfo.Contains("<span>职业")) { string pros = HtmlDecoder.CutString(sDivInfo, "<span>职业</span>:", "</li>"); temp.Clear(); foreach (var item in pros.Split('/')) { temp.Append(item.Trim()).Append("/"); } if (temp.Length > 0) { celeb.celeb_Pro = temp.Remove(temp.Length - 1, 1).ToString(); } } //家庭成员 if (sDivInfo.Contains("<span>家庭成员")) { string families = HtmlDecoder.CutString(sDivInfo, "<span>家庭成员</span>:", "</li>"); temp.Clear(); foreach (var item in families.Split('/')) { temp.Append(item.Trim()).Append("/"); } if (temp.Length > 0) { celeb.celeb_Family = temp.Remove(temp.Length - 1, 1).ToString(); } } //imdb编号 http://www.imdb.com/name/nm0000701 if (sDivInfo.Contains("<span>imdb编号")) { celeb.celeb_IMDbID = HtmlDecoder.CutString(HtmlDecoder.CutString(sDivInfo, "<span>imdb编号</span>:", "</li>"), "target=\"_self\">", "</a>"); } //影人简介 sDivInfo = HtmlDecoder.CutString(sHtmlCode, "<div id=\"intro\" class=\"mod\">", "<div class=\"mod\">"); if (sDivInfo.Contains("<span class=\"all hidden\">")) { celeb.celeb_Summary = HtmlDecoder.CutString(sDivInfo, "<span class=\"all hidden\">", "</span>").Replace("<br/>", "$").Trim(); } else { celeb.celeb_Summary = HtmlDecoder.CutString(sDivInfo, "<div class=\"bd\">", "</div>").Replace("<br/>", "$").Trim(); } string[] summarys = celeb.celeb_Summary.Split('$'); temp.Clear(); foreach (var item in summarys) { if (string.IsNullOrEmpty(item) || string.IsNullOrWhiteSpace(item)) { continue; } temp.Append(" ").Append(item.Trim()).Append('\n'); } celeb.celeb_Summary = temp.ToString(); return(celeb); }