예제 #1
0
        /// <summary>
        /// 按正则表达式处理字符串
        /// </summary>
        /// <param name="str">源字符串</param>
        /// <param name="regex">正则表达式</param>
        /// <param name="begin"></param>
        /// <param name="end"></param>
        /// <returns>处理后字符串</returns>
        public static string SplitRegex(string str, string regex, string begin, string end)
        {
            System.Text.StringBuilder temp = new System.Text.StringBuilder();
            Regex           rWriters       = new Regex(regex, RegexOptions.IgnoreCase);
            MatchCollection mcWriters      = rWriters.Matches(str);

            foreach (Match item in mcWriters)
            {
                temp.Append(HtmlDecoder.CutString(item.ToString(), begin, end)).Append("/");
            }
            return(temp.Remove(temp.Length - 1, 1).ToString());
        }
예제 #2
0
        /// <summary>
        /// 从豆瓣返回的json中提取电影信息
        /// </summary>
        /// <param name="json">豆瓣电影json</param>
        /// <param name="mappath">电影海报保存地址</param>
        /// <returns></returns>
        public static tbl_Movie JsonToMovie(JObject json, string mappath)
        {
            tbl_Movie movie = new tbl_Movie();

            //电影名
            movie.movie_Title = json["title"].ToString();
            //外文名
            movie.movie_TitleEn = json["original_title"].ToString();

            //又名
            System.Text.StringBuilder temp = new System.Text.StringBuilder();
            var aka = json["aka"].Children().Values();

            foreach (var item in aka)
            {
                temp.Append(item.ToString()).Append("/");
            }
            if (temp.Length > 0)
            {
                movie.movie_Aka = temp.Remove(temp.Length - 1, 1).ToString();
            }

            movie.movie_Summary  = json["summary"].ToString(); //剧情简介
            movie.movie_DoubanID = json["id"].ToString();      //豆瓣编号
            movie.movie_Year     = json["year"].ToString();    //年代

            //类型
            temp.Clear();
            var genre = json["genres"].Children().Values();

            foreach (var item in genre)
            {
                temp.Append(item.ToString()).Append("/");
            }
            if (temp.Length > 0)
            {
                movie.movie_Genres = Translator.GenreToId(temp.Remove(temp.Length - 1, 1).ToString());
            }

            //制片国家/地区
            temp.Clear();
            var country = json["countries"].Children().Values();

            foreach (var item in country)
            {
                temp.Append(item.ToString()).Append("/");
            }
            if (temp.Length > 0)
            {
                movie.movie_Countries = Translator.CountryToId(temp.Remove(temp.Length - 1, 1).ToString());
            }

            temp.Clear();
            temp.Append(json["rating"]["average"].ToString());//评分
            if (temp.Length == 1)
            {
                temp.Append(".0");
            }
            movie.movie_Rating      = temp.ToString();
            movie.movie_RatingCount = json["ratings_count"].ToString();//评分人数

            //海报
            string imgurl = json["images"]["large"].ToString();

            System.IO.MemoryStream ms  = new System.IO.MemoryStream(HtmlDecoder.GetImage(imgurl));
            System.Drawing.Image   img = System.Drawing.Image.FromStream(ms);
            Uri    uri      = new Uri(imgurl);
            string filePath = mappath + uri.Segments[uri.Segments.Length - 1];

            img.Save(filePath);
            movie.movie_Avatar = uri.Segments[uri.Segments.Length - 1];

            //导演
            string sHtmlCode = HtmlDecoder.GetHtmlCode(string.Format("{0}{1}{2}", "http://movie.douban.com/subject/", movie.movie_DoubanID, "/"));
            string sDivInfo  = HtmlDecoder.CutString(sHtmlCode, "<div id=\"info\">", "</div>");

            if (sDivInfo.Contains(">导演"))
            {
                movie.movie_Directors   = HtmlDecoder.SplitRegex(HtmlDecoder.CutString(sDivInfo, "<span class='pl'>导演</span>", "</span></span><br/>"), "<a[^>]+>([^<]+)</a>");
                movie.movie_DirectorsId = HtmlDecoder.SplitRegex(HtmlDecoder.CutString(sDivInfo, "<span class='pl'>导演</span>", "</span></span><br/>"), "<a[^>]+>([^<]+)</a>", "href=\"/celebrity/", "/\"");
                temp.Clear();
                //MR_DataClassesDataContext _db = new MR_DataClassesDataContext();
                MRDataEntities _db = new MRDataEntities();
                foreach (var item in movie.movie_DirectorsId.Split('/'))
                {
                    var director = _db.tbl_Celebrity.SingleOrDefault(c => c.celeb_DoubanID != null && c.celeb_DoubanID == item);
                    if (director == null)
                    {
                        temp.Append(item.Trim()).Append("/");
                    }
                    else
                    {
                        temp.Append(director.celeb_Id).Append("/");
                    }
                }
                movie.movie_DirectorsId = temp.Remove(temp.Length - 1, 1).ToString();
            }
            //编剧
            if (sDivInfo.Contains(">编剧"))
            {
                movie.movie_Writers   = HtmlDecoder.SplitRegex(HtmlDecoder.CutString(sDivInfo, "<span class='pl'>编剧</span>: <span class='attrs'>", "</span></span><br/>"), "<a[^>]+>([^<]+)</a>");
                movie.movie_WritersId = HtmlDecoder.SplitRegex(HtmlDecoder.CutString(sDivInfo, "<span class='pl'>编剧</span>: <span class='attrs'>", "</span></span><br/>"), "<a[^>]+>([^<]+)</a>", "href=\"/celebrity/", "/\"");
                temp.Clear();
                //MR_DataClassesDataContext _db = new MR_DataClassesDataContext();
                MRDataEntities _db = new MRDataEntities();
                foreach (var item in movie.movie_WritersId.Split('/'))
                {
                    var writer = _db.tbl_Celebrity.SingleOrDefault(c => c.celeb_DoubanID != null && c.celeb_DoubanID == item);
                    if (writer == null)
                    {
                        temp.Append(item.Trim()).Append("/");
                    }
                    else
                    {
                        temp.Append(writer.celeb_Id).Append("/");
                    }
                }
                movie.movie_WritersId = temp.Remove(temp.Length - 1, 1).ToString();
            }
            //主演
            if (sDivInfo.Contains(">主演"))
            {
                movie.movie_Casts   = HtmlDecoder.SplitRegex(HtmlDecoder.CutString(sDivInfo, "<span class=\"actor\"><span class='pl'>主演</span>: <span class='attrs'>", "</span></span><br/>"), "<a[^>]+>([^<]+)</a>");
                movie.movie_CastsId = HtmlDecoder.SplitRegex(HtmlDecoder.CutString(sDivInfo, "<span class=\"actor\"><span class='pl'>主演</span>: <span class='attrs'>", "</span></span><br/>"), "<a[^>]+>([^<]+)</a>", "href=\"/celebrity/", "/\"");
                temp.Clear();
                //MR_DataClassesDataContext _db = new MR_DataClassesDataContext();
                MRDataEntities _db = new MRDataEntities();
                foreach (var item in movie.movie_CastsId.Split('/'))
                {
                    var cast = _db.tbl_Celebrity.SingleOrDefault(c => c.celeb_DoubanID != null && c.celeb_DoubanID == item);
                    if (cast == null)
                    {
                        temp.Append(item.Trim()).Append("/");
                    }
                    else
                    {
                        temp.Append(cast.celeb_Id).Append("/");
                    }
                }
                movie.movie_CastsId = temp.Remove(temp.Length - 1, 1).ToString();
            }
            //语言
            if (sDivInfo.Contains(">语言"))
            {
                string sLangs = HtmlDecoder.CutString(sDivInfo, "<span class=\"pl\">语言:</span>", "<br/>");
                movie.movie_Languages = Translator.LangToId(HtmlDecoder.SplitSlash(sLangs));
            }
            //上映日期
            if (sDivInfo.Contains(">上映日期"))
            {
                movie.movie_Pubdates = HtmlDecoder.SplitRegex(HtmlDecoder.CutString(sDivInfo, "<span class=\"pl\">上映日期:</span>", "<br/>"), "<span[^>]+>([^<]+)</span>");
            }
            //片长
            if (sDivInfo.Contains(">片长"))
            {
                movie.movie_Durations = HtmlDecoder.CutString(HtmlDecoder.CutString(sDivInfo, "<span class=\"pl\">片长:</span>", "<br/>"), "\">", "</span>").Trim();
            }
            //IMDb链接
            if (sDivInfo.Contains(">IMDb链接"))
            {
                movie.movie_IMDbID = HtmlDecoder.CutString(HtmlDecoder.CutString(sDivInfo, "<span class=\"pl\">IMDb链接:</span>", "<br>"), "\">", "</a>").Trim();
            }

            return(movie);
        }
예제 #3
0
        /// <summary>
        /// 从豆瓣返回的json中提取影人信息
        /// </summary>
        /// <param name="json">豆瓣影人json</param>
        /// <param name="mappath">影人海报保存地址</param>
        /// <returns></returns>
        public static tbl_Celebrity JsonToCeleb(JObject json, string mappath)
        {
            tbl_Celebrity celeb = new tbl_Celebrity();

            celeb.celeb_Name      = json["name"].ToString();                          //中文名
            celeb.celeb_NameEn    = json["name_en"].ToString();                       //外文名
            celeb.celeb_Gender    = Translator.GenderToId(json["gender"].ToString()); //性别
            celeb.celeb_DoubanID  = json["id"].ToString();                            //豆瓣编号
            celeb.celeb_BornPlace = json["born_place"].ToString();                    //出生地

            //更多中文名
            System.Text.StringBuilder temp = new System.Text.StringBuilder();
            var aka = json["aka"].Children().Values();

            foreach (var item in aka)
            {
                temp.Append(item.ToString()).Append("/");
            }
            if (temp.Length > 0)
            {
                celeb.celeb_Aka = temp.Remove(temp.Length - 1, 1).ToString();
            }

            //更多外文名
            temp = new System.Text.StringBuilder();
            var akaen = json["aka_en"].Children().Values();

            foreach (var item in akaen)
            {
                temp.Append(item.ToString()).Append("/");
            }
            if (temp.Length > 0)
            {
                celeb.celeb_AkaEn = temp.Remove(temp.Length - 1, 1).ToString();
            }


            //照片
            string imgurl = json["avatars"]["large"].ToString();

            System.IO.MemoryStream ms  = new System.IO.MemoryStream(HtmlDecoder.GetImage(imgurl));
            System.Drawing.Image   img = System.Drawing.Image.FromStream(ms);
            Uri    uri      = new Uri(imgurl);
            string filePath = mappath + uri.Segments[uri.Segments.Length - 1];

            img.Save(filePath);
            celeb.celeb_Avatar = uri.Segments[uri.Segments.Length - 1];

            //生卒日期
            string sHtmlCode = HtmlDecoder.GetHtmlCode(string.Format("{0}{1}{2}", "https://movie.douban.com/celebrity/", celeb.celeb_DoubanID, "/"));
            string sDivInfo  = HtmlDecoder.CutString(sHtmlCode, "<div id=\"headline\" class=\"item\">", "<div id=\"opt-bar\" class=\"mod\">");

            sDivInfo = HtmlDecoder.CutString(sDivInfo, "<div class=\"info\">", "</div>");
            if (sDivInfo.Contains("<span>出生日期"))
            {
                celeb.celeb_Birthday = HtmlDecoder.CutString(sDivInfo, "<span>出生日期</span>:", "</li>");
            }
            else if (sDivInfo.Contains("<span>生卒日期"))
            {
                celeb.celeb_Birthday = HtmlDecoder.CutString(sDivInfo, "<span>生卒日期</span>:", "</li>").Split('至')[0].Trim();
                celeb.celeb_Deathday = HtmlDecoder.CutString(sDivInfo, "<span>生卒日期</span>:", "</li>").Split('至')[1].Trim();
            }

            //职业
            if (sDivInfo.Contains("<span>职业"))
            {
                string pros = HtmlDecoder.CutString(sDivInfo, "<span>职业</span>:", "</li>");
                temp.Clear();
                foreach (var item in pros.Split('/'))
                {
                    temp.Append(item.Trim()).Append("/");
                }
                if (temp.Length > 0)
                {
                    celeb.celeb_Pro = temp.Remove(temp.Length - 1, 1).ToString();
                }
            }

            //家庭成员
            if (sDivInfo.Contains("<span>家庭成员"))
            {
                string families = HtmlDecoder.CutString(sDivInfo, "<span>家庭成员</span>:", "</li>");
                temp.Clear();
                foreach (var item in families.Split('/'))
                {
                    temp.Append(item.Trim()).Append("/");
                }
                if (temp.Length > 0)
                {
                    celeb.celeb_Family = temp.Remove(temp.Length - 1, 1).ToString();
                }
            }

            //imdb编号        http://www.imdb.com/name/nm0000701
            if (sDivInfo.Contains("<span>imdb编号"))
            {
                celeb.celeb_IMDbID = HtmlDecoder.CutString(HtmlDecoder.CutString(sDivInfo, "<span>imdb编号</span>:", "</li>"), "target=\"_self\">", "</a>");
            }

            //影人简介
            sDivInfo = HtmlDecoder.CutString(sHtmlCode, "<div id=\"intro\" class=\"mod\">", "<div class=\"mod\">");
            if (sDivInfo.Contains("<span class=\"all hidden\">"))
            {
                celeb.celeb_Summary = HtmlDecoder.CutString(sDivInfo, "<span class=\"all hidden\">", "</span>").Replace("<br/>", "$").Trim();
            }
            else
            {
                celeb.celeb_Summary = HtmlDecoder.CutString(sDivInfo, "<div class=\"bd\">", "</div>").Replace("<br/>", "$").Trim();
            }
            string[] summarys = celeb.celeb_Summary.Split('$');
            temp.Clear();
            foreach (var item in summarys)
            {
                if (string.IsNullOrEmpty(item) || string.IsNullOrWhiteSpace(item))
                {
                    continue;
                }
                temp.Append("  ").Append(item.Trim()).Append('\n');
            }
            celeb.celeb_Summary = temp.ToString();

            return(celeb);
        }