예제 #1
0
 public sys_job GetJobInfo(string url)
 {
     try
     {
         IHtmlDocument html = new JumonyParser().LoadDocument(url, Encoding.UTF8);
         int           pos  = html.InnerHtml().IndexOf("product-info-summary");
         if (pos < 0)
         {
             return(new sys_job());
         }
         string jobtitle = html.Find(".product-info-summary .row h4").FirstOrDefault().InnerText();
         string author   = html.Find(".product-info-summary .row small").FirstOrDefault().InnerText().Replace("发布者:", "");
         string price    = html.Find(".product-info-summary .row .p-desc").FirstOrDefault().InnerText().Replace(" 预算: ", "");
         string rq       = html.Find("#p-other ul li:first-child").FirstOrDefault().InnerText();
         string xqh      = html.Find("#p-other ul li:nth-child(3)").SingleOrDefault().InnerText();
         string describe = html.Find("#wrap").SingleOrDefault().InnerHtml();
         string t        = @"<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>";
         describe = Regex.Replace(describe, t, "");
         describe = Regex.Replace(describe, "<.*?>", "").Replace("\t", "").Replace("\r", "").Replace("\n", "");
         return(new sys_job
         {
             title = jobtitle,
             author = author,
             desc = describe,
             rq = rq,
             tag = xqh,
             price_min = price
         });
     }
     catch (Exception e)
     {
         log.Error(url + "----" + e.Message);
         return(new sys_job());
     }
 }
예제 #2
0
        public List <sys_film> Get_FilmInfo(string url)
        {
            try
            {
                List <sys_film> filmlist = new List <sys_film>();
                IHtmlDocument   source   = new JumonyParser().LoadDocument(url);
                int             pos      = source.InnerHtml().IndexOf("Zoom");
                if (pos < 0)
                {
                    return(filmlist);
                }
                //var list = source.Find("#Zoom a").Where(t => t.Attribute("href").Value().Contains("magnet:") || t.Attribute("href").Value().Contains("ftp:"));
                var list      = source.Find("#Zoom a");
                var title_all = source.Find(".title_all h1 font").FirstOrDefault().InnerText();
                var desc      = source.Find("#Zoom span").FirstOrDefault().InnerHtml();
                //评分提取
                Regex  regpf  = new Regex("(?<imdb>IMDb评分.*?<br />)");
                Regex  regdb  = new Regex("(?<douban>豆瓣评分.*?<br />)");
                var    pfms   = regpf.Match(desc);
                var    pfdb   = regdb.Match(desc);
                string imdb   = pfms.Groups["imdb"].Value.Replace("IMDb评分", "").Replace("<br />", "").Trim();
                string douban = pfdb.Groups["douban"].Value.Replace("豆瓣评分", "").Replace("<br />", "").Trim();
                foreach (var item in list)
                {
                    string filmlink = item.Attribute("href").Value();
                    if (filmlink == null)
                    {
                        continue;
                    }
                    if (filmlink.Contains("magnet:") || filmlink.Contains("ftp:"))
                    {
                        bool isok = db.SetAdd("filmlink", filmlink);
                        if (isok)
                        {
                            filmlist.Add(new sys_film()
                            {
                                link = filmlink, title = title_all, txt = desc, fromurl = url, imdb = imdb, douban = douban
                            });
                        }
                    }
                }

                return(filmlist);
            }
            catch (Exception e)
            {
                log.Error(url + "----" + e.Message);
                this.db.ListLeftPush("error_infourl", url);
                return(new List <sys_film>());
            }
        }