Beispiel #1
0
        public static List<LinkGrap> getLinkLists(string link, string _class)
        {
            // string   status = "Nạp danh sách";
            List<LinkGrap> List = new List<LinkGrap>();
            HttpWebRequest wrq;
            wrq = (HttpWebRequest)(WebRequest.Create(link));
            string host = new Uri(link).Host;
            wrq.Credentials = CredentialCache.DefaultCredentials;
            wrq.Method = "GET";
            wrq.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.1; vi; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3";
            wrq.SendChunked = true;
            HttpWebResponse wrp = (HttpWebResponse)wrq.GetResponse();
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.Load(wrp.GetResponseStream(), Encoding.UTF8);
            HtmlNode _n1 = doc.DocumentNode.SelectSingleNode(@"//td[@class='"+_class+"']");
            HtmlNode _n2 = _n1.SelectSingleNode(@"table");

            HtmlNodeCollection _nl = _n2.SelectNodes("tr");
              //  status = "Đọc danh sách";
            foreach (HtmlNode _n3 in _nl)
            {
                LinkGrap item = new LinkGrap();
                HtmlNode _n11 = _n3.SelectSingleNode("td/table");
                HtmlNode _n111 = _n11.ChildNodes[1].ChildNodes[1].ChildNodes[1];
                item.Link =  _n111.Attributes["href"].Value;
                item.Title = _n3.InnerText;
                List.Add(item);
            }
            //foreach (HtmlNode _n3 in _nl)
            //{
            //    HtmlNode _n11 = _n3.SelectSingleNode("td/table");
            //    HtmlNode _n111 = _n11.ChildNodes[1].ChildNodes[1].ChildNodes[1];
            //    HtmlNode _n112 = _n11.ChildNodes[5].ChildNodes[1].ChildNodes[1];
            //    insertLinkDeledate _dele = new insertLinkDeledate(insertLink);
            //    _dele.BeginInvoke(string.Format("http://www.ttnn.com.vn{0}", _n111.Attributes["href"].Value), null, null);
            //}
            return List;
        }
Beispiel #2
0
        public static List<LinkGrap> GetLinkFromRss(string link)
        {
            List<LinkGrap> List = new List<LinkGrap>();
            #region xử lý Link
            HttpWebRequest wrq;
            wrq = (HttpWebRequest)(WebRequest.Create(link));
            string host = new Uri(link).Host;
            wrq.Credentials = CredentialCache.DefaultCredentials;
            wrq.Method = "GET";
            wrq.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.1; vi; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3";
            wrq.SendChunked = false;
            if (link.IndexOf("zing.vn") != -1)
            {
                wrq.Referer = "http://mp3.zing.vn";
            }
            try
            {
                using (HttpWebResponse wrp = (HttpWebResponse)wrq.GetResponse())
                {
                    XmlDocument doc = new XmlDocument();
                    try
                    {
                        doc.Load(wrp.GetResponseStream());
                        XmlNodeList _l = doc.SelectNodes("//item");
                        if (List != null)
                        {
                            foreach (XmlNode n in _l)
                            {
                                LinkGrap item = new LinkGrap();
                                if (n.SelectSingleNode("title") != null)
                                {
                                    item.Title = n.SelectSingleNode("title").InnerText;
                                }
                                if (n.SelectSingleNode("description") != null)
                                {
                                    item.Description = n.SelectSingleNode("description").InnerText;

                                }
                                if (n.SelectSingleNode("link | LINK") != null)
                                {
                                    string itemlink = n.SelectSingleNode("link | LINK").InnerText.Trim();
                                    if (itemlink.IndexOf("http://nld.com.vnhttp://worldcup") != -1) itemlink = itemlink.Substring("http://nld.com.vn".Length);
                                    item.Link = itemlink;
                                }
                                List.Add(item);
                            }
                        }
                    }
                    catch (XmlException xmlex)
                    {
                    }
                }
            }
            catch (WebException ex)
            {
                LinkGrap _obj2 = new LinkGrap();
                _obj2.Title = "0";
            }
            #endregion
            return List;
        }
Beispiel #3
0
        public LinkGrap(string link, bool extractLink)
        {
            LinkGrap Item = null;
            string saveLocation = HostingEnvironment.MapPath("~/lib/up/");
            //string uploadDir = @"D:\InetPub\tintucme\wwwroot\lib\up\";
            //C:\inetpub\wwwroot\choNongNghiep\web\lib\up\rss
            string uploadDir = @"C:\inetpub\wwwroot\choNongNghiep\web\lib\up\tintuc\rss\";
            Item = (LinkGrap)HttpRuntime.Cache[string.Format(cacheKey, link)];
            Item = null;
            if (Item == null)
            {
                #region xử lý Link
                HttpWebRequest wrq;
                wrq = (HttpWebRequest)(WebRequest.Create(link));
                string host = new Uri(link).Host;
                wrq.Credentials = CredentialCache.DefaultCredentials;
                wrq.Method = "GET";
                wrq.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.1; vi; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3";
                wrq.SendChunked = false;
                if (link.IndexOf("zing.vn") != -1)
                {
                    wrq.Referer = "http://mp3.zing.vn";
                }
                try
                {
                    HttpWebResponse wrp = (HttpWebResponse)wrq.GetResponse();
                    HtmlDocument doc = new HtmlDocument();
                    contentType = wrp.ContentType;
                    if (contentType.ToLower().IndexOf("html") != -1)
                    {
                        doc.Load(wrp.GetResponseStream(), Encoding.UTF8);
                        string domain = "http://" + (new Uri(link)).Host;
                        #region Title
                        if (doc.DocumentNode.SelectNodes("//title | //TITLE") != null)
                        {
                            HtmlNode titleNode = doc.DocumentNode.SelectNodes("//title | //TITLE")[0];
                            Title = titleNode.InnerText;
                        }
                        #endregion
                        #region Desc
                        if (doc.DocumentNode.SelectNodes("//meta[@name='description'] | //meta[@name='DESCRIPTION']") != null)
                        {
                            HtmlNode titleNode = doc.DocumentNode.SelectNodes("//meta[@name='description'] | //meta[@name='DESCRIPTION']")[0];
                            Description = titleNode.Attributes["content"].Value;
                        }
                        #endregion
                        #region Content
                        ContentRawHtml = doc.DocumentNode.InnerHtml;
                        ContentRawText = doc.DocumentNode.InnerText;
                        Content = Wrapper(host, link, doc);
                        if (string.IsNullOrEmpty(Content)) return;
                        string contentTokeyword = Giga.Common.Lib._string.getHTML(Content);
                        if (!string.IsNullOrEmpty(contentTokeyword))
                        {
                            using (LinkKeyword _linkKeyword = new LinkKeyword(contentTokeyword))
                            {
                                if (_linkKeyword.ListKeyWord != null)
                                {
                                    KeyWords = _linkKeyword.ListKeyWord;
                                }
                            }
                        }
                        //if (string.IsNullOrEmpty(contentTokeyword)) contentTokeyword = ContentRawText;
                        //List<LinkKeyword> _ListKeyword = new List<LinkKeyword>();
                        #endregion
                        #region Images
                        HtmlDocument _doc1 = new HtmlDocument();
                        _doc1.LoadHtml(Content);
                        if (_doc1.DocumentNode.SelectNodes("//img | //IMG") != null)
                        {
                            List<string> _list = new List<string>();
                            foreach (HtmlNode _img in _doc1.DocumentNode.SelectNodes("//img | //IMG"))
                            {
                                if (_img.Attributes["src"] != null)
                                {
                                    string src = _img.Attributes["src"].Value;
                                    if (src.ToLower().IndexOf("http://") != 0)
                                    {
                                        if (src.IndexOf("/") != 0) src = "/" + src;
                                        src = domain + src;
                                    }
                                    try
                                    {
                                        ImageProcess gimg = new ImageProcess(new Uri(src), src);
                                        if (gimg.Width > 250 && gimg.Heigth > 200)
                                        {
                                            #region ảnh cũ
                                            //string _newid = Guid.NewGuid().ToString();
                                            //string _ten = saveLocation + _newid;
                                            //gimg.Save(_ten + gimg.Ext);
                                            //gimg.Crop(320, 188);
                                            //gimg.Save(_ten + "320x188" + gimg.Ext);
                                            //gimg.Crop(150, 160);
                                            //gimg.Save(_ten + "150x160" + gimg.Ext);
                                            //gimg.Crop(101, 58);
                                            //gimg.Save(_ten + "101x58" + gimg.Ext);
                                            //gimg.Crop(62, 36);
                                            //gimg.Save(_ten + "62x36" + gimg.Ext);
                                            //_list.Add(_newid + gimg.Ext);
                                            //HttpRuntime.Cache.Remove(src);
                                            #endregion
                                            string gimg_t = Guid.NewGuid().ToString().Replace("-", "");

                                            string gimg_ten = gimg_t + gimg.Ext;
                                            //saveLocation = Path.Combine(uploadDir, gimg_ten);
                                            //gimg.Save(saveLocation);
                                            _list.Add(gimg_ten);

                                            string gimg_ten_430x300 = gimg_t + "430x300" + gimg.Ext;
                                            gimg.Crop(430, 300);
                                            saveLocation = Path.Combine(uploadDir, gimg_ten_430x300);
                                            gimg.Save(saveLocation);
                                            _list.Add(gimg_ten_430x300);

                                            //string gimg_ten_100 = gimg_t + "200x150" + gimg.Ext;
                                            //gimg.Crop(200, 150);
                                            //saveLocation = Path.Combine(uploadDir, gimg_ten_100);
                                            //gimg.Save(saveLocation);
                                            //_list.Add(gimg_ten_100);

                                            string gimg_ten_101_58 = gimg_t + "100x100" + gimg.Ext;
                                            gimg.Crop(100, 100);
                                            saveLocation = Path.Combine(uploadDir, gimg_ten_101_58);
                                            gimg.Save(saveLocation);
                                            _list.Add(gimg_ten_101_58);

                                            string gimg_ten_62_36 = gimg_t + "50x50" + gimg.Ext;
                                            gimg.Crop(50, 50);
                                            saveLocation = Path.Combine(uploadDir, gimg_ten_62_36);
                                            gimg.Save(saveLocation);
                                            _list.Add(gimg_ten_62_36);
                                            break;
                                        }
                                    }
                                    catch (Exception ex)
                                    {
                                    }

                                }
                            }
                            if (_list.Count == 0) return;
                            Images = _list;
                        }
                        #endregion
                        #region Link
                        //List<string> _inLink = new List<string>();
                        //List<string> _outLink = new List<string>();
                        //foreach (HtmlNode a in doc.DocumentNode.SelectNodes("//a|//A"))
                        //{
                        //    string href = string.Empty;
                        //    if (a.Attributes["href"] != null)
                        //    {
                        //        href = a.Attributes["href"].Value;
                        //    }
                        //    else if (a.Attributes["HREF"] != null)
                        //    {
                        //        href = a.Attributes["HREF"].Value;
                        //    }
                        //    if (!string.IsNullOrEmpty(href))
                        //    {
                        //        if (href.ToLower().IndexOf("javascrip") != 0 && href.ToLower().IndexOf("#") != 0)
                        //        {
                        //            if (href.ToLower().IndexOf("http://") == 0)//HTTPLink
                        //            {
                        //                Uri _href = new Uri(href);
                        //                if (_href.Host.ToLower().IndexOf(host.ToLower()) != -1)
                        //                {
                        //                    _inLink.Add(href);
                        //                }
                        //                else
                        //                {
                        //                    _outLink.Add(href);
                        //                }
                        //            }
                        //            else
                        //            {
                        //                if (href.ToLower().IndexOf("/") == -1) href = "/" + href;
                        //                if (href.ToLower().IndexOf("../") == 0) href = href.Substring(href.LastIndexOf("../") + 3);
                        //                href = "http://" + host + href;
                        //                _inLink.Add(href);
                        //            }
                        //        }
                        //    }
                        //    InLink = _inLink;
                        //    OutLink = _outLink;
                        //}
                        #endregion

                    }
                    else
                    {

                        if (contentType.IndexOf("image") != -1)
                        {
                            Title = link;
                            Description = link;
                            List<string> _list = new List<string>();
                            ImageProcess gimg = new ImageProcess(new Uri(link), link);
                            if (gimg.Width > 100 && gimg.Heigth > 75)
                            {
                                _list.Add(link);
                            }
                            Images = _list;
                        }
                    }
                    LinkGrap _obj = new LinkGrap();
                    _obj.Title = Title;
                    _obj.Description = Description;
                    _obj.Images = Images;
                    _obj.contentType = contentType;
                    _obj.InLink = InLink;
                    _obj.OutLink = OutLink;
                    _obj.contentType = Content;
                    _obj.ContentRawText = ContentRawText;
                    _obj.ContentRawHtml = ContentRawHtml;
                    HttpRuntime.Cache.Insert(string.Format(cacheKey, link), _obj);
                    Item = _obj;
                }
                catch (WebException ex)
                {
                    LinkGrap _obj2 = new LinkGrap();
                    _obj2.Title = "0";
                    HttpRuntime.Cache.Remove(string.Format(cacheKey, link));
                    Item = _obj2;
                }

                #endregion
            }

            Title = Item.Title;
            Description = Item.Description;
            Images = Item.Images;
            contentType = Item.contentType;
            KeyWordsIEnum = Item.KeyWordsIEnum;
        }
Beispiel #4
0
        public LinkGrap(string link)
        {
            LinkGrap Item = null;
            string saveLocation = HostingEnvironment.MapPath("~/lib/u/");
            Item = (LinkGrap)HttpContext.Current.Cache[string.Format(cacheKey, link)];
            if (Item == null)
            {
                #region xử lý Link
                HttpWebRequest wrq;
                wrq = (HttpWebRequest)(WebRequest.Create(link));
                string host = new Uri(link).Host;
                wrq.Credentials = CredentialCache.DefaultCredentials;
                wrq.Method = "GET";
                wrq.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.1; vi; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3";
                wrq.SendChunked = true;
                if (link.IndexOf("zing.vn") != -1)
                {
                    wrq.Referer = "http://mp3.zing.vn";
                }
                try
                {
                    HttpWebResponse wrp = (HttpWebResponse)wrq.GetResponse();
                    HtmlDocument doc = new HtmlDocument();
                    contentType = wrp.ContentType;
                    if (contentType.ToLower().IndexOf("html") != -1)
                    {
                        doc.Load(wrp.GetResponseStream(), Encoding.UTF8);
                        string domain = "http://" + (new Uri(link)).Host;
                        #region Title
                        if (doc.DocumentNode.SelectNodes("//title | //TITLE") != null)
                        {
                            HtmlNode titleNode = doc.DocumentNode.SelectNodes("//title | //TITLE")[0];
                            Title = titleNode.InnerText;
                        }
                        #endregion
                        #region Desc
                        if (doc.DocumentNode.SelectNodes("//meta[@name='description'] | //meta[@name='DESCRIPTION']") != null)
                        {
                            HtmlNode titleNode = doc.DocumentNode.SelectNodes("//meta[@name='description'] | //meta[@name='DESCRIPTION']")[0];
                            Description = titleNode.Attributes["content"].Value;
                        }
                        #endregion
                        #region Content
                        ContentRawHtml = doc.DocumentNode.InnerHtml;
                        ContentRawText = doc.DocumentNode.InnerText;
                        Content = Wrapper(host, link, doc);
                        #endregion
                        #region Images
                        //if (doc.DocumentNode.SelectNodes("//img | //IMG") != null)
                        //{
                        //    List<string> _list = new List<string>();
                        //    foreach (HtmlNode _img in doc.DocumentNode.SelectNodes("//img | //IMG"))
                        //    {
                        //        if (_img.Attributes["src"] != null)
                        //        {
                        //            string src = _img.Attributes["src"].Value;
                        //            if (src.ToLower().IndexOf("http://") != 0)
                        //            {
                        //                src = domain + src;
                        //            }
                        //            _list.Add(src);
                        //        }
                        //    }
                        //    Images = _list;
                        //}
                        #endregion
                        #region Link
                        List<string> _inLink = new List<string>();
                        List<string> _outLink = new List<string>();
                        foreach (HtmlNode a in doc.DocumentNode.SelectNodes("//a|//A"))
                        {
                            string href = string.Empty;
                            if (a.Attributes["href"] != null)
                            {
                                href = a.Attributes["href"].Value;
                            }
                            else if (a.Attributes["HREF"] != null)
                            {
                                href = a.Attributes["HREF"].Value;
                            }
                            if (!string.IsNullOrEmpty(href))
                            {
                                if (href.ToLower().IndexOf("javascrip") != 0 && href.ToLower().IndexOf("#") != 0)
                                {
                                    if (href.ToLower().IndexOf("http://") == 0)//HTTPLink
                                    {
                                        Uri _href = new Uri(href);
                                        if (_href.Host.ToLower().IndexOf(host.ToLower()) != -1)
                                        {
                                            _inLink.Add(href);
                                        }
                                        else
                                        {
                                            _outLink.Add(href);
                                        }
                                    }
                                    else
                                    {
                                        if (href.ToLower().IndexOf("/") == -1) href = "/" + href;
                                        if (href.ToLower().IndexOf("../") == 0) href = href.Substring(href.LastIndexOf("../") + 3);
                                        href = "http://" + host + href;
                                        _inLink.Add(href);
                                    }
                                }
                            }
                            InLink = _inLink;
                            OutLink = _outLink;
                        }
                        #endregion

                    }
                    #region Ảnh
                    //else
                    //{
                    //    if (contentType.IndexOf("image") != -1)
                    //    {
                    //        Title = link;
                    //        Description = link;
                    //        List<string> _list = new List<string>();
                    //        ImageProcess gimg = new ImageProcess(new Uri(link), link);
                    //        if (gimg.Width > 100 && gimg.Heigth > 75)
                    //        {
                    //            _list.Add(link);
                    //        }
                    //        Images = _list;
                    //    }
                    //}
                    #endregion
                    HttpContext.Current.Cache.Insert(string.Format(cacheKey, link), this);
                }
                catch (WebException ex)
                {
                    LinkGrap _obj2 = new LinkGrap();
                    _obj2.Title = "0";
                    HttpContext.Current.Cache.Remove(string.Format(cacheKey, link));
                }
                #endregion
            }
        }
Beispiel #5
0
 public static List<LinkGrap> GetRss(string link)
 {
     List<LinkGrap> List = new List<LinkGrap>();
     #region xử lý Link
     HttpWebRequest wrq;
     wrq = (HttpWebRequest)(WebRequest.Create(link));
     string host = new Uri(link).Host;
     wrq.Credentials = CredentialCache.DefaultCredentials;
     wrq.Method = "GET";
     wrq.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.1; vi; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3";
     wrq.SendChunked = true;
     if (link.IndexOf("zing.vn") != -1)
     {
         wrq.Referer = "http://mp3.zing.vn";
     }
     try
     {
         using (HttpWebResponse wrp = (HttpWebResponse)wrq.GetResponse())
         {
             HtmlDocument doc = new HtmlDocument();
             doc.Load(wrp.GetResponseStream());
             foreach (HtmlNode a in doc.DocumentNode.SelectNodes(@"//a[contains(text(),'rss') and contains(@href,'rss')]"))
             {
                 LinkGrap item = new LinkGrap();
                 item.Link = a.Attributes["href"].Value;
                 item.Title = a.InnerText;
                 List.Add(item);
             }
         }
     }
     catch (WebException ex)
     {
         LinkGrap _obj2 = new LinkGrap();
         _obj2.Title = "0";
     }
     #endregion
     return List;
 }