public static List<LinkGrap> getLinkLists(string link, string _class) { // string status = "Nạp danh sách"; List<LinkGrap> List = new List<LinkGrap>(); HttpWebRequest wrq; wrq = (HttpWebRequest)(WebRequest.Create(link)); string host = new Uri(link).Host; wrq.Credentials = CredentialCache.DefaultCredentials; wrq.Method = "GET"; wrq.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.1; vi; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3"; wrq.SendChunked = true; HttpWebResponse wrp = (HttpWebResponse)wrq.GetResponse(); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.Load(wrp.GetResponseStream(), Encoding.UTF8); HtmlNode _n1 = doc.DocumentNode.SelectSingleNode(@"//td[@class='"+_class+"']"); HtmlNode _n2 = _n1.SelectSingleNode(@"table"); HtmlNodeCollection _nl = _n2.SelectNodes("tr"); // status = "Đọc danh sách"; foreach (HtmlNode _n3 in _nl) { LinkGrap item = new LinkGrap(); HtmlNode _n11 = _n3.SelectSingleNode("td/table"); HtmlNode _n111 = _n11.ChildNodes[1].ChildNodes[1].ChildNodes[1]; item.Link = _n111.Attributes["href"].Value; item.Title = _n3.InnerText; List.Add(item); } //foreach (HtmlNode _n3 in _nl) //{ // HtmlNode _n11 = _n3.SelectSingleNode("td/table"); // HtmlNode _n111 = _n11.ChildNodes[1].ChildNodes[1].ChildNodes[1]; // HtmlNode _n112 = _n11.ChildNodes[5].ChildNodes[1].ChildNodes[1]; // insertLinkDeledate _dele = new insertLinkDeledate(insertLink); // _dele.BeginInvoke(string.Format("http://www.ttnn.com.vn{0}", _n111.Attributes["href"].Value), null, null); //} return List; }
public static List<LinkGrap> GetLinkFromRss(string link) { List<LinkGrap> List = new List<LinkGrap>(); #region xử lý Link HttpWebRequest wrq; wrq = (HttpWebRequest)(WebRequest.Create(link)); string host = new Uri(link).Host; wrq.Credentials = CredentialCache.DefaultCredentials; wrq.Method = "GET"; wrq.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.1; vi; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3"; wrq.SendChunked = false; if (link.IndexOf("zing.vn") != -1) { wrq.Referer = "http://mp3.zing.vn"; } try { using (HttpWebResponse wrp = (HttpWebResponse)wrq.GetResponse()) { XmlDocument doc = new XmlDocument(); try { doc.Load(wrp.GetResponseStream()); XmlNodeList _l = doc.SelectNodes("//item"); if (List != null) { foreach (XmlNode n in _l) { LinkGrap item = new LinkGrap(); if (n.SelectSingleNode("title") != null) { item.Title = n.SelectSingleNode("title").InnerText; } if (n.SelectSingleNode("description") != null) { item.Description = n.SelectSingleNode("description").InnerText; } if (n.SelectSingleNode("link | LINK") != null) { string itemlink = n.SelectSingleNode("link | LINK").InnerText.Trim(); if (itemlink.IndexOf("http://nld.com.vnhttp://worldcup") != -1) itemlink = itemlink.Substring("http://nld.com.vn".Length); item.Link = itemlink; } List.Add(item); } } } catch (XmlException xmlex) { } } } catch (WebException ex) { LinkGrap _obj2 = new LinkGrap(); _obj2.Title = "0"; } #endregion return List; }
public LinkGrap(string link, bool extractLink) { LinkGrap Item = null; string saveLocation = HostingEnvironment.MapPath("~/lib/up/"); //string uploadDir = @"D:\InetPub\tintucme\wwwroot\lib\up\"; //C:\inetpub\wwwroot\choNongNghiep\web\lib\up\rss string uploadDir = @"C:\inetpub\wwwroot\choNongNghiep\web\lib\up\tintuc\rss\"; Item = (LinkGrap)HttpRuntime.Cache[string.Format(cacheKey, link)]; Item = null; if (Item == null) { #region xử lý Link HttpWebRequest wrq; wrq = (HttpWebRequest)(WebRequest.Create(link)); string host = new Uri(link).Host; wrq.Credentials = CredentialCache.DefaultCredentials; wrq.Method = "GET"; wrq.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.1; vi; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3"; wrq.SendChunked = false; if (link.IndexOf("zing.vn") != -1) { wrq.Referer = "http://mp3.zing.vn"; } try { HttpWebResponse wrp = (HttpWebResponse)wrq.GetResponse(); HtmlDocument doc = new HtmlDocument(); contentType = wrp.ContentType; if (contentType.ToLower().IndexOf("html") != -1) { doc.Load(wrp.GetResponseStream(), Encoding.UTF8); string domain = "http://" + (new Uri(link)).Host; #region Title if (doc.DocumentNode.SelectNodes("//title | //TITLE") != null) { HtmlNode titleNode = doc.DocumentNode.SelectNodes("//title | //TITLE")[0]; Title = titleNode.InnerText; } #endregion #region Desc if (doc.DocumentNode.SelectNodes("//meta[@name='description'] | //meta[@name='DESCRIPTION']") != null) { HtmlNode titleNode = doc.DocumentNode.SelectNodes("//meta[@name='description'] | //meta[@name='DESCRIPTION']")[0]; Description = titleNode.Attributes["content"].Value; } #endregion #region Content ContentRawHtml = doc.DocumentNode.InnerHtml; ContentRawText = doc.DocumentNode.InnerText; Content = Wrapper(host, link, doc); if (string.IsNullOrEmpty(Content)) return; string contentTokeyword = Giga.Common.Lib._string.getHTML(Content); if (!string.IsNullOrEmpty(contentTokeyword)) { using (LinkKeyword _linkKeyword = new LinkKeyword(contentTokeyword)) { if (_linkKeyword.ListKeyWord != null) { KeyWords = _linkKeyword.ListKeyWord; } } } //if (string.IsNullOrEmpty(contentTokeyword)) contentTokeyword = ContentRawText; //List<LinkKeyword> _ListKeyword = new List<LinkKeyword>(); #endregion #region Images HtmlDocument _doc1 = new HtmlDocument(); _doc1.LoadHtml(Content); if (_doc1.DocumentNode.SelectNodes("//img | //IMG") != null) { List<string> _list = new List<string>(); foreach (HtmlNode _img in _doc1.DocumentNode.SelectNodes("//img | //IMG")) { if (_img.Attributes["src"] != null) { string src = _img.Attributes["src"].Value; if (src.ToLower().IndexOf("http://") != 0) { if (src.IndexOf("/") != 0) src = "/" + src; src = domain + src; } try { ImageProcess gimg = new ImageProcess(new Uri(src), src); if (gimg.Width > 250 && gimg.Heigth > 200) { #region ảnh cũ //string _newid = Guid.NewGuid().ToString(); //string _ten = saveLocation + _newid; //gimg.Save(_ten + gimg.Ext); //gimg.Crop(320, 188); //gimg.Save(_ten + "320x188" + gimg.Ext); //gimg.Crop(150, 160); //gimg.Save(_ten + "150x160" + gimg.Ext); //gimg.Crop(101, 58); //gimg.Save(_ten + "101x58" + gimg.Ext); //gimg.Crop(62, 36); //gimg.Save(_ten + "62x36" + gimg.Ext); //_list.Add(_newid + gimg.Ext); //HttpRuntime.Cache.Remove(src); #endregion string gimg_t = Guid.NewGuid().ToString().Replace("-", ""); string gimg_ten = gimg_t + gimg.Ext; //saveLocation = Path.Combine(uploadDir, gimg_ten); //gimg.Save(saveLocation); _list.Add(gimg_ten); string gimg_ten_430x300 = gimg_t + "430x300" + gimg.Ext; gimg.Crop(430, 300); saveLocation = Path.Combine(uploadDir, gimg_ten_430x300); gimg.Save(saveLocation); _list.Add(gimg_ten_430x300); //string gimg_ten_100 = gimg_t + "200x150" + gimg.Ext; //gimg.Crop(200, 150); //saveLocation = Path.Combine(uploadDir, gimg_ten_100); //gimg.Save(saveLocation); //_list.Add(gimg_ten_100); string gimg_ten_101_58 = gimg_t + "100x100" + gimg.Ext; gimg.Crop(100, 100); saveLocation = Path.Combine(uploadDir, gimg_ten_101_58); gimg.Save(saveLocation); _list.Add(gimg_ten_101_58); string gimg_ten_62_36 = gimg_t + "50x50" + gimg.Ext; gimg.Crop(50, 50); saveLocation = Path.Combine(uploadDir, gimg_ten_62_36); gimg.Save(saveLocation); _list.Add(gimg_ten_62_36); break; } } catch (Exception ex) { } } } if (_list.Count == 0) return; Images = _list; } #endregion #region Link //List<string> _inLink = new List<string>(); //List<string> _outLink = new List<string>(); //foreach (HtmlNode a in doc.DocumentNode.SelectNodes("//a|//A")) //{ // string href = string.Empty; // if (a.Attributes["href"] != null) // { // href = a.Attributes["href"].Value; // } // else if (a.Attributes["HREF"] != null) // { // href = a.Attributes["HREF"].Value; // } // if (!string.IsNullOrEmpty(href)) // { // if (href.ToLower().IndexOf("javascrip") != 0 && href.ToLower().IndexOf("#") != 0) // { // if (href.ToLower().IndexOf("http://") == 0)//HTTPLink // { // Uri _href = new Uri(href); // if (_href.Host.ToLower().IndexOf(host.ToLower()) != -1) // { // _inLink.Add(href); // } // else // { // _outLink.Add(href); // } // } // else // { // if (href.ToLower().IndexOf("/") == -1) href = "/" + href; // if (href.ToLower().IndexOf("../") == 0) href = href.Substring(href.LastIndexOf("../") + 3); // href = "http://" + host + href; // _inLink.Add(href); // } // } // } // InLink = _inLink; // OutLink = _outLink; //} #endregion } else { if (contentType.IndexOf("image") != -1) { Title = link; Description = link; List<string> _list = new List<string>(); ImageProcess gimg = new ImageProcess(new Uri(link), link); if (gimg.Width > 100 && gimg.Heigth > 75) { _list.Add(link); } Images = _list; } } LinkGrap _obj = new LinkGrap(); _obj.Title = Title; _obj.Description = Description; _obj.Images = Images; _obj.contentType = contentType; _obj.InLink = InLink; _obj.OutLink = OutLink; _obj.contentType = Content; _obj.ContentRawText = ContentRawText; _obj.ContentRawHtml = ContentRawHtml; HttpRuntime.Cache.Insert(string.Format(cacheKey, link), _obj); Item = _obj; } catch (WebException ex) { LinkGrap _obj2 = new LinkGrap(); _obj2.Title = "0"; HttpRuntime.Cache.Remove(string.Format(cacheKey, link)); Item = _obj2; } #endregion } Title = Item.Title; Description = Item.Description; Images = Item.Images; contentType = Item.contentType; KeyWordsIEnum = Item.KeyWordsIEnum; }
public LinkGrap(string link) { LinkGrap Item = null; string saveLocation = HostingEnvironment.MapPath("~/lib/u/"); Item = (LinkGrap)HttpContext.Current.Cache[string.Format(cacheKey, link)]; if (Item == null) { #region xử lý Link HttpWebRequest wrq; wrq = (HttpWebRequest)(WebRequest.Create(link)); string host = new Uri(link).Host; wrq.Credentials = CredentialCache.DefaultCredentials; wrq.Method = "GET"; wrq.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.1; vi; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3"; wrq.SendChunked = true; if (link.IndexOf("zing.vn") != -1) { wrq.Referer = "http://mp3.zing.vn"; } try { HttpWebResponse wrp = (HttpWebResponse)wrq.GetResponse(); HtmlDocument doc = new HtmlDocument(); contentType = wrp.ContentType; if (contentType.ToLower().IndexOf("html") != -1) { doc.Load(wrp.GetResponseStream(), Encoding.UTF8); string domain = "http://" + (new Uri(link)).Host; #region Title if (doc.DocumentNode.SelectNodes("//title | //TITLE") != null) { HtmlNode titleNode = doc.DocumentNode.SelectNodes("//title | //TITLE")[0]; Title = titleNode.InnerText; } #endregion #region Desc if (doc.DocumentNode.SelectNodes("//meta[@name='description'] | //meta[@name='DESCRIPTION']") != null) { HtmlNode titleNode = doc.DocumentNode.SelectNodes("//meta[@name='description'] | //meta[@name='DESCRIPTION']")[0]; Description = titleNode.Attributes["content"].Value; } #endregion #region Content ContentRawHtml = doc.DocumentNode.InnerHtml; ContentRawText = doc.DocumentNode.InnerText; Content = Wrapper(host, link, doc); #endregion #region Images //if (doc.DocumentNode.SelectNodes("//img | //IMG") != null) //{ // List<string> _list = new List<string>(); // foreach (HtmlNode _img in doc.DocumentNode.SelectNodes("//img | //IMG")) // { // if (_img.Attributes["src"] != null) // { // string src = _img.Attributes["src"].Value; // if (src.ToLower().IndexOf("http://") != 0) // { // src = domain + src; // } // _list.Add(src); // } // } // Images = _list; //} #endregion #region Link List<string> _inLink = new List<string>(); List<string> _outLink = new List<string>(); foreach (HtmlNode a in doc.DocumentNode.SelectNodes("//a|//A")) { string href = string.Empty; if (a.Attributes["href"] != null) { href = a.Attributes["href"].Value; } else if (a.Attributes["HREF"] != null) { href = a.Attributes["HREF"].Value; } if (!string.IsNullOrEmpty(href)) { if (href.ToLower().IndexOf("javascrip") != 0 && href.ToLower().IndexOf("#") != 0) { if (href.ToLower().IndexOf("http://") == 0)//HTTPLink { Uri _href = new Uri(href); if (_href.Host.ToLower().IndexOf(host.ToLower()) != -1) { _inLink.Add(href); } else { _outLink.Add(href); } } else { if (href.ToLower().IndexOf("/") == -1) href = "/" + href; if (href.ToLower().IndexOf("../") == 0) href = href.Substring(href.LastIndexOf("../") + 3); href = "http://" + host + href; _inLink.Add(href); } } } InLink = _inLink; OutLink = _outLink; } #endregion } #region Ảnh //else //{ // if (contentType.IndexOf("image") != -1) // { // Title = link; // Description = link; // List<string> _list = new List<string>(); // ImageProcess gimg = new ImageProcess(new Uri(link), link); // if (gimg.Width > 100 && gimg.Heigth > 75) // { // _list.Add(link); // } // Images = _list; // } //} #endregion HttpContext.Current.Cache.Insert(string.Format(cacheKey, link), this); } catch (WebException ex) { LinkGrap _obj2 = new LinkGrap(); _obj2.Title = "0"; HttpContext.Current.Cache.Remove(string.Format(cacheKey, link)); } #endregion } }
public static List<LinkGrap> GetRss(string link) { List<LinkGrap> List = new List<LinkGrap>(); #region xử lý Link HttpWebRequest wrq; wrq = (HttpWebRequest)(WebRequest.Create(link)); string host = new Uri(link).Host; wrq.Credentials = CredentialCache.DefaultCredentials; wrq.Method = "GET"; wrq.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.1; vi; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3"; wrq.SendChunked = true; if (link.IndexOf("zing.vn") != -1) { wrq.Referer = "http://mp3.zing.vn"; } try { using (HttpWebResponse wrp = (HttpWebResponse)wrq.GetResponse()) { HtmlDocument doc = new HtmlDocument(); doc.Load(wrp.GetResponseStream()); foreach (HtmlNode a in doc.DocumentNode.SelectNodes(@"//a[contains(text(),'rss') and contains(@href,'rss')]")) { LinkGrap item = new LinkGrap(); item.Link = a.Attributes["href"].Value; item.Title = a.InnerText; List.Add(item); } } } catch (WebException ex) { LinkGrap _obj2 = new LinkGrap(); _obj2.Title = "0"; } #endregion return List; }