public RegularTin(string url) { string _host = string.Empty; _host = new Uri(url).Host.ToLower(); HtmlAgilityPack.HtmlDocument doc = loadDoc(url); List<QuyTac> List = QuyTacDal.SelectByHost(_host).ToList(); WrapperClean(_host, url, doc); #region Lấy Ten,Mota, Noi Dung foreach (QuyTac item in List) { HtmlNode _c = doc.DocumentNode.SelectSingleNode(@item.Xpath); switch (item.Loai) { case 0:// renew doc if (_c != null) { doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(_c.InnerHtml); } break; case 1:// Title if (_c != null) { Ten = _c.InnerText; } else { _c = doc.DocumentNode.SelectNodes(@"//title | //TITLE")[0]; if (_c != null) { Ten = _c.InnerText; } } break; case 2: if (_c != null) { MoTa = _c.InnerHtml; } else { _c = doc.DocumentNode.SelectNodes(@"//meta[@name='description'] | //meta[@name='DESCRIPTION'] | //meta[@name='Description']")[0]; if (_c != null) { MoTa = _c.InnerText; } } break; case 3: if (item.Xoa) { if (_c != null) { _c.ParentNode.RemoveChild(_c, false); } } else { if (_c != null) { NoiDung = _c.InnerHtml; } } break; default: break; } } #endregion #region Images if (!string.IsNullOrEmpty(NoiDung)) { HtmlAgilityPack.HtmlDocument _doc1 = new HtmlAgilityPack.HtmlDocument(); _doc1.LoadHtml(NoiDung); #region Keywords using (LinkKeyword _linkKeyword = new LinkKeyword(_doc1.DocumentNode.InnerText)) { if (_linkKeyword.ListKeyWord != null) { KeyWords = _linkKeyword.ListKeyWord; } } #endregion string domain = "http://" + _host; string saveLocation = HostingEnvironment.MapPath("~/lib/up/"); //string uploadDir = @"D:\Work\linh\ktt_x1\web\lib\up\rss\"; string uploadDir = @"C:\inetpub\wwwroot\kttvn\web\lib\up\rss\"; if (_doc1.DocumentNode.SelectNodes("//img | //IMG") != null) { List<string> _list = new List<string>(); foreach (HtmlNode _img in _doc1.DocumentNode.SelectNodes("//img | //IMG")) { if (_img.Attributes["src"] != null) { string src = _img.Attributes["src"].Value; if (src.ToLower().IndexOf("http://") != 0) { if (src.IndexOf("/") != 0) src = "/" + src; src = domain + src; } try { var gimg = new ImageProcess(new Uri(src), src); if (gimg.Width > 250 && gimg.Heigth > 200) { _list.Add(src); break; } } catch (Exception ex) { } } } if (_list.Count == 0) return; Anh = _list[0]; } } #endregion }
public LinkGrap(string link, bool extractLink) { LinkGrap Item = null; string saveLocation = HostingEnvironment.MapPath("~/lib/up/"); //string uploadDir = @"D:\InetPub\tintucme\wwwroot\lib\up\"; //C:\inetpub\wwwroot\choNongNghiep\web\lib\up\rss string uploadDir = @"C:\inetpub\wwwroot\choNongNghiep\web\lib\up\tintuc\rss\"; Item = (LinkGrap)HttpRuntime.Cache[string.Format(cacheKey, link)]; Item = null; if (Item == null) { #region xử lý Link HttpWebRequest wrq; wrq = (HttpWebRequest)(WebRequest.Create(link)); string host = new Uri(link).Host; wrq.Credentials = CredentialCache.DefaultCredentials; wrq.Method = "GET"; wrq.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.1; vi; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3"; wrq.SendChunked = false; if (link.IndexOf("zing.vn") != -1) { wrq.Referer = "http://mp3.zing.vn"; } try { HttpWebResponse wrp = (HttpWebResponse)wrq.GetResponse(); HtmlDocument doc = new HtmlDocument(); contentType = wrp.ContentType; if (contentType.ToLower().IndexOf("html") != -1) { doc.Load(wrp.GetResponseStream(), Encoding.UTF8); string domain = "http://" + (new Uri(link)).Host; #region Title if (doc.DocumentNode.SelectNodes("//title | //TITLE") != null) { HtmlNode titleNode = doc.DocumentNode.SelectNodes("//title | //TITLE")[0]; Title = titleNode.InnerText; } #endregion #region Desc if (doc.DocumentNode.SelectNodes("//meta[@name='description'] | //meta[@name='DESCRIPTION']") != null) { HtmlNode titleNode = doc.DocumentNode.SelectNodes("//meta[@name='description'] | //meta[@name='DESCRIPTION']")[0]; Description = titleNode.Attributes["content"].Value; } #endregion #region Content ContentRawHtml = doc.DocumentNode.InnerHtml; ContentRawText = doc.DocumentNode.InnerText; Content = Wrapper(host, link, doc); if (string.IsNullOrEmpty(Content)) return; string contentTokeyword = Giga.Common.Lib._string.getHTML(Content); if (!string.IsNullOrEmpty(contentTokeyword)) { using (LinkKeyword _linkKeyword = new LinkKeyword(contentTokeyword)) { if (_linkKeyword.ListKeyWord != null) { KeyWords = _linkKeyword.ListKeyWord; } } } //if (string.IsNullOrEmpty(contentTokeyword)) contentTokeyword = ContentRawText; //List<LinkKeyword> _ListKeyword = new List<LinkKeyword>(); #endregion #region Images HtmlDocument _doc1 = new HtmlDocument(); _doc1.LoadHtml(Content); if (_doc1.DocumentNode.SelectNodes("//img | //IMG") != null) { List<string> _list = new List<string>(); foreach (HtmlNode _img in _doc1.DocumentNode.SelectNodes("//img | //IMG")) { if (_img.Attributes["src"] != null) { string src = _img.Attributes["src"].Value; if (src.ToLower().IndexOf("http://") != 0) { if (src.IndexOf("/") != 0) src = "/" + src; src = domain + src; } try { ImageProcess gimg = new ImageProcess(new Uri(src), src); if (gimg.Width > 250 && gimg.Heigth > 200) { #region ảnh cũ //string _newid = Guid.NewGuid().ToString(); //string _ten = saveLocation + _newid; //gimg.Save(_ten + gimg.Ext); //gimg.Crop(320, 188); //gimg.Save(_ten + "320x188" + gimg.Ext); //gimg.Crop(150, 160); //gimg.Save(_ten + "150x160" + gimg.Ext); //gimg.Crop(101, 58); //gimg.Save(_ten + "101x58" + gimg.Ext); //gimg.Crop(62, 36); //gimg.Save(_ten + "62x36" + gimg.Ext); //_list.Add(_newid + gimg.Ext); //HttpRuntime.Cache.Remove(src); #endregion string gimg_t = Guid.NewGuid().ToString().Replace("-", ""); string gimg_ten = gimg_t + gimg.Ext; //saveLocation = Path.Combine(uploadDir, gimg_ten); //gimg.Save(saveLocation); _list.Add(gimg_ten); string gimg_ten_430x300 = gimg_t + "430x300" + gimg.Ext; gimg.Crop(430, 300); saveLocation = Path.Combine(uploadDir, gimg_ten_430x300); gimg.Save(saveLocation); _list.Add(gimg_ten_430x300); //string gimg_ten_100 = gimg_t + "200x150" + gimg.Ext; //gimg.Crop(200, 150); //saveLocation = Path.Combine(uploadDir, gimg_ten_100); //gimg.Save(saveLocation); //_list.Add(gimg_ten_100); string gimg_ten_101_58 = gimg_t + "100x100" + gimg.Ext; gimg.Crop(100, 100); saveLocation = Path.Combine(uploadDir, gimg_ten_101_58); gimg.Save(saveLocation); _list.Add(gimg_ten_101_58); string gimg_ten_62_36 = gimg_t + "50x50" + gimg.Ext; gimg.Crop(50, 50); saveLocation = Path.Combine(uploadDir, gimg_ten_62_36); gimg.Save(saveLocation); _list.Add(gimg_ten_62_36); break; } } catch (Exception ex) { } } } if (_list.Count == 0) return; Images = _list; } #endregion #region Link //List<string> _inLink = new List<string>(); //List<string> _outLink = new List<string>(); //foreach (HtmlNode a in doc.DocumentNode.SelectNodes("//a|//A")) //{ // string href = string.Empty; // if (a.Attributes["href"] != null) // { // href = a.Attributes["href"].Value; // } // else if (a.Attributes["HREF"] != null) // { // href = a.Attributes["HREF"].Value; // } // if (!string.IsNullOrEmpty(href)) // { // if (href.ToLower().IndexOf("javascrip") != 0 && href.ToLower().IndexOf("#") != 0) // { // if (href.ToLower().IndexOf("http://") == 0)//HTTPLink // { // Uri _href = new Uri(href); // if (_href.Host.ToLower().IndexOf(host.ToLower()) != -1) // { // _inLink.Add(href); // } // else // { // _outLink.Add(href); // } // } // else // { // if (href.ToLower().IndexOf("/") == -1) href = "/" + href; // if (href.ToLower().IndexOf("../") == 0) href = href.Substring(href.LastIndexOf("../") + 3); // href = "http://" + host + href; // _inLink.Add(href); // } // } // } // InLink = _inLink; // OutLink = _outLink; //} #endregion } else { if (contentType.IndexOf("image") != -1) { Title = link; Description = link; List<string> _list = new List<string>(); ImageProcess gimg = new ImageProcess(new Uri(link), link); if (gimg.Width > 100 && gimg.Heigth > 75) { _list.Add(link); } Images = _list; } } LinkGrap _obj = new LinkGrap(); _obj.Title = Title; _obj.Description = Description; _obj.Images = Images; _obj.contentType = contentType; _obj.InLink = InLink; _obj.OutLink = OutLink; _obj.contentType = Content; _obj.ContentRawText = ContentRawText; _obj.ContentRawHtml = ContentRawHtml; HttpRuntime.Cache.Insert(string.Format(cacheKey, link), _obj); Item = _obj; } catch (WebException ex) { LinkGrap _obj2 = new LinkGrap(); _obj2.Title = "0"; HttpRuntime.Cache.Remove(string.Format(cacheKey, link)); Item = _obj2; } #endregion } Title = Item.Title; Description = Item.Description; Images = Item.Images; contentType = Item.contentType; KeyWordsIEnum = Item.KeyWordsIEnum; }
public RegularTin(string url, string uploadDir, List<ImageSize> images) { var host = new Uri(url).Host.ToLower(); var doc = loadDoc(url); var list = QuyTacDal.SelectByHost(host).ToList(); WrapperClean(host, url, doc); #region Lấy Ten,Mota, Noi Dung foreach (var item in list) { var c = doc.DocumentNode.SelectSingleNode(@item.Xpath); switch (item.Loai) { case 0:// renew doc if (c != null) { doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(c.InnerHtml); } break; case 1:// Title if (c != null) { Ten = c.InnerText; } else { c = doc.DocumentNode.SelectNodes(@"//title | //TITLE")[0]; if (c != null) { Ten = c.InnerText; } } break; case 2: if (c != null) { MoTa = c.InnerHtml; } else { c = doc.DocumentNode.SelectNodes(@"//meta[@name='description'] | //meta[@name='DESCRIPTION'] | //meta[@name='Description']")[0]; if (c != null) { MoTa = c.InnerText; } } break; case 3: if (item.Xoa) { if (c != null) { c.ParentNode.RemoveChild(c, false); } } else { if (c != null) { NoiDung = c.InnerHtml; } } break; default: break; } } #endregion #region Images if (string.IsNullOrEmpty(NoiDung)) return; var doc1 = new HtmlAgilityPack.HtmlDocument(); doc1.LoadHtml(NoiDung); #region Keywords using (var linkKeyword = new LinkKeyword(doc1.DocumentNode.InnerText)) { if (linkKeyword.ListKeyWord != null) { KeyWords = linkKeyword.ListKeyWord; } } #endregion var domain = "http://" + host; var saveLocation = HostingEnvironment.MapPath("~/lib/up/"); List<string> _list = new List<string>(); if (doc1.DocumentNode.SelectNodes("//img | //IMG") != null) { foreach (var img in doc1.DocumentNode.SelectNodes("//img | //IMG")) { if (img.Attributes["src"] != null) { string src = img.Attributes["src"].Value; if (src.ToLower().IndexOf("http://", System.StringComparison.Ordinal) != 0) { if (src.IndexOf("/", System.StringComparison.Ordinal) != 0) src = "/" + src; src = domain + src; } try { var gimg = new ImageProcess(new Uri(src), src); if (gimg.Width > 250 && gimg.Heigth > 200) { var imgTen = Guid.NewGuid().ToString().Replace("-", ""); foreach (var imageSize in images) { gimg.Crop(imageSize.Width, imageSize.Height); gimg.Save(Path.Combine(uploadDir, string.Format("{0}{1}.{2}", imgTen, imageSize.DefaultImage ? "" : string.Format("{0}x{1}", imageSize.Width, imageSize.Height), gimg.Ext))); } break; } } catch (Exception ex) { } } } if (_list.Count == 0) return; Anh = _list[0]; } #endregion }
public Dictionary<string, LinkKeyword> GetDic(Dictionary<string, LinkKeyword> inputDic , Dictionary<int, string> IndexDic , Dictionary<string, LinkKeyword> DuplicateDic) { Dictionary<string, LinkKeyword> Dic8 = new Dictionary<string, LinkKeyword>(); if (inputDic.Count > 0) { foreach (LinkKeyword item in new List<LinkKeyword>(inputDic.Values)) { if (item.count > 1) { int KeySize = item.loai; foreach (int _index in item.index) { int newIndex = _index + KeySize; if (newIndex < IndexDic.Count) { if (DuplicateDic.ContainsKey(IndexDic[newIndex])) { string newKey = item.key + " " + IndexDic[newIndex]; if (Dic8.ContainsKey(newKey)) { LinkKeyword _Key = Dic8[newKey]; List<int> List = _Key.index; List.Add(_index); Dic8[newKey] = new LinkKeyword(_Key.key, _Key.count + 1, List, KeySize + 1, false); } else { List<int> List = new List<int>(); List.Add(_index); Dic8.Add(newKey, new LinkKeyword(newKey, 1, List, KeySize + 1, false)); } //if (newIndex + 1 < IndexDic.Count) //{ // if (!DuplicateDic.ContainsKey(IndexDic[newIndex + 1])) // { // } //} } } } } } Dictionary<string, LinkKeyword> DicNone = GetDic(Dic8, IndexDic, DuplicateDic); foreach (KeyValuePair<string, LinkKeyword> item in DicNone) { if (item.Value.loai > 1 && item.Value.count > 1) { string itemChild = item.Value.key.Substring(0, item.Value.key.LastIndexOf(" ")); if (Dic8.ContainsKey(itemChild)) { LinkKeyword removeKey = Dic8[itemChild]; removeKey.kill = true; Dic8[itemChild] = removeKey; } } if (Dic8.ContainsKey(item.Key)) { LinkKeyword _Key = Dic8[item.Key]; Dic8[item.Key] = new LinkKeyword(_Key.key, _Key.count + 1, _Key.index, item.Value.loai, _Key.kill); } else { Dic8.Add(item.Key, item.Value); } } } return Dic8; }