Ejemplo n.º 1
0
        public RegularTin(string url)
        {
            string _host = string.Empty;
            _host = new Uri(url).Host.ToLower();
            HtmlAgilityPack.HtmlDocument doc = loadDoc(url);
            List<QuyTac> List = QuyTacDal.SelectByHost(_host).ToList();
            WrapperClean(_host, url, doc);
            #region Lấy Ten,Mota, Noi Dung
            foreach (QuyTac item in List)
            {
                HtmlNode _c = doc.DocumentNode.SelectSingleNode(@item.Xpath);
                switch (item.Loai)
                {
                    case 0:// renew doc
                        if (_c != null)
                        {
                            doc = new HtmlAgilityPack.HtmlDocument();
                            doc.LoadHtml(_c.InnerHtml);
                        }
                        break;
                    case 1:// Title
                        if (_c != null)
                        {
                            Ten = _c.InnerText;
                        }
                        else
                        {
                            _c = doc.DocumentNode.SelectNodes(@"//title | //TITLE")[0];
                            if (_c != null)
                            {
                                Ten = _c.InnerText;
                            }
                        }
                        break;
                    case 2:
                        if (_c != null)
                        {
                            MoTa = _c.InnerHtml;
                        }
                        else
                        {
                            _c = doc.DocumentNode.SelectNodes(@"//meta[@name='description'] | //meta[@name='DESCRIPTION'] | //meta[@name='Description']")[0];
                            if (_c != null)
                            {
                                MoTa = _c.InnerText;
                            }
                        }
                        break;
                    case 3:

                        if (item.Xoa)
                        {
                            if (_c != null)
                            {
                                _c.ParentNode.RemoveChild(_c, false);
                            }
                        }
                        else
                        {
                            if (_c != null)
                            {
                                NoiDung = _c.InnerHtml;
                            }
                        }
                        break;
                    default:
                        break;
                }
            }
            #endregion
            #region Images
            if (!string.IsNullOrEmpty(NoiDung))
            {
                HtmlAgilityPack.HtmlDocument _doc1 = new HtmlAgilityPack.HtmlDocument();
                _doc1.LoadHtml(NoiDung);
                #region Keywords
                using (LinkKeyword _linkKeyword = new LinkKeyword(_doc1.DocumentNode.InnerText))
                {
                    if (_linkKeyword.ListKeyWord != null)
                    {
                        KeyWords = _linkKeyword.ListKeyWord;
                    }
                }
                #endregion
                string domain = "http://" + _host;
                string saveLocation = HostingEnvironment.MapPath("~/lib/up/");
                //string uploadDir = @"D:\Work\linh\ktt_x1\web\lib\up\rss\";
                string uploadDir = @"C:\inetpub\wwwroot\kttvn\web\lib\up\rss\";
                if (_doc1.DocumentNode.SelectNodes("//img | //IMG") != null)
                {
                    List<string> _list = new List<string>();
                    foreach (HtmlNode _img in _doc1.DocumentNode.SelectNodes("//img | //IMG"))
                    {
                        if (_img.Attributes["src"] != null)
                        {
                            string src = _img.Attributes["src"].Value;
                            if (src.ToLower().IndexOf("http://") != 0)
                            {
                                if (src.IndexOf("/") != 0) src = "/" + src;
                                src = domain + src;
                            }
                            try
                            {
                                var gimg = new ImageProcess(new Uri(src), src);
                                if (gimg.Width > 250 && gimg.Heigth > 200)
                                {
                                    _list.Add(src);
                                    break;
                                }
                            }
                            catch (Exception ex)
                            {
                            }

                        }
                    }
                    if (_list.Count == 0) return;
                    Anh = _list[0];
                }
            }

            #endregion
        }
Ejemplo n.º 2
0
        public LinkGrap(string link, bool extractLink)
        {
            LinkGrap Item = null;
            string saveLocation = HostingEnvironment.MapPath("~/lib/up/");
            //string uploadDir = @"D:\InetPub\tintucme\wwwroot\lib\up\";
            //C:\inetpub\wwwroot\choNongNghiep\web\lib\up\rss
            string uploadDir = @"C:\inetpub\wwwroot\choNongNghiep\web\lib\up\tintuc\rss\";
            Item = (LinkGrap)HttpRuntime.Cache[string.Format(cacheKey, link)];
            Item = null;
            if (Item == null)
            {
                #region xử lý Link
                HttpWebRequest wrq;
                wrq = (HttpWebRequest)(WebRequest.Create(link));
                string host = new Uri(link).Host;
                wrq.Credentials = CredentialCache.DefaultCredentials;
                wrq.Method = "GET";
                wrq.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.1; vi; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3";
                wrq.SendChunked = false;
                if (link.IndexOf("zing.vn") != -1)
                {
                    wrq.Referer = "http://mp3.zing.vn";
                }
                try
                {
                    HttpWebResponse wrp = (HttpWebResponse)wrq.GetResponse();
                    HtmlDocument doc = new HtmlDocument();
                    contentType = wrp.ContentType;
                    if (contentType.ToLower().IndexOf("html") != -1)
                    {
                        doc.Load(wrp.GetResponseStream(), Encoding.UTF8);
                        string domain = "http://" + (new Uri(link)).Host;
                        #region Title
                        if (doc.DocumentNode.SelectNodes("//title | //TITLE") != null)
                        {
                            HtmlNode titleNode = doc.DocumentNode.SelectNodes("//title | //TITLE")[0];
                            Title = titleNode.InnerText;
                        }
                        #endregion
                        #region Desc
                        if (doc.DocumentNode.SelectNodes("//meta[@name='description'] | //meta[@name='DESCRIPTION']") != null)
                        {
                            HtmlNode titleNode = doc.DocumentNode.SelectNodes("//meta[@name='description'] | //meta[@name='DESCRIPTION']")[0];
                            Description = titleNode.Attributes["content"].Value;
                        }
                        #endregion
                        #region Content
                        ContentRawHtml = doc.DocumentNode.InnerHtml;
                        ContentRawText = doc.DocumentNode.InnerText;
                        Content = Wrapper(host, link, doc);
                        if (string.IsNullOrEmpty(Content)) return;
                        string contentTokeyword = Giga.Common.Lib._string.getHTML(Content);
                        if (!string.IsNullOrEmpty(contentTokeyword))
                        {
                            using (LinkKeyword _linkKeyword = new LinkKeyword(contentTokeyword))
                            {
                                if (_linkKeyword.ListKeyWord != null)
                                {
                                    KeyWords = _linkKeyword.ListKeyWord;
                                }
                            }
                        }
                        //if (string.IsNullOrEmpty(contentTokeyword)) contentTokeyword = ContentRawText;
                        //List<LinkKeyword> _ListKeyword = new List<LinkKeyword>();
                        #endregion
                        #region Images
                        HtmlDocument _doc1 = new HtmlDocument();
                        _doc1.LoadHtml(Content);
                        if (_doc1.DocumentNode.SelectNodes("//img | //IMG") != null)
                        {
                            List<string> _list = new List<string>();
                            foreach (HtmlNode _img in _doc1.DocumentNode.SelectNodes("//img | //IMG"))
                            {
                                if (_img.Attributes["src"] != null)
                                {
                                    string src = _img.Attributes["src"].Value;
                                    if (src.ToLower().IndexOf("http://") != 0)
                                    {
                                        if (src.IndexOf("/") != 0) src = "/" + src;
                                        src = domain + src;
                                    }
                                    try
                                    {
                                        ImageProcess gimg = new ImageProcess(new Uri(src), src);
                                        if (gimg.Width > 250 && gimg.Heigth > 200)
                                        {
                                            #region ảnh cũ
                                            //string _newid = Guid.NewGuid().ToString();
                                            //string _ten = saveLocation + _newid;
                                            //gimg.Save(_ten + gimg.Ext);
                                            //gimg.Crop(320, 188);
                                            //gimg.Save(_ten + "320x188" + gimg.Ext);
                                            //gimg.Crop(150, 160);
                                            //gimg.Save(_ten + "150x160" + gimg.Ext);
                                            //gimg.Crop(101, 58);
                                            //gimg.Save(_ten + "101x58" + gimg.Ext);
                                            //gimg.Crop(62, 36);
                                            //gimg.Save(_ten + "62x36" + gimg.Ext);
                                            //_list.Add(_newid + gimg.Ext);
                                            //HttpRuntime.Cache.Remove(src);
                                            #endregion
                                            string gimg_t = Guid.NewGuid().ToString().Replace("-", "");

                                            string gimg_ten = gimg_t + gimg.Ext;
                                            //saveLocation = Path.Combine(uploadDir, gimg_ten);
                                            //gimg.Save(saveLocation);
                                            _list.Add(gimg_ten);

                                            string gimg_ten_430x300 = gimg_t + "430x300" + gimg.Ext;
                                            gimg.Crop(430, 300);
                                            saveLocation = Path.Combine(uploadDir, gimg_ten_430x300);
                                            gimg.Save(saveLocation);
                                            _list.Add(gimg_ten_430x300);

                                            //string gimg_ten_100 = gimg_t + "200x150" + gimg.Ext;
                                            //gimg.Crop(200, 150);
                                            //saveLocation = Path.Combine(uploadDir, gimg_ten_100);
                                            //gimg.Save(saveLocation);
                                            //_list.Add(gimg_ten_100);

                                            string gimg_ten_101_58 = gimg_t + "100x100" + gimg.Ext;
                                            gimg.Crop(100, 100);
                                            saveLocation = Path.Combine(uploadDir, gimg_ten_101_58);
                                            gimg.Save(saveLocation);
                                            _list.Add(gimg_ten_101_58);

                                            string gimg_ten_62_36 = gimg_t + "50x50" + gimg.Ext;
                                            gimg.Crop(50, 50);
                                            saveLocation = Path.Combine(uploadDir, gimg_ten_62_36);
                                            gimg.Save(saveLocation);
                                            _list.Add(gimg_ten_62_36);
                                            break;
                                        }
                                    }
                                    catch (Exception ex)
                                    {
                                    }

                                }
                            }
                            if (_list.Count == 0) return;
                            Images = _list;
                        }
                        #endregion
                        #region Link
                        //List<string> _inLink = new List<string>();
                        //List<string> _outLink = new List<string>();
                        //foreach (HtmlNode a in doc.DocumentNode.SelectNodes("//a|//A"))
                        //{
                        //    string href = string.Empty;
                        //    if (a.Attributes["href"] != null)
                        //    {
                        //        href = a.Attributes["href"].Value;
                        //    }
                        //    else if (a.Attributes["HREF"] != null)
                        //    {
                        //        href = a.Attributes["HREF"].Value;
                        //    }
                        //    if (!string.IsNullOrEmpty(href))
                        //    {
                        //        if (href.ToLower().IndexOf("javascrip") != 0 && href.ToLower().IndexOf("#") != 0)
                        //        {
                        //            if (href.ToLower().IndexOf("http://") == 0)//HTTPLink
                        //            {
                        //                Uri _href = new Uri(href);
                        //                if (_href.Host.ToLower().IndexOf(host.ToLower()) != -1)
                        //                {
                        //                    _inLink.Add(href);
                        //                }
                        //                else
                        //                {
                        //                    _outLink.Add(href);
                        //                }
                        //            }
                        //            else
                        //            {
                        //                if (href.ToLower().IndexOf("/") == -1) href = "/" + href;
                        //                if (href.ToLower().IndexOf("../") == 0) href = href.Substring(href.LastIndexOf("../") + 3);
                        //                href = "http://" + host + href;
                        //                _inLink.Add(href);
                        //            }
                        //        }
                        //    }
                        //    InLink = _inLink;
                        //    OutLink = _outLink;
                        //}
                        #endregion

                    }
                    else
                    {

                        if (contentType.IndexOf("image") != -1)
                        {
                            Title = link;
                            Description = link;
                            List<string> _list = new List<string>();
                            ImageProcess gimg = new ImageProcess(new Uri(link), link);
                            if (gimg.Width > 100 && gimg.Heigth > 75)
                            {
                                _list.Add(link);
                            }
                            Images = _list;
                        }
                    }
                    LinkGrap _obj = new LinkGrap();
                    _obj.Title = Title;
                    _obj.Description = Description;
                    _obj.Images = Images;
                    _obj.contentType = contentType;
                    _obj.InLink = InLink;
                    _obj.OutLink = OutLink;
                    _obj.contentType = Content;
                    _obj.ContentRawText = ContentRawText;
                    _obj.ContentRawHtml = ContentRawHtml;
                    HttpRuntime.Cache.Insert(string.Format(cacheKey, link), _obj);
                    Item = _obj;
                }
                catch (WebException ex)
                {
                    LinkGrap _obj2 = new LinkGrap();
                    _obj2.Title = "0";
                    HttpRuntime.Cache.Remove(string.Format(cacheKey, link));
                    Item = _obj2;
                }

                #endregion
            }

            Title = Item.Title;
            Description = Item.Description;
            Images = Item.Images;
            contentType = Item.contentType;
            KeyWordsIEnum = Item.KeyWordsIEnum;
        }
Ejemplo n.º 3
0
        public RegularTin(string url, string uploadDir, List<ImageSize> images)
        {
            var host = new Uri(url).Host.ToLower();
            var doc = loadDoc(url);
            var list = QuyTacDal.SelectByHost(host).ToList();
            WrapperClean(host, url, doc);
            #region Lấy Ten,Mota, Noi Dung
            foreach (var item in list)
            {
                var c = doc.DocumentNode.SelectSingleNode(@item.Xpath);
                switch (item.Loai)
                {
                    case 0:// renew doc
                        if (c != null)
                        {
                            doc = new HtmlAgilityPack.HtmlDocument();
                            doc.LoadHtml(c.InnerHtml);
                        }
                        break;
                    case 1:// Title
                        if (c != null)
                        {
                            Ten = c.InnerText;
                        }
                        else
                        {
                            c = doc.DocumentNode.SelectNodes(@"//title | //TITLE")[0];
                            if (c != null)
                            {
                                Ten = c.InnerText;
                            }
                        }
                        break;
                    case 2:
                        if (c != null)
                        {
                            MoTa = c.InnerHtml;
                        }
                        else
                        {
                            c = doc.DocumentNode.SelectNodes(@"//meta[@name='description'] | //meta[@name='DESCRIPTION'] | //meta[@name='Description']")[0];
                            if (c != null)
                            {
                                MoTa = c.InnerText;
                            }
                        }
                        break;
                    case 3:

                        if (item.Xoa)
                        {
                            if (c != null)
                            {
                                c.ParentNode.RemoveChild(c, false);
                            }
                        }
                        else
                        {
                            if (c != null)
                            {
                                NoiDung = c.InnerHtml;
                            }
                        }
                        break;
                    default:
                        break;
                }
            }
            #endregion
            #region Images

            if (string.IsNullOrEmpty(NoiDung)) return;
            var doc1 = new HtmlAgilityPack.HtmlDocument();
            doc1.LoadHtml(NoiDung);
            #region Keywords
            using (var linkKeyword = new LinkKeyword(doc1.DocumentNode.InnerText))
            {
                if (linkKeyword.ListKeyWord != null)
                {
                    KeyWords = linkKeyword.ListKeyWord;
                }
            }
            #endregion
            var domain = "http://" + host;
            var saveLocation = HostingEnvironment.MapPath("~/lib/up/");
            List<string> _list = new List<string>();
            if (doc1.DocumentNode.SelectNodes("//img | //IMG") != null)
            {
                foreach (var img in doc1.DocumentNode.SelectNodes("//img | //IMG"))
                {
                    if (img.Attributes["src"] != null)
                    {
                        string src = img.Attributes["src"].Value;
                        if (src.ToLower().IndexOf("http://", System.StringComparison.Ordinal) != 0)
                        {
                            if (src.IndexOf("/", System.StringComparison.Ordinal) != 0) src = "/" + src;
                            src = domain + src;
                        }
                        try
                        {
                            var gimg = new ImageProcess(new Uri(src), src);
                            if (gimg.Width > 250 && gimg.Heigth > 200)
                            {
                                var imgTen = Guid.NewGuid().ToString().Replace("-", "");
                                foreach (var imageSize in images)
                                {
                                    gimg.Crop(imageSize.Width, imageSize.Height);
                                    gimg.Save(Path.Combine(uploadDir,
                                                           string.Format("{0}{1}.{2}", imgTen,
                                                                         imageSize.DefaultImage
                                                                             ? ""
                                                                             : string.Format("{0}x{1}",
                                                                                             imageSize.Width,
                                                                                             imageSize.Height),
                                                                         gimg.Ext)));
                                }
                                break;
                            }
                        }
                        catch (Exception ex)
                        {
                        }

                    }
                }
                if (_list.Count == 0) return;
                Anh = _list[0];
            }

            #endregion
        }
Ejemplo n.º 4
0
        public Dictionary<string, LinkKeyword> GetDic(Dictionary<string, LinkKeyword> inputDic
        , Dictionary<int, string> IndexDic
        , Dictionary<string, LinkKeyword> DuplicateDic)
        {
            Dictionary<string, LinkKeyword> Dic8 = new Dictionary<string, LinkKeyword>();
            if (inputDic.Count > 0)
            {
                foreach (LinkKeyword item in new List<LinkKeyword>(inputDic.Values))
                {
                    if (item.count > 1)
                    {
                        int KeySize = item.loai;
                        foreach (int _index in item.index)
                        {
                            int newIndex = _index + KeySize;
                            if (newIndex < IndexDic.Count)
                            {
                                if (DuplicateDic.ContainsKey(IndexDic[newIndex]))
                                {
                                    string newKey = item.key + " " + IndexDic[newIndex];
                                    if (Dic8.ContainsKey(newKey))
                                    {
                                        LinkKeyword _Key = Dic8[newKey];
                                        List<int> List = _Key.index;
                                        List.Add(_index);
                                        Dic8[newKey] = new LinkKeyword(_Key.key, _Key.count + 1, List, KeySize + 1, false);
                                    }
                                    else
                                    {
                                        List<int> List = new List<int>();
                                        List.Add(_index);
                                        Dic8.Add(newKey, new LinkKeyword(newKey, 1, List, KeySize + 1, false));
                                    }
                                    //if (newIndex + 1 < IndexDic.Count)
                                    //{
                                    //    if (!DuplicateDic.ContainsKey(IndexDic[newIndex + 1]))
                                    //    {

                                    //    }
                                    //}
                                }
                            }
                        }
                    }
                }
                Dictionary<string, LinkKeyword> DicNone = GetDic(Dic8, IndexDic, DuplicateDic);
                foreach (KeyValuePair<string, LinkKeyword> item in DicNone)
                {
                    if (item.Value.loai > 1 && item.Value.count > 1)
                    {
                        string itemChild = item.Value.key.Substring(0, item.Value.key.LastIndexOf(" "));
                        if (Dic8.ContainsKey(itemChild))
                        {
                            LinkKeyword removeKey = Dic8[itemChild];
                            removeKey.kill = true;
                            Dic8[itemChild] = removeKey;
                        }
                    }
                    if (Dic8.ContainsKey(item.Key))
                    {
                        LinkKeyword _Key = Dic8[item.Key];
                        Dic8[item.Key] = new LinkKeyword(_Key.key, _Key.count + 1, _Key.index, item.Value.loai, _Key.kill);
                    }
                    else
                    {
                        Dic8.Add(item.Key, item.Value);
                    }
                }
            }
            return Dic8;
        }