Example #1
0
        static internal LinkInfo GetLinkData(string URL) {
            string sPage = GetPageFromURL(ref URL, string.Empty, string.Empty);
            LinkInfo link = new LinkInfo();
            if (string.IsNullOrEmpty(sPage)) {
                return link;
            }
            string sTitle = string.Empty;
            string sDescription = string.Empty;
            string sImage = string.Empty;
            
            link.URL = URL;
            link.Images = new List<ImageInfo>();
            Match m = Regex.Match(sPage, "<(title)[^>]*?>((?:.|\\n)*?)</\\s*\\1\\s*>", RegexOptions.IgnoreCase & RegexOptions.Multiline);
            if (m.Success) {
                link.Title = m.Groups[2].ToString().Trim();
            }
            //
            Regex regExp = new Regex("<meta\\s*(?:(?:\\b(\\w|-)+\\b\\s*(?:=\\s*(?:\"[^\"]*\"|'[^']*'|[^\"'<> ]+)\\s*)?)*)/?\\s*>", RegexOptions.IgnoreCase & RegexOptions.Multiline);
            MatchCollection matches = default(MatchCollection);
            matches = regExp.Matches(sPage);
            int i = 0;
            foreach (Match match in matches) {
                string sTempDesc = match.Groups[0].Value;
                Regex subReg = new Regex("<meta[\\s]+[^>]*?(((name|property)*?[\\s]?=[\\s\\x27\\x22]+(.*?)[\\x27\\x22]+.*?)|(content*?[\\s]?=[\\s\\x27\\x22]+(.*?)[\\x27\\x22]+.*?))((content*?[\\s]?=[\\s\\x27\\x22]+(.*?)[\\x27\\x22]+.*?>)|(name*?[\\s]?=[\\s\\x27\\x22]+(.*?)[\\x27\\x22]+.*?>)|>)", RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.Singleline);
                foreach (Match subM in subReg.Matches(sTempDesc)) {
                    if (subM.Groups[4].Value.ToUpperInvariant() == "OG:DESCRIPTION") {
                        link.Description = subM.Groups[9].Value;
                    } else if (subM.Groups[4].Value.ToUpperInvariant() == "DESCRIPTION".ToUpperInvariant()) {
                        link.Description = subM.Groups[9].Value;
                    }
                    if (subM.Groups[4].Value.ToUpperInvariant() == "OG:TITLE") {
                        link.Title = subM.Groups[9].Value;
                    }
                    
                    if (subM.Groups[4].Value.ToUpperInvariant() == "OG:IMAGE") {
                        sImage = subM.Groups[9].Value;
                        ImageInfo img = new ImageInfo();
                        img.URL = sImage;
                        link.Images.Add(img);
                        i += 1;
                    }
                }
            }
            if (!string.IsNullOrEmpty(link.Description)) {
                link.Description = HttpUtility.HtmlDecode(link.Description);
                link.Description = HttpUtility.UrlDecode(link.Description);
                link.Description = RemoveHTML(link.Description);
            }
            if (!string.IsNullOrEmpty(link.Title)) {
                link.Title = link.Title.Replace("&amp;", "&");
            }
            regExp = new Regex("<img[\\s]+[^>]*?((alt*?[\\s]?=[\\s\\x27\\x22]+(.*?)[\\x27\\x22]+.*?)|(src*?[\\s]?=[\\s\\x27\\x22]+(.*?)[\\x27\\x22]+.*?))((src*?[\\s]?=[\\s\\x27\\x22]+(.*?)[\\x27\\x22]+.*?>)|(alt*?[\\s]?=[\\s\\x27\\x22]+(.*?)[\\x27\\x22]+.*?>)|>)", RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.Singleline);
            matches = regExp.Matches(sPage);

            string imgList = string.Empty;
            string hostUrl = string.Empty;
            if (!URL.Contains("http")) {
                URL = "http://" + URL;
            } 
            Uri uri = new Uri(URL);
            hostUrl = uri.Host;
            if (URL.Contains("https:")) {
                hostUrl = "https://" + hostUrl;
            } else {
                hostUrl = "http://" + hostUrl;
            }
            foreach (Match match in matches) {
                string sImg = match.Groups[5].Value;
                if (string.IsNullOrEmpty(sImg)) {
                    sImg = match.Groups[8].Value;
                }
                if (!string.IsNullOrEmpty(sImg)) {
                    if (!sImg.Contains("http")) {
                        sImg = hostUrl + sImg;
                    }
                  
                    ImageInfo img = new ImageInfo();
                    img.URL = sImg;
                    if (!imgList.Contains(sImg)) {
                        Bitmap bmp = Utilities.GetImageFromURL(sImg);
                        if ((bmp != null)) {
                            if (bmp.Height > 25 & bmp.Height < 500 & bmp.Width > 25 & bmp.Width < 500) {
                                link.Images.Add(img);
                                imgList += sImg;
                                i += 1;

                            }
                        }
                    }
                    if (i == 10) {
                        break; // TODO: might not be correct. Was : Exit For
                    }
                }

            }
            return link;
        }
Example #2
0
        static internal LinkInfo GetLinkData(string URL)
        {
            string   sPage = GetPageFromURL(ref URL, string.Empty, string.Empty);
            LinkInfo link  = new LinkInfo();

            if (string.IsNullOrEmpty(sPage))
            {
                return(link);
            }
            string sTitle       = string.Empty;
            string sDescription = string.Empty;
            string sImage       = string.Empty;

            link.URL    = URL;
            link.Images = new List <ImageInfo>();
            Match m = Regex.Match(sPage, "<(title)[^>]*?>((?:.|\\n)*?)</\\s*\\1\\s*>", RegexOptions.IgnoreCase & RegexOptions.Multiline);

            if (m.Success)
            {
                link.Title = m.Groups[2].ToString().Trim();
            }
            //
            Regex           regExp  = new Regex("<meta\\s*(?:(?:\\b(\\w|-)+\\b\\s*(?:=\\s*(?:\"[^\"]*\"|'[^']*'|[^\"'<> ]+)\\s*)?)*)/?\\s*>", RegexOptions.IgnoreCase & RegexOptions.Multiline);
            MatchCollection matches = default(MatchCollection);

            matches = regExp.Matches(sPage);
            int i = 0;

            foreach (Match match in matches)
            {
                string sTempDesc = match.Groups[0].Value;
                Regex  subReg    = new Regex("<meta[\\s]+[^>]*?(((name|property)*?[\\s]?=[\\s\\x27\\x22]+(.*?)[\\x27\\x22]+.*?)|(content*?[\\s]?=[\\s\\x27\\x22]+(.*?)[\\x27\\x22]+.*?))((content*?[\\s]?=[\\s\\x27\\x22]+(.*?)[\\x27\\x22]+.*?>)|(name*?[\\s]?=[\\s\\x27\\x22]+(.*?)[\\x27\\x22]+.*?>)|>)", RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.Singleline);
                foreach (Match subM in subReg.Matches(sTempDesc))
                {
                    if (subM.Groups[4].Value.ToUpperInvariant() == "OG:DESCRIPTION")
                    {
                        link.Description = subM.Groups[9].Value;
                    }
                    else if (subM.Groups[4].Value.ToUpperInvariant() == "DESCRIPTION".ToUpperInvariant())
                    {
                        link.Description = subM.Groups[9].Value;
                    }
                    if (subM.Groups[4].Value.ToUpperInvariant() == "OG:TITLE")
                    {
                        link.Title = subM.Groups[9].Value;
                    }

                    if (subM.Groups[4].Value.ToUpperInvariant() == "OG:IMAGE")
                    {
                        sImage = subM.Groups[9].Value;
                        ImageInfo img = new ImageInfo();
                        img.URL = sImage;
                        link.Images.Add(img);
                        i += 1;
                    }
                }
            }
            if (!string.IsNullOrEmpty(link.Description))
            {
                link.Description = HttpUtility.HtmlDecode(link.Description);
                link.Description = HttpUtility.UrlDecode(link.Description);
                link.Description = RemoveHTML(link.Description);
            }
            if (!string.IsNullOrEmpty(link.Title))
            {
                link.Title = link.Title.Replace("&amp;", "&");
            }
            regExp  = new Regex("<img[\\s]+[^>]*?((alt*?[\\s]?=[\\s\\x27\\x22]+(.*?)[\\x27\\x22]+.*?)|(src*?[\\s]?=[\\s\\x27\\x22]+(.*?)[\\x27\\x22]+.*?))((src*?[\\s]?=[\\s\\x27\\x22]+(.*?)[\\x27\\x22]+.*?>)|(alt*?[\\s]?=[\\s\\x27\\x22]+(.*?)[\\x27\\x22]+.*?>)|>)", RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.Singleline);
            matches = regExp.Matches(sPage);

            string imgList = string.Empty;
            string hostUrl = string.Empty;

            if (!URL.Contains("http"))
            {
                URL = "http://" + URL;
            }
            Uri uri = new Uri(URL);

            hostUrl = uri.Host;
            if (URL.Contains("https:"))
            {
                hostUrl = "https://" + hostUrl;
            }
            else
            {
                hostUrl = "http://" + hostUrl;
            }
            foreach (Match match in matches)
            {
                string sImg = match.Groups[5].Value;
                if (string.IsNullOrEmpty(sImg))
                {
                    sImg = match.Groups[8].Value;
                }
                if (!string.IsNullOrEmpty(sImg))
                {
                    if (!sImg.Contains("http"))
                    {
                        sImg = hostUrl + sImg;
                    }

                    ImageInfo img = new ImageInfo();
                    img.URL = sImg;
                    if (!imgList.Contains(sImg))
                    {
                        Bitmap bmp = Utilities.GetImageFromURL(sImg);
                        if ((bmp != null))
                        {
                            if (bmp.Height > 25 & bmp.Height < 500 & bmp.Width > 25 & bmp.Width < 500)
                            {
                                link.Images.Add(img);
                                imgList += sImg;
                                i       += 1;
                            }
                        }
                    }
                    if (i == 10)
                    {
                        break; // TODO: might not be correct. Was : Exit For
                    }
                }
            }
            return(link);
        }