static internal LinkInfo GetLinkData(string URL) { string sPage = GetPageFromURL(ref URL, string.Empty, string.Empty); LinkInfo link = new LinkInfo(); if (string.IsNullOrEmpty(sPage)) { return link; } string sTitle = string.Empty; string sDescription = string.Empty; string sImage = string.Empty; link.URL = URL; link.Images = new List<ImageInfo>(); Match m = PageRegex.Match(sPage); if (m.Success) { link.Title = m.Groups[2].ToString().Trim(); } // MatchCollection matches = default(MatchCollection); matches = MetaRegex.Matches(sPage); int i = 0; foreach (Match match in matches) { string sTempDesc = match.Groups[0].Value; foreach (Match subM in MetaSubRegex.Matches(sTempDesc)) { if (subM.Groups[4].Value.Equals("OG:DESCRIPTION", StringComparison.InvariantCultureIgnoreCase)) { link.Description = subM.Groups[9].Value; } else if (subM.Groups[4].Value.Equals("DESCRIPTION", StringComparison.InvariantCultureIgnoreCase)) { link.Description = subM.Groups[9].Value; } if (subM.Groups[4].Value.Equals("OG:TITLE", StringComparison.InvariantCultureIgnoreCase)) { link.Title = subM.Groups[9].Value; } if (subM.Groups[4].Value.Equals("OG:IMAGE", StringComparison.InvariantCultureIgnoreCase)) { sImage = subM.Groups[9].Value; ImageInfo img = new ImageInfo(); img.URL = sImage; link.Images.Add(img); i += 1; } } } if (!string.IsNullOrEmpty(link.Description)) { link.Description = HttpUtility.HtmlDecode(link.Description); link.Description = HttpUtility.UrlDecode(link.Description); link.Description = RemoveHTML(link.Description); } if (!string.IsNullOrEmpty(link.Title)) { link.Title = link.Title.Replace("&", "&"); } matches = MetaSubRegex2.Matches(sPage); string imgList = string.Empty; string hostUrl = string.Empty; if (!URL.Contains("http")) { URL = "http://" + URL; } Uri uri = new Uri(URL); hostUrl = uri.Host; if (URL.Contains("https:")) { hostUrl = "https://" + hostUrl; } else { hostUrl = "http://" + hostUrl; } foreach (Match match in matches) { string sImg = match.Groups[5].Value; if (string.IsNullOrEmpty(sImg)) { sImg = match.Groups[8].Value; } if (!string.IsNullOrEmpty(sImg)) { if (!sImg.Contains("http")) { sImg = hostUrl + sImg; } ImageInfo img = new ImageInfo(); img.URL = sImg; if (!imgList.Contains(sImg)) { Bitmap bmp = Utilities.GetImageFromURL(sImg); if ((bmp != null)) { if (bmp.Height > 25 & bmp.Height < 500 & bmp.Width > 25 & bmp.Width < 500) { link.Images.Add(img); imgList += sImg; i += 1; } } } if (i == 10) { break; } } } return link; }
static internal LinkInfo GetLinkData(string URL) { string sPage = GetPageFromURL(ref URL, string.Empty, string.Empty); LinkInfo link = new LinkInfo(); if (string.IsNullOrEmpty(sPage)) { return(link); } string sTitle = string.Empty; string sDescription = string.Empty; string sImage = string.Empty; link.URL = URL; link.Images = new List <ImageInfo>(); Match m = Regex.Match(sPage, "<(title)[^>]*?>((?:.|\\n)*?)</\\s*\\1\\s*>", RegexOptions.IgnoreCase & RegexOptions.Multiline); if (m.Success) { link.Title = m.Groups[2].ToString().Trim(); } // Regex regExp = new Regex("<meta\\s*(?:(?:\\b(\\w|-)+\\b\\s*(?:=\\s*(?:\"[^\"]*\"|'[^']*'|[^\"'<> ]+)\\s*)?)*)/?\\s*>", RegexOptions.IgnoreCase & RegexOptions.Multiline); MatchCollection matches = default(MatchCollection); matches = regExp.Matches(sPage); int i = 0; foreach (Match match in matches) { string sTempDesc = match.Groups[0].Value; Regex subReg = new Regex("<meta[\\s]+[^>]*?(((name|property)*?[\\s]?=[\\s\\x27\\x22]+(.*?)[\\x27\\x22]+.*?)|(content*?[\\s]?=[\\s\\x27\\x22]+(.*?)[\\x27\\x22]+.*?))((content*?[\\s]?=[\\s\\x27\\x22]+(.*?)[\\x27\\x22]+.*?>)|(name*?[\\s]?=[\\s\\x27\\x22]+(.*?)[\\x27\\x22]+.*?>)|>)", RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.Singleline); foreach (Match subM in subReg.Matches(sTempDesc)) { if (subM.Groups[4].Value.ToUpperInvariant() == "OG:DESCRIPTION") { link.Description = subM.Groups[9].Value; } else if (subM.Groups[4].Value.ToUpperInvariant() == "DESCRIPTION".ToUpperInvariant()) { link.Description = subM.Groups[9].Value; } if (subM.Groups[4].Value.ToUpperInvariant() == "OG:TITLE") { link.Title = subM.Groups[9].Value; } if (subM.Groups[4].Value.ToUpperInvariant() == "OG:IMAGE") { sImage = subM.Groups[9].Value; ImageInfo img = new ImageInfo(); img.URL = sImage; link.Images.Add(img); i += 1; } } } if (!string.IsNullOrEmpty(link.Description)) { link.Description = HttpUtility.HtmlDecode(link.Description); link.Description = HttpUtility.UrlDecode(link.Description); link.Description = RemoveHTML(link.Description); } if (!string.IsNullOrEmpty(link.Title)) { link.Title = link.Title.Replace("&", "&"); } regExp = new Regex("<img[\\s]+[^>]*?((alt*?[\\s]?=[\\s\\x27\\x22]+(.*?)[\\x27\\x22]+.*?)|(src*?[\\s]?=[\\s\\x27\\x22]+(.*?)[\\x27\\x22]+.*?))((src*?[\\s]?=[\\s\\x27\\x22]+(.*?)[\\x27\\x22]+.*?>)|(alt*?[\\s]?=[\\s\\x27\\x22]+(.*?)[\\x27\\x22]+.*?>)|>)", RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.Singleline); matches = regExp.Matches(sPage); string imgList = string.Empty; string hostUrl = string.Empty; if (!URL.Contains("http")) { URL = "http://" + URL; } Uri uri = new Uri(URL); hostUrl = uri.Host; if (URL.Contains("https:")) { hostUrl = "https://" + hostUrl; } else { hostUrl = "http://" + hostUrl; } foreach (Match match in matches) { string sImg = match.Groups[5].Value; if (string.IsNullOrEmpty(sImg)) { sImg = match.Groups[8].Value; } if (!string.IsNullOrEmpty(sImg)) { if (!sImg.Contains("http")) { sImg = hostUrl + sImg; } ImageInfo img = new ImageInfo(); img.URL = sImg; if (!imgList.Contains(sImg)) { Bitmap bmp = Utilities.GetImageFromURL(sImg); if ((bmp != null)) { if (bmp.Height > 25 & bmp.Height < 500 & bmp.Width > 25 & bmp.Width < 500) { link.Images.Add(img); imgList += sImg; i += 1; } } } if (i == 10) { break; // TODO: might not be correct. Was : Exit For } } } return(link); }