/// <summary> /// Attempts to extract hot linked and thumb->FullScale images. /// </summary> /// <param name="htmlDump"> /// The html Dump. /// </param> /// <param name="postId"> /// The Post Id. /// </param> /// <returns> /// The extract images links html. /// </returns> public static List <ImageInfo> ExtractImagesLinksHtml(string htmlDump, string postId) { if (!string.IsNullOrEmpty(postId) && postId.StartsWith("http://")) { postId = postId.Substring(postId.IndexOf("#post") + 5); } htmlDump = htmlDump.Replace("&", "&"); // use only message content var sMessageStart = string.Format("<div id=\"post_message_{0}\">", postId); var sMessageEnd = "</blockquote>"; var iStart = htmlDump.IndexOf(sMessageStart); iStart += sMessageStart.Length; var iEnd = htmlDump.IndexOf(sMessageEnd, iStart); htmlDump = htmlDump.Substring(iStart, iEnd - iStart); /////////////////////////////////////////////// // Parse all Links <a> var rtnList = LinkFinder.ListAllLinks(htmlDump).Select( link => new ImageInfo { ImageUrl = RemoveRedirectLink(Utility.ReplaceHexWithAscii(link.Href)), ThumbnailUrl = Utility.ReplaceHexWithAscii(link.Text) }).Where(newPicPoolItem => !Utility.IsImageNoneSense(newPicPoolItem.ImageUrl) && !Utility.IsImageNoneSense(newPicPoolItem.ThumbnailUrl)).ToList(); // Parse all Image <a> rtnList.AddRange( LinkFinder.ListAllImages(htmlDump).Select( link => new ImageInfo { ImageUrl = RemoveRedirectLink(Utility.ReplaceHexWithAscii(link.Href)), ThumbnailUrl = Utility.ReplaceHexWithAscii(link.Text) })); return(rtnList); }
/// <summary> /// Attempts to extract hot linked and thumb->FullScale images. /// </summary> /// <param name="htmlDump">The html Dump.</param> /// <param name="postId">The post identifier.</param> /// <returns> /// The extract attachment images html. /// </returns> public static List <ImageInfo> ExtractAttachmentImagesHtml(string htmlDump, string postId) { var rtnList = new List <ImageInfo>(); htmlDump = htmlDump.Replace("&", "&"); var start = "<div class=\"attachments\">"; var end = "<!-- / attachments -->"; // use only message content var iStart = htmlDump.IndexOf(start, System.StringComparison.Ordinal); if (iStart < 0) { // fix post id if (postId.Contains("#post")) { postId = postId.Substring(postId.IndexOf("#post", System.StringComparison.Ordinal) + 5); } start = string.Format("<div id=\"post_message_{0}\">", postId); end = "</blockquote>"; iStart = htmlDump.IndexOf(start, System.StringComparison.Ordinal); if (iStart < 0) { // Return Empty List return(rtnList); } iStart += start.Length; var startDump = htmlDump.Substring(iStart); var iEnd = startDump.IndexOf(end, System.StringComparison.Ordinal); if (iEnd > 0) { htmlDump = startDump.Remove(iEnd); } } else { iStart += start.Length; var iEnd = htmlDump.IndexOf(end, System.StringComparison.Ordinal); if (iEnd > 0) { htmlDump = htmlDump.Substring(iStart, iEnd - iStart); } } /////////////////////////////////////////////// rtnList.AddRange( LinkFinder.ListAllLinks(htmlDump) .Select( link => new ImageInfo { ImageUrl = link.Href.StartsWith("http://") ? link.Href : CacheController.Instance().UserSettings.CurrentForumUrl + Utility.ReplaceHexWithAscii(link.Href), ThumbnailUrl = string.Empty }) .Where(newPicPoolItem => !Utility.IsImageNoneSense(newPicPoolItem.ImageUrl))); return(rtnList); }
/// <summary> /// Attempts to extract hot linked and thumb->FullScale images. /// </summary> /// <param name="strDump"> /// The STR dump. /// </param> /// <returns> /// The extract images. /// </returns> public static List <ImageInfo> ExtractImages(string strDump) { var rtnList = new List <ImageInfo>(); var rtnHashChk = new Hashtable(); try { var ds = new DataSet(); ds.ReadXml(new StringReader(strDump)); foreach (DataRow row in ds.Tables["Image"].Rows) { string thumbUrl; try { thumbUrl = row["thumb_url"].ToString(); } catch (Exception) { thumbUrl = string.Empty; } var newPicPool = new ImageInfo { ImageUrl = row["main_url"].ToString(), ThumbnailUrl = thumbUrl }; newPicPool.ImageUrl = Regex.Replace(newPicPool.ImageUrl, @"""", string.Empty); ////////////////////////////////////////////////////////////////////////// if (Utility.IsImageNoneSense(newPicPool.ImageUrl)) { continue; } newPicPool.ImageUrl = Utility.ReplaceHexWithAscii(newPicPool.ImageUrl); // Remove anonym.to from Link if exists if (newPicPool.ImageUrl.Contains("anonym.to")) { newPicPool.ImageUrl = newPicPool.ImageUrl.Replace("http://www.anonym.to/?", string.Empty); } // Remove redirect if (newPicPool.ImageUrl.Contains("redirect-to")) { newPicPool.ImageUrl = newPicPool.ImageUrl.Replace( $"{CacheController.Instance().UserSettings.ForumURL}redirect-to/?redirect=", string.Empty); } // Get Real Url if (newPicPool.ImageUrl.Contains("/out/out.php?x=")) { var req = (HttpWebRequest)WebRequest.Create(newPicPool.ImageUrl); req.Referer = newPicPool.ImageUrl; req.Timeout = 20000; var res = (HttpWebResponse)req.GetResponse(); newPicPool.ImageUrl = res.ResponseUri.ToString(); res.Close(); } if (rtnHashChk.Contains(newPicPool.ImageUrl)) { continue; } rtnList.Add(newPicPool); rtnHashChk.Add(newPicPool.ImageUrl, "OK"); } } catch (Exception ex) { MessageBox.Show($"{ex.Message}\n{ex.StackTrace}"); } return(rtnList); }