示例#1
0
        /// <summary>
        /// Attempts to extract hot linked and thumb-&gt;FullScale images.
        /// </summary>
        /// <param name="htmlDump">
        /// The html Dump.
        /// </param>
        /// <param name="postId">
        /// The Post Id.
        /// </param>
        /// <returns>
        /// The extract images links html.
        /// </returns>
        public static List <ImageInfo> ExtractImagesLinksHtml(string htmlDump, string postId)
        {
            if (!string.IsNullOrEmpty(postId) && postId.StartsWith("http://"))
            {
                postId = postId.Substring(postId.IndexOf("#post") + 5);
            }

            htmlDump = htmlDump.Replace("&amp;", "&");

            // use only message content
            var sMessageStart = string.Format("<div id=\"post_message_{0}\">", postId);
            var sMessageEnd   = "</blockquote>";

            var iStart = htmlDump.IndexOf(sMessageStart);

            iStart += sMessageStart.Length;

            var iEnd = htmlDump.IndexOf(sMessageEnd, iStart);

            htmlDump = htmlDump.Substring(iStart, iEnd - iStart);

            ///////////////////////////////////////////////

            // Parse all Links <a>
            var rtnList =
                LinkFinder.ListAllLinks(htmlDump).Select(
                    link =>
                    new ImageInfo
            {
                ImageUrl     = RemoveRedirectLink(Utility.ReplaceHexWithAscii(link.Href)),
                ThumbnailUrl = Utility.ReplaceHexWithAscii(link.Text)
            }).Where(newPicPoolItem => !Utility.IsImageNoneSense(newPicPoolItem.ImageUrl) && !Utility.IsImageNoneSense(newPicPoolItem.ThumbnailUrl)).ToList();

            // Parse all Image <a>
            rtnList.AddRange(
                LinkFinder.ListAllImages(htmlDump).Select(
                    link =>
                    new ImageInfo
            {
                ImageUrl     = RemoveRedirectLink(Utility.ReplaceHexWithAscii(link.Href)),
                ThumbnailUrl = Utility.ReplaceHexWithAscii(link.Text)
            }));

            return(rtnList);
        }
示例#2
0
        /// <summary>
        /// Attempts to extract hot linked and thumb-&gt;FullScale images.
        /// </summary>
        /// <param name="htmlDump">The html Dump.</param>
        /// <param name="postId">The post identifier.</param>
        /// <returns>
        /// The extract attachment images html.
        /// </returns>
        public static List <ImageInfo> ExtractAttachmentImagesHtml(string htmlDump, string postId)
        {
            var rtnList = new List <ImageInfo>();

            htmlDump = htmlDump.Replace("&amp;", "&");

            var start = "<div class=\"attachments\">";
            var end   = "<!-- / attachments -->";

            // use only message content
            var iStart = htmlDump.IndexOf(start, System.StringComparison.Ordinal);

            if (iStart < 0)
            {
                // fix post id
                if (postId.Contains("#post"))
                {
                    postId = postId.Substring(postId.IndexOf("#post", System.StringComparison.Ordinal) + 5);
                }


                start = string.Format("<div id=\"post_message_{0}\">", postId);
                end   = "</blockquote>";

                iStart = htmlDump.IndexOf(start, System.StringComparison.Ordinal);

                if (iStart < 0)
                {
                    // Return Empty List
                    return(rtnList);
                }

                iStart += start.Length;

                var startDump = htmlDump.Substring(iStart);

                var iEnd = startDump.IndexOf(end, System.StringComparison.Ordinal);

                if (iEnd > 0)
                {
                    htmlDump = startDump.Remove(iEnd);
                }
            }
            else
            {
                iStart += start.Length;

                var iEnd = htmlDump.IndexOf(end, System.StringComparison.Ordinal);

                if (iEnd > 0)
                {
                    htmlDump = htmlDump.Substring(iStart, iEnd - iStart);
                }
            }

            ///////////////////////////////////////////////
            rtnList.AddRange(
                LinkFinder.ListAllLinks(htmlDump)
                .Select(
                    link =>
                    new ImageInfo
            {
                ImageUrl =
                    link.Href.StartsWith("http://")
                                        ? link.Href
                                        : CacheController.Instance().UserSettings.CurrentForumUrl
                    + Utility.ReplaceHexWithAscii(link.Href),
                ThumbnailUrl = string.Empty
            })
                .Where(newPicPoolItem => !Utility.IsImageNoneSense(newPicPoolItem.ImageUrl)));

            return(rtnList);
        }
示例#3
0
        /// <summary>
        /// Attempts to extract hot linked and thumb-&gt;FullScale images.
        /// </summary>
        /// <param name="strDump">
        /// The STR dump.
        /// </param>
        /// <returns>
        /// The extract images.
        /// </returns>
        public static List <ImageInfo> ExtractImages(string strDump)
        {
            var rtnList    = new List <ImageInfo>();
            var rtnHashChk = new Hashtable();

            try
            {
                var ds = new DataSet();

                ds.ReadXml(new StringReader(strDump));

                foreach (DataRow row in ds.Tables["Image"].Rows)
                {
                    string thumbUrl;

                    try
                    {
                        thumbUrl = row["thumb_url"].ToString();
                    }
                    catch (Exception)
                    {
                        thumbUrl = string.Empty;
                    }

                    var newPicPool = new ImageInfo
                    {
                        ImageUrl     = row["main_url"].ToString(),
                        ThumbnailUrl = thumbUrl
                    };

                    newPicPool.ImageUrl = Regex.Replace(newPicPool.ImageUrl, @"""", string.Empty);

                    //////////////////////////////////////////////////////////////////////////
                    if (Utility.IsImageNoneSense(newPicPool.ImageUrl))
                    {
                        continue;
                    }

                    newPicPool.ImageUrl = Utility.ReplaceHexWithAscii(newPicPool.ImageUrl);

                    // Remove anonym.to from Link if exists
                    if (newPicPool.ImageUrl.Contains("anonym.to"))
                    {
                        newPicPool.ImageUrl = newPicPool.ImageUrl.Replace("http://www.anonym.to/?", string.Empty);
                    }

                    // Remove redirect
                    if (newPicPool.ImageUrl.Contains("redirect-to"))
                    {
                        newPicPool.ImageUrl =
                            newPicPool.ImageUrl.Replace(
                                $"{CacheController.Instance().UserSettings.ForumURL}redirect-to/?redirect=",
                                string.Empty);
                    }

                    // Get Real Url
                    if (newPicPool.ImageUrl.Contains("/out/out.php?x="))
                    {
                        var req = (HttpWebRequest)WebRequest.Create(newPicPool.ImageUrl);

                        req.Referer = newPicPool.ImageUrl;
                        req.Timeout = 20000;

                        var res = (HttpWebResponse)req.GetResponse();

                        newPicPool.ImageUrl = res.ResponseUri.ToString();

                        res.Close();
                    }

                    if (rtnHashChk.Contains(newPicPool.ImageUrl))
                    {
                        continue;
                    }

                    rtnList.Add(newPicPool);
                    rtnHashChk.Add(newPicPool.ImageUrl, "OK");
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show($"{ex.Message}\n{ex.StackTrace}");
            }

            return(rtnList);
        }