Example #1
0
        public async Task <GiphySearchResults> SearchGif(GiphySearch search)
        {
            if (string.IsNullOrEmpty(search.Query))
            {
                throw new FormatException("You must place a query/term to search for.");
            }

            var collection = new NameValueCollection
            {
                { "api_key", _key },
                { "q", search.Query },
                { "limit", search.Limit.ToString() },
                { "offset", search.Rating.ToRatingString() }
            };

            if (search.Rating != GiphyRatings.Empty)
            {
                collection.Add("rating", search.Rating.ToRatingString());
            }

            if (!string.IsNullOrEmpty(search.Format))
            {
                collection.Add("fmt", search.Format);
            }

            var url = await _site.GetUri(new Uri($"{BaseUri}/search{ConvertUri.ToUrlString(collection)}"));

            if (!url.IsSuccess)
            {
                throw new WebException("The GIFs you called for failed to work.");
            }
            return(JsonConvert.DeserializeObject <GiphySearchResults>(url.JsonData));
        }
Example #2
0
        public async Task <OsuUserResult> GetUser(OsuUserParameters parameters)
        {
            var collection = new NameValueCollection
            {
                { "k", $"{_key}" },
                { "u", $"{parameters.User}" }
            };

            /* Parameter Check (Must be written inside a seperate class to prevent sloppy code such as this.) */
            if (parameters.Mode != null)
            {
                collection.Add("m", $"{parameters.Mode}");
            }
            if (parameters.Type != null)
            {
                collection.Add("type", $"{parameters.Type}");
            }
            if (parameters.EventDays != null)
            {
                collection.Add("event_days", $"{parameters.EventDays}");
            }

            var uri = new Uri($"{baseUrl}{user}{ConvertUri.ToUrlString(collection)}");
            var url = await _site.GetUri(uri);

            if (!url.IsSuccess)
            {
                throw new WebException($"Osu!API encountered an error while: {beatmap.ToUpper()}");
            }
            var jsonData = "{\n\"users\":" + url.JsonData + "}";

            return(JsonConvert.DeserializeObject <OsuUserResult>(jsonData));
        }
Example #3
0
        public async Task <GiphyRandomResult> GetGifRandomly(GiphyRandom random)
        {
            var collection = new NameValueCollection
            {
                { "api_key", _key }
            };

            if (random.Rating != GiphyRatings.Empty)
            {
                collection.Add("rating", random.Rating.ToRatingString());
            }
            if (!string.IsNullOrEmpty(random.Format))
            {
                collection.Add("fmt", random.Format);
            }
            if (!string.IsNullOrEmpty(random.Tag))
            {
                collection.Add("tag", random.Tag);
            }

            var url = await _site.GetUri(new Uri($"{BaseUri}/random{ConvertUri.ToUrlString(collection)}"));

            if (!url.IsSuccess)
            {
                throw new WebException("The GIFs you called for failed to work.");
            }
            return(JsonConvert.DeserializeObject <GiphyRandomResult>(url.JsonData));
        }
Example #4
0
        public async Task <GiphyIdResult> TranslateGif(GiphyTranslation translate)
        {
            if (string.IsNullOrEmpty(translate.Phrase))
            {
                throw new FormatException("You must place a query/term to translate.");
            }

            var collection = new NameValueCollection
            {
                { "api_key", _key },
                { "s", translate.Phrase }
            };

            if (translate.Rating != GiphyRatings.Empty)
            {
                collection.Add("rating", translate.Rating.ToRatingString());
            }
            if (!string.IsNullOrEmpty(translate.Format))
            {
                collection.Add("fmt", translate.Format);
            }

            var url = await _site.GetUri(new Uri($"{BaseUri}/translate{ConvertUri.ToUrlString(collection)}"));

            if (!url.IsSuccess)
            {
                throw new WebException("The GIFs you called for failed to work.");
            }
            return(JsonConvert.DeserializeObject <GiphyIdResult>(url.JsonData));
        }
Example #5
0
        public async Task <GiphySearchResults> GetGifByTrendingTypes(GiphyTrending trend)
        {
            var collection = new NameValueCollection
            {
                { "api_key", _key },
                { "limit", trend.Limit.ToString() }
            };

            if (trend.Rating != GiphyRatings.Empty)
            {
                collection.Add("rating", trend.Rating.ToRatingString());
            }
            if (!string.IsNullOrEmpty(trend.Format))
            {
                collection.Add("fmt", trend.Format);
            }

            var url = await _site.GetUri(new Uri($"{BaseUri}/trending{ConvertUri.ToUrlString(collection)}"));

            if (!url.IsSuccess)
            {
                throw new WebException("The GIFs you called for failed to work.");
            }
            return(JsonConvert.DeserializeObject <GiphySearchResults>(url.JsonData));
        }
Example #6
0
        public async Task <OsuBeatmapResult> GetBeatmaps(OsuBeatmapParameters parameters)
        {
            var collection = new NameValueCollection
            {
                { "k", $"{_key}" }
            };

            /* Parameter Check (Must be written inside a seperate class to prevent sloppy code such as this.) */
            if (parameters.Since != null)
            {
                collection.Add("since", $"{parameters.Since}");
            }
            if (parameters.BeatmapSetId != null)
            {
                collection.Add("s", $"{parameters.BeatmapSetId}");
            }
            if (parameters.BeatmapId != null)
            {
                collection.Add("b", $"{parameters.BeatmapId}");
            }
            if (parameters.User != null)
            {
                collection.Add("u", $"{parameters.User}");
            }
            if (parameters.Type != null)
            {
                collection.Add("type", $"{parameters.Type}");
            }
            if (parameters.Mode != null)
            {
                collection.Add("m", $"{parameters.Mode}");
            }
            if (parameters.IncludeConvertedBeatmaps != null)
            {
                collection.Add("a", $"{parameters.IncludeConvertedBeatmaps}");
            }
            if (parameters.BeatmapHash != null)
            {
                collection.Add("h", $"{parameters.BeatmapHash}");
            }
            if (parameters.Limit != null)
            {
                collection.Add("limit", $"{parameters.Limit}");
            }

            var uri = new Uri($"{baseUrl}{beatmap}{ConvertUri.ToUrlString(collection)}");
            var url = await _site.GetUri(uri);

            if (!url.IsSuccess)
            {
                throw new WebException($"Osu!API encountered an error while: {beatmap.ToUpper()}");
            }
            var jsonData = "{\n\"beatmaps\":" + url.JsonData + "}";

            return(JsonConvert.DeserializeObject <OsuBeatmapResult>(jsonData));
        }
Example #7
0
        public async Task <E926BaseResults> GetPost(ulong id)
        {
            var collection = new NameValueCollection
            {
                { "id", $"{id}" }
            };
            var url = await _site.GetUri(new Uri($"{baseUri}{POST_RESULT}{ConvertUri.ToUrlString(collection)}"));

            if (!url.IsSuccess)
            {
                throw new WebException($"E926 failed to collect information on ID#{id}");
            }
            Console.WriteLine($"{baseUri}{POST_RESULT}{ConvertUri.ToUrlString(collection)}");
            return(JsonConvert.DeserializeObject <E926BaseResults>("{\nposts:[" + url.JsonData + "]}"));
        }
Example #8
0
        /*Single GIF ID*/
        public async Task <GiphyIdResult> GetGifByIdType(string id)
        {
            var collection = new NameValueCollection
            {
                { "api_key", _key }
            };

            var url = await _site.GetUri(new Uri($"{BaseUri}/{id}{ConvertUri.ToUrlString(collection)}"));

            if (!url.IsSuccess)
            {
                throw new WebException("The GIFs you called for failed to work.");
            }
            return(JsonConvert.DeserializeObject <GiphyIdResult>(url.JsonData));
        }
Example #9
0
        public void ConvertHtmlToPDF()
        {
            try
            {
                ConvertUri convertUri = new ConvertUri(@"C:\SampleCV\orange download.html");
                ChromeHtmlToPdfLib.Settings.PageSettings pageSettings = new ChromeHtmlToPdfLib.Settings.PageSettings();
                pageSettings.PrintBackground = true;

                Converter converter1 = new Converter();

                converter1.ConvertToPdf(convertUri, @"C:\SampleCV\competitiveprogramminggoogle.pdf", pageSettings);
            }
            catch (Exception ex)
            {
                // throw;
            }
        }
Example #10
0
        public async Task <OsuMatchResult> GetMatch(OsuMatchParameters parameters)
        {
            var collection = new NameValueCollection
            {
                { "k", $"{_key}" },
                { "mp", $"{parameters.MatchId}" }
            };

            var uri = new Uri($"{baseUrl}{match}{ConvertUri.ToUrlString(collection)}");
            var url = await _site.GetUri(uri);

            if (!url.IsSuccess)
            {
                throw new WebException($"Osu!API encountered an error while: {beatmap.ToUpper()}");
            }
            var jsonData = "{\n\"matches\":" + url.JsonData + "}";

            return(JsonConvert.DeserializeObject <OsuMatchResult>(jsonData));
        }
Example #11
0
        public async Task <OsuReplayResult> GetReplay(OsuReplayParameters parameters)
        {
            var collection = new NameValueCollection
            {
                { "k", $"{_key}" },
                { "m", $"{parameters.Mode}" },
                { "b", $"{parameters.BeatmapId}" },
                { "u", $"{parameters.User}" },
            };

            var uri = new Uri($"{baseUrl}{replay}{ConvertUri.ToUrlString(collection)}");
            var url = await _site.GetUri(uri);

            if (!url.IsSuccess)
            {
                throw new WebException($"Osu!API encountered an error while: {beatmap.ToUpper()}");
            }
            var jsonData = "{\n\"replay\":[" + url.JsonData + "]}";

            return(JsonConvert.DeserializeObject <OsuReplayResult>(jsonData));
        }
Example #12
0
        public async Task <E926BaseResults> GetPostCollection(string tag, int limit = 10)
        {
            var collection = new NameValueCollection
            {
                { "tags", tag }
            };

            if (limit < -1)
            {
                collection.Add("limit", limit.ToString());
            }

            var url = await _site.GetUri(new Uri($"{baseUri}{POST_COLLECTION}{ConvertUri.ToUrlString(collection)}"));

            if (!url.IsSuccess)
            {
                throw new WebException("E926 failed to collect search results.");
            }
            Console.WriteLine($"{baseUri}{POST_COLLECTION}{ConvertUri.ToUrlString(collection)}");
            return(JsonConvert.DeserializeObject <E926BaseResults>("{\nposts:" + url.JsonData + "}"));
        }
Example #13
0
        public async Task <TenorRandomResults> GetGifRandomly(TenorRandom random)
        {
            var collection = new NameValueCollection
            {
                { "key", _key },
                { "q", random.Q }
            };

            if (random.SafeSearch != TenorRating.Disabled)
            {
                collection.Add("safesearch", random.SafeSearch.ToString());
            }

            var url = await _site.GetUri(new Uri($"{BaseUri}/random{ConvertUri.ToUrlString(collection)}"));

            if (!url.IsSuccess)
            {
                throw new WebException("The Tenor API GIFs you called for failed to work.\n" + url + url.Exception);
            }

            return(JsonConvert.DeserializeObject <TenorRandomResults>(url.JsonData));
        }
        /// <summary>
        /// Validates all images if they are rotated correctly (when <paramref name="rotate"/> is set
        /// to <c>true</c>) and fit on the given <paramref name="pageSettings"/>.
        /// If an image does need to be rotated or does not fit then a local copy is made of
        /// the <paramref name="inputUri"/> file.
        /// </summary>
        /// <param name="inputUri">The uri of the webpage</param>
        /// <param name="resize">When set to <c>true</c> then an image is resized when needed</param>
        /// <param name="rotate">When set to <c>true</c> then the EXIF information of an
        ///     image is read and when needed the image is automatic rotated</param>
        /// <param name="pageSettings"><see cref="PageSettings"/></param>
        /// <param name="outputUri">The outputUri when this method returns <c>true</c> otherwise
        ///     <c>null</c> is returned</param>
        /// <param name="urlBlacklist">A list of URL's that need to be blocked (use * as a wildcard)</param>
        /// <param name="safeUrls">A list with URL's that are safe to load</param>
        /// <returns>Returns <c>false</c> when the images dit not fit the page, otherwise <c>true</c></returns>
        /// <exception cref="WebException">Raised when the webpage from <paramref name="inputUri"/> could not be downloaded</exception>
        public bool ValidateImages(
            ConvertUri inputUri,
            bool resize,
            bool rotate,
            PageSettings pageSettings,
            out ConvertUri outputUri,
            ref List <string> safeUrls,
            List <string> urlBlacklist)
        {
            outputUri = null;

            using (var graphics = Graphics.FromHwnd(IntPtr.Zero))
                using (var webpage = inputUri.IsFile ? OpenFileStream(inputUri.OriginalString) : OpenDownloadStream(inputUri))
                {
                    WriteToLog($"DPI settings for image, x: '{graphics.DpiX}' and y: '{graphics.DpiY}'");
                    var maxWidth  = (pageSettings.PaperWidth - pageSettings.MarginLeft - pageSettings.MarginRight) * graphics.DpiX;
                    var maxHeight = (pageSettings.PaperHeight - pageSettings.MarginTop - pageSettings.MarginBottom) * graphics.DpiY;

                    string localDirectory = null;

                    if (inputUri.IsFile)
                    {
                        localDirectory = Path.GetDirectoryName(inputUri.OriginalString);
                    }

                    var htmlChanged = false;

                    IConfiguration config;

                    if (_webProxy != null)
                    {
                        WriteToLog($"Using web proxy '{_webProxy.Address}' to download images");

                        var httpClientHandler = new HttpClientHandler
                        {
                            Proxy = _webProxy,
                            ServerCertificateCustomValidationCallback = (message, certificate, arg1, arg2) =>
                            {
                                WriteToLog($"Accepting certificate '{certificate.Subject}', message '{message}'");
                                return(true);
                            }
                        };

                        var client = new HttpClient(httpClientHandler);
                        config = Configuration.Default
                                 .With(new HttpClientRequester(client))
                                 .WithTemporaryCookies()
                                 .WithDefaultLoader()
                                 .WithCss();
                    }
                    else
                    {
                        config = Configuration.Default.WithCss();
                    }

                    var context = BrowsingContext.New(config);

                    IDocument document;

                    try
                    {
                        // ReSharper disable AccessToDisposedClosure
                        document = inputUri.Encoding != null
                        ? context.OpenAsync(m =>
                                            m.Content(webpage).Header("Content-Type", $"text/html; charset={inputUri.Encoding.WebName}")
                                            .Address(inputUri.ToString())).Result
                        : context.OpenAsync(m => m.Content(webpage).Address(inputUri.ToString())).Result;

                        // ReSharper restore AccessToDisposedClosure
                    }
                    catch (Exception exception)
                    {
                        WriteToLog($"Exception occurred in AngleSharp: {ExceptionHelpers.GetInnerException(exception)}");
                        return(false);
                    }

                    WriteToLog("Validating all images if they need to be rotated and if they fit the page");
                    var unchangedImages = new List <IHtmlImageElement>();
                    var absoluteUri     = inputUri.AbsoluteUri.Substring(0, inputUri.AbsoluteUri.LastIndexOf('/') + 1);

                    // ReSharper disable once PossibleInvalidCastExceptionInForeachLoop
                    foreach (var htmlImage in document.Images)
                    {
                        var imageChanged = false;

                        if (string.IsNullOrWhiteSpace(htmlImage.Source))
                        {
                            WriteToLog($"HTML image tag '{htmlImage.TagName}' has no image source '{htmlImage.Source}'");
                            continue;
                        }

                        Image image = null;

                        var source        = htmlImage.Source.Contains("?") ? htmlImage.Source.Split('?')[0] : htmlImage.Source;
                        var isSafeUrl     = safeUrls.Contains(source);
                        var isAbsoluteUri = source.StartsWith(absoluteUri, StringComparison.InvariantCultureIgnoreCase);

                        if (!RegularExpression.IsRegExMatch(urlBlacklist, source, out var matchedPattern) ||
                            isAbsoluteUri || isSafeUrl)
                        {
                            if (isAbsoluteUri)
                            {
                                WriteToLog($"The url '{source}' has been allowed because it start with the absolute uri '{absoluteUri}'");
                            }
                            else if (isSafeUrl)
                            {
                                WriteToLog($"The url '{source}' has been allowed because it is on the safe url list");
                            }
                            else
                            {
                                WriteToLog($"The url '{source}' has been allowed because it did not match anything on the url blacklist");
                            }
                        }
                        else
                        {
                            WriteToLog($"The url '{source}' has been blocked by url blacklist pattern '{matchedPattern}'");
                            continue;
                        }

                        var extension = Path.GetExtension(FileManager.RemoveInvalidFileNameChars(source));
                        var fileName  = GetTempFile(extension);

                        try
                        {
                            // The local width and height attributes always go before css width and height
                            var width  = htmlImage.DisplayWidth;
                            var height = htmlImage.DisplayHeight;

                            if (rotate)
                            {
                                image = GetImage(htmlImage.Source, localDirectory);

                                if (image == null)
                                {
                                    continue;
                                }

                                if (RotateImageByExifOrientationData(image))
                                {
                                    htmlImage.DisplayWidth  = image.Width;
                                    htmlImage.DisplayHeight = image.Height;
                                    WriteToLog($"Image rotated and saved to location '{fileName}'");
                                    image.Save(fileName);
                                    htmlImage.DisplayWidth  = image.Width;
                                    htmlImage.DisplayHeight = image.Height;
                                    htmlImage.SetStyle(string.Empty);
                                    var newSrc = new Uri(fileName).ToString();
                                    WriteToLog($"Adding url '{newSrc}' to the safe url list");
                                    safeUrls.Add(newSrc);
                                    htmlImage.Source = newSrc;
                                    htmlChanged      = true;
                                    imageChanged     = true;
                                }

                                width  = image.Width;
                                height = image.Height;
                            }

                            if (resize)
                            {
                                if (height == 0 && width == 0)
                                {
                                    ICssStyleDeclaration style = null;

                                    try
                                    {
                                        style = context.Current.GetComputedStyle(htmlImage);
                                    }
                                    catch (Exception exception)
                                    {
                                        WriteToLog($"Could not get computed style from html image, exception: '{exception.Message}'");
                                    }

                                    if (style != null)
                                    {
                                        width  = ParseValue(style.GetPropertyValue("width"));
                                        height = ParseValue(style.GetPropertyValue("height"));
                                    }
                                }

                                // If we don't know the image size then get if from the image itself
                                if (width <= 0 || height <= 0)
                                {
                                    if (image == null)
                                    {
                                        image = GetImage(htmlImage.Source, localDirectory);
                                    }

                                    if (image == null)
                                    {
                                        continue;
                                    }
                                    width  = image.Width;
                                    height = image.Height;
                                }

                                if (width > maxWidth || height > maxHeight)
                                {
                                    // If we did not load the image already then load it

                                    if (image == null)
                                    {
                                        image = GetImage(htmlImage.Source, localDirectory);
                                    }

                                    if (image == null)
                                    {
                                        continue;
                                    }

                                    ScaleImage(image, (int)maxWidth, out var newWidth, out var newHeight);
                                    WriteToLog($"Image rescaled to width {newWidth} and height {newHeight}");
                                    htmlImage.DisplayWidth  = newWidth;
                                    htmlImage.DisplayHeight = newHeight;
                                    htmlImage.SetStyle(string.Empty);
                                    htmlChanged = true;
                                }
                            }
                        }
                        finally
                        {
                            image?.Dispose();
                        }

                        if (!imageChanged)
                        {
                            unchangedImages.Add(htmlImage);
                        }
                    }

                    if (!htmlChanged)
                    {
                        return(false);
                    }

                    foreach (var unchangedImage in unchangedImages)
                    {
                        using (var image = GetImage(unchangedImage.Source, localDirectory))
                        {
                            if (image == null)
                            {
                                WriteToLog($"Could not load unchanged image from location '{unchangedImage.Source}'");
                                continue;
                            }

                            var extension = Path.GetExtension(unchangedImage.Source.Contains("?")
                            ? unchangedImage.Source.Split('?')[0]
                            : unchangedImage.Source);
                            var fileName = GetTempFile(extension);

                            WriteToLog($"Unchanged image saved to location '{fileName}'");
                            image.Save(fileName);
                            var newSrc = new Uri(fileName).ToString();
                            safeUrls.Add(newSrc);
                            unchangedImage.Source = newSrc;
                        }
                    }

                    var outputFile = GetTempFile(".htm");
                    outputUri = new ConvertUri(outputFile, inputUri.Encoding);
                    safeUrls.Add(outputUri.ToString());

                    try
                    {
                        WriteToLog($"Writing changed webpage to '{outputFile}'");

                        using (var fileStream = new FileStream(outputFile, FileMode.CreateNew, FileAccess.Write))
                        {
                            if (inputUri.Encoding != null)
                            {
                                using (var textWriter = new StreamWriter(fileStream, inputUri.Encoding))
                                    document.ToHtml(textWriter, new HtmlMarkupFormatter());
                            }
                            else
                            {
                                using (var textWriter = new StreamWriter(fileStream))
                                    document.ToHtml(textWriter, new HtmlMarkupFormatter());
                            }
                        }

                        WriteToLog("Changed webpage written");

                        return(true);
                    }
                    catch (Exception exception)
                    {
                        WriteToLog($"Could not write new html file '{outputFile}', error: {ExceptionHelpers.GetInnerException(exception)}");
                        return(false);
                    }
                }
        }
Example #15
0
        static void Main(string[] args)
        {
            Options options = null;

            try
            {
                ParseCommandlineParameters(args, out options);
                if (options == null)
                {
                    throw new ArgumentException(nameof(options));
                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
            }

            // ReSharper disable once PossibleNullReferenceException
            using (_logStream = string.IsNullOrWhiteSpace(options.LogFile)
                ? Console.OpenStandardOutput()
                : File.OpenWrite(ReplaceWildCards(options.LogFile)))
            {
                try
                {
                    var maxTasks = SetMaxConcurrencyLevel(options);

                    if (options.InputIsList)
                    {
                        _itemsToConvert = new ConcurrentQueue <ConversionItem>();
                        _itemsConverted = new ConcurrentQueue <ConversionItem>();

                        WriteToLog($"Reading inputfile '{options.Input}'");
                        var lines = File.ReadAllLines(options.Input);
                        foreach (var line in lines)
                        {
                            var inputUri   = new ConvertUri(line);
                            var outputPath = Path.GetFullPath(options.Output);

                            var outputFile = inputUri.IsFile
                                ? Path.GetFileName(inputUri.AbsolutePath)
                                : FileManager.RemoveInvalidFileNameChars(inputUri.ToString());

                            _itemsToConvert.Enqueue(new ConversionItem(inputUri,
                                                                       Path.Combine(outputPath, outputFile)));
                        }

                        WriteToLog($"{_itemsToConvert.Count} items read");

                        if (options.UseMultiThreading)
                        {
                            _workerTasks = new List <Task>();

                            WriteToLog($"Starting {maxTasks} processing tasks");
                            for (var i = 0; i < maxTasks; i++)
                            {
                                var i1 = i;
                                _workerTasks.Add(_taskFactory.StartNew(() =>
                                                                       ConvertWithTask(options, (i1 + 1).ToString())));
                            }

                            WriteToLog("Started");

                            // Waiting until all tasks are finished
                            foreach (var task in _workerTasks)
                            {
                                task.Wait();
                            }
                        }
                        else
                        {
                            ConvertWithTask(options, null);
                        }

                        // Write conversion information to output file
                        using (var output = File.OpenWrite(options.Output))
                        {
                            foreach (var itemConverted in _itemsConverted)
                            {
                                var bytes = new UTF8Encoding(true).GetBytes(itemConverted.OutputLine);
                                output.Write(bytes, 0, bytes.Length);
                            }
                        }
                    }
                    else
                    {
                        Convert(options);
                    }

                    Environment.Exit(0);
                }
                catch (Exception exception)
                {
                    WriteToLog(exception.StackTrace + ", " + exception.Message);
                    Environment.Exit(1);
                }
            }
        }
Example #16
0
        /// <summary>
        /// Sanitizes the HTML by removing all forbidden elements
        /// </summary>
        /// <param name="inputUri">The uri of the webpage</param>
        /// <param name="outputUri">The outputUri when this method returns <c>false</c> otherwise
        ///     <c>null</c> is returned</param>
        /// <returns></returns>
        public bool FitPageToContent(ConvertUri inputUri, out ConvertUri outputUri)
        {
            outputUri = null;

            using (var webpage = inputUri.IsFile
                ? File.OpenRead(inputUri.OriginalString)
                : DownloadStream(inputUri))
            {
                var config  = Configuration.Default.WithCss();
                var context = BrowsingContext.New(config);

                IDocument document;

                try
                {
                    // ReSharper disable AccessToDisposedClosure
                    document = inputUri.Encoding != null
                        ? context.OpenAsync(m => m.Content(webpage).Header("Content-Type", $"text/html; charset={inputUri.Encoding.WebName}").Address(inputUri.ToString())).Result
                        : context.OpenAsync(m => m.Content(webpage).Address(inputUri.ToString())).Result;

                    // ReSharper restore AccessToDisposedClosure

                    var styleElement = new HtmlElement(document as Document, "style")
                    {
                        InnerHtml = "html, body " + Environment.NewLine +
                                    "{" + Environment.NewLine +
                                    "   width: fit-content;" + Environment.NewLine +
                                    "   height: fit-content;" + Environment.NewLine +
                                    "   margin: 0px;" + Environment.NewLine +
                                    "   padding: 0px;" + Environment.NewLine +
                                    "}" + Environment.NewLine
                    };

                    document.Head.AppendElement(styleElement);

                    var pageStyleElement = new HtmlElement(document as Document, "style")
                    {
                        Id        = "pagestyle",
                        InnerHtml = "@page " + Environment.NewLine +
                                    "{ " + Environment.NewLine +
                                    "   size: 595px 842px ; " + Environment.NewLine +
                                    "   margin: 0px " + Environment.NewLine +
                                    "}" + Environment.NewLine
                    };

                    document.Head.AppendElement(pageStyleElement);

                    var pageElement = new HtmlElement(document as Document, "script")
                    {
                        InnerHtml = "window.onload = function () {" + Environment.NewLine +
                                    "" + Environment.NewLine +
                                    "   var page = document.getElementsByTagName('html')[0];" + Environment.NewLine +
                                    "   var pageInfo = window.getComputedStyle(page);" + Environment.NewLine +
                                    "" + Environment.NewLine +
                                    "    var height = parseInt(pageInfo.height) + 10 + 'px';" +
                                    Environment.NewLine +
                                    "" + Environment.NewLine +
                                    "    var pageCss = '@page { size: ' + pageInfo.width + ' ' + height + '; margin: 0; }'" +
                                    Environment.NewLine +
                                    "    document.getElementById('pagestyle').innerHTML = pageCss;" + Environment.NewLine +
                                    "}" + Environment.NewLine
                    };

                    document.Body.AppendElement(pageElement);
                }
                catch (Exception exception)
                {
                    WriteToLog($"Exception occured in AngleSharp: {ExceptionHelpers.GetInnerException(exception)}");
                    return(false);
                }

                var outputFile = GetTempFile(".htm");
                outputUri = new ConvertUri(outputFile, inputUri.Encoding);

                try
                {
                    WriteToLog($"Writing changed webpage to '{outputFile}'");

                    using (var fileStream = new FileStream(outputFile, FileMode.CreateNew, FileAccess.Write))
                    {
                        if (inputUri.Encoding != null)
                        {
                            using (var textWriter = new StreamWriter(fileStream, inputUri.Encoding))
                                document.ToHtml(textWriter, new HtmlMarkupFormatter());
                        }
                        else
                        {
                            using (var textWriter = new StreamWriter(fileStream))
                                document.ToHtml(textWriter, new HtmlMarkupFormatter());
                        }
                    }

                    WriteToLog("Changed webpage written");
                    return(true);
                }
                catch (Exception exception)
                {
                    WriteToLog($"Could not write new html file '{outputFile}', error: {ExceptionHelpers.GetInnerException(exception)}");
                    return(false);
                }
            }
        }
Example #17
0
        /// <summary>
        /// Sanitizes the HTML by removing all forbidden elements
        /// </summary>
        /// <param name="inputUri">The uri of the webpage</param>
        /// <param name="sanitizer"><see cref="HtmlSanitizer"/></param>
        /// <param name="outputUri">The outputUri when this method returns <c>false</c> otherwise
        ///     <c>null</c> is returned</param>
        /// <returns></returns>
        public bool SanitizeHtml(
            ConvertUri inputUri,
            HtmlSanitizer sanitizer,
            out ConvertUri outputUri)
        {
            outputUri = null;

            using (var webpage = inputUri.IsFile
                ? File.OpenRead(inputUri.OriginalString)
                : DownloadStream(inputUri))
            {
                var htmlChanged = false;
                var config      = Configuration.Default.WithCss();
                var context     = BrowsingContext.New(config);

                IDocument document;

                try
                {
                    // ReSharper disable AccessToDisposedClosure
                    document = inputUri.Encoding != null
                        ? context.OpenAsync(m => m.Content(webpage).Header("Content-Type", $"text/html; charset={inputUri.Encoding.WebName}").Address(inputUri.ToString())).Result
                        : context.OpenAsync(m => m.Content(webpage).Address(inputUri.ToString())).Result;

                    // ReSharper restore AccessToDisposedClosure
                }
                catch (Exception exception)
                {
                    WriteToLog($"Exception occured in AngleSharp: {ExceptionHelpers.GetInnerException(exception)}");
                    return(false);
                }

                WriteToLog("Sanitizing HTML");

                if (sanitizer == null)
                {
                    sanitizer = new HtmlSanitizer();
                }

                sanitizer.FilterUrl += delegate(object sender, FilterUrlEventArgs args)
                {
                    if (args.OriginalUrl != args.SanitizedUrl)
                    {
                        WriteToLog($"URL sanitized from '{args.OriginalUrl}' to '{args.SanitizedUrl}'");
                        htmlChanged = true;
                    }
                };

                sanitizer.RemovingAtRule += delegate(object sender, RemovingAtRuleEventArgs args)
                {
                    WriteToLog($"Removing CSS at-rule '{args.Rule.CssText}' from tag '{args.Tag.TagName}'");
                    htmlChanged = true;
                };

                sanitizer.RemovingAttribute += delegate(object sender, RemovingAttributeEventArgs args)
                {
                    WriteToLog(
                        $"Removing attribute '{args.Attribute.Name}' from tag '{args.Tag.TagName}', reason '{args.Reason}'");
                    htmlChanged = true;
                };

                sanitizer.RemovingComment += delegate(object sender, RemovingCommentEventArgs args)
                {
                    WriteToLog($"Removing comment '{args.Comment.TextContent}'");
                    htmlChanged = true;
                };

                sanitizer.RemovingCssClass += delegate(object sender, RemovingCssClassEventArgs args)
                {
                    WriteToLog(
                        $"Removing CSS class '{args.CssClass}' from tag '{args.Tag.TagName}', reason '{args.Reason}'");
                    htmlChanged = true;
                };

                sanitizer.RemovingStyle += delegate(object sender, RemovingStyleEventArgs args)
                {
                    WriteToLog(
                        $"Removing style '{args.Style.Name}' from tag '{args.Tag.TagName}', reason '{args.Reason}'");
                    htmlChanged = true;
                };

                sanitizer.RemovingTag += delegate(object sender, RemovingTagEventArgs args)
                {
                    WriteToLog($"Removing tag '{args.Tag.TagName}', reason '{args.Reason}'");
                    htmlChanged = true;
                };

                sanitizer.SanitizeDom(document as IHtmlDocument);

                WriteToLog("HTML sanitized");

                if (!htmlChanged)
                {
                    return(false);
                }

                var sanitizedOutputFile = GetTempFile(".htm");
                outputUri = new ConvertUri(sanitizedOutputFile, inputUri.Encoding);

                try
                {
                    WriteToLog($"Writing sanitized webpage to '{sanitizedOutputFile}'");

                    using (var fileStream =
                               new FileStream(sanitizedOutputFile, FileMode.CreateNew, FileAccess.Write))
                    {
                        if (inputUri.Encoding != null)
                        {
                            using (var textWriter = new StreamWriter(fileStream, inputUri.Encoding))
                                document.ToHtml(textWriter, new HtmlMarkupFormatter());
                        }
                        else
                        {
                            using (var textWriter = new StreamWriter(fileStream))
                                document.ToHtml(textWriter, new HtmlMarkupFormatter());
                        }
                    }

                    WriteToLog("Sanitized webpage written");
                    return(true);
                }
                catch (Exception exception)
                {
                    WriteToLog($"Could not write new html file '{sanitizedOutputFile}', error: {ExceptionHelpers.GetInnerException(exception)}");
                    return(false);
                }
            }
        }
Example #18
0
 internal ConversionItem(ConvertUri inputUri, string outputFile)
 {
     InputUri   = inputUri;
     OutputFile = outputFile;
     Status     = ConversionItemStatus.None;
 }
Example #19
0
        /// <summary>
        /// Validates all images if they are rotated correctly (when <paramref name="rotate"/> is set
        /// to <c>true</c>) and fit on the given <paramref name="pageSettings"/>.
        /// If an image does need to be rotated or does not fit then a local copy is maded of
        /// the <paramref name="inputUri"/> file.
        /// </summary>
        /// <param name="inputUri">The uri of the webpage</param>
        /// <param name="resize">When set to <c>true</c> then an image is resized when needed</param>
        /// <param name="rotate">When set to <c>true</c> then the EXIF information of an
        /// image is read and when needed the image is automaticly rotated</param>
        /// <param name="pageSettings"><see cref="PageSettings"/></param>
        /// <param name="outputUri">The outputUri when this method returns <c>false</c> otherwise
        ///     <c>null</c> is returned</param>
        /// <returns>Returns <c>false</c> when the images dit not fit the page, otherwise <c>true</c></returns>
        /// <exception cref="WebException">Raised when the webpage from <paramref name="inputUri"/> could not be downloaded</exception>
        public bool ValidateImages(ConvertUri inputUri,
                                   bool resize,
                                   bool rotate,
                                   PageSettings pageSettings,
                                   out ConvertUri outputUri)
        {
            WriteToLog("Validating all images if they need to be rotated and if they fit the page");
            outputUri = null;

            string localDirectory = null;

            if (inputUri.IsFile)
            {
                localDirectory = Path.GetDirectoryName(inputUri.OriginalString);
            }

            var webpage = inputUri.IsFile
                ? inputUri.Encoding != null
                    ? File.ReadAllText(inputUri.OriginalString, inputUri.Encoding)
                    : File.ReadAllText(inputUri.OriginalString)
                : DownloadString(inputUri);

            var maxWidth  = pageSettings.PaperWidth * 96.0;
            var maxHeight = pageSettings.PaperHeight * 96.0;

            var changed = false;
            var config  = Configuration.Default.WithCss();
            var context = BrowsingContext.New(config);

            var document = inputUri.Encoding != null
                ? context.OpenAsync(m => m.Content(webpage).Header("Content-Type", $"text/html; charset={inputUri.Encoding.WebName}")).Result
                : context.OpenAsync(m => m.Content(webpage)).Result;

            // ReSharper disable once PossibleInvalidCastExceptionInForeachLoop
            foreach (var htmlImage in document.Images)
            {
                if (string.IsNullOrWhiteSpace(htmlImage.Source))
                {
                    WriteToLog($"HTML image tag '{htmlImage.TagName}' has no image source '{htmlImage.Source}'");
                    continue;
                }

                Image image = null;

                try
                {
                    // The local width and height attributes always go before css width and height
                    var width  = htmlImage.DisplayWidth;
                    var height = htmlImage.DisplayHeight;

                    if (rotate)
                    {
                        image = GetImage(new Uri(htmlImage.Source), localDirectory);
                        if (image == null)
                        {
                            continue;
                        }
                        if (RotateImageByExifOrientationData(image))
                        {
                            htmlImage.DisplayWidth  = image.Width;
                            htmlImage.DisplayHeight = image.Height;
                            changed = true;
                        }
                        width  = image.Width;
                        height = image.Height;
                    }

                    if (!resize)
                    {
                        continue;
                    }

                    if (height == 0 && width == 0)
                    {
                        var style = context.Current.GetComputedStyle(htmlImage);
                        if (style != null)
                        {
                            width  = ParseValue(style.Width);
                            height = ParseValue(style.Height);
                        }
                    }

                    // If we don't know the image size then get if from the image itself
                    if (width <= 0 || height <= 0)
                    {
                        if (image == null)
                        {
                            image = GetImage(new Uri(htmlImage.Source), localDirectory);
                        }

                        if (image == null)
                        {
                            continue;
                        }
                        width  = image.Width;
                        height = image.Height;
                    }

                    if (width > maxWidth || height > maxHeight)
                    {
                        var extension = Path.GetExtension(htmlImage.Source.Contains("?")
                            ? htmlImage.Source.Split('?')[0]
                            : htmlImage.Source);

                        var fileName = GetTempFile(extension);

                        // If we did not load the image already then load it
                        if (image == null)
                        {
                            image = GetImage(new Uri(htmlImage.Source), localDirectory);
                        }

                        if (image == null)
                        {
                            continue;
                        }
                        image = ScaleImage(image, (int)maxWidth);
                        WriteToLog($"Image resized to width {image.Width} and height {image.Height}");
                        image.Save(fileName);
                        htmlImage.DisplayWidth  = image.Width;
                        htmlImage.DisplayHeight = image.Height;
                        htmlImage.Source        = new Uri(fileName).ToString();
                        changed = true;
                    }
                }
                finally
                {
                    image?.Dispose();
                }
            }

            if (!changed)
            {
                return(true);
            }

            var outputFile = GetTempFile(".htm");

            outputUri = new ConvertUri(outputFile, inputUri.Encoding);

            try
            {
                using (var fileStream = new FileStream(outputFile, FileMode.CreateNew, FileAccess.Write))
                {
                    if (inputUri.Encoding != null)
                    {
                        using (var textWriter = new StreamWriter(fileStream, inputUri.Encoding))
                            document.ToHtml(textWriter, new AutoSelectedMarkupFormatter());
                    }
                    else
                    {
                        using (var textWriter = new StreamWriter(fileStream))
                            document.ToHtml(textWriter, new AutoSelectedMarkupFormatter());
                    }
                }

                return(false);
            }
            catch (Exception exception)
            {
                WriteToLog($"Could not generate new html file '{outputFile}', error: {ExceptionHelpers.GetInnerException(exception)}");
                return(true);
            }
        }
        /// <summary>
        /// Sanitizes the HTML by removing all forbidden elements
        /// </summary>
        /// <param name="inputUri">The uri of the webpage</param>
        /// <param name="mediaLoadTimeout">The media load timeout or <c>null</c> when not set</param>
        /// <param name="sanitizer"><see cref="HtmlSanitizer"/></param>
        /// <param name="outputUri">The outputUri when this method returns <c>false</c> otherwise
        ///     <c>null</c> is returned</param>
        /// <param name="safeUrls">A list of safe URL's</param>
        /// <returns></returns>
        public bool SanitizeHtml(
            ConvertUri inputUri,
            int?mediaLoadTimeout,
            HtmlSanitizer sanitizer,
            out ConvertUri outputUri,
            ref List <string> safeUrls)
        {
            outputUri = null;

            using (var webpage = inputUri.IsFile ? OpenFileStream(inputUri.OriginalString) : OpenDownloadStream(inputUri))
            {
                var htmlChanged = false;
                var config      = Configuration.Default.WithCss();
                var context     = BrowsingContext.New(config);

                IDocument document;

                try
                {
                    // ReSharper disable AccessToDisposedClosure
                    document = inputUri.Encoding != null
                        ? context.OpenAsync(m => m.Content(webpage).Header("Content-Type", $"text/html; charset={inputUri.Encoding.WebName}").Address(inputUri.ToString())).Result
                        : context.OpenAsync(m => m.Content(webpage).Address(inputUri.ToString())).Result;

                    // ReSharper restore AccessToDisposedClosure
                }
                catch (Exception exception)
                {
                    WriteToLog($"Exception occurred in AngleSharp: {ExceptionHelpers.GetInnerException(exception)}");
                    return(false);
                }

                WriteToLog("Sanitizing HTML");

                if (sanitizer == null)
                {
                    sanitizer = new HtmlSanitizer();
                }

                sanitizer.FilterUrl += delegate(object sender, FilterUrlEventArgs args)
                {
                    if (args.OriginalUrl != args.SanitizedUrl)
                    {
                        WriteToLog($"URL sanitized from '{args.OriginalUrl}' to '{args.SanitizedUrl}'");
                        htmlChanged = true;
                    }
                };

                sanitizer.RemovingAtRule += delegate(object sender, RemovingAtRuleEventArgs args)
                {
                    WriteToLog($"Removing CSS at-rule '{args.Rule.CssText}' from tag '{args.Tag.TagName}'");
                    htmlChanged = true;
                };

                sanitizer.RemovingAttribute += delegate(object sender, RemovingAttributeEventArgs args)
                {
                    WriteToLog($"Removing attribute '{args.Attribute.Name}' from tag '{args.Tag.TagName}', reason '{args.Reason}'");
                    htmlChanged = true;
                };

                sanitizer.RemovingComment += delegate(object sender, RemovingCommentEventArgs args)
                {
                    WriteToLog($"Removing comment '{args.Comment.TextContent}'");
                    htmlChanged = true;
                };

                sanitizer.RemovingCssClass += delegate(object sender, RemovingCssClassEventArgs args)
                {
                    WriteToLog($"Removing CSS class '{args.CssClass}' from tag '{args.Tag.TagName}', reason '{args.Reason}'");
                    htmlChanged = true;
                };

                sanitizer.RemovingStyle += delegate(object sender, RemovingStyleEventArgs args)
                {
                    WriteToLog($"Removing style '{args.Style.Name}' from tag '{args.Tag.TagName}', reason '{args.Reason}'");
                    htmlChanged = true;
                };

                sanitizer.RemovingTag += delegate(object sender, RemovingTagEventArgs args)
                {
                    WriteToLog($"Removing tag '{args.Tag.TagName}', reason '{args.Reason}'");
                    htmlChanged = true;
                };

                sanitizer.SanitizeDom(document as IHtmlDocument);

                if (!htmlChanged)
                {
                    WriteToLog("HTML did not need any sanitization");
                    return(false);
                }

                WriteToLog("HTML sanitized");

                var sanitizedOutputFile = GetTempFile(".htm");
                outputUri = new ConvertUri(sanitizedOutputFile, inputUri.Encoding);
                var url = outputUri.ToString();
                WriteToLog($"Adding url '{url}' to the safe url list");
                safeUrls.Add(url);

                try
                {
                    if (document.BaseUrl.Scheme.StartsWith("file"))
                    {
                        var images = document.DocumentElement.Descendents()
                                     .Where(x => x.NodeType == NodeType.Element)
                                     .OfType <IHtmlImageElement>();

                        foreach (var image in images)
                        {
                            var src = image.Source;

                            if (src.StartsWith("http://", StringComparison.InvariantCultureIgnoreCase) ||
                                src.StartsWith("https://", StringComparison.InvariantCultureIgnoreCase))
                            {
                                continue;
                            }

                            WriteToLog($"Updating image source to '{src}' and adding it to the safe url list");
                            safeUrls.Add(src);
                            image.Source = src;
                        }
                    }

                    WriteToLog($"Writing sanitized webpage to '{sanitizedOutputFile}'");

                    using (var fileStream = new FileStream(sanitizedOutputFile, FileMode.CreateNew, FileAccess.Write))
                    {
                        if (inputUri.Encoding != null)
                        {
                            using (var textWriter = new StreamWriter(fileStream, inputUri.Encoding))
                                document.ToHtml(textWriter, new HtmlMarkupFormatter());
                        }
                        else
                        {
                            using (var textWriter = new StreamWriter(fileStream))
                                document.ToHtml(textWriter, new HtmlMarkupFormatter());
                        }
                    }

                    WriteToLog("Sanitized webpage written");
                    return(true);
                }
                catch (Exception exception)
                {
                    WriteToLog($"Could not write new html file '{sanitizedOutputFile}', error: {ExceptionHelpers.GetInnerException(exception)}");
                    return(false);
                }
            }
        }
Example #21
0
        /// <summary>
        /// Validates all images if they are rotated correctly (when <paramref name="rotate"/> is set
        /// to <c>true</c>) and fit on the given <paramref name="pageSettings"/>.
        /// If an image does need to be rotated or does not fit then a local copy is made of
        /// the <paramref name="inputUri"/> file.
        /// </summary>
        /// <param name="inputUri">The uri of the webpage</param>
        /// <param name="resize">When set to <c>true</c> then an image is resized when needed</param>
        /// <param name="rotate">When set to <c>true</c> then the EXIF information of an
        ///     image is read and when needed the image is automatic rotated</param>
        /// <param name="sanitizeHtml">When set to <c>true</c> then the HTML with get sanitized</param>
        /// <param name="pageSettings"><see cref="PageSettings"/></param>
        /// <param name="outputUri">The outputUri when this method returns <c>false</c> otherwise
        ///     <c>null</c> is returned</param>
        /// <returns>Returns <c>false</c> when the images dit not fit the page, otherwise <c>true</c></returns>
        /// <exception cref="WebException">Raised when the webpage from <paramref name="inputUri"/> could not be downloaded</exception>
        public bool Validate(ConvertUri inputUri,
                             bool resize,
                             bool rotate,
                             bool sanitizeHtml,
                             PageSettings pageSettings,
                             out ConvertUri outputUri)
        {
            outputUri = null;

            string localDirectory = null;

            if (inputUri.IsFile)
            {
                localDirectory = Path.GetDirectoryName(inputUri.OriginalString);
            }

            using (var webpage = inputUri.IsFile
                ? File.OpenRead(inputUri.OriginalString)
                : DownloadStream(inputUri))
            {
                var maxWidth  = (pageSettings.PaperWidth - pageSettings.MarginLeft - pageSettings.MarginRight) * 96.0;
                var maxHeight = (pageSettings.PaperHeight - pageSettings.MarginTop - pageSettings.MarginBottom) * 96.0;

                var htmlChanged = false;
                var config      = Configuration.Default.WithCss();
                var context     = BrowsingContext.New(config);

                IDocument document;

                try
                {
                    // ReSharper disable AccessToDisposedClosure
                    document = inputUri.Encoding != null
                        ? context.OpenAsync(m =>
                                            m.Content(webpage).Header("Content-Type",
                                                                      $"text/html; charset={inputUri.Encoding.WebName}"))
                               .Result
                        : context.OpenAsync(m => m.Content(webpage)).Result;

                    // ReSharper restore AccessToDisposedClosure
                }
                catch (Exception exception)
                {
                    WriteToLog($"Exception occured in AngleSharp: {ExceptionHelpers.GetInnerException(exception)}");
                    return(true);
                }

                if (sanitizeHtml)
                {
                    WriteToLog("Sanitizing HTML");
                    new HtmlSanitizer().DoSanitize(document as IHtmlDocument, document.DocumentElement);
                    htmlChanged = true;
                    WriteToLog("HTML sanitized");
                }

                WriteToLog("Validating all images if they need to be rotated and if they fit the page");
                var unchangedImages = new List <IHtmlImageElement>();

                // ReSharper disable once PossibleInvalidCastExceptionInForeachLoop
                foreach (var htmlImage in document.Images)
                {
                    var imageChanged = false;

                    if (string.IsNullOrWhiteSpace(htmlImage.Source))
                    {
                        WriteToLog($"HTML image tag '{htmlImage.TagName}' has no image source '{htmlImage.Source}'");
                        continue;
                    }

                    Image image  = null;
                    var   source = htmlImage.Source.Contains("?")
                        ? htmlImage.Source.Split('?')[0]
                        : htmlImage.Source;

                    var extension = Path.GetExtension(FileManager.RemoveInvalidFileNameChars(source));

                    var fileName = GetTempFile(extension);

                    try
                    {
                        // The local width and height attributes always go before css width and height
                        var width  = htmlImage.DisplayWidth;
                        var height = htmlImage.DisplayHeight;

                        if (rotate)
                        {
                            image = GetImage(htmlImage.Source, localDirectory);

                            if (image == null)
                            {
                                continue;
                            }

                            if (RotateImageByExifOrientationData(image))
                            {
                                htmlImage.DisplayWidth  = image.Width;
                                htmlImage.DisplayHeight = image.Height;
                                WriteToLog($"Image rotated and saved to location '{fileName}'");
                                image.Save(fileName);
                                htmlImage.DisplayWidth  = image.Width;
                                htmlImage.DisplayHeight = image.Height;
                                htmlImage.SetStyle(string.Empty);
                                htmlImage.Source = new Uri(fileName).ToString();
                                htmlChanged      = true;
                                imageChanged     = true;
                            }

                            width  = image.Width;
                            height = image.Height;
                        }

                        if (resize)
                        {
                            if (height == 0 && width == 0)
                            {
                                var style = context.Current.GetComputedStyle(htmlImage);
                                if (style != null)
                                {
                                    width  = ParseValue(style.GetPropertyValue("width"));
                                    height = ParseValue(style.GetPropertyValue("height"));
                                }
                            }

                            // If we don't know the image size then get if from the image itself
                            if (width <= 0 || height <= 0)
                            {
                                if (image == null)
                                {
                                    image = GetImage(htmlImage.Source, localDirectory);
                                }

                                if (image == null)
                                {
                                    continue;
                                }
                                width  = image.Width;
                                height = image.Height;
                            }

                            if (width > maxWidth || height > maxHeight)
                            {
                                // If we did not load the image already then load it

                                if (image == null)
                                {
                                    image = GetImage(htmlImage.Source, localDirectory);
                                }

                                if (image == null)
                                {
                                    continue;
                                }

                                ScaleImage(image, (int)maxWidth, out var newWidth, out var newHeight);
                                WriteToLog($"Image rescaled to width {newWidth} and height {newHeight}");
                                htmlImage.DisplayWidth  = newWidth;
                                htmlImage.DisplayHeight = newHeight;
                                htmlImage.SetStyle(string.Empty);
                                htmlChanged = true;
                            }
                        }
                    }
                    finally
                    {
                        image?.Dispose();
                    }

                    if (!imageChanged)
                    {
                        unchangedImages.Add(htmlImage);
                    }
                }

                if (!htmlChanged)
                {
                    return(true);
                }

                foreach (var unchangedImage in unchangedImages)
                {
                    using (var image = GetImage(unchangedImage.Source, localDirectory))
                    {
                        if (image == null)
                        {
                            WriteToLog($"Could not load unchanged image from location '{unchangedImage.Source}'");
                            continue;
                        }

                        var extension = Path.GetExtension(unchangedImage.Source.Contains("?")
                            ? unchangedImage.Source.Split('?')[0]
                            : unchangedImage.Source);
                        var fileName = GetTempFile(extension);

                        WriteToLog($"Unchanged image saved to location '{fileName}'");
                        image.Save(fileName);
                        unchangedImage.Source = new Uri(fileName).ToString();
                    }
                }

                var outputFile = GetTempFile(".htm");
                outputUri = new ConvertUri(outputFile, inputUri.Encoding);

                try
                {
                    using (var fileStream = new FileStream(outputFile, FileMode.CreateNew, FileAccess.Write))
                    {
                        if (inputUri.Encoding != null)
                        {
                            using (var textWriter = new StreamWriter(fileStream, inputUri.Encoding))
                                document.ToHtml(textWriter, new HtmlMarkupFormatter());
                        }
                        else
                        {
                            using (var textWriter = new StreamWriter(fileStream))
                                document.ToHtml(textWriter, new HtmlMarkupFormatter());
                        }
                    }

                    return(false);
                }
                catch (Exception exception)
                {
                    WriteToLog($"Could not generate new html file '{outputFile}', error: {ExceptionHelpers.GetInnerException(exception)}");
                    return(true);
                }
            }
        }
Example #22
0
        /// <summary>
        /// Validates all images if they are rotated correctly when <paramref name="rotate"/> is set
        /// to <c>true</c>) and fit on the given <paramref name="pageSettings"/>.
        /// If an image does need to be rotated or does not fit then a local copy is made of
        /// the <paramref name="inputUri"/> file.
        /// </summary>
        /// <param name="inputUri">The uri of the webpage</param>
        /// <param name="sanitize"></param>
        /// <param name="resize">When set to <c>true</c> then an image is resized when needed</param>
        /// <param name="rotate">When set to <c>true</c> then the EXIF information of an
        ///     image is read and when needed the image is automatically rotated</param>
        /// <param name="pageSettings"><see cref="PageSettings"/></param>
        /// <param name="outputUri">The outputUri when this method returns <c>false</c> otherwise
        ///     <c>null</c> is returned</param>
        /// <returns>Returns <c>false</c> when the images dit not fit the page, otherwise <c>true</c></returns>
        /// <exception cref="WebException">Raised when the webpage from <paramref name="inputUri"/> could not be downloaded</exception>
        public bool Cleanup(ConvertUri inputUri,
                            bool sanitize,
                            bool resize,
                            bool rotate,
                            PageSettings pageSettings,
                            out ConvertUri outputUri)
        {
            outputUri = null;

            string localDirectory = null;

            if (inputUri.IsFile)
            {
                localDirectory = Path.GetDirectoryName(inputUri.OriginalString);
            }

            var webpage = inputUri.IsFile
                ? inputUri.Encoding != null
                    ? File.ReadAllText(inputUri.OriginalString, inputUri.Encoding)
                    : File.ReadAllText(inputUri.OriginalString)
                : DownloadString(inputUri);

            var changed = false;

            if (sanitize)
            {
                var sanitizer = new HtmlSanitizer();
                sanitizer.AllowedSchemes.Add("mailto");
                sanitizer.AllowedTags.Add("html");
                sanitizer.AllowedTags.Add("head");
                sanitizer.AllowedAttributes.Add("http-equiv");
                sanitizer.AllowedAttributes.Add("content");
                sanitizer.AllowedTags.Add("body");
                sanitizer.AllowedTags.Add("meta");
                sanitizer.AllowedAttributes.Add("class");
                sanitizer.AllowDataAttributes = true;

                var sanitizedWebPage = sanitizer.Sanitize(webpage, string.Empty, new AutoSelectedMarkupFormatter());
                if (webpage != sanitizedWebPage)
                {
                    changed = true;
                    webpage = sanitizedWebPage;
                    WriteToLog("Webpage sanitized");
                }
            }

            var maxWidth  = pageSettings.PaperWidth * 96.0;
            var maxHeight = pageSettings.PaperHeight * 96.0;

            var config  = Configuration.Default.WithCss();
            var context = BrowsingContext.New(config);

            var document = inputUri.Encoding != null
                ? context.OpenAsync(m => m.Content(webpage).Header("Content-Type", $"text/html; charset={inputUri.Encoding.WebName}")).Result
                : context.OpenAsync(m => m.Content(webpage)).Result;

            //document.TextContent

            var unchangedImages = new List <IHtmlImageElement>();

            // ReSharper disable once PossibleInvalidCastExceptionInForeachLoop
            foreach (var htmlImage in document.Images)
            {
                if (string.IsNullOrWhiteSpace(htmlImage.Source))
                {
                    WriteToLog($"HTML image tag '{htmlImage.TagName}' has no image source '{htmlImage.Source}'");
                    continue;
                }

                Image image = null;

                var extension = Path.GetExtension(htmlImage.Source.Contains("?")
                    ? htmlImage.Source.Split('?')[0]
                    : htmlImage.Source);

                var fileName = GetTempFile(extension);

                try
                {
                    // The local width and height attributes always go before css width and height
                    var width  = htmlImage.DisplayWidth;
                    var height = htmlImage.DisplayHeight;

                    if (rotate)
                    {
                        image = htmlImage.Source.StartsWith("data:", StringComparison.InvariantCultureIgnoreCase)
                            ? GetImageFromBase64(htmlImage.Source)
                            : GetImage(new Uri(htmlImage.Source), localDirectory);

                        if (image == null)
                        {
                            continue;
                        }

                        if (RotateImageByExifOrientationData(image))
                        {
                            htmlImage.DisplayWidth  = image.Width;
                            htmlImage.DisplayHeight = image.Height;
                            changed = true;
                        }
                        width  = image.Width;
                        height = image.Height;

                        if (!resize)
                        {
                            WriteToLog($"Image rotated and saved to location '{fileName}'");
                            image.Save(fileName);
                            htmlImage.DisplayWidth  = image.Width;
                            htmlImage.DisplayHeight = image.Height;
                            htmlImage.Source        = new Uri(fileName).ToString();
                        }
                    }

                    if (resize)
                    {
                        if (height == 0 && width == 0)
                        {
                            var style = context.Current.GetComputedStyle(htmlImage);
                            if (style != null)
                            {
                                width  = ParseValue(style.GetPropertyValue("width"));
                                height = ParseValue(style.GetPropertyValue("height"));
                            }
                        }

                        // If we don't know the image size then get if from the image itself
                        if (width <= 0 || height <= 0)
                        {
                            if (image == null)
                            {
                                image = htmlImage.Source.StartsWith("data:",
                                                                    StringComparison.InvariantCultureIgnoreCase)
                                    ? GetImageFromBase64(htmlImage.Source)
                                    : GetImage(new Uri(htmlImage.Source), localDirectory);
                            }

                            if (image == null)
                            {
                                continue;
                            }
                            width  = image.Width;
                            height = image.Height;
                        }

                        if (width > maxWidth || height > maxHeight)
                        {
                            // If we did not load the image already then load it

                            if (image == null)
                            {
                                image = htmlImage.Source.StartsWith("data:",
                                                                    StringComparison.InvariantCultureIgnoreCase)
                                    ? GetImageFromBase64(htmlImage.Source)
                                    : GetImage(new Uri(htmlImage.Source), localDirectory);
                            }

                            if (image == null)
                            {
                                continue;
                            }

                            image = ScaleImage(image, (int)maxWidth);
                            WriteToLog($"Image resized to width {image.Width} and height {image.Height} and saved to location '{fileName}'");
                            image.Save(fileName);
                            htmlImage.DisplayWidth  = image.Width;
                            htmlImage.DisplayHeight = image.Height;
                            htmlImage.Source        = new Uri(fileName).ToString();
                            changed = true;
                        }
                    }
                }
                finally
                {
                    image?.Dispose();
                }

                if (!changed)
                {
                    unchangedImages.Add(htmlImage);
                }
            }

            if (!changed)
            {
                return(true);
            }

            foreach (var unchangedImage in unchangedImages)
            {
                var imageSource = new Uri(unchangedImage.Source);
                using (var image = GetImage(imageSource, localDirectory))
                {
                    if (localDirectory != null)
                    {
                        var fileName = Path.Combine(localDirectory, Path.GetFileName(imageSource.ToString()));
                        unchangedImage.Source = new Uri(fileName).ToString();
                    }
                    else
                    {
                        var extension = Path.GetExtension(unchangedImage.Source.Contains("?")
                            ? unchangedImage.Source.Split('?')[0]
                            : unchangedImage.Source);
                        var fileName = GetTempFile(extension);

                        WriteToLog($"Unchanged image saved to location '{fileName}'");
                        image.Save(fileName);
                        unchangedImage.Source = new Uri(fileName).ToString();
                    }
                }
            }

            var outputFile = GetTempFile(".htm");

            outputUri = new ConvertUri(outputFile, inputUri.Encoding);

            try
            {
                using (var fileStream = new FileStream(outputFile, FileMode.CreateNew, FileAccess.Write))
                {
                    if (inputUri.Encoding != null)
                    {
                        using (var textWriter = new StreamWriter(fileStream, inputUri.Encoding))
                            document.ToHtml(textWriter, new AutoSelectedMarkupFormatter());
                    }
                    else
                    {
                        using (var textWriter = new StreamWriter(fileStream))
                            document.ToHtml(textWriter, new AutoSelectedMarkupFormatter());
                    }
                }

                return(false);
            }
            catch (Exception exception)
            {
                WriteToLog($"Could not generate new html file '{outputFile}', error: {ExceptionHelpers.GetInnerException(exception)}");
                return(true);
            }
        }