Beispiel #1
0
        public static MemoryStream RenderPage_AsMemoryStream(string pdf_filename, int page_number, int dpi, string device, ProcessPriorityClass priority_class)
        {
            // STDOUT/STDERR
            string ghostscript_parameters = GhostscriptBinaries.GenerateGhostscriptParameters(pdf_filename, device, dpi, page_number, 0, @"-");

            using (Process process = GhostscriptBinaries.StartGhostscriptProcess(ghostscript_parameters, priority_class))
            {
                Logging.Info("Process started!");

                // Read image from stdout
                using (ProcessOutputReader process_output_reader = new ProcessOutputReader(process, stdout_is_binary: true))
                {
                    using (StreamReader sr = process.StandardOutput)
                    {
                        using (FileStream fs = (FileStream)sr.BaseStream)
                        {
                            MemoryStream ms         = new MemoryStream(128 * 1024);
                            int          total_size = StreamToFile.CopyStreamToStream(fs, ms);
                            Logging.Debug特("Image size was {0} for PDF file {1}, page {2} @ dpi {3}", total_size, pdf_filename, page_number, dpi);

                            // Check that the process has exited properly
                            process.WaitForExit(1000);
                            if (!process.HasExited)
                            {
                                Logging.Error("Ghostscript process did not terminate.\n{0}", process_output_reader.GetOutputsDumpString());
                            }

                            return(ms);
                        }
                    }
                }
            }
        }
Beispiel #2
0
        private static MemoryStream ReadEntireStandardOutput(string process_parameters, ProcessPriorityClass priority_class)
        {
            Process process = ProcessSpawning.SpawnChildProcess("pdfdraw.exe", process_parameters, priority_class);

            process.ErrorDataReceived += (sender, e) => { };
            process.BeginErrorReadLine();

            // Read image from stdout
            StreamReader sr         = process.StandardOutput;
            FileStream   fs         = (FileStream)sr.BaseStream;
            MemoryStream ms         = new MemoryStream(128 * 1024);
            int          total_size = StreamToFile.CopyStreamToStream(fs, ms);

            // Check that the process has exited properly
            process.WaitForExit(1000);
            if (!process.HasExited)
            {
                Logging.Error("PDFRenderer process did not terminate, so killing it");

                try
                {
                    Logging.Info("Killing PDFRenderer process");
                    process.Kill();
                    Logging.Info("Killed PDFRenderer process");
                }
                catch (Exception)
                {
                    Logging.Error("These was an exception while trying to kill the PDFRenderer process");
                }
            }

            return(ms);
        }
Beispiel #3
0
        private static MemoryStream ReadEntireStandardOutput(string process_parameters, ProcessPriorityClass priority_class)
        {
            Stopwatch clk = Stopwatch.StartNew();

            // STDOUT/STDERR
            Logging.Debug("PDFDRAW :: ReadEntireStandardOutput command: pdfdraw.exe {0}", process_parameters);
            using (Process process = ProcessSpawning.SpawnChildProcess("pdfdraw.exe", process_parameters, priority_class, stdout_is_binary: true))
            {
                using (ProcessOutputReader process_output_reader = new ProcessOutputReader(process, stdout_is_binary: true))
                {
                    // Read image from stdout
                    using (StreamReader sr = process.StandardOutput)
                    {
                        using (FileStream fs = (FileStream)sr.BaseStream)
                        {
                            long elapsed = clk.ElapsedMilliseconds;
                            Logging.Debug("PDFDRAW :: ReadEntireStandardOutput setup time: {0} ms for parameters:\n    {1}", elapsed, process_parameters);

                            MemoryStream ms         = new MemoryStream(256 * 1024);
                            int          total_size = StreamToFile.CopyStreamToStream(fs, ms);
                            long         elapsed2   = clk.ElapsedMilliseconds;
                            Logging.Debug("PDFDRAW image output {0} bytes in {1} ms (output copy took {2} ms) for command:\n    pdfdraw.exe {3}", total_size, elapsed2, elapsed2 - elapsed, process_parameters);

                            // Check that the process has exited properly
                            process.WaitForExit(1000);

                            if (!process.HasExited)
                            {
                                Logging.Debug("PDFRenderer process did not terminate, so killing it.\n{0}", process_output_reader.GetOutputsDumpString());

                                try
                                {
                                    process.Kill();

                                    // wait for the completion signal; this also helps to collect all STDERR output of the application (even while it was KILLED)
                                    process.WaitForExit(1000);
                                }
                                catch (Exception ex)
                                {
                                    Logging.Error(ex, "There was a problem killing the PDFRenderer process after timeout ({0} ms)", elapsed2 + 1000);
                                }

                                Logging.Error("PDFRenderer process did not terminate, so killed it. Commandline:\n    {0}\n{1}", process_parameters, process_output_reader.GetOutputsDumpString());

                                throw new ApplicationException($"PDFRenderer process did not terminate, so killed it.\n    Commandline: pdfdraw.exe {process_parameters}");
                            }
                            else if (process.ExitCode != 0)
                            {
                                Logging.Error("PDFDRAW did fail with exit code {0} for commandline:\n    {1}\n{2}", process.ExitCode, process_parameters, process_output_reader.GetOutputsDumpString());

                                throw new ApplicationException($"PDFRenderer::PDFDRAW did fail with exit code {process.ExitCode}.\n    Commandline: pdfdraw.exe {process_parameters}");
                            }

                            return(ms);
                        }
                    }
                }
            }
        }
Beispiel #4
0
        public static MemoryStream RenderPage_AsMemoryStream(string pdf_filename, int page_number, int dpi, string device, ProcessPriorityClass priority_class)
        {
            // STDOUT/STDERR
            string ghostscript_parameters = GhostscriptBinaries.GenerateGhostscriptParameters(pdf_filename, device, dpi, page_number, 0, @"-");

            using (Process process = GhostscriptBinaries.StartGhostscriptProcess(ghostscript_parameters, priority_class))
            {
                Logging.Info("Process started!");

                // Read image from stdout
                using (ProcessOutputReader process_output_reader = new ProcessOutputReader(process, stdout_is_binary: true))
                {
                    using (StreamReader sr = process.StandardOutput)
                    {
                        using (FileStream fs = (FileStream)sr.BaseStream)
                        {
                            MemoryStream ms         = new MemoryStream(128 * 1024);
                            int          total_size = StreamToFile.CopyStreamToStream(fs, ms);
                            Logging.Debug特("Image size was {0} for PDF file {1}, page {2} @ dpi {3}", total_size, pdf_filename, page_number, dpi);

                            // Check that the process has exited properly
                            process.WaitForExit(1000);

                            bool has_exited = process.HasExited;

                            if (!has_exited)
                            {
                                try
                                {
                                    process.Kill();

                                    // wait for the completion signal; this also helps to collect all STDERR output of the application (even while it was KILLED)
                                    process.WaitForExit(1000);
                                }
                                catch (Exception ex)
                                {
                                    Logging.Error(ex, "There was a problem killing the GhostScript process after timeout");
                                }
                            }

                            // Check that we had a clean exit
                            if (!has_exited || 0 != process.ExitCode)
                            {
                                Logging.Error("Ghostscript process did not terminate.\n{0}", process_output_reader.GetOutputsDumpString());
                            }

                            return(ms);
                        }
                    }
                }
            }
        }
Beispiel #5
0
        public static MemoryStream RenderPage_AsMemoryStream(string pdf_filename, int page_number, int dpi, string device, ProcessPriorityClass priority_class)
        {
            string  ghostscript_parameters = GhostscriptBinaries.GenerateGhostscriptParameters(pdf_filename, device, dpi, page_number, 0, @"-");
            Process process = GhostscriptBinaries.StartGhostscriptProcess(ghostscript_parameters, priority_class);

            Logging.Info("Process started!");

            // Read image from stdout
            StreamReader sr         = process.StandardOutput;
            FileStream   fs         = (FileStream)sr.BaseStream;
            MemoryStream ms         = new MemoryStream(128 * 1024);
            int          total_size = StreamToFile.CopyStreamToStream(fs, ms);

            Logging.Debug("Image size was {0}", total_size);

            // Check that the process has exited properly
            process.WaitForExit(1000);
            if (!process.HasExited)
            {
                Logging.Error("Ghostscript process did not terminate");
            }

            return(ms);
        }
Beispiel #6
0
        public static void AddNewDocumentToLibraryFromInternet_SYNCHRONOUS(Library library, string download_url)
        {
            StatusManager.Instance.UpdateStatus(LIBRARY_DOWNLOAD, String.Format("Downloading {0}", download_url));

            try
            {
                HttpWebRequest web_request = (HttpWebRequest)HttpWebRequest.Create(new Uri(download_url));
                web_request.Proxy             = ConfigurationManager.Instance.Proxy;
                web_request.Method            = "GET";
                web_request.AllowAutoRedirect = true;
                // https://stackoverflow.com/questions/21728773/the-underlying-connection-was-closed-an-unexpected-error-occurred-on-a-receiv
                // also: https://stackoverflow.com/questions/21481682/httpwebrequest-the-underlying-connection-was-closed-the-connection-was-closed
                web_request.KeepAlive = false;
                // Allow ALL protocols
                ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls | SecurityProtocolType.Tls11 | SecurityProtocolType.Tls12 | SecurityProtocolType.Ssl3;

                // same headers as sent by modern Chrome.
                // Gentlemen, start your prayer wheels!
                web_request.Headers.Add("Cache-Control", "no-cache");
                web_request.Headers.Add("Pragma", "no-cache");
                web_request.UserAgent = ConfigurationManager.Instance.ConfigurationRecord.GetWebUserAgent();

                using (HttpWebResponse web_response = (HttpWebResponse)web_request.GetResponse())
                {
                    // is this a 302/30x Response Code (Forwarded)?
                    // if so, then grab the forward reference URI and go grab that one.
                    if (web_response.StatusCode == HttpStatusCode.MovedPermanently ||
                        web_response.StatusCode == HttpStatusCode.Moved ||
                        web_response.StatusCode == HttpStatusCode.Redirect ||
                        web_response.StatusCode == HttpStatusCode.Found ||
                        web_response.StatusCode == HttpStatusCode.SeeOther ||
                        web_response.StatusCode == HttpStatusCode.RedirectKeepVerb ||
                        web_response.StatusCode == HttpStatusCode.TemporaryRedirect ||
                        (uint)web_response.StatusCode == 308)
                    {
                        string fwd_uri_str = web_response.GetResponseHeader("Location");
                        Uri    fwd_uri     = new Uri(web_response.ResponseUri, fwd_uri_str);
                        // fetch the PDF!
                        //
                        // Warning: Do NOT get into a download loop due to badly configured or nasty webservers:
                        if (fwd_uri.AbsoluteUri != web_request.RequestUri.AbsoluteUri)
                        {
                            AddNewDocumentToLibraryFromInternet_ASYNCHRONOUS(library, fwd_uri.AbsoluteUri);
                        }
                        else
                        {
                            MessageBoxes.Info("Looks like the webserver is throwing you into an infinite redirection loop at URI {0}.", web_request.RequestUri.AbsoluteUri);
                        }
                    }
                    else
                    {
                        using (Stream response_stream = web_response.GetResponseStream())
                        {
                            string content_type = web_response.GetResponseHeader("Content-Type");
                            // See also: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition
                            string content_disposition = web_response.GetResponseHeader("Content-Disposition");
                            string original_filename   = null;
                            try
                            {
                                if (!String.IsNullOrEmpty(content_disposition))
                                {
                                    ContentDisposition contentDisposition = new ContentDisposition(content_disposition);
                                    original_filename = contentDisposition.FileName;
                                    //StringDictionary parameters = contentDisposition.Parameters;
                                }
                                else
                                {
                                    Logging.Warn("AddNewDocumentToLibraryFromInternet: no or empty Content-Disposition header received from {1}?\n  Headers:\n{0}", web_response.Headers, download_url);

                                    // fallback: derive the filename from the URL:
                                    original_filename = web_response.ResponseUri.LocalPath;
                                }
                            }
                            catch (Exception ex)
                            {
                                Logging.Error(ex, "AddNewDocumentToLibraryFromInternet: no Content-Disposition header received from {1}?\n  Headers:\n{0}", web_response.Headers, download_url);

                                // fallback: derive the filename from the URL:
                                original_filename = web_response.ResponseUri.LocalPath;
                            }

                            // extract the type from the Content-Type header value:
                            try
                            {
                                if (!String.IsNullOrEmpty(content_type))
                                {
                                    ContentType ct = new ContentType(content_type);
                                    content_type = ct.MediaType.ToLower(CultureInfo.CurrentCulture);
                                }
                                else
                                {
                                    Logging.Warn("AddNewDocumentToLibraryFromInternet: no or empty Content-Type header '{2}' received from {1}?\n  Headers:\n{0}", web_response.Headers, download_url, content_type);
                                    content_type = "text/html";
                                }
                            }
                            catch (Exception ex)
                            {
                                Logging.Error(ex, "AddNewDocumentToLibraryFromInternet: no or invalid Content-Type header '{2}' received from {1}?\n  Headers:\n{0}", web_response.Headers, download_url, content_type);
                                content_type = "text/html";
                            }

                            bool is_acceptable_content_type = false;
                            if (content_type.EndsWith("pdf"))
                            {
                                is_acceptable_content_type = true;
                            }
                            if (content_type.StartsWith("application/octet-stream"))
                            {
                                is_acceptable_content_type = true;
                            }

                            if (is_acceptable_content_type)
                            {
                                string filename = TempFile.GenerateTempFilename("pdf");
                                using (FileStream fs = File.OpenWrite(filename))
                                {
                                    int total_bytes = StreamToFile.CopyStreamToStream(response_stream, fs);
                                    Logging.Info("Saved {0} bytes to {1}", total_bytes, filename);
                                    //fs.Close();    -- autoclosed by `using` statement
                                }

                                PDFDocument pdf_document = library.AddNewDocumentToLibrary_SYNCHRONOUS(filename, original_filename, download_url, null, null, null, false, false);
                                File.Delete(filename);

                                // make sure we open every PDF fetched off the Internet: the user may need to review
                                // their metadata.
                                MainWindowServiceDispatcher.Instance.MainWindow.Dispatcher.InvokeAsync
                                (
                                    new Action(() =>
                                {
                                    Documents.PDF.PDFControls.PDFReadingControl pdf_reading_control = MainWindowServiceDispatcher.Instance.OpenDocument(pdf_document);
                                    pdf_reading_control.EnableGuestMoveNotification(null);
                                }),
                                    DispatcherPriority.Background
                                );
                            }
                            else
                            {
                                string html = "";

                                if (content_type.EndsWith("html"))
                                {
                                    using (StreamReader sr = new StreamReader(response_stream))
                                    {
                                        html = sr.ReadToEnd();
                                        Logging.Warn("Got this HTML instead of a PDF for URI {1}: {0}", html, download_url);
                                    }
                                }

                                // TODO: check these conditions; they are meant to be pretty tight but MAYBE I still let some
                                // nasty websites' embedded PDF or other trickery slip through unnoticed.
                                bool tolerate_type = false;
                                foreach (string t in content_types_to_tolerate)
                                {
                                    if (content_type.Contains(t))
                                    {
                                        tolerate_type = true;
                                    }
                                }
                                if (!tolerate_type || web_response.StatusCode != HttpStatusCode.OK || html.Contains("<embed"))
                                {
                                    MessageBoxes.Info("The document library supports only PDF files at the moment.  You are trying to download something of type {0} / response code {1} at URI {2}.", content_type, (uint)web_response.StatusCode, download_url);
                                }
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Logging.Error(ex, "There was a problem adding the downloaded PDF to the library for URI {0}.", download_url);
            }

            StatusManager.Instance.UpdateStatus(LIBRARY_DOWNLOAD, String.Format("Downloaded {0}", download_url));
        }
Beispiel #7
0
        public static void AddNewDocumentToLibraryFromInternet_SYNCHRONOUS(Library library, object download_url_obj)
        {
            string download_url = (string)download_url_obj;

            StatusManager.Instance.UpdateStatus(LIBRARY_DOWNLOAD, String.Format("Downloading {0}", download_url));

            try
            {
                HttpWebRequest web_request = (HttpWebRequest)HttpWebRequest.Create(download_url);
                web_request.Proxy             = ConfigurationManager.Instance.Proxy;
                web_request.Method            = "GET";
                web_request.AllowAutoRedirect = true;

                using (HttpWebResponse web_response = (HttpWebResponse)web_request.GetResponse())
                {
                    if (false)
                    {
                    }

                    if (HttpStatusCode.Redirect == web_response.StatusCode)
                    {
                        string redirect_url = web_response.Headers["Location"];
                    }
                    else
                    {
                        Stream response_stream = web_response.GetResponseStream();
                        string content_type    = web_response.GetResponseHeader("Content-Type");

                        bool is_acceptable_content_type = false;
                        if (content_type.ToLower(CultureInfo.CurrentCulture).EndsWith("pdf"))
                        {
                            is_acceptable_content_type = true;
                        }
                        if (content_type.ToLower(CultureInfo.CurrentCulture).StartsWith("application/octet-stream"))
                        {
                            is_acceptable_content_type = true;
                        }

                        if (is_acceptable_content_type)
                        {
                            string filename = TempFile.GenerateTempFilename("pdf");
                            using (FileStream fs = File.OpenWrite(filename))
                            {
                                int total_bytes = StreamToFile.CopyStreamToStream(response_stream, fs);
                                Logging.Info("Saved {0} bytes to {1}", total_bytes, filename);
                                fs.Close();
                            }

                            library.AddNewDocumentToLibrary_SYNCHRONOUS(filename, download_url, null, null, null, false, false);
                            File.Delete(filename);
                        }
                        else
                        {
                            if (content_type.ToLower(CultureInfo.CurrentCulture).EndsWith("html"))
                            {
                                StreamReader sr   = new StreamReader(response_stream);
                                string       html = sr.ReadToEnd();
                                Logging.Warn("Got this HTML instead of a PDF: {0}", html);
                            }

                            MessageBoxes.Info("The document library supports only PDF files at the moment.  You are trying to download something of type {0}.", content_type);
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Logging.Error(ex, "There was a problem adding the downloaded PDF to the library.");
            }

            StatusManager.Instance.UpdateStatus(LIBRARY_DOWNLOAD, String.Format("Downloaded {0}", download_url));
        }
Beispiel #8
0
        private static ExecResultAggregate ReadEntireStandardOutput(string pdfDrawExe, string process_parameters, bool binary_output, ProcessPriorityClass priority_class)
        {
            WPFDoEvents.AssertThisCodeIs_NOT_RunningInTheUIThread();

            ExecResultAggregate rv = new ExecResultAggregate
            {
                executable         = pdfDrawExe,
                process_parameters = process_parameters,
                stdoutIsBinary     = binary_output
            };

            Stopwatch clk = Stopwatch.StartNew();

            // STDOUT/STDERR
            Logging.Debug("PDFDRAW :: ReadEntireStandardOutput command: pdfdraw.exe {0}", process_parameters);
            using (Process process = ProcessSpawning.SpawnChildProcess(pdfDrawExe, process_parameters, priority_class, stdout_is_binary: true))
            {
                using (ProcessOutputReader process_output_reader = new ProcessOutputReader(process, stdout_is_binary: true))
                {
                    // Read image from stdout
                    using (StreamReader sr = process.StandardOutput)
                    {
                        using (FileStream fs = (FileStream)sr.BaseStream)
                        {
                            long elapsed = clk.ElapsedMilliseconds;
                            Logging.Debug("PDFDRAW :: ReadEntireStandardOutput setup time: {0} ms for parameters:\n    {1}", elapsed, process_parameters);

                            rv.stdoutStream = new MemoryStream(1024 * 1024);
                            int total_size = StreamToFile.CopyStreamToStream(fs, rv.stdoutStream);

                            long elapsed2 = clk.ElapsedMilliseconds;
                            Logging.Debug("PDFDRAW image output {0} bytes in {1} ms (output copy took {2} ms) for command:\n    {4} {3}", total_size, elapsed2, elapsed2 - elapsed, process_parameters, pdfDrawExe);

                            // Check that the process has exited properly
                            process.WaitForExit(1000);

                            if (!process.HasExited)
                            {
                                Logging.Debug("PDFRenderer process did not terminate, so killing it.\n{0}", process_output_reader.GetOutputsDumpStrings().stderr);

                                try
                                {
                                    process.Kill();

                                    // wait for the completion signal; this also helps to collect all STDERR output of the application (even while it was KILLED)
                                    process.WaitForExit(3000);
                                }
                                catch (Exception ex)
                                {
                                    Logging.Error(ex, "There was a problem killing the PDFRenderer process after timeout ({0} ms)", elapsed2 + 1000);
                                }

                                // grab stderr output for successful runs and log it anyway: MuPDF diagnostics, etc. come this way:
                                var outs = process_output_reader.GetOutputsDumpStrings();
                                rv.errOutputDump = outs;

                                Logging.Error($"PDFRenderer process did not terminate, so killed it. Commandline:\n    {pdfDrawExe} {process_parameters}\n{outs.stderr}");

                                rv.error    = new ApplicationException($"PDFRenderer process did not terminate, so killed it.\n    Commandline: {pdfDrawExe} {process_parameters}");
                                rv.exitCode = 0;
                                if (process.HasExited)
                                {
                                    rv.exitCode = process.ExitCode;
                                }
                                if (rv.exitCode == 0)
                                {
                                    rv.exitCode = -666;  // timeout
                                }
                            }
                            else if (process.ExitCode != 0)
                            {
                                // grab stderr output for successful runs and log it anyway: MuPDF diagnostics, etc. come this way:
                                var outs = process_output_reader.GetOutputsDumpStrings();
                                rv.errOutputDump = outs;

                                Logging.Error("PDFDRAW did fail with exit code {0} for commandline:\n    {3} {1}\n{2}", process.ExitCode, process_parameters, outs.stderr, pdfDrawExe);

                                rv.error    = new ApplicationException($"PDFRenderer::PDFDRAW did fail with exit code {process.ExitCode}.\n    Commandline: {pdfDrawExe} {process_parameters}");
                                rv.exitCode = process.ExitCode;
                            }
                            else
                            {
                                // grab stderr output for successful runs and log it anyway: MuPDF diagnostics, etc. come this way:
                                var outs = process_output_reader.GetOutputsDumpStrings();
                                rv.errOutputDump = outs;

                                Logging.Error("PDFDRAW did SUCCEED with exit code {0} for commandline:\n    {3} {1}\n{2}", process.ExitCode, process_parameters, outs.stderr, pdfDrawExe);

                                rv.error    = null;
                                rv.exitCode = process.ExitCode;
                            }

                            return(rv);
                        }
                    }
                }
            }
        }