public static MemoryStream RenderPage_AsMemoryStream(string pdf_filename, int page_number, int dpi, string device, ProcessPriorityClass priority_class) { // STDOUT/STDERR string ghostscript_parameters = GhostscriptBinaries.GenerateGhostscriptParameters(pdf_filename, device, dpi, page_number, 0, @"-"); using (Process process = GhostscriptBinaries.StartGhostscriptProcess(ghostscript_parameters, priority_class)) { Logging.Info("Process started!"); // Read image from stdout using (ProcessOutputReader process_output_reader = new ProcessOutputReader(process, stdout_is_binary: true)) { using (StreamReader sr = process.StandardOutput) { using (FileStream fs = (FileStream)sr.BaseStream) { MemoryStream ms = new MemoryStream(128 * 1024); int total_size = StreamToFile.CopyStreamToStream(fs, ms); Logging.Debug特("Image size was {0} for PDF file {1}, page {2} @ dpi {3}", total_size, pdf_filename, page_number, dpi); // Check that the process has exited properly process.WaitForExit(1000); if (!process.HasExited) { Logging.Error("Ghostscript process did not terminate.\n{0}", process_output_reader.GetOutputsDumpString()); } return(ms); } } } } }
private static MemoryStream ReadEntireStandardOutput(string process_parameters, ProcessPriorityClass priority_class) { Process process = ProcessSpawning.SpawnChildProcess("pdfdraw.exe", process_parameters, priority_class); process.ErrorDataReceived += (sender, e) => { }; process.BeginErrorReadLine(); // Read image from stdout StreamReader sr = process.StandardOutput; FileStream fs = (FileStream)sr.BaseStream; MemoryStream ms = new MemoryStream(128 * 1024); int total_size = StreamToFile.CopyStreamToStream(fs, ms); // Check that the process has exited properly process.WaitForExit(1000); if (!process.HasExited) { Logging.Error("PDFRenderer process did not terminate, so killing it"); try { Logging.Info("Killing PDFRenderer process"); process.Kill(); Logging.Info("Killed PDFRenderer process"); } catch (Exception) { Logging.Error("These was an exception while trying to kill the PDFRenderer process"); } } return(ms); }
private static MemoryStream ReadEntireStandardOutput(string process_parameters, ProcessPriorityClass priority_class) { Stopwatch clk = Stopwatch.StartNew(); // STDOUT/STDERR Logging.Debug("PDFDRAW :: ReadEntireStandardOutput command: pdfdraw.exe {0}", process_parameters); using (Process process = ProcessSpawning.SpawnChildProcess("pdfdraw.exe", process_parameters, priority_class, stdout_is_binary: true)) { using (ProcessOutputReader process_output_reader = new ProcessOutputReader(process, stdout_is_binary: true)) { // Read image from stdout using (StreamReader sr = process.StandardOutput) { using (FileStream fs = (FileStream)sr.BaseStream) { long elapsed = clk.ElapsedMilliseconds; Logging.Debug("PDFDRAW :: ReadEntireStandardOutput setup time: {0} ms for parameters:\n {1}", elapsed, process_parameters); MemoryStream ms = new MemoryStream(256 * 1024); int total_size = StreamToFile.CopyStreamToStream(fs, ms); long elapsed2 = clk.ElapsedMilliseconds; Logging.Debug("PDFDRAW image output {0} bytes in {1} ms (output copy took {2} ms) for command:\n pdfdraw.exe {3}", total_size, elapsed2, elapsed2 - elapsed, process_parameters); // Check that the process has exited properly process.WaitForExit(1000); if (!process.HasExited) { Logging.Debug("PDFRenderer process did not terminate, so killing it.\n{0}", process_output_reader.GetOutputsDumpString()); try { process.Kill(); // wait for the completion signal; this also helps to collect all STDERR output of the application (even while it was KILLED) process.WaitForExit(1000); } catch (Exception ex) { Logging.Error(ex, "There was a problem killing the PDFRenderer process after timeout ({0} ms)", elapsed2 + 1000); } Logging.Error("PDFRenderer process did not terminate, so killed it. Commandline:\n {0}\n{1}", process_parameters, process_output_reader.GetOutputsDumpString()); throw new ApplicationException($"PDFRenderer process did not terminate, so killed it.\n Commandline: pdfdraw.exe {process_parameters}"); } else if (process.ExitCode != 0) { Logging.Error("PDFDRAW did fail with exit code {0} for commandline:\n {1}\n{2}", process.ExitCode, process_parameters, process_output_reader.GetOutputsDumpString()); throw new ApplicationException($"PDFRenderer::PDFDRAW did fail with exit code {process.ExitCode}.\n Commandline: pdfdraw.exe {process_parameters}"); } return(ms); } } } } }
public static MemoryStream RenderPage_AsMemoryStream(string pdf_filename, int page_number, int dpi, string device, ProcessPriorityClass priority_class) { // STDOUT/STDERR string ghostscript_parameters = GhostscriptBinaries.GenerateGhostscriptParameters(pdf_filename, device, dpi, page_number, 0, @"-"); using (Process process = GhostscriptBinaries.StartGhostscriptProcess(ghostscript_parameters, priority_class)) { Logging.Info("Process started!"); // Read image from stdout using (ProcessOutputReader process_output_reader = new ProcessOutputReader(process, stdout_is_binary: true)) { using (StreamReader sr = process.StandardOutput) { using (FileStream fs = (FileStream)sr.BaseStream) { MemoryStream ms = new MemoryStream(128 * 1024); int total_size = StreamToFile.CopyStreamToStream(fs, ms); Logging.Debug特("Image size was {0} for PDF file {1}, page {2} @ dpi {3}", total_size, pdf_filename, page_number, dpi); // Check that the process has exited properly process.WaitForExit(1000); bool has_exited = process.HasExited; if (!has_exited) { try { process.Kill(); // wait for the completion signal; this also helps to collect all STDERR output of the application (even while it was KILLED) process.WaitForExit(1000); } catch (Exception ex) { Logging.Error(ex, "There was a problem killing the GhostScript process after timeout"); } } // Check that we had a clean exit if (!has_exited || 0 != process.ExitCode) { Logging.Error("Ghostscript process did not terminate.\n{0}", process_output_reader.GetOutputsDumpString()); } return(ms); } } } } }
public static MemoryStream RenderPage_AsMemoryStream(string pdf_filename, int page_number, int dpi, string device, ProcessPriorityClass priority_class) { string ghostscript_parameters = GhostscriptBinaries.GenerateGhostscriptParameters(pdf_filename, device, dpi, page_number, 0, @"-"); Process process = GhostscriptBinaries.StartGhostscriptProcess(ghostscript_parameters, priority_class); Logging.Info("Process started!"); // Read image from stdout StreamReader sr = process.StandardOutput; FileStream fs = (FileStream)sr.BaseStream; MemoryStream ms = new MemoryStream(128 * 1024); int total_size = StreamToFile.CopyStreamToStream(fs, ms); Logging.Debug("Image size was {0}", total_size); // Check that the process has exited properly process.WaitForExit(1000); if (!process.HasExited) { Logging.Error("Ghostscript process did not terminate"); } return(ms); }
public static void AddNewDocumentToLibraryFromInternet_SYNCHRONOUS(Library library, string download_url) { StatusManager.Instance.UpdateStatus(LIBRARY_DOWNLOAD, String.Format("Downloading {0}", download_url)); try { HttpWebRequest web_request = (HttpWebRequest)HttpWebRequest.Create(new Uri(download_url)); web_request.Proxy = ConfigurationManager.Instance.Proxy; web_request.Method = "GET"; web_request.AllowAutoRedirect = true; // https://stackoverflow.com/questions/21728773/the-underlying-connection-was-closed-an-unexpected-error-occurred-on-a-receiv // also: https://stackoverflow.com/questions/21481682/httpwebrequest-the-underlying-connection-was-closed-the-connection-was-closed web_request.KeepAlive = false; // Allow ALL protocols ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls | SecurityProtocolType.Tls11 | SecurityProtocolType.Tls12 | SecurityProtocolType.Ssl3; // same headers as sent by modern Chrome. // Gentlemen, start your prayer wheels! web_request.Headers.Add("Cache-Control", "no-cache"); web_request.Headers.Add("Pragma", "no-cache"); web_request.UserAgent = ConfigurationManager.Instance.ConfigurationRecord.GetWebUserAgent(); using (HttpWebResponse web_response = (HttpWebResponse)web_request.GetResponse()) { // is this a 302/30x Response Code (Forwarded)? // if so, then grab the forward reference URI and go grab that one. if (web_response.StatusCode == HttpStatusCode.MovedPermanently || web_response.StatusCode == HttpStatusCode.Moved || web_response.StatusCode == HttpStatusCode.Redirect || web_response.StatusCode == HttpStatusCode.Found || web_response.StatusCode == HttpStatusCode.SeeOther || web_response.StatusCode == HttpStatusCode.RedirectKeepVerb || web_response.StatusCode == HttpStatusCode.TemporaryRedirect || (uint)web_response.StatusCode == 308) { string fwd_uri_str = web_response.GetResponseHeader("Location"); Uri fwd_uri = new Uri(web_response.ResponseUri, fwd_uri_str); // fetch the PDF! // // Warning: Do NOT get into a download loop due to badly configured or nasty webservers: if (fwd_uri.AbsoluteUri != web_request.RequestUri.AbsoluteUri) { AddNewDocumentToLibraryFromInternet_ASYNCHRONOUS(library, fwd_uri.AbsoluteUri); } else { MessageBoxes.Info("Looks like the webserver is throwing you into an infinite redirection loop at URI {0}.", web_request.RequestUri.AbsoluteUri); } } else { using (Stream response_stream = web_response.GetResponseStream()) { string content_type = web_response.GetResponseHeader("Content-Type"); // See also: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition string content_disposition = web_response.GetResponseHeader("Content-Disposition"); string original_filename = null; try { if (!String.IsNullOrEmpty(content_disposition)) { ContentDisposition contentDisposition = new ContentDisposition(content_disposition); original_filename = contentDisposition.FileName; //StringDictionary parameters = contentDisposition.Parameters; } else { Logging.Warn("AddNewDocumentToLibraryFromInternet: no or empty Content-Disposition header received from {1}?\n Headers:\n{0}", web_response.Headers, download_url); // fallback: derive the filename from the URL: original_filename = web_response.ResponseUri.LocalPath; } } catch (Exception ex) { Logging.Error(ex, "AddNewDocumentToLibraryFromInternet: no Content-Disposition header received from {1}?\n Headers:\n{0}", web_response.Headers, download_url); // fallback: derive the filename from the URL: original_filename = web_response.ResponseUri.LocalPath; } // extract the type from the Content-Type header value: try { if (!String.IsNullOrEmpty(content_type)) { ContentType ct = new ContentType(content_type); content_type = ct.MediaType.ToLower(CultureInfo.CurrentCulture); } else { Logging.Warn("AddNewDocumentToLibraryFromInternet: no or empty Content-Type header '{2}' received from {1}?\n Headers:\n{0}", web_response.Headers, download_url, content_type); content_type = "text/html"; } } catch (Exception ex) { Logging.Error(ex, "AddNewDocumentToLibraryFromInternet: no or invalid Content-Type header '{2}' received from {1}?\n Headers:\n{0}", web_response.Headers, download_url, content_type); content_type = "text/html"; } bool is_acceptable_content_type = false; if (content_type.EndsWith("pdf")) { is_acceptable_content_type = true; } if (content_type.StartsWith("application/octet-stream")) { is_acceptable_content_type = true; } if (is_acceptable_content_type) { string filename = TempFile.GenerateTempFilename("pdf"); using (FileStream fs = File.OpenWrite(filename)) { int total_bytes = StreamToFile.CopyStreamToStream(response_stream, fs); Logging.Info("Saved {0} bytes to {1}", total_bytes, filename); //fs.Close(); -- autoclosed by `using` statement } PDFDocument pdf_document = library.AddNewDocumentToLibrary_SYNCHRONOUS(filename, original_filename, download_url, null, null, null, false, false); File.Delete(filename); // make sure we open every PDF fetched off the Internet: the user may need to review // their metadata. MainWindowServiceDispatcher.Instance.MainWindow.Dispatcher.InvokeAsync ( new Action(() => { Documents.PDF.PDFControls.PDFReadingControl pdf_reading_control = MainWindowServiceDispatcher.Instance.OpenDocument(pdf_document); pdf_reading_control.EnableGuestMoveNotification(null); }), DispatcherPriority.Background ); } else { string html = ""; if (content_type.EndsWith("html")) { using (StreamReader sr = new StreamReader(response_stream)) { html = sr.ReadToEnd(); Logging.Warn("Got this HTML instead of a PDF for URI {1}: {0}", html, download_url); } } // TODO: check these conditions; they are meant to be pretty tight but MAYBE I still let some // nasty websites' embedded PDF or other trickery slip through unnoticed. bool tolerate_type = false; foreach (string t in content_types_to_tolerate) { if (content_type.Contains(t)) { tolerate_type = true; } } if (!tolerate_type || web_response.StatusCode != HttpStatusCode.OK || html.Contains("<embed")) { MessageBoxes.Info("The document library supports only PDF files at the moment. You are trying to download something of type {0} / response code {1} at URI {2}.", content_type, (uint)web_response.StatusCode, download_url); } } } } } } catch (Exception ex) { Logging.Error(ex, "There was a problem adding the downloaded PDF to the library for URI {0}.", download_url); } StatusManager.Instance.UpdateStatus(LIBRARY_DOWNLOAD, String.Format("Downloaded {0}", download_url)); }
public static void AddNewDocumentToLibraryFromInternet_SYNCHRONOUS(Library library, object download_url_obj) { string download_url = (string)download_url_obj; StatusManager.Instance.UpdateStatus(LIBRARY_DOWNLOAD, String.Format("Downloading {0}", download_url)); try { HttpWebRequest web_request = (HttpWebRequest)HttpWebRequest.Create(download_url); web_request.Proxy = ConfigurationManager.Instance.Proxy; web_request.Method = "GET"; web_request.AllowAutoRedirect = true; using (HttpWebResponse web_response = (HttpWebResponse)web_request.GetResponse()) { if (false) { } if (HttpStatusCode.Redirect == web_response.StatusCode) { string redirect_url = web_response.Headers["Location"]; } else { Stream response_stream = web_response.GetResponseStream(); string content_type = web_response.GetResponseHeader("Content-Type"); bool is_acceptable_content_type = false; if (content_type.ToLower(CultureInfo.CurrentCulture).EndsWith("pdf")) { is_acceptable_content_type = true; } if (content_type.ToLower(CultureInfo.CurrentCulture).StartsWith("application/octet-stream")) { is_acceptable_content_type = true; } if (is_acceptable_content_type) { string filename = TempFile.GenerateTempFilename("pdf"); using (FileStream fs = File.OpenWrite(filename)) { int total_bytes = StreamToFile.CopyStreamToStream(response_stream, fs); Logging.Info("Saved {0} bytes to {1}", total_bytes, filename); fs.Close(); } library.AddNewDocumentToLibrary_SYNCHRONOUS(filename, download_url, null, null, null, false, false); File.Delete(filename); } else { if (content_type.ToLower(CultureInfo.CurrentCulture).EndsWith("html")) { StreamReader sr = new StreamReader(response_stream); string html = sr.ReadToEnd(); Logging.Warn("Got this HTML instead of a PDF: {0}", html); } MessageBoxes.Info("The document library supports only PDF files at the moment. You are trying to download something of type {0}.", content_type); } } } } catch (Exception ex) { Logging.Error(ex, "There was a problem adding the downloaded PDF to the library."); } StatusManager.Instance.UpdateStatus(LIBRARY_DOWNLOAD, String.Format("Downloaded {0}", download_url)); }
private static ExecResultAggregate ReadEntireStandardOutput(string pdfDrawExe, string process_parameters, bool binary_output, ProcessPriorityClass priority_class) { WPFDoEvents.AssertThisCodeIs_NOT_RunningInTheUIThread(); ExecResultAggregate rv = new ExecResultAggregate { executable = pdfDrawExe, process_parameters = process_parameters, stdoutIsBinary = binary_output }; Stopwatch clk = Stopwatch.StartNew(); // STDOUT/STDERR Logging.Debug("PDFDRAW :: ReadEntireStandardOutput command: pdfdraw.exe {0}", process_parameters); using (Process process = ProcessSpawning.SpawnChildProcess(pdfDrawExe, process_parameters, priority_class, stdout_is_binary: true)) { using (ProcessOutputReader process_output_reader = new ProcessOutputReader(process, stdout_is_binary: true)) { // Read image from stdout using (StreamReader sr = process.StandardOutput) { using (FileStream fs = (FileStream)sr.BaseStream) { long elapsed = clk.ElapsedMilliseconds; Logging.Debug("PDFDRAW :: ReadEntireStandardOutput setup time: {0} ms for parameters:\n {1}", elapsed, process_parameters); rv.stdoutStream = new MemoryStream(1024 * 1024); int total_size = StreamToFile.CopyStreamToStream(fs, rv.stdoutStream); long elapsed2 = clk.ElapsedMilliseconds; Logging.Debug("PDFDRAW image output {0} bytes in {1} ms (output copy took {2} ms) for command:\n {4} {3}", total_size, elapsed2, elapsed2 - elapsed, process_parameters, pdfDrawExe); // Check that the process has exited properly process.WaitForExit(1000); if (!process.HasExited) { Logging.Debug("PDFRenderer process did not terminate, so killing it.\n{0}", process_output_reader.GetOutputsDumpStrings().stderr); try { process.Kill(); // wait for the completion signal; this also helps to collect all STDERR output of the application (even while it was KILLED) process.WaitForExit(3000); } catch (Exception ex) { Logging.Error(ex, "There was a problem killing the PDFRenderer process after timeout ({0} ms)", elapsed2 + 1000); } // grab stderr output for successful runs and log it anyway: MuPDF diagnostics, etc. come this way: var outs = process_output_reader.GetOutputsDumpStrings(); rv.errOutputDump = outs; Logging.Error($"PDFRenderer process did not terminate, so killed it. Commandline:\n {pdfDrawExe} {process_parameters}\n{outs.stderr}"); rv.error = new ApplicationException($"PDFRenderer process did not terminate, so killed it.\n Commandline: {pdfDrawExe} {process_parameters}"); rv.exitCode = 0; if (process.HasExited) { rv.exitCode = process.ExitCode; } if (rv.exitCode == 0) { rv.exitCode = -666; // timeout } } else if (process.ExitCode != 0) { // grab stderr output for successful runs and log it anyway: MuPDF diagnostics, etc. come this way: var outs = process_output_reader.GetOutputsDumpStrings(); rv.errOutputDump = outs; Logging.Error("PDFDRAW did fail with exit code {0} for commandline:\n {3} {1}\n{2}", process.ExitCode, process_parameters, outs.stderr, pdfDrawExe); rv.error = new ApplicationException($"PDFRenderer::PDFDRAW did fail with exit code {process.ExitCode}.\n Commandline: {pdfDrawExe} {process_parameters}"); rv.exitCode = process.ExitCode; } else { // grab stderr output for successful runs and log it anyway: MuPDF diagnostics, etc. come this way: var outs = process_output_reader.GetOutputsDumpStrings(); rv.errOutputDump = outs; Logging.Error("PDFDRAW did SUCCEED with exit code {0} for commandline:\n {3} {1}\n{2}", process.ExitCode, process_parameters, outs.stderr, pdfDrawExe); rv.error = null; rv.exitCode = process.ExitCode; } return(rv); } } } } }