private void ProcessFile(string filename, Extractor item) { m_stderr.WriteLine("Processing " + filename); try { int flags = isys_docfilters.IGR_BODY_AND_META; if (m_html) { flags |= isys_docfilters.IGR_FORMAT_HTML; } item.Open(flags); // Extract the text and return it to stdout while (!item.getEOF()) { String t = item.GetText(4096); // Cleanup the text t = t.Replace('\u000E', '\n'); t = t.Replace('\r', '\n'); m_stdout.Write(t); } m_stdout.WriteLine(""); // Extract the HTML generated images into the current folder if (m_html) { SubFile image = item.GetFirstImage(); while (image != null) { m_stderr.WriteLine("Extracting image " + image.getName()); image.CopyTo(image.getName()); image.Close(); // Move onto the next image image = item.GetNextImage(); } } // Extract the sub files (if any) and process recursively if (m_subFiles) { SubFile child = item.GetFirstSubFile(); while (child != null) { ProcessFile(filename + ">" + child.getName(), child); // Move onto the next sub file child = item.GetNextSubFile(); } } } catch (Exception e) { m_stderr.WriteLine("Error Processing " + filename); m_stderr.WriteLine(" - " + e.ToString()); } finally { item.Close(); } }
private void ProcessFile(string filename, Extractor item) { string fileNameWithoutExtension = System.IO.Path.GetFileNameWithoutExtension(filename); string outputFolder = m_outputFolder + "\\" + fileNameWithoutExtension; if (!Directory.Exists(outputFolder)) { Directory.CreateDirectory(outputFolder); } string destination = System.IO.Path.Combine(outputFolder, fileNameWithoutExtension + ".html"); m_stderr.WriteLine("Processing " + filename); try { item.Open(isys_docfilters.IGR_BODY_AND_META | isys_docfilters.IGR_FORMAT_IMAGE, "HTTP_EXTERNAL_IMAGES=On"); Perceptive.DocumentFilters.Canvas canvas = m_filters.MakeOutputCanvas(destination, isys_docfilters.IGR_DEVICE_HTML, "HTTP_EXTERNAL_IMAGES=On"); try { for (int pageIndex = 0; pageIndex < item.GetPageCount(); pageIndex++) { Perceptive.DocumentFilters.Page page = item.GetPage(pageIndex); try { canvas.RenderPage(page); // Embedded Images SubFile image = page.GetFirstImage(); while (image != null) { image.CopyTo(System.IO.Path.Combine(outputFolder, image.getName())); image = page.GetNextImage(); } } finally { page.Close(); } } } finally { canvas.Close(); } } catch (Exception e) { m_stderr.WriteLine("Error Processing " + filename); m_stderr.WriteLine(" - " + e.ToString()); } finally { item.Close(); } }
private void ProcessFile(string filename, BinaryWriter outFile, Extractor item) { m_stderr.WriteLine("Processing " + filename); try { item.Open(isys_docfilters.IGR_BODY_AND_META | isys_docfilters.IGR_FORMAT_IMAGE); CustomStream outStream = new CustomStream(new MemoryStream()); Hyland.DocumentFilters.Canvas canvas = m_filters.MakeOutputCanvas(outStream, isys_docfilters.IGR_DEVICE_IMAGE_TIF, ""); try { for (int pageIndex = 0; pageIndex < item.GetPageCount(); pageIndex++) { Hyland.DocumentFilters.Page page = item.GetPage(pageIndex); try { canvas.RenderPage(page); } finally { page.Close(); } } } finally { canvas.Close(); // Write in-memory stream contents to existing open file (e.g. stdout) long BytesWritten = outStream.writeTo(outFile); m_stderr.WriteLine(BytesWritten + " bytes written to outFile"); } } catch (Exception e) { m_stderr.WriteLine("Error Processing " + filename); m_stderr.WriteLine(" - " + e.ToString()); } finally { item.Close(); } }
private void ProcessFile(string filename, Extractor item) { string destination = System.IO.Path.Combine(m_outputFolder, System.IO.Path.GetFileNameWithoutExtension(filename) + ".tif"); m_stderr.WriteLine("Processing " + filename); try { item.Open(isys_docfilters.IGR_BODY_AND_META | isys_docfilters.IGR_FORMAT_IMAGE); Hyland.DocumentFilters.Canvas canvas = m_filters.MakeOutputCanvas(destination, isys_docfilters.IGR_DEVICE_IMAGE_TIF, ""); try { for (int pageIndex = 0; pageIndex < item.GetPageCount(); pageIndex++) { Hyland.DocumentFilters.Page page = item.GetPage(pageIndex); try { canvas.RenderPage(page); } finally { page.Close(); } } } finally { canvas.Close(); } } catch (Exception e) { m_stderr.WriteLine("Error Processing " + filename); m_stderr.WriteLine(" - " + e.ToString()); } finally { item.Close(); } }
private void ProcessFile(string filename, Extractor item) { m_stderr.WriteLine("Processing " + filename); try { item.Open(isys_docfilters.IGR_BODY_AND_META | isys_docfilters.IGR_FORMAT_IMAGE); for (int pageIndex = 0; pageIndex < item.GetPageCount(); pageIndex++) { Hyland.DocumentFilters.Page page = item.GetPage(pageIndex); Hyland.DocumentFilters.Word word; try { System.Console.WriteLine(String.Format("Page {0,-16}[width: {1,3}; height: {2,3}; words: {3,3}]", pageIndex + 1, page.Width, page.Height, page.WordCount)); for (word = page.FirstWord; word != null; word = page.NextWord) { System.Console.WriteLine(String.Format("{0,3}. {1,-15} [x: {2,4}; y: {3,4}; width: {4,3}; height: {5,3}; character: {6,4}]", word.WordIndex, word.Text, word.X, word.Y, word.Width, word.Height, word.CharacterOffset)); } System.Console.WriteLine(""); } finally { page.Close(); } } } catch (Exception e) { m_stderr.WriteLine("Error Processing " + filename); m_stderr.WriteLine(" - " + e.ToString()); } finally { item.Close(); } }
private void ProcessFile(string filename, Extractor item) { string destination = System.IO.Path.Combine(m_outputFolder, System.IO.Path.GetFileNameWithoutExtension(filename) + ".html"); m_stderr.WriteLine("Processing " + filename + " to " + destination); try { String OptionsMain; String OptionsPage; OptionsMain = OptionsPage = m_options + ";" + "HDHTML_INCLUDE_WORD_INDEXES=on;"; // HDHTML_INCLUDE_WORD_INDEXES will include information about each word in the run data for a page. This is // used by the javascript to map between the web browsers co-ordinate system and the word co-ordinate system // used by Document Filters. In this example, it's used by the redaction function. if (m_inlineImages) { OptionsMain += "HTML_INLINE_IMAGES=on;"; OptionsPage += "HTML_INLINE_IMAGES=on;"; // HTML_INLINE_IMAGES forces any images to be inlined directly into the HTML using url(data:image/png;base64, // rather than having external files. } OptionsMain += "[email protected];"; // HDHTML_OUTPUT_INJECT_HEAD will load the contents of the perceptive-viewer-inject.html file and place it // at the bottom of the <HEAD> section of the main HTML page. This allows us to inject stylesheets, javascript // and extra metadata that will be loaded when the page is viewed. OptionsPage += "HDHTML_OUTPUT_BOILERPLATE=off"; // HDHTML_OUTPUT_BOILERPLATE disables the surrounding <html>...<body> tags that typically get generated into // the HTML output. This is used when generateing ASYNC pages, which must only contain the actual page data // and not the surrounding html. item.Open(isys_docfilters.IGR_BODY_AND_META | isys_docfilters.IGR_FORMAT_IMAGE, OptionsMain); System.IO.FileStream OutputStream = new System.IO.FileStream(destination, FileMode.Create, FileAccess.ReadWrite); Hyland.DocumentFilters.Canvas canvas = m_filters.MakeOutputCanvas(new Hyland.DocumentFilters.IGRStreamBridge(OutputStream), isys_docfilters.IGR_DEVICE_HTML, OptionsMain); try { for (int pageIndex = 0; pageIndex < item.GetPageCount(); pageIndex++) { Hyland.DocumentFilters.Page page = item.GetPage(pageIndex); try { m_stderr.Write(" - Page " + (pageIndex + 1) + " => "); // When in Async mode, always render the first page directly into the main output if (!m_async || pageIndex == 0) { m_stderr.WriteLine(destination); canvas.RenderPage(page); } else { String pageFilename = System.IO.Path.ChangeExtension(destination, null) + String.Format("_page_{0}.html", pageIndex + 1); m_stderr.WriteLine(pageFilename); // Render page stubs instead of the actual page content int w = page.Width; int h = page.Height; String html = "<div class=\"idf-page\" style=\"width: " + w + "px; height: " + h + "px; border: 1px solid black; overflow:hidden;\" title=\"" + System.IO.Path.GetFileName(pageFilename) + "\"></div>"; byte[] chars = UTF8Encoding.Default.GetBytes(html); OutputStream.Write(chars, 0, chars.Length); // Render page to it's own file Hyland.DocumentFilters.Canvas pageCanvas = m_filters.MakeOutputCanvas(pageFilename, isys_docfilters.IGR_DEVICE_HTML, OptionsPage); pageCanvas.RenderPage(page); pageCanvas.Close(); } // Embedded Images if (!m_inlineImages) { SubFile image = page.GetFirstImage(); while (image != null) { image.CopyTo(System.IO.Path.Combine(m_outputFolder, image.getName())); image = page.GetNextImage(); } } } finally { page.Close(); } } } finally { canvas.Close(); } String outDir = System.IO.Path.GetDirectoryName(destination); if (!System.IO.File.Exists(outDir + "\\perceptive-viewer-utils.js")) { System.IO.File.Copy("perceptive-viewer-utils.js", outDir + "\\perceptive-viewer-utils.js", false); } if (!System.IO.File.Exists(outDir + "\\perceptive-viewer-utils.css")) { System.IO.File.Copy("perceptive-viewer-utils.css", outDir + "\\perceptive-viewer-utils.css", false); } } catch (Exception e) { m_stderr.WriteLine("Error Processing " + filename); m_stderr.WriteLine(" - " + e.ToString()); } finally { item.Close(); } }