private void ProcessFile(string filename, Extractor item)
        {
            m_stderr.WriteLine("Processing " + filename);
            try
            {
                int flags = isys_docfilters.IGR_BODY_AND_META;
                if (m_html)
                {
                    flags |= isys_docfilters.IGR_FORMAT_HTML;
                }

                item.Open(flags);

                // Extract the text and return it to stdout
                while (!item.getEOF())
                {
                    String t = item.GetText(4096);
                    // Cleanup the text
                    t = t.Replace('\u000E', '\n');
                    t = t.Replace('\r', '\n');
                    m_stdout.Write(t);
                }
                m_stdout.WriteLine("");

                // Extract the HTML generated images into the current folder
                if (m_html)
                {
                    SubFile image = item.GetFirstImage();
                    while (image != null)
                    {
                        m_stderr.WriteLine("Extracting image " + image.getName());
                        image.CopyTo(image.getName());
                        image.Close();
                        // Move onto the next image
                        image = item.GetNextImage();
                    }
                }

                // Extract the sub files (if any) and process recursively
                if (m_subFiles)
                {
                    SubFile child = item.GetFirstSubFile();
                    while (child != null)
                    {
                        ProcessFile(filename + ">" + child.getName(), child);
                        // Move onto the next sub file
                        child = item.GetNextSubFile();
                    }
                }
            }
            catch (Exception e)
            {
                m_stderr.WriteLine("Error Processing " + filename);
                m_stderr.WriteLine("   - " + e.ToString());
            }
            finally
            {
                item.Close();
            }
        }
Example #2
0
        private void ProcessFile(string filename, Extractor item)
        {
            string fileNameWithoutExtension = System.IO.Path.GetFileNameWithoutExtension(filename);
            string outputFolder             = m_outputFolder + "\\" + fileNameWithoutExtension;

            if (!Directory.Exists(outputFolder))
            {
                Directory.CreateDirectory(outputFolder);
            }

            string destination = System.IO.Path.Combine(outputFolder, fileNameWithoutExtension + ".html");

            m_stderr.WriteLine("Processing " + filename);
            try
            {
                item.Open(isys_docfilters.IGR_BODY_AND_META | isys_docfilters.IGR_FORMAT_IMAGE, "HTTP_EXTERNAL_IMAGES=On");

                Perceptive.DocumentFilters.Canvas canvas = m_filters.MakeOutputCanvas(destination, isys_docfilters.IGR_DEVICE_HTML, "HTTP_EXTERNAL_IMAGES=On");
                try
                {
                    for (int pageIndex = 0; pageIndex < item.GetPageCount(); pageIndex++)
                    {
                        Perceptive.DocumentFilters.Page page = item.GetPage(pageIndex);
                        try
                        {
                            canvas.RenderPage(page);

                            // Embedded Images
                            SubFile image = page.GetFirstImage();
                            while (image != null)
                            {
                                image.CopyTo(System.IO.Path.Combine(outputFolder, image.getName()));
                                image = page.GetNextImage();
                            }
                        }
                        finally
                        {
                            page.Close();
                        }
                    }
                }
                finally
                {
                    canvas.Close();
                }
            }
            catch (Exception e)
            {
                m_stderr.WriteLine("Error Processing " + filename);
                m_stderr.WriteLine("   - " + e.ToString());
            }
            finally
            {
                item.Close();
            }
        }
        private void ProcessFile(string filename, Extractor item)
        {
            string destination = System.IO.Path.Combine(m_outputFolder, System.IO.Path.GetFileNameWithoutExtension(filename) + ".html");

            m_stderr.WriteLine("Processing " + filename + " to " + destination);
            try
            {
                String OptionsMain;
                String OptionsPage;

                OptionsMain = OptionsPage = m_options + ";" +
                                            "HDHTML_INCLUDE_WORD_INDEXES=on;";
                // HDHTML_INCLUDE_WORD_INDEXES will include information about each word in the run data for a page. This is
                // used by the javascript to map between the web browsers co-ordinate system and the word co-ordinate system
                // used by Document Filters. In this example, it's used by the redaction function.

                if (m_inlineImages)
                {
                    OptionsMain += "HTML_INLINE_IMAGES=on;";
                    OptionsPage += "HTML_INLINE_IMAGES=on;";
                    // HTML_INLINE_IMAGES forces any images to be inlined directly into the HTML using url(data:image/png;base64,
                    // rather than having external files.
                }


                OptionsMain += "[email protected];";
                // HDHTML_OUTPUT_INJECT_HEAD will load the contents of the perceptive-viewer-inject.html file and place it
                // at the bottom of the <HEAD> section of the main HTML page. This allows us to inject stylesheets, javascript
                // and extra metadata that will be loaded when the page is viewed.

                OptionsPage += "HDHTML_OUTPUT_BOILERPLATE=off";
                // HDHTML_OUTPUT_BOILERPLATE disables the surrounding <html>...<body> tags that typically get generated into
                // the HTML output.  This is used when generateing ASYNC pages, which must only contain the actual page data
                // and not the surrounding html.

                item.Open(isys_docfilters.IGR_BODY_AND_META | isys_docfilters.IGR_FORMAT_IMAGE, OptionsMain);

                System.IO.FileStream OutputStream = new System.IO.FileStream(destination, FileMode.Create, FileAccess.ReadWrite);

                Hyland.DocumentFilters.Canvas canvas = m_filters.MakeOutputCanvas(new Hyland.DocumentFilters.IGRStreamBridge(OutputStream), isys_docfilters.IGR_DEVICE_HTML, OptionsMain);
                try
                {
                    for (int pageIndex = 0; pageIndex < item.GetPageCount(); pageIndex++)
                    {
                        Hyland.DocumentFilters.Page page = item.GetPage(pageIndex);
                        try
                        {
                            m_stderr.Write(" - Page " + (pageIndex + 1) + " => ");

                            // When in Async mode, always render the first page directly into the main output
                            if (!m_async || pageIndex == 0)
                            {
                                m_stderr.WriteLine(destination);
                                canvas.RenderPage(page);
                            }
                            else
                            {
                                String pageFilename = System.IO.Path.ChangeExtension(destination, null) + String.Format("_page_{0}.html", pageIndex + 1);

                                m_stderr.WriteLine(pageFilename);

                                // Render page stubs instead of the actual page content
                                int w = page.Width;
                                int h = page.Height;


                                String html = "<div class=\"idf-page\" style=\"width: " + w + "px; height: "
                                              + h + "px; border: 1px solid black; overflow:hidden;\" title=\"" + System.IO.Path.GetFileName(pageFilename) + "\"></div>";

                                byte[] chars = UTF8Encoding.Default.GetBytes(html);
                                OutputStream.Write(chars, 0, chars.Length);

                                // Render page to it's own file
                                Hyland.DocumentFilters.Canvas pageCanvas = m_filters.MakeOutputCanvas(pageFilename, isys_docfilters.IGR_DEVICE_HTML, OptionsPage);
                                pageCanvas.RenderPage(page);
                                pageCanvas.Close();
                            }

                            // Embedded Images
                            if (!m_inlineImages)
                            {
                                SubFile image = page.GetFirstImage();
                                while (image != null)
                                {
                                    image.CopyTo(System.IO.Path.Combine(m_outputFolder, image.getName()));
                                    image = page.GetNextImage();
                                }
                            }
                        }
                        finally
                        {
                            page.Close();
                        }
                    }
                }
                finally
                {
                    canvas.Close();
                }
                String outDir = System.IO.Path.GetDirectoryName(destination);
                if (!System.IO.File.Exists(outDir + "\\perceptive-viewer-utils.js"))
                {
                    System.IO.File.Copy("perceptive-viewer-utils.js", outDir + "\\perceptive-viewer-utils.js", false);
                }
                if (!System.IO.File.Exists(outDir + "\\perceptive-viewer-utils.css"))
                {
                    System.IO.File.Copy("perceptive-viewer-utils.css", outDir + "\\perceptive-viewer-utils.css", false);
                }
            }
            catch (Exception e)
            {
                m_stderr.WriteLine("Error Processing " + filename);
                m_stderr.WriteLine("   - " + e.ToString());
            }
            finally
            {
                item.Close();
            }
        }