public PDFRenderer(string precomputed_document_fingerprint, string pdf_filename, string pdf_user_password, string pdf_owner_password)
        {
            this.pdf_filename         = pdf_filename;
            this.pdf_user_password    = pdf_user_password;
            this.pdf_owner_password   = pdf_owner_password;
            this.document_fingerprint = precomputed_document_fingerprint ?? StreamFingerprint.FromFile(this.pdf_filename);

            pdf_render_file_layer = new PDFRendererFileLayer(this.document_fingerprint, pdf_filename);
            sorax_pdf_renderer    = new SoraxPDFRenderer(pdf_filename, pdf_user_password, pdf_owner_password);
        }
        private void DisplayThumbnail()
        {
            ImageThumbnail.Source = null;
            TxtAbstract.Text      = "";

            if (null == pdf_document)
            {
                return;
            }

            SafeThreadPool.QueueUserWorkItem(o =>
            {
                try
                {
                    if (pdf_document.DocumentExists)
                    {
                        const double IMAGE_PERCENTAGE = 0.5;

                        using (MemoryStream ms = new MemoryStream(SoraxPDFRenderer.GetPageByHeightAsImage(pdf_document.DocumentPath, pdf_document.PDFPassword, page, (int)Math.Round(ImageThumbnail.Height / IMAGE_PERCENTAGE), (int)Math.Round(ImageThumbnail.Width / IMAGE_PERCENTAGE))))
                        {
                            Bitmap image = (Bitmap)Image.FromStream(ms);
                            PDFOverlayRenderer.RenderAnnotations(image, pdf_document, page, specific_pdf_annotation);
                            PDFOverlayRenderer.RenderHighlights(image, pdf_document, page);
                            PDFOverlayRenderer.RenderInks(image, pdf_document, page);

                            image = image.Clone(new RectangleF {
                                Width = image.Width, Height = (int)Math.Round(image.Height * IMAGE_PERCENTAGE)
                            }, image.PixelFormat);
                            BitmapSource image_page = BitmapImageTools.CreateBitmapSourceFromImage(image);

                            WPFDoEvents.InvokeAsyncInUIThread(() =>
                            {
                                ImageThumbnail.Source = image_page;

                                if (null != ImageThumbnail.Source)
                                {
                                    ImageThumbnail.Visibility = Visibility.Visible;
                                }
                                else
                                {
                                    ImageThumbnail.Visibility = Visibility.Collapsed;
                                }
                            });
                        }
                    }
                    else
                    {
                        string abstract_text = pdf_document.Abstract;
                        if (PDFAbstractExtraction.CANT_LOCATE != abstract_text)
                        {
                            WPFDoEvents.InvokeAsyncInUIThread(() =>
                            {
                                TxtAbstract.Text = abstract_text;
                            });
                        }
                    }
                }
                catch (Exception ex)
                {
                    Logging.Error(ex, "There was a problem showing the PDF thumbnail");
                }
            });
        }
示例#3
0
        public static WordList DoOCR(string pdf_filename, int page_number)
        {
            Logging.Info("+Rendering page {1} for PDF file {0}", pdf_filename, page_number);
            using (MemoryStream ms = new MemoryStream(SoraxPDFRenderer.GetPageByDPIAsImage(pdf_filename, pdf_user_password, page_number, 200)))
            {
                Bitmap bitmap = (Bitmap)Image.FromStream(ms);

                Logging.Info("-Rendering page #{0}", page_number);

                Logging.Info("Startup directory is {0}", Environment.CurrentDirectory);
                Logging.Info("Language is '{0}'", language);

                using (Tesseract ocr = new Tesseract())
                {
                    ocr.Init(null, language, false);

                    Logging.Info("+Doing OCR");

                    const int MIN_WIDTH = 0;

                    // Build a list of all the rectangles to process
                    PDFRegionLocator        pdf_region_locator = new PDFRegionLocator(bitmap);
                    PDFRegionLocator.Region last_region        = pdf_region_locator.regions[0];
                    List <Rectangle>        rectangles         = new List <Rectangle>();
                    Rectangle last_rectangle = new Rectangle();
                    foreach (PDFRegionLocator.Region region in pdf_region_locator.regions)
                    {
                        int  rect_height     = region.y - last_region.y;
                        bool alarming_height = (rect_height <= 0);

                        Rectangle rectangle = new Rectangle();

                        if (last_region.state == PDFRegionLocator.SegmentState.BLANKS)
                        {
                            // LHS
                            {
                                rectangle = new Rectangle(0, last_region.y, bitmap.Width / 2, Math.Max(MIN_WIDTH, rect_height));
                            }
                            // RHS
                            {
                                rectangle = new Rectangle(bitmap.Width / 2, last_region.y, bitmap.Width / 2, Math.Max(MIN_WIDTH, rect_height));
                            }
                        }
                        else if (last_region.state == PDFRegionLocator.SegmentState.PIXELS)
                        {
                            // Full column
                            {
                                rectangle = new Rectangle(0, last_region.y, bitmap.Width, Math.Max(MIN_WIDTH, rect_height));
                            }
                        }

                        if (alarming_height || rectangle.Height <= 0)
                        {
                            Logging.Warn("Calculated region height is negative or zero: {0} :: Calculated region {1} <-- CURRENT:{2} - LAST:{3}", rect_height, rectangle, region, last_region);

                            // skip rectangle
                        }
                        else if (last_rectangle.X == rectangle.X && last_rectangle.Y == rectangle.Y)
                        {
                            Logging.Warn("Overlapping subsequent rectangles will be merged :: CURRENT:{0} - LAST:{1}", rectangle, last_rectangle);
                            last_rectangle.Width  = Math.Max(last_rectangle.Width, rectangle.Width);
                            last_rectangle.Height = Math.Max(last_rectangle.Height, rectangle.Height);
                            Logging.Warn("--> Updated 'last' rectangle:{0}", last_rectangle);
                        }
                        else
                        {
                            rectangles.Add(rectangle);
                            last_rectangle = rectangle;
                        }

                        last_region = region;
                    }

                    // DEBUG CODE: Draw in the region rectangles
                    //
                    // When we run in NOKILL mode, we "know" we're running in a debugger or stand-alone environment
                    // intended for testing this code. Hence we should dump the regions image as part of the process.
                    if (no_kill)
                    {
                        string bitmap_diag_path = pdf_filename + @"." + page_number + @"-ocr.png";

                        Logging.Info("Dumping regions-augmented page {0} PNG image to file {1}", page_number, bitmap_diag_path);
                        Graphics g = Graphics.FromImage(bitmap);
                        foreach (Rectangle rectangle in rectangles)
                        {
                            if (rectangle.Width <= MIN_WIDTH && rectangle.Height > MIN_WIDTH)
                            {
                                DrawRectangleOutline(g, Pens.Purple, rectangle);
                            }
                            else if (rectangle.Width > MIN_WIDTH && rectangle.Height <= MIN_WIDTH)
                            {
                                DrawRectangleOutline(g, Pens.PowderBlue, rectangle);
                            }
                            else if (rectangle.Width <= MIN_WIDTH && rectangle.Height <= MIN_WIDTH)
                            {
                                DrawRectangleOutline(g, Pens.Red, rectangle);
                            }
                            else
                            {
                                DrawRectangleOutline(g, Pens.LawnGreen, rectangle);
                            }
                        }

                        bitmap.Save(bitmap_diag_path, ImageFormat.Png);
                    }

                    // Do the OCR on each of the rectangles
                    WordList word_list = new WordList();
                    foreach (Rectangle rectangle in rectangles)
                    {
                        if (0 == rectangle.Width || 0 == rectangle.Height)
                        {
                            Logging.Info("Skipping zero extent rectangle {0}", rectangle.ToString());
                            continue;
                        }

                        Logging.Info("Doing OCR for region {0} on bitmap WxH: {1}x{2}", rectangle.ToString(), bitmap.Width, bitmap.Height);
                        List <Word> result = ocr.DoOCR(bitmap, rectangle);
                        Logging.Info("Got {0} words", result.Count);
                        word_list.AddRange(ConvertToWordList(result, rectangle, bitmap));
                    }

                    Logging.Info("-Doing OCR");


                    Logging.Info("Found {0} words ({1} @ #{2})", word_list.Count, pdf_filename, page_number);

#if false
                    Logging.Info("+Reordering words for columns");
                    WordList word_list_ordered = ColumnWordOrderer.ReorderWords(word_list);
                    Logging.Info("-Reordering words for columns");
                    word_list_ordered.WriteToFile(ocr_output_filename);
#endif

                    return(word_list);
                }
            }
        }
示例#4
0
        public static WordList DoOCR(string pdf_filename, int page_number)
        {
            Logging.Info("+Rendering page");
            SoraxPDFRenderer renderer = new SoraxPDFRenderer(pdf_filename, pdf_user_password, pdf_user_password);
            Bitmap           bitmap   = (Bitmap)Image.FromStream(new MemoryStream(renderer.GetPageByDPIAsImage(page_number, 200)));

            Logging.Info("-Rendering page");

            Logging.Info("Startup directory is {0}", Environment.CurrentDirectory);
            Logging.Info("Language is '{0}'", language);

            Tesseract ocr = new Tesseract();

            ocr.Init(null, language, false);

            Logging.Info("+Doing OCR");

            // Build a list of all the rectangles to process
            PDFRegionLocator pdf_region_locator = new PDFRegionLocator(bitmap);

            PDFRegionLocator.Region last_region = pdf_region_locator.regions[0];
            List <Rectangle>        rectangles  = new List <Rectangle>();

            foreach (PDFRegionLocator.Region region in pdf_region_locator.regions)
            {
                if (false)
                {
                }
                else if (last_region.state == PDFRegionLocator.SegmentState.BLANKS)
                {
                    // LHS
                    {
                        Rectangle rectangle = new Rectangle(0, last_region.y, bitmap.Width / 2, region.y - last_region.y);
                        rectangles.Add(rectangle);
                    }
                    // RHS
                    {
                        Rectangle rectangle = new Rectangle(bitmap.Width / 2, last_region.y, bitmap.Width / 2, region.y - last_region.y);
                        rectangles.Add(rectangle);
                    }
                }
                else if (last_region.state == PDFRegionLocator.SegmentState.PIXELS)
                {
                    // Full column
                    {
                        Rectangle rectangle = new Rectangle(0, last_region.y, bitmap.Width, region.y - last_region.y);
                        rectangles.Add(rectangle);
                    }
                }

                last_region = region;
            }

            // DEBUG CODE: Draw in the region rectangles
            //{
            //    Graphics g = Graphics.FromImage(bitmap);
            //    foreach (Rectangle rectangle in rectangles)
            //    {
            //        g.DrawRectangle(Pens.Black, rectangle);
            //    }

            //    bitmap.Save(@"C:\temp\aaaaaa.png", ImageFormat.Png);
            //}

            // Do the OCR on each of the rectangles
            WordList word_list = new WordList();

            foreach (Rectangle rectangle in rectangles)
            {
                if (0 == rectangle.Width || 0 == rectangle.Height)
                {
                    Logging.Info("Skipping zero extent rectangle {0}", rectangle.ToString());
                    continue;
                }

                Logging.Info("Doing OCR for region {0}", rectangle.ToString());
                List <Word> result = ocr.DoOCR(bitmap, rectangle);
                Logging.Info("Got {0} words", result.Count);
                word_list.AddRange(ConvertToWordList(result, rectangle, bitmap));
            }

            Logging.Info("-Doing OCR");


            Logging.Info("Found {0} words", word_list.Count);

            //Logging.Info("+Reordering words for columns");
            //WordList word_list_ordered = ColumnWordOrderer.ReorderWords(word_list);
            //Logging.Info("-Reordering words for columns");
            //word_list_ordered.WriteToFile(ocr_output_filename);

            return(word_list);
        }
        private void ResizedPageImageItemThreadEntry()
        {
            WPFDoEvents.AssertThisCodeIsRunningInTheUIThread();

            while (true)
            {
                ResizedPageImageItemRequest resized_page_image_item_request = null;

                // Utilities.LockPerfTimer l1_clk = Utilities.LockPerfChecker.Start();
                lock (resized_page_image_item_requests)
                {
                    // l1_clk.LockPerfTimerStop();

                    // If there is nothing more to do...
                    if (0 == resized_page_image_item_request_orders.Count)
                    {
                        Interlocked.Decrement(ref num_resized_page_image_item_thread_running);
                        break;
                    }

                    // Get a piece of work
                    int page = resized_page_image_item_request_orders[resized_page_image_item_request_orders.Count - 1];
                    resized_page_image_item_request_orders.RemoveAt(resized_page_image_item_request_orders.Count - 1);

                    if (resized_page_image_item_requests.TryGetValue(page, out resized_page_image_item_request))
                    {
                        resized_page_image_item_requests.Remove(page);
                    }
                    else
                    {
                        continue;
                    }
                }

                Logging.Debug("Performing page redraw for {0}", resized_page_image_item_request.page);

                // Check that the page is still visible
                ASSERT.Test(resized_page_image_item_request.page_control != null);
                if (!resized_page_image_item_request.page_control.PageIsInView)
                {
                    continue;
                }

                SafeThreadPool.QueueUserWorkItem(o =>
                {
                    WPFDoEvents.AssertThisCodeIs_NOT_RunningInTheUIThread();

                    try
                    {
                        //PngBitmapDecoder decoder = new PngBitmapDecoder(new MemoryStream(pdf_document.PDFRenderer.GetPageByHeightAsImage(resized_page_image_item_request.page, resized_page_image_item_request.height)), BitmapCreateOptions.PreservePixelFormat, BitmapCacheOption.OnLoad);
                        //BitmapSource bitmap = decoder.Frames[0];
                        //bitmap.Freeze();

                        BitmapImage bitmap = new BitmapImage();
                        using (MemoryStream ms = new MemoryStream(SoraxPDFRenderer.GetPageByHeightAsImage(pdf_document.DocumentPath, pdf_document.PDFPassword, resized_page_image_item_request.page, resized_page_image_item_request.height, resized_page_image_item_request.width)))
                        {
                            bitmap.BeginInit();
                            bitmap.StreamSource = ms;
                            bitmap.CacheOption  = BitmapCacheOption.OnLoad;
                            bitmap.EndInit();
                            bitmap.Freeze();
                        }

                        if (null != bitmap)
                        {
                            resized_page_image_item_request.callback(bitmap, resized_page_image_item_request.height, resized_page_image_item_request.width);
                        }
                    }
                    catch (Exception ex)
                    {
                        Logging.Error(ex, "There was an error while resizing a PDF page image");
                    }
                });
            }
        }
示例#6
0
        private void UpdateLibraryStatistics_Stats_Background_CoverFlow(WebLibraryDetail web_library_detail)
        {
            WPFDoEvents.AssertThisCodeIs_NOT_RunningInTheUIThread();

            if (web_library_detail.Xlibrary == null)
            {
                return;
            }

            List <PDFDocument> pdf_documents_all = web_library_detail.Xlibrary.PDFDocuments;

            // The list of recommended items
            DocumentDisplayWorkManager ddwm = new DocumentDisplayWorkManager();

            {
                int ITEMS_IN_LIST = 5;

                // Upcoming reading is:
                //  interrupted
                //  top priority
                //  read again
                //  recently added and no status

                pdf_documents_all.Sort(PDFDocumentListSorters.DateAddedToDatabase);

                foreach (string reading_stage in new string[] { Choices.ReadingStages_INTERRUPTED, Choices.ReadingStages_TOP_PRIORITY, Choices.ReadingStages_READ_AGAIN })
                {
                    foreach (PDFDocument pdf_document in pdf_documents_all)
                    {
                        if (!pdf_document.DocumentExists)
                        {
                            continue;
                        }

                        if (pdf_document.ReadingStage == reading_stage)
                        {
                            if (!ddwm.ContainsPDFDocument(pdf_document))
                            {
                                ddwm.AddDocumentDisplayWork(DocumentDisplayWork.StarburstColor.Pink, reading_stage, pdf_document);

                                if (ddwm.Count >= ITEMS_IN_LIST)
                                {
                                    break;
                                }
                            }
                        }
                    }
                }
            }

            {
                int ITEMS_IN_LIST = 3;

                // Recently added
                {
                    pdf_documents_all.Sort(PDFDocumentListSorters.DateAddedToDatabase);

                    int num_added = 0;
                    foreach (PDFDocument pdf_document in pdf_documents_all)
                    {
                        if (!pdf_document.DocumentExists)
                        {
                            continue;
                        }

                        if (!ddwm.ContainsPDFDocument(pdf_document))
                        {
                            ddwm.AddDocumentDisplayWork(DocumentDisplayWork.StarburstColor.Green, "Added Recently", pdf_document);

                            if (++num_added >= ITEMS_IN_LIST)
                            {
                                break;
                            }
                        }
                    }
                }

                // Recently read
                {
                    pdf_documents_all.Sort(PDFDocumentListSorters.DateLastRead);

                    int num_added = 0;
                    foreach (PDFDocument pdf_document in pdf_documents_all)
                    {
                        if (!pdf_document.DocumentExists)
                        {
                            continue;
                        }

                        if (!ddwm.ContainsPDFDocument(pdf_document))
                        {
                            ddwm.AddDocumentDisplayWork(DocumentDisplayWork.StarburstColor.Blue, "Read Recently", pdf_document);

                            if (++num_added >= ITEMS_IN_LIST)
                            {
                                break;
                            }
                        }
                    }
                }
            }

            WPFDoEvents.InvokeAsyncInUIThread(() =>
            {
                WPFDoEvents.AssertThisCodeIsRunningInTheUIThread();

                // And fill the placeholders
                try
                {
                    UpdateLibraryStatistics_Stats_Background_GUI_AddAllPlaceHolders(ddwm.ddws);

                    SafeThreadPool.QueueUserWorkItem(o =>
                    {
                        try
                        {
                            // Now render each document
                            using (Font font = new Font("Times New Roman", 11.0f))
                            {
                                using (StringFormat string_format = new StringFormat
                                {
                                    Alignment = StringAlignment.Center,
                                    LineAlignment = StringAlignment.Center
                                })
                                {
                                    var color_matrix      = new ColorMatrix();
                                    color_matrix.Matrix33 = 0.9f;
                                    using (var image_attributes = new ImageAttributes())
                                    {
                                        image_attributes.SetColorMatrix(color_matrix, ColorMatrixFlag.Default, ColorAdjustType.Bitmap);

                                        foreach (DocumentDisplayWork ddw in ddwm.ddws)
                                        {
                                            try
                                            {
                                                using (MemoryStream ms = new MemoryStream(SoraxPDFRenderer.GetPageByHeightAsImage(ddw.pdf_document.DocumentPath, ddw.pdf_document.PDFPassword, 1, (int)Math.Round(PREVIEW_IMAGE_HEIGHT / PREVIEW_IMAGE_PERCENTAGE), (int)Math.Round(PREVIEW_IMAGE_WIDTH / PREVIEW_IMAGE_PERCENTAGE))))
                                                {
                                                    Bitmap page_bitmap = (Bitmap)System.Drawing.Image.FromStream(ms);
                                                    page_bitmap        = page_bitmap.Clone(new RectangleF {
                                                        Width = page_bitmap.Width, Height = (int)Math.Round(page_bitmap.Height * PREVIEW_IMAGE_PERCENTAGE)
                                                    }, page_bitmap.PixelFormat);

                                                    using (Graphics g = Graphics.FromImage(page_bitmap))
                                                    {
                                                        int CENTER = 60;
                                                        int RADIUS = 60;

                                                        {
                                                            BitmapImage starburst_bi = null;
                                                            switch (ddw.starburst_color)
                                                            {
                                                            case DocumentDisplayWork.StarburstColor.Blue:
                                                                starburst_bi = Icons.GetAppIcon(Icons.PageCornerBlue);
                                                                break;

                                                            case DocumentDisplayWork.StarburstColor.Green:
                                                                starburst_bi = Icons.GetAppIcon(Icons.PageCornerGreen);
                                                                break;

                                                            case DocumentDisplayWork.StarburstColor.Pink:
                                                                starburst_bi = Icons.GetAppIcon(Icons.PageCornerPink);
                                                                break;

                                                            default:
                                                                starburst_bi = Icons.GetAppIcon(Icons.PageCornerOrange);
                                                                break;
                                                            }

                                                            Bitmap starburst_image = BitmapImageTools.ConvertBitmapSourceToBitmap(starburst_bi);
                                                            g.SmoothingMode        = SmoothingMode.AntiAlias;
                                                            g.DrawImage(
                                                                starburst_image,
                                                                new Rectangle(CENTER - RADIUS, CENTER - RADIUS, 2 * RADIUS, 2 * RADIUS),
                                                                0,
                                                                0,
                                                                starburst_image.Width,
                                                                starburst_image.Height,
                                                                GraphicsUnit.Pixel,
                                                                image_attributes
                                                                );
                                                        }

                                                        using (Matrix mat = new Matrix())
                                                        {
                                                            mat.RotateAt(-50, new PointF(CENTER / 2, CENTER / 2));
                                                            g.Transform = mat;

                                                            string wrapped_caption = ddw.starburst_caption;
                                                            wrapped_caption        = wrapped_caption.ToLower();
                                                            wrapped_caption        = Thread.CurrentThread.CurrentCulture.TextInfo.ToTitleCase(wrapped_caption);
                                                            wrapped_caption        = wrapped_caption.Replace(" ", "\n");
                                                            g.DrawString(wrapped_caption, font, Brushes.Black, new PointF(CENTER / 2, CENTER / 2), string_format);
                                                        }
                                                    }

                                                    BitmapSource page_bitmap_source = BitmapImageTools.CreateBitmapSourceFromImage(page_bitmap);

                                                    ddw.page_bitmap_source = page_bitmap_source;
                                                }

#if false  // do this bit further below, all at once for all entries, in the UI thread!
                                                try
                                                {
                                                    UpdateLibraryStatistics_Stats_Background_GUI_FillPlaceHolder(ddw);
                                                }
                                                catch (Exception ex)
                                                {
                                                    Logging.Error(ex, "UpdateLibraryStatistics_Stats_Background_CoverFlow: Error occurred.");
                                                    throw;
                                                }
#endif
                                            }
                                            catch (Exception ex)
                                            {
                                                Logging.Warn(ex, "There was a problem loading a preview image for document {0}", ddw.pdf_document.Fingerprint);

                                                Logging.Error(ex, "UpdateLibraryStatistics_Stats_Background_CoverFlow: Error occurred.");

                                                // do not rethrow the error: allow the other pages in the pages to be rendered...

                                                ddw.page_bitmap_source = Backgrounds.GetBackground(Backgrounds.PageRenderingFailed_ClassicNews);
                                            }
                                        }
                                    }
                                }
                            }
                        }
                        catch (Exception ex)
                        {
                            Logging.Error(ex, "UpdateLibraryStatistics_Stats_Background_CoverFlow: Error occurred.");
                        }

                        // Don't care if there were errors in the process so far: the pages which got rendered, SHOULD make it into the UI anyway!
                        WPFDoEvents.InvokeAsyncInUIThread(() =>
                        {
                            foreach (DocumentDisplayWork ddw in ddwm.ddws)
                            {
                                try
                                {
                                    UpdateLibraryStatistics_Stats_Background_GUI_FillPlaceHolder(ddw);
                                }
                                catch (Exception ex)
                                {
                                    Logging.Error(ex, "UpdateLibraryStatistics_Stats_Background_CoverFlow: Error occurred.");
                                    Logging.Warn(ex, "There was a problem loading a preview image for document {0}", ddw.pdf_document.Fingerprint);
                                }
                            }

                            if (0 == ddwm.ddws.Count)
                            {
                                ButtonCoverFlow.IsChecked = false;
                                UpdateLibraryStatistics();
                            }
                        });
                    });
                }
                catch (Exception ex)
                {
                    Logging.Error(ex, "UpdateLibraryStatistics_Stats_Background_CoverFlow: Error occurred.");
                }

                if (0 == ddwm.ddws.Count)
                {
                    ButtonCoverFlow.IsChecked = false;
                    UpdateLibraryStatistics();
                }
            });
        }
示例#7
0
 internal byte[] GetPageByDPIAsImage(int page, int dpi)
 {
     return(SoraxPDFRenderer.GetPageByDPIAsImage(DocumentPath, PDFPassword, page, dpi));
 }