public PDFRenderer(string precomputed_document_fingerprint, string pdf_filename, string pdf_user_password, string pdf_owner_password) { this.pdf_filename = pdf_filename; this.pdf_user_password = pdf_user_password; this.pdf_owner_password = pdf_owner_password; this.document_fingerprint = precomputed_document_fingerprint ?? StreamFingerprint.FromFile(this.pdf_filename); pdf_render_file_layer = new PDFRendererFileLayer(this.document_fingerprint, pdf_filename); sorax_pdf_renderer = new SoraxPDFRenderer(pdf_filename, pdf_user_password, pdf_owner_password); }
private void DisplayThumbnail() { ImageThumbnail.Source = null; TxtAbstract.Text = ""; if (null == pdf_document) { return; } SafeThreadPool.QueueUserWorkItem(o => { try { if (pdf_document.DocumentExists) { const double IMAGE_PERCENTAGE = 0.5; using (MemoryStream ms = new MemoryStream(SoraxPDFRenderer.GetPageByHeightAsImage(pdf_document.DocumentPath, pdf_document.PDFPassword, page, (int)Math.Round(ImageThumbnail.Height / IMAGE_PERCENTAGE), (int)Math.Round(ImageThumbnail.Width / IMAGE_PERCENTAGE)))) { Bitmap image = (Bitmap)Image.FromStream(ms); PDFOverlayRenderer.RenderAnnotations(image, pdf_document, page, specific_pdf_annotation); PDFOverlayRenderer.RenderHighlights(image, pdf_document, page); PDFOverlayRenderer.RenderInks(image, pdf_document, page); image = image.Clone(new RectangleF { Width = image.Width, Height = (int)Math.Round(image.Height * IMAGE_PERCENTAGE) }, image.PixelFormat); BitmapSource image_page = BitmapImageTools.CreateBitmapSourceFromImage(image); WPFDoEvents.InvokeAsyncInUIThread(() => { ImageThumbnail.Source = image_page; if (null != ImageThumbnail.Source) { ImageThumbnail.Visibility = Visibility.Visible; } else { ImageThumbnail.Visibility = Visibility.Collapsed; } }); } } else { string abstract_text = pdf_document.Abstract; if (PDFAbstractExtraction.CANT_LOCATE != abstract_text) { WPFDoEvents.InvokeAsyncInUIThread(() => { TxtAbstract.Text = abstract_text; }); } } } catch (Exception ex) { Logging.Error(ex, "There was a problem showing the PDF thumbnail"); } }); }
public static WordList DoOCR(string pdf_filename, int page_number) { Logging.Info("+Rendering page {1} for PDF file {0}", pdf_filename, page_number); using (MemoryStream ms = new MemoryStream(SoraxPDFRenderer.GetPageByDPIAsImage(pdf_filename, pdf_user_password, page_number, 200))) { Bitmap bitmap = (Bitmap)Image.FromStream(ms); Logging.Info("-Rendering page #{0}", page_number); Logging.Info("Startup directory is {0}", Environment.CurrentDirectory); Logging.Info("Language is '{0}'", language); using (Tesseract ocr = new Tesseract()) { ocr.Init(null, language, false); Logging.Info("+Doing OCR"); const int MIN_WIDTH = 0; // Build a list of all the rectangles to process PDFRegionLocator pdf_region_locator = new PDFRegionLocator(bitmap); PDFRegionLocator.Region last_region = pdf_region_locator.regions[0]; List <Rectangle> rectangles = new List <Rectangle>(); Rectangle last_rectangle = new Rectangle(); foreach (PDFRegionLocator.Region region in pdf_region_locator.regions) { int rect_height = region.y - last_region.y; bool alarming_height = (rect_height <= 0); Rectangle rectangle = new Rectangle(); if (last_region.state == PDFRegionLocator.SegmentState.BLANKS) { // LHS { rectangle = new Rectangle(0, last_region.y, bitmap.Width / 2, Math.Max(MIN_WIDTH, rect_height)); } // RHS { rectangle = new Rectangle(bitmap.Width / 2, last_region.y, bitmap.Width / 2, Math.Max(MIN_WIDTH, rect_height)); } } else if (last_region.state == PDFRegionLocator.SegmentState.PIXELS) { // Full column { rectangle = new Rectangle(0, last_region.y, bitmap.Width, Math.Max(MIN_WIDTH, rect_height)); } } if (alarming_height || rectangle.Height <= 0) { Logging.Warn("Calculated region height is negative or zero: {0} :: Calculated region {1} <-- CURRENT:{2} - LAST:{3}", rect_height, rectangle, region, last_region); // skip rectangle } else if (last_rectangle.X == rectangle.X && last_rectangle.Y == rectangle.Y) { Logging.Warn("Overlapping subsequent rectangles will be merged :: CURRENT:{0} - LAST:{1}", rectangle, last_rectangle); last_rectangle.Width = Math.Max(last_rectangle.Width, rectangle.Width); last_rectangle.Height = Math.Max(last_rectangle.Height, rectangle.Height); Logging.Warn("--> Updated 'last' rectangle:{0}", last_rectangle); } else { rectangles.Add(rectangle); last_rectangle = rectangle; } last_region = region; } // DEBUG CODE: Draw in the region rectangles // // When we run in NOKILL mode, we "know" we're running in a debugger or stand-alone environment // intended for testing this code. Hence we should dump the regions image as part of the process. if (no_kill) { string bitmap_diag_path = pdf_filename + @"." + page_number + @"-ocr.png"; Logging.Info("Dumping regions-augmented page {0} PNG image to file {1}", page_number, bitmap_diag_path); Graphics g = Graphics.FromImage(bitmap); foreach (Rectangle rectangle in rectangles) { if (rectangle.Width <= MIN_WIDTH && rectangle.Height > MIN_WIDTH) { DrawRectangleOutline(g, Pens.Purple, rectangle); } else if (rectangle.Width > MIN_WIDTH && rectangle.Height <= MIN_WIDTH) { DrawRectangleOutline(g, Pens.PowderBlue, rectangle); } else if (rectangle.Width <= MIN_WIDTH && rectangle.Height <= MIN_WIDTH) { DrawRectangleOutline(g, Pens.Red, rectangle); } else { DrawRectangleOutline(g, Pens.LawnGreen, rectangle); } } bitmap.Save(bitmap_diag_path, ImageFormat.Png); } // Do the OCR on each of the rectangles WordList word_list = new WordList(); foreach (Rectangle rectangle in rectangles) { if (0 == rectangle.Width || 0 == rectangle.Height) { Logging.Info("Skipping zero extent rectangle {0}", rectangle.ToString()); continue; } Logging.Info("Doing OCR for region {0} on bitmap WxH: {1}x{2}", rectangle.ToString(), bitmap.Width, bitmap.Height); List <Word> result = ocr.DoOCR(bitmap, rectangle); Logging.Info("Got {0} words", result.Count); word_list.AddRange(ConvertToWordList(result, rectangle, bitmap)); } Logging.Info("-Doing OCR"); Logging.Info("Found {0} words ({1} @ #{2})", word_list.Count, pdf_filename, page_number); #if false Logging.Info("+Reordering words for columns"); WordList word_list_ordered = ColumnWordOrderer.ReorderWords(word_list); Logging.Info("-Reordering words for columns"); word_list_ordered.WriteToFile(ocr_output_filename); #endif return(word_list); } } }
public static WordList DoOCR(string pdf_filename, int page_number) { Logging.Info("+Rendering page"); SoraxPDFRenderer renderer = new SoraxPDFRenderer(pdf_filename, pdf_user_password, pdf_user_password); Bitmap bitmap = (Bitmap)Image.FromStream(new MemoryStream(renderer.GetPageByDPIAsImage(page_number, 200))); Logging.Info("-Rendering page"); Logging.Info("Startup directory is {0}", Environment.CurrentDirectory); Logging.Info("Language is '{0}'", language); Tesseract ocr = new Tesseract(); ocr.Init(null, language, false); Logging.Info("+Doing OCR"); // Build a list of all the rectangles to process PDFRegionLocator pdf_region_locator = new PDFRegionLocator(bitmap); PDFRegionLocator.Region last_region = pdf_region_locator.regions[0]; List <Rectangle> rectangles = new List <Rectangle>(); foreach (PDFRegionLocator.Region region in pdf_region_locator.regions) { if (false) { } else if (last_region.state == PDFRegionLocator.SegmentState.BLANKS) { // LHS { Rectangle rectangle = new Rectangle(0, last_region.y, bitmap.Width / 2, region.y - last_region.y); rectangles.Add(rectangle); } // RHS { Rectangle rectangle = new Rectangle(bitmap.Width / 2, last_region.y, bitmap.Width / 2, region.y - last_region.y); rectangles.Add(rectangle); } } else if (last_region.state == PDFRegionLocator.SegmentState.PIXELS) { // Full column { Rectangle rectangle = new Rectangle(0, last_region.y, bitmap.Width, region.y - last_region.y); rectangles.Add(rectangle); } } last_region = region; } // DEBUG CODE: Draw in the region rectangles //{ // Graphics g = Graphics.FromImage(bitmap); // foreach (Rectangle rectangle in rectangles) // { // g.DrawRectangle(Pens.Black, rectangle); // } // bitmap.Save(@"C:\temp\aaaaaa.png", ImageFormat.Png); //} // Do the OCR on each of the rectangles WordList word_list = new WordList(); foreach (Rectangle rectangle in rectangles) { if (0 == rectangle.Width || 0 == rectangle.Height) { Logging.Info("Skipping zero extent rectangle {0}", rectangle.ToString()); continue; } Logging.Info("Doing OCR for region {0}", rectangle.ToString()); List <Word> result = ocr.DoOCR(bitmap, rectangle); Logging.Info("Got {0} words", result.Count); word_list.AddRange(ConvertToWordList(result, rectangle, bitmap)); } Logging.Info("-Doing OCR"); Logging.Info("Found {0} words", word_list.Count); //Logging.Info("+Reordering words for columns"); //WordList word_list_ordered = ColumnWordOrderer.ReorderWords(word_list); //Logging.Info("-Reordering words for columns"); //word_list_ordered.WriteToFile(ocr_output_filename); return(word_list); }
private void ResizedPageImageItemThreadEntry() { WPFDoEvents.AssertThisCodeIsRunningInTheUIThread(); while (true) { ResizedPageImageItemRequest resized_page_image_item_request = null; // Utilities.LockPerfTimer l1_clk = Utilities.LockPerfChecker.Start(); lock (resized_page_image_item_requests) { // l1_clk.LockPerfTimerStop(); // If there is nothing more to do... if (0 == resized_page_image_item_request_orders.Count) { Interlocked.Decrement(ref num_resized_page_image_item_thread_running); break; } // Get a piece of work int page = resized_page_image_item_request_orders[resized_page_image_item_request_orders.Count - 1]; resized_page_image_item_request_orders.RemoveAt(resized_page_image_item_request_orders.Count - 1); if (resized_page_image_item_requests.TryGetValue(page, out resized_page_image_item_request)) { resized_page_image_item_requests.Remove(page); } else { continue; } } Logging.Debug("Performing page redraw for {0}", resized_page_image_item_request.page); // Check that the page is still visible ASSERT.Test(resized_page_image_item_request.page_control != null); if (!resized_page_image_item_request.page_control.PageIsInView) { continue; } SafeThreadPool.QueueUserWorkItem(o => { WPFDoEvents.AssertThisCodeIs_NOT_RunningInTheUIThread(); try { //PngBitmapDecoder decoder = new PngBitmapDecoder(new MemoryStream(pdf_document.PDFRenderer.GetPageByHeightAsImage(resized_page_image_item_request.page, resized_page_image_item_request.height)), BitmapCreateOptions.PreservePixelFormat, BitmapCacheOption.OnLoad); //BitmapSource bitmap = decoder.Frames[0]; //bitmap.Freeze(); BitmapImage bitmap = new BitmapImage(); using (MemoryStream ms = new MemoryStream(SoraxPDFRenderer.GetPageByHeightAsImage(pdf_document.DocumentPath, pdf_document.PDFPassword, resized_page_image_item_request.page, resized_page_image_item_request.height, resized_page_image_item_request.width))) { bitmap.BeginInit(); bitmap.StreamSource = ms; bitmap.CacheOption = BitmapCacheOption.OnLoad; bitmap.EndInit(); bitmap.Freeze(); } if (null != bitmap) { resized_page_image_item_request.callback(bitmap, resized_page_image_item_request.height, resized_page_image_item_request.width); } } catch (Exception ex) { Logging.Error(ex, "There was an error while resizing a PDF page image"); } }); } }
private void UpdateLibraryStatistics_Stats_Background_CoverFlow(WebLibraryDetail web_library_detail) { WPFDoEvents.AssertThisCodeIs_NOT_RunningInTheUIThread(); if (web_library_detail.Xlibrary == null) { return; } List <PDFDocument> pdf_documents_all = web_library_detail.Xlibrary.PDFDocuments; // The list of recommended items DocumentDisplayWorkManager ddwm = new DocumentDisplayWorkManager(); { int ITEMS_IN_LIST = 5; // Upcoming reading is: // interrupted // top priority // read again // recently added and no status pdf_documents_all.Sort(PDFDocumentListSorters.DateAddedToDatabase); foreach (string reading_stage in new string[] { Choices.ReadingStages_INTERRUPTED, Choices.ReadingStages_TOP_PRIORITY, Choices.ReadingStages_READ_AGAIN }) { foreach (PDFDocument pdf_document in pdf_documents_all) { if (!pdf_document.DocumentExists) { continue; } if (pdf_document.ReadingStage == reading_stage) { if (!ddwm.ContainsPDFDocument(pdf_document)) { ddwm.AddDocumentDisplayWork(DocumentDisplayWork.StarburstColor.Pink, reading_stage, pdf_document); if (ddwm.Count >= ITEMS_IN_LIST) { break; } } } } } } { int ITEMS_IN_LIST = 3; // Recently added { pdf_documents_all.Sort(PDFDocumentListSorters.DateAddedToDatabase); int num_added = 0; foreach (PDFDocument pdf_document in pdf_documents_all) { if (!pdf_document.DocumentExists) { continue; } if (!ddwm.ContainsPDFDocument(pdf_document)) { ddwm.AddDocumentDisplayWork(DocumentDisplayWork.StarburstColor.Green, "Added Recently", pdf_document); if (++num_added >= ITEMS_IN_LIST) { break; } } } } // Recently read { pdf_documents_all.Sort(PDFDocumentListSorters.DateLastRead); int num_added = 0; foreach (PDFDocument pdf_document in pdf_documents_all) { if (!pdf_document.DocumentExists) { continue; } if (!ddwm.ContainsPDFDocument(pdf_document)) { ddwm.AddDocumentDisplayWork(DocumentDisplayWork.StarburstColor.Blue, "Read Recently", pdf_document); if (++num_added >= ITEMS_IN_LIST) { break; } } } } } WPFDoEvents.InvokeAsyncInUIThread(() => { WPFDoEvents.AssertThisCodeIsRunningInTheUIThread(); // And fill the placeholders try { UpdateLibraryStatistics_Stats_Background_GUI_AddAllPlaceHolders(ddwm.ddws); SafeThreadPool.QueueUserWorkItem(o => { try { // Now render each document using (Font font = new Font("Times New Roman", 11.0f)) { using (StringFormat string_format = new StringFormat { Alignment = StringAlignment.Center, LineAlignment = StringAlignment.Center }) { var color_matrix = new ColorMatrix(); color_matrix.Matrix33 = 0.9f; using (var image_attributes = new ImageAttributes()) { image_attributes.SetColorMatrix(color_matrix, ColorMatrixFlag.Default, ColorAdjustType.Bitmap); foreach (DocumentDisplayWork ddw in ddwm.ddws) { try { using (MemoryStream ms = new MemoryStream(SoraxPDFRenderer.GetPageByHeightAsImage(ddw.pdf_document.DocumentPath, ddw.pdf_document.PDFPassword, 1, (int)Math.Round(PREVIEW_IMAGE_HEIGHT / PREVIEW_IMAGE_PERCENTAGE), (int)Math.Round(PREVIEW_IMAGE_WIDTH / PREVIEW_IMAGE_PERCENTAGE)))) { Bitmap page_bitmap = (Bitmap)System.Drawing.Image.FromStream(ms); page_bitmap = page_bitmap.Clone(new RectangleF { Width = page_bitmap.Width, Height = (int)Math.Round(page_bitmap.Height * PREVIEW_IMAGE_PERCENTAGE) }, page_bitmap.PixelFormat); using (Graphics g = Graphics.FromImage(page_bitmap)) { int CENTER = 60; int RADIUS = 60; { BitmapImage starburst_bi = null; switch (ddw.starburst_color) { case DocumentDisplayWork.StarburstColor.Blue: starburst_bi = Icons.GetAppIcon(Icons.PageCornerBlue); break; case DocumentDisplayWork.StarburstColor.Green: starburst_bi = Icons.GetAppIcon(Icons.PageCornerGreen); break; case DocumentDisplayWork.StarburstColor.Pink: starburst_bi = Icons.GetAppIcon(Icons.PageCornerPink); break; default: starburst_bi = Icons.GetAppIcon(Icons.PageCornerOrange); break; } Bitmap starburst_image = BitmapImageTools.ConvertBitmapSourceToBitmap(starburst_bi); g.SmoothingMode = SmoothingMode.AntiAlias; g.DrawImage( starburst_image, new Rectangle(CENTER - RADIUS, CENTER - RADIUS, 2 * RADIUS, 2 * RADIUS), 0, 0, starburst_image.Width, starburst_image.Height, GraphicsUnit.Pixel, image_attributes ); } using (Matrix mat = new Matrix()) { mat.RotateAt(-50, new PointF(CENTER / 2, CENTER / 2)); g.Transform = mat; string wrapped_caption = ddw.starburst_caption; wrapped_caption = wrapped_caption.ToLower(); wrapped_caption = Thread.CurrentThread.CurrentCulture.TextInfo.ToTitleCase(wrapped_caption); wrapped_caption = wrapped_caption.Replace(" ", "\n"); g.DrawString(wrapped_caption, font, Brushes.Black, new PointF(CENTER / 2, CENTER / 2), string_format); } } BitmapSource page_bitmap_source = BitmapImageTools.CreateBitmapSourceFromImage(page_bitmap); ddw.page_bitmap_source = page_bitmap_source; } #if false // do this bit further below, all at once for all entries, in the UI thread! try { UpdateLibraryStatistics_Stats_Background_GUI_FillPlaceHolder(ddw); } catch (Exception ex) { Logging.Error(ex, "UpdateLibraryStatistics_Stats_Background_CoverFlow: Error occurred."); throw; } #endif } catch (Exception ex) { Logging.Warn(ex, "There was a problem loading a preview image for document {0}", ddw.pdf_document.Fingerprint); Logging.Error(ex, "UpdateLibraryStatistics_Stats_Background_CoverFlow: Error occurred."); // do not rethrow the error: allow the other pages in the pages to be rendered... ddw.page_bitmap_source = Backgrounds.GetBackground(Backgrounds.PageRenderingFailed_ClassicNews); } } } } } } catch (Exception ex) { Logging.Error(ex, "UpdateLibraryStatistics_Stats_Background_CoverFlow: Error occurred."); } // Don't care if there were errors in the process so far: the pages which got rendered, SHOULD make it into the UI anyway! WPFDoEvents.InvokeAsyncInUIThread(() => { foreach (DocumentDisplayWork ddw in ddwm.ddws) { try { UpdateLibraryStatistics_Stats_Background_GUI_FillPlaceHolder(ddw); } catch (Exception ex) { Logging.Error(ex, "UpdateLibraryStatistics_Stats_Background_CoverFlow: Error occurred."); Logging.Warn(ex, "There was a problem loading a preview image for document {0}", ddw.pdf_document.Fingerprint); } } if (0 == ddwm.ddws.Count) { ButtonCoverFlow.IsChecked = false; UpdateLibraryStatistics(); } }); }); } catch (Exception ex) { Logging.Error(ex, "UpdateLibraryStatistics_Stats_Background_CoverFlow: Error occurred."); } if (0 == ddwm.ddws.Count) { ButtonCoverFlow.IsChecked = false; UpdateLibraryStatistics(); } }); }
internal byte[] GetPageByDPIAsImage(int page, int dpi) { return(SoraxPDFRenderer.GetPageByDPIAsImage(DocumentPath, PDFPassword, page, dpi)); }