//This function will find every image in the document and add text if
 //possible
 static void AddTextToImages(Document doc, Content content, OCREngine engine)
 {
     for (int index = 0; index < content.NumElements; index++)
     {
         Element e = content.GetElement(index);
         if (e is Datalogics.PDFL.Image)
         {
             //PlaceTextUnder creates a form with the image and the generated text
             //under the image. The original image in the page is then replaced by
             //by the form.
             Form form = engine.PlaceTextUnder((Image)e, doc);
             content.RemoveElement(index);
             content.AddElement(form, index - 1);
         }
         else if (e is Container)
         {
             AddTextToImages(doc, (e as Container).Content, engine);
         }
         else if (e is Group)
         {
             AddTextToImages(doc, (e as Group).Content, engine);
         }
         else if (e is Form)
         {
             AddTextToImages(doc, (e as Form).Content, engine);
         }
     }
 }
Exemplo n.º 2
0
        static void Main(string[] args)
        {
            Console.WriteLine("AddTextToDocument Sample:");

            // ReSharper disable once UnusedVariable
            using (Library lib = new Library())
            {
                Console.WriteLine("Initialized the library.");

                String sInput  = Library.ResourceDirectory + "Sample_Input/scanned_images.pdf";
                String sOutput = "AddTextToDocument-out.pdf";

                if (args.Length > 0)
                {
                    sInput = args[0];
                }
                if (args.Length > 1)
                {
                    sOutput = args[1];
                }

                Console.WriteLine("Input file: " + sInput);
                Console.WriteLine("Writing output to: " + sOutput);

                OCRParams ocrParams = new OCRParams();
                //The OCRParams.Languages parameter controls which languages the OCR engine attempts
                //to detect. By default the OCR engine searches for English.
                List <LanguageSetting> langList    = new List <LanguageSetting>();
                LanguageSetting        languageOne = new LanguageSetting(Language.English, false);
                langList.Add(languageOne);

                //You could add additional languages for the OCR engine to detect by adding
                //more entries to the LanguageSetting list.

                //LanguageSetting languageTwo = new LanguageSetting(Language.Japanese, false);
                //langList.Add(languageTwo);
                ocrParams.Languages = langList;

                using (OCREngine ocrEngine = new OCREngine(ocrParams))
                {
                    //Create a document object using the input file
                    using (Document doc = new Document(sInput))
                    {
                        for (int numPage = 0; numPage < doc.NumPages; numPage++)
                        {
                            using (Page page = doc.GetPage(numPage))
                            {
                                Content content = page.Content;
                                Console.WriteLine("Adding text to page: " + numPage);
                                AddTextToImages(doc, content, ocrEngine);
                                page.UpdateContent();
                            }
                        }

                        doc.Save(SaveFlags.Full, sOutput);
                    }
                }
            }
        }
        static void Main(string[] args)
        {
            Console.WriteLine("AddTextToDocument Sample:");

            using (Library lib = new Library())
            {
                Console.WriteLine("Initialized the library.");

                String sInput  = Library.ResourceDirectory + "Sample_Input/scanned_images.pdf";
                String sOutput = "../AddTextToDocument-out.pdf";

                if (args.Length > 0)
                {
                    sInput = args[0];
                }
                if (args.Length > 1)
                {
                    sOutput = args[1];
                }

                Console.WriteLine("Input file: " + sInput);
                Console.WriteLine("Writing output to: " + sOutput);

                OCRParams ocrParams = new OCRParams();
                // Setting the segmentation mode to AUTOMATIC lets the OCR engine
                // choose how to segment the page for text detection.
                ocrParams.PageSegmentationMode = PageSegmentationMode.Automatic;
                // This tells the selected engine to improve accuracy at the expense
                // of increased run time. For Tesseract 3, it runs two different
                // algorithms, and chooses the one that has the most confidence.
                ocrParams.Performance = Performance.BestAccuracy;

                using (OCREngine ocrEngine = new OCREngine(ocrParams))
                {
                    //Create a document object using the input file
                    using (Document doc = new Document(sInput))
                    {
                        for (int numPage = 0; numPage < doc.NumPages; numPage++)
                        {
                            using (Page page = doc.GetPage(numPage))
                            {
                                Content content = page.Content;
                                Console.WriteLine("Adding text to page: " + numPage);
                                AddTextToImages(doc, content, ocrEngine);
                                page.UpdateContent();
                            }
                        }
                        doc.Save(SaveFlags.Full, sOutput);
                    }
                }
            }
        }
Exemplo n.º 4
0
        public ComicTransMainWindow()
        {
            InitializeComponent();

            TransResListView.ItemsSource = lstData;

            ComicImgList = new List <string>();
            CurrentPos   = 0;

            transRes1    = "";
            transRes2    = "";
            _translator1 = TranslateWindow.TranslatorAuto(Common.appSettings.FirstTranslator);
            _translator2 = TranslateWindow.TranslatorAuto(Common.appSettings.SecondTranslator);

            ocr = OCRCommon.OCRAuto(Common.appSettings.OCRsource);
            ocr.SetOCRSourceLang("jpn");
            if (Common.appSettings.OCRsource == "BaiduOCR")
            {
                if (ocr.OCR_Init(Common.appSettings.BDOCR_APIKEY, Common.appSettings.BDOCR_SecretKey) == false)
                {
                    HandyControl.Controls.Growl.ErrorGlobal($"百度OCR {Application.Current.Resources["APITest_Error_Hint"]}\n{ocr.GetLastError()}");
                }
            }
            else if (Common.appSettings.OCRsource == "Tesseract5")
            {
                if (ocr.OCR_Init(Common.appSettings.Tesseract5OCR_Path, Common.appSettings.Tesseract5OCR_Args) == false)
                {
                    HandyControl.Controls.Growl.ErrorGlobal($"Tesseract5 {Application.Current.Resources["APITest_Error_Hint"]}\n{ocr.GetLastError()}");
                }
            }
            else if (Common.appSettings.OCRsource == "TesseractOCR")
            {
                if (ocr.OCR_Init("", "") == false)
                {
                    HandyControl.Controls.Growl.ErrorGlobal($"TesseractOCR {Application.Current.Resources["APITest_Error_Hint"]}\n{ocr.GetLastError()}");
                }
            }


            scale = Common.GetScale();
            DrawingAttributes drawingAttributes = new DrawingAttributes
            {
                Color     = Colors.Red,
                Width     = 2,
                Height    = 2,
                StylusTip = StylusTip.Rectangle,
                //FitToCurve = true,
                IsHighlighter  = false,
                IgnorePressure = true,
            };

            inkCanvasMeasure.DefaultDrawingAttributes = drawingAttributes;

            viewModel = new ViewModel
            {
                MeaInfo    = "",
                InkStrokes = new StrokeCollection(),
            };

            DataContext = viewModel;
        }
        static void Main(string[] args)
        {
            Console.WriteLine("AddTextToImage Sample:");

            // ReSharper disable once UnusedVariable
            using (Library lib = new Library())
            {
                Console.WriteLine("Initialized the library.");

                String sInput  = Library.ResourceDirectory + "Sample_Input/text_as_image.jpg";
                String sOutput = "AddTextToImage-out.pdf";

                if (args.Length > 0)
                {
                    sInput = args[0];
                }
                if (args.Length > 1)
                {
                    sOutput = args[1];
                }

                Console.WriteLine("Input file: " + sInput);
                Console.WriteLine("Writing output to: " + sOutput);


                OCRParams ocrParams = new OCRParams();
                //The OCRParams.Languages parameter controls which languages the OCR engine attempts
                //to detect. By default the OCR engine searches for English.
                List <LanguageSetting> langList    = new List <LanguageSetting>();
                LanguageSetting        languageOne = new LanguageSetting(Language.English, false);
                langList.Add(languageOne);

                //You could add additional languages for the OCR engine to detect by adding
                //more entries to the LanguageSetting list.

                //LanguageSetting languageTwo = new LanguageSetting(Language.Japanese, false);
                //langList.Add(languageTwo);
                ocrParams.Languages = langList;

                // If your image resolution is not 300 dpi, specify it here. Specifying a
                // correct resolution gives better results for OCR, especially with
                // automatic image preprocessing.
                // ocrParams.Resolution = 600;

                using (OCREngine ocrEngine = new OCREngine(ocrParams))
                {
                    //Create a document object
                    using (Document doc = new Document())
                    {
                        using (Image newimage = new Image(sInput, doc))
                        {
                            // Create a PDF page which is the size of the image.
                            // Matrix.A and Matrix.D fields, respectively.
                            // There are 72 PDF user space units in one inch.
                            Rect pageRect = new Rect(0, 0, newimage.Matrix.A, newimage.Matrix.D);
                            using (Page docpage = doc.CreatePage(Document.BeforeFirstPage, pageRect))
                            {
                                docpage.Content.AddElement(newimage);
                                docpage.UpdateContent();
                            }
                        }

                        using (Page page = doc.GetPage(0))
                        {
                            Content content = page.Content;
                            Element elem    = content.GetElement(0);
                            Image   image   = (Image)elem;
                            //PlaceTextUnder creates a form with the image and the generated text
                            //under the image. The original image in the page is then replaced by
                            //by the form.
                            Form form = ocrEngine.PlaceTextUnder(image, doc);
                            content.RemoveElement(0);
                            content.AddElement(form, Content.BeforeFirst);
                            page.UpdateContent();
                        }

                        doc.Save(SaveFlags.Full, sOutput);
                    }
                }
            }
        }
        static void Main(string[] args)
        {
            Console.WriteLine("AddTextToImage Sample:");

            using (Library lib = new Library())
            {
                Console.WriteLine("Initialized the library.");

                String sInput  = Library.ResourceDirectory + "Sample_Input/text_as_image.jpg";
                String sOutput = "../AddTextToImage-out.pdf";

                if (args.Length > 0)
                {
                    sInput = args[0];
                }
                if (args.Length > 1)
                {
                    sOutput = args[1];
                }

                Console.WriteLine("Input file: " + sInput);
                Console.WriteLine("Writing output to: " + sOutput);


                OCRParams ocrParams = new OCRParams();
                // Setting the segmentation mode to AUTOMATIC lets the OCR engine
                // choose how to segment the page for text detection.
                ocrParams.PageSegmentationMode = PageSegmentationMode.Automatic;
                // This tells the selected engine to improve accuracy at the expense
                // of increased run time. For Tesseract 3, it runs two different
                // algorithms, and chooses the one that has the most confidence.
                ocrParams.Performance = Performance.BestAccuracy;
                using (OCREngine ocrEngine = new OCREngine(ocrParams))
                {
                    //Create a document object
                    using (Document doc = new Document())
                    {
                        using (Image newimage = new Image(sInput, doc))
                        {
                            // Create a PDF page which is the size of the image.
                            // Matrix.A and Matrix.D fields, respectively.
                            // There are 72 PDF user space units in one inch.
                            Rect pageRect = new Rect(0, 0, newimage.Matrix.A, newimage.Matrix.D);
                            using (Page docpage = doc.CreatePage(Document.BeforeFirstPage, pageRect))
                            {
                                docpage.Content.AddElement(newimage);
                                docpage.UpdateContent();
                            }
                        }
                        using (Page page = doc.GetPage(0))
                        {
                            Content content = page.Content;
                            Element elem    = content.GetElement(0);
                            Image   image   = (Image)elem;
                            //PlaceTextUnder creates a form with the image and the generated text
                            //under the image. The original image in the page is then replaced by
                            //by the form.
                            Form form = ocrEngine.PlaceTextUnder(image, doc);
                            content.RemoveElement(0);
                            content.AddElement(form, -1);
                            page.UpdateContent();
                        }
                        doc.Save(SaveFlags.Full, sOutput);
                    }
                }
            }
        }
        static void Main(string[] args)
        {
            Console.WriteLine("AddTextToDocument Sample:");

            using (Library lib = new Library())
            {
                Console.WriteLine("Initialized the library.");

                String sInput  = Library.ResourceDirectory + "Sample_Input/scanned_images.pdf";
                String sOutput = "../AddTextToDocument-out.pdf";

                if (args.Length > 0)
                {
                    sInput = args[0];
                }
                if (args.Length > 1)
                {
                    sOutput = args[1];
                }

                Console.WriteLine("Input file: " + sInput);
                Console.WriteLine("Writing output to: " + sOutput);

                OCRParams ocrParams = new OCRParams();

                using (Document doc = new Document(sInput))
                {
                    var segmodeList    = new List <PageSegmentationMode>();
                    var performanceOpt = new List <Datalogics.PDFL.Performance>();
                    performanceOpt.Add(Datalogics.PDFL.Performance.Default);
                    performanceOpt.Add(Datalogics.PDFL.Performance.Faster);
                    performanceOpt.Add(Datalogics.PDFL.Performance.MoreAccuracy);
                    performanceOpt.Add(Datalogics.PDFL.Performance.BestAccuracy);
                    int index = 0;

                    foreach (Datalogics.PDFL.Performance perfOpt in performanceOpt)
                    {
                        ocrParams.PageSegmentationMode = PageSegmentationMode.Automatic;
                        ocrParams.Performance          = perfOpt;
                        Console.WriteLine("\nUsing Performance option " + perfOpt.ToString() + " and PageSegmentationMode.Automatic");

                        var sw = Stopwatch.StartNew();
                        using (OCREngine ocrEngine = new OCREngine(ocrParams))
                        {
                            for (int numPage = 0; numPage < doc.NumPages; numPage++)
                            {
                                using (Page page = doc.GetPage(numPage))
                                {
                                    Content content = page.Content;
                                    sw = Stopwatch.StartNew();
                                    AddTextToImages(doc, content, ocrEngine);
                                    //page.UpdateContent();
                                    sw.Stop();
                                    TimeSpan timeElapsed = sw.Elapsed;
                                    Console.WriteLine("  Page " + numPage + " -- Time elapsed: " + timeElapsed.TotalMilliseconds + " milliseconds");
                                }
                            }
                            index++;
                            //doc.Save(SaveFlags.Full, sOutput);
                        }
                    }

                    /*
                     * ocrParams.PageSegmentationMode = PageSegmentationMode.Automatic;
                     * ocrParams.Performance = Performance.MoreAccuracy;
                     * sw = Stopwatch.StartNew();
                     * using (OCREngine ocrEngine = new OCREngine(ocrParams))
                     * {
                     *  for (int numPage = 0; numPage < doc.NumPages; numPage++)
                     *  {
                     *      using (Page page = doc.GetPage(numPage))
                     *      {
                     *          Content content = page.Content;
                     *          sw = Stopwatch.StartNew();
                     *          AddTextToImages(doc, content, ocrEngine);
                     *          //page.UpdateContent();
                     *          sw.Stop();
                     *          TimeSpan timeElapsed = sw.Elapsed;
                     *          Console.WriteLine("\nUsing Performance.MoreAccuracy and PageSegmentationMode.Automatic");
                     *          Console.WriteLine("  Page " + numPage + " -- Time elapsed: " + timeElapsed.TotalMilliseconds + " milliseconds");
                     *
                     *      }
                     *  }
                     *  //doc.Save(SaveFlags.Full, sOutput);
                     * }
                     *
                     * ocrParams.PageSegmentationMode = PageSegmentationMode.Automatic;
                     * ocrParams.Performance = Performance.BestAccuracy;
                     * sw = Stopwatch.StartNew();
                     * using (OCREngine ocrEngine = new OCREngine(ocrParams))
                     * {
                     *  for (int numPage = 0; numPage < doc.NumPages; numPage++)
                     *  {
                     *      using (Page page = doc.GetPage(numPage))
                     *      {
                     *          Content content = page.Content;
                     *          sw = Stopwatch.StartNew();
                     *          AddTextToImages(doc, content, ocrEngine);
                     *          //page.UpdateContent();
                     *          sw.Stop();
                     *          TimeSpan timeElapsed = sw.Elapsed;
                     *          Console.WriteLine("\nUsing Performance.BestAccuracy and PageSegmentationMode.Automatic");
                     *          Console.WriteLine("  Page " + numPage + " -- Time elapsed: " + timeElapsed.TotalMilliseconds + " milliseconds");
                     *
                     *      }
                     *  }
                     *  //doc.Save(SaveFlags.Full, sOutput);
                     * }
                     */
                }
            }
        }