コード例 #1
0
        static void Main(string[] args)
        {
            Console.WriteLine("AddTextToDocument Sample:");

            // ReSharper disable once UnusedVariable
            using (Library lib = new Library())
            {
                Console.WriteLine("Initialized the library.");

                String sInput  = Library.ResourceDirectory + "Sample_Input/scanned_images.pdf";
                String sOutput = "AddTextToDocument-out.pdf";

                if (args.Length > 0)
                {
                    sInput = args[0];
                }
                if (args.Length > 1)
                {
                    sOutput = args[1];
                }

                Console.WriteLine("Input file: " + sInput);
                Console.WriteLine("Writing output to: " + sOutput);

                OCRParams ocrParams = new OCRParams();
                //The OCRParams.Languages parameter controls which languages the OCR engine attempts
                //to detect. By default the OCR engine searches for English.
                List <LanguageSetting> langList    = new List <LanguageSetting>();
                LanguageSetting        languageOne = new LanguageSetting(Language.English, false);
                langList.Add(languageOne);

                //You could add additional languages for the OCR engine to detect by adding
                //more entries to the LanguageSetting list.

                //LanguageSetting languageTwo = new LanguageSetting(Language.Japanese, false);
                //langList.Add(languageTwo);
                ocrParams.Languages = langList;

                using (OCREngine ocrEngine = new OCREngine(ocrParams))
                {
                    //Create a document object using the input file
                    using (Document doc = new Document(sInput))
                    {
                        for (int numPage = 0; numPage < doc.NumPages; numPage++)
                        {
                            using (Page page = doc.GetPage(numPage))
                            {
                                Content content = page.Content;
                                Console.WriteLine("Adding text to page: " + numPage);
                                AddTextToImages(doc, content, ocrEngine);
                                page.UpdateContent();
                            }
                        }

                        doc.Save(SaveFlags.Full, sOutput);
                    }
                }
            }
        }
コード例 #2
0
        static void Main(string[] args)
        {
            Console.WriteLine("AddTextToDocument Sample:");

            using (Library lib = new Library())
            {
                Console.WriteLine("Initialized the library.");

                String sInput  = Library.ResourceDirectory + "Sample_Input/scanned_images.pdf";
                String sOutput = "../AddTextToDocument-out.pdf";

                if (args.Length > 0)
                {
                    sInput = args[0];
                }
                if (args.Length > 1)
                {
                    sOutput = args[1];
                }

                Console.WriteLine("Input file: " + sInput);
                Console.WriteLine("Writing output to: " + sOutput);

                OCRParams ocrParams = new OCRParams();
                // Setting the segmentation mode to AUTOMATIC lets the OCR engine
                // choose how to segment the page for text detection.
                ocrParams.PageSegmentationMode = PageSegmentationMode.Automatic;
                // This tells the selected engine to improve accuracy at the expense
                // of increased run time. For Tesseract 3, it runs two different
                // algorithms, and chooses the one that has the most confidence.
                ocrParams.Performance = Performance.BestAccuracy;

                using (OCREngine ocrEngine = new OCREngine(ocrParams))
                {
                    //Create a document object using the input file
                    using (Document doc = new Document(sInput))
                    {
                        for (int numPage = 0; numPage < doc.NumPages; numPage++)
                        {
                            using (Page page = doc.GetPage(numPage))
                            {
                                Content content = page.Content;
                                Console.WriteLine("Adding text to page: " + numPage);
                                AddTextToImages(doc, content, ocrEngine);
                                page.UpdateContent();
                            }
                        }
                        doc.Save(SaveFlags.Full, sOutput);
                    }
                }
            }
        }
コード例 #3
0
        static void Main(string[] args)
        {
            Console.WriteLine("AddTextToImage Sample:");

            // ReSharper disable once UnusedVariable
            using (Library lib = new Library())
            {
                Console.WriteLine("Initialized the library.");

                String sInput  = Library.ResourceDirectory + "Sample_Input/text_as_image.jpg";
                String sOutput = "AddTextToImage-out.pdf";

                if (args.Length > 0)
                {
                    sInput = args[0];
                }
                if (args.Length > 1)
                {
                    sOutput = args[1];
                }

                Console.WriteLine("Input file: " + sInput);
                Console.WriteLine("Writing output to: " + sOutput);


                OCRParams ocrParams = new OCRParams();
                //The OCRParams.Languages parameter controls which languages the OCR engine attempts
                //to detect. By default the OCR engine searches for English.
                List <LanguageSetting> langList    = new List <LanguageSetting>();
                LanguageSetting        languageOne = new LanguageSetting(Language.English, false);
                langList.Add(languageOne);

                //You could add additional languages for the OCR engine to detect by adding
                //more entries to the LanguageSetting list.

                //LanguageSetting languageTwo = new LanguageSetting(Language.Japanese, false);
                //langList.Add(languageTwo);
                ocrParams.Languages = langList;

                // If your image resolution is not 300 dpi, specify it here. Specifying a
                // correct resolution gives better results for OCR, especially with
                // automatic image preprocessing.
                // ocrParams.Resolution = 600;

                using (OCREngine ocrEngine = new OCREngine(ocrParams))
                {
                    //Create a document object
                    using (Document doc = new Document())
                    {
                        using (Image newimage = new Image(sInput, doc))
                        {
                            // Create a PDF page which is the size of the image.
                            // Matrix.A and Matrix.D fields, respectively.
                            // There are 72 PDF user space units in one inch.
                            Rect pageRect = new Rect(0, 0, newimage.Matrix.A, newimage.Matrix.D);
                            using (Page docpage = doc.CreatePage(Document.BeforeFirstPage, pageRect))
                            {
                                docpage.Content.AddElement(newimage);
                                docpage.UpdateContent();
                            }
                        }

                        using (Page page = doc.GetPage(0))
                        {
                            Content content = page.Content;
                            Element elem    = content.GetElement(0);
                            Image   image   = (Image)elem;
                            //PlaceTextUnder creates a form with the image and the generated text
                            //under the image. The original image in the page is then replaced by
                            //by the form.
                            Form form = ocrEngine.PlaceTextUnder(image, doc);
                            content.RemoveElement(0);
                            content.AddElement(form, Content.BeforeFirst);
                            page.UpdateContent();
                        }

                        doc.Save(SaveFlags.Full, sOutput);
                    }
                }
            }
        }
コード例 #4
0
        static void Main(string[] args)
        {
            Console.WriteLine("AddTextToImage Sample:");

            using (Library lib = new Library())
            {
                Console.WriteLine("Initialized the library.");

                String sInput  = Library.ResourceDirectory + "Sample_Input/text_as_image.jpg";
                String sOutput = "../AddTextToImage-out.pdf";

                if (args.Length > 0)
                {
                    sInput = args[0];
                }
                if (args.Length > 1)
                {
                    sOutput = args[1];
                }

                Console.WriteLine("Input file: " + sInput);
                Console.WriteLine("Writing output to: " + sOutput);


                OCRParams ocrParams = new OCRParams();
                // Setting the segmentation mode to AUTOMATIC lets the OCR engine
                // choose how to segment the page for text detection.
                ocrParams.PageSegmentationMode = PageSegmentationMode.Automatic;
                // This tells the selected engine to improve accuracy at the expense
                // of increased run time. For Tesseract 3, it runs two different
                // algorithms, and chooses the one that has the most confidence.
                ocrParams.Performance = Performance.BestAccuracy;
                using (OCREngine ocrEngine = new OCREngine(ocrParams))
                {
                    //Create a document object
                    using (Document doc = new Document())
                    {
                        using (Image newimage = new Image(sInput, doc))
                        {
                            // Create a PDF page which is the size of the image.
                            // Matrix.A and Matrix.D fields, respectively.
                            // There are 72 PDF user space units in one inch.
                            Rect pageRect = new Rect(0, 0, newimage.Matrix.A, newimage.Matrix.D);
                            using (Page docpage = doc.CreatePage(Document.BeforeFirstPage, pageRect))
                            {
                                docpage.Content.AddElement(newimage);
                                docpage.UpdateContent();
                            }
                        }
                        using (Page page = doc.GetPage(0))
                        {
                            Content content = page.Content;
                            Element elem    = content.GetElement(0);
                            Image   image   = (Image)elem;
                            //PlaceTextUnder creates a form with the image and the generated text
                            //under the image. The original image in the page is then replaced by
                            //by the form.
                            Form form = ocrEngine.PlaceTextUnder(image, doc);
                            content.RemoveElement(0);
                            content.AddElement(form, -1);
                            page.UpdateContent();
                        }
                        doc.Save(SaveFlags.Full, sOutput);
                    }
                }
            }
        }
コード例 #5
0
        static void Main(string[] args)
        {
            Console.WriteLine("AddTextToDocument Sample:");

            using (Library lib = new Library())
            {
                Console.WriteLine("Initialized the library.");

                String sInput  = Library.ResourceDirectory + "Sample_Input/scanned_images.pdf";
                String sOutput = "../AddTextToDocument-out.pdf";

                if (args.Length > 0)
                {
                    sInput = args[0];
                }
                if (args.Length > 1)
                {
                    sOutput = args[1];
                }

                Console.WriteLine("Input file: " + sInput);
                Console.WriteLine("Writing output to: " + sOutput);

                OCRParams ocrParams = new OCRParams();

                using (Document doc = new Document(sInput))
                {
                    var segmodeList    = new List <PageSegmentationMode>();
                    var performanceOpt = new List <Datalogics.PDFL.Performance>();
                    performanceOpt.Add(Datalogics.PDFL.Performance.Default);
                    performanceOpt.Add(Datalogics.PDFL.Performance.Faster);
                    performanceOpt.Add(Datalogics.PDFL.Performance.MoreAccuracy);
                    performanceOpt.Add(Datalogics.PDFL.Performance.BestAccuracy);
                    int index = 0;

                    foreach (Datalogics.PDFL.Performance perfOpt in performanceOpt)
                    {
                        ocrParams.PageSegmentationMode = PageSegmentationMode.Automatic;
                        ocrParams.Performance          = perfOpt;
                        Console.WriteLine("\nUsing Performance option " + perfOpt.ToString() + " and PageSegmentationMode.Automatic");

                        var sw = Stopwatch.StartNew();
                        using (OCREngine ocrEngine = new OCREngine(ocrParams))
                        {
                            for (int numPage = 0; numPage < doc.NumPages; numPage++)
                            {
                                using (Page page = doc.GetPage(numPage))
                                {
                                    Content content = page.Content;
                                    sw = Stopwatch.StartNew();
                                    AddTextToImages(doc, content, ocrEngine);
                                    //page.UpdateContent();
                                    sw.Stop();
                                    TimeSpan timeElapsed = sw.Elapsed;
                                    Console.WriteLine("  Page " + numPage + " -- Time elapsed: " + timeElapsed.TotalMilliseconds + " milliseconds");
                                }
                            }
                            index++;
                            //doc.Save(SaveFlags.Full, sOutput);
                        }
                    }

                    /*
                     * ocrParams.PageSegmentationMode = PageSegmentationMode.Automatic;
                     * ocrParams.Performance = Performance.MoreAccuracy;
                     * sw = Stopwatch.StartNew();
                     * using (OCREngine ocrEngine = new OCREngine(ocrParams))
                     * {
                     *  for (int numPage = 0; numPage < doc.NumPages; numPage++)
                     *  {
                     *      using (Page page = doc.GetPage(numPage))
                     *      {
                     *          Content content = page.Content;
                     *          sw = Stopwatch.StartNew();
                     *          AddTextToImages(doc, content, ocrEngine);
                     *          //page.UpdateContent();
                     *          sw.Stop();
                     *          TimeSpan timeElapsed = sw.Elapsed;
                     *          Console.WriteLine("\nUsing Performance.MoreAccuracy and PageSegmentationMode.Automatic");
                     *          Console.WriteLine("  Page " + numPage + " -- Time elapsed: " + timeElapsed.TotalMilliseconds + " milliseconds");
                     *
                     *      }
                     *  }
                     *  //doc.Save(SaveFlags.Full, sOutput);
                     * }
                     *
                     * ocrParams.PageSegmentationMode = PageSegmentationMode.Automatic;
                     * ocrParams.Performance = Performance.BestAccuracy;
                     * sw = Stopwatch.StartNew();
                     * using (OCREngine ocrEngine = new OCREngine(ocrParams))
                     * {
                     *  for (int numPage = 0; numPage < doc.NumPages; numPage++)
                     *  {
                     *      using (Page page = doc.GetPage(numPage))
                     *      {
                     *          Content content = page.Content;
                     *          sw = Stopwatch.StartNew();
                     *          AddTextToImages(doc, content, ocrEngine);
                     *          //page.UpdateContent();
                     *          sw.Stop();
                     *          TimeSpan timeElapsed = sw.Elapsed;
                     *          Console.WriteLine("\nUsing Performance.BestAccuracy and PageSegmentationMode.Automatic");
                     *          Console.WriteLine("  Page " + numPage + " -- Time elapsed: " + timeElapsed.TotalMilliseconds + " milliseconds");
                     *
                     *      }
                     *  }
                     *  //doc.Save(SaveFlags.Full, sOutput);
                     * }
                     */
                }
            }
        }