static void Main(string[] args) { Console.WriteLine("AddTextToDocument Sample:"); // ReSharper disable once UnusedVariable using (Library lib = new Library()) { Console.WriteLine("Initialized the library."); String sInput = Library.ResourceDirectory + "Sample_Input/scanned_images.pdf"; String sOutput = "AddTextToDocument-out.pdf"; if (args.Length > 0) { sInput = args[0]; } if (args.Length > 1) { sOutput = args[1]; } Console.WriteLine("Input file: " + sInput); Console.WriteLine("Writing output to: " + sOutput); OCRParams ocrParams = new OCRParams(); //The OCRParams.Languages parameter controls which languages the OCR engine attempts //to detect. By default the OCR engine searches for English. List <LanguageSetting> langList = new List <LanguageSetting>(); LanguageSetting languageOne = new LanguageSetting(Language.English, false); langList.Add(languageOne); //You could add additional languages for the OCR engine to detect by adding //more entries to the LanguageSetting list. //LanguageSetting languageTwo = new LanguageSetting(Language.Japanese, false); //langList.Add(languageTwo); ocrParams.Languages = langList; using (OCREngine ocrEngine = new OCREngine(ocrParams)) { //Create a document object using the input file using (Document doc = new Document(sInput)) { for (int numPage = 0; numPage < doc.NumPages; numPage++) { using (Page page = doc.GetPage(numPage)) { Content content = page.Content; Console.WriteLine("Adding text to page: " + numPage); AddTextToImages(doc, content, ocrEngine); page.UpdateContent(); } } doc.Save(SaveFlags.Full, sOutput); } } } }
static void Main(string[] args) { Console.WriteLine("AddTextToDocument Sample:"); using (Library lib = new Library()) { Console.WriteLine("Initialized the library."); String sInput = Library.ResourceDirectory + "Sample_Input/scanned_images.pdf"; String sOutput = "../AddTextToDocument-out.pdf"; if (args.Length > 0) { sInput = args[0]; } if (args.Length > 1) { sOutput = args[1]; } Console.WriteLine("Input file: " + sInput); Console.WriteLine("Writing output to: " + sOutput); OCRParams ocrParams = new OCRParams(); // Setting the segmentation mode to AUTOMATIC lets the OCR engine // choose how to segment the page for text detection. ocrParams.PageSegmentationMode = PageSegmentationMode.Automatic; // This tells the selected engine to improve accuracy at the expense // of increased run time. For Tesseract 3, it runs two different // algorithms, and chooses the one that has the most confidence. ocrParams.Performance = Performance.BestAccuracy; using (OCREngine ocrEngine = new OCREngine(ocrParams)) { //Create a document object using the input file using (Document doc = new Document(sInput)) { for (int numPage = 0; numPage < doc.NumPages; numPage++) { using (Page page = doc.GetPage(numPage)) { Content content = page.Content; Console.WriteLine("Adding text to page: " + numPage); AddTextToImages(doc, content, ocrEngine); page.UpdateContent(); } } doc.Save(SaveFlags.Full, sOutput); } } } }
static void Main(string[] args) { Console.WriteLine("AddTextToImage Sample:"); // ReSharper disable once UnusedVariable using (Library lib = new Library()) { Console.WriteLine("Initialized the library."); String sInput = Library.ResourceDirectory + "Sample_Input/text_as_image.jpg"; String sOutput = "AddTextToImage-out.pdf"; if (args.Length > 0) { sInput = args[0]; } if (args.Length > 1) { sOutput = args[1]; } Console.WriteLine("Input file: " + sInput); Console.WriteLine("Writing output to: " + sOutput); OCRParams ocrParams = new OCRParams(); //The OCRParams.Languages parameter controls which languages the OCR engine attempts //to detect. By default the OCR engine searches for English. List <LanguageSetting> langList = new List <LanguageSetting>(); LanguageSetting languageOne = new LanguageSetting(Language.English, false); langList.Add(languageOne); //You could add additional languages for the OCR engine to detect by adding //more entries to the LanguageSetting list. //LanguageSetting languageTwo = new LanguageSetting(Language.Japanese, false); //langList.Add(languageTwo); ocrParams.Languages = langList; // If your image resolution is not 300 dpi, specify it here. Specifying a // correct resolution gives better results for OCR, especially with // automatic image preprocessing. // ocrParams.Resolution = 600; using (OCREngine ocrEngine = new OCREngine(ocrParams)) { //Create a document object using (Document doc = new Document()) { using (Image newimage = new Image(sInput, doc)) { // Create a PDF page which is the size of the image. // Matrix.A and Matrix.D fields, respectively. // There are 72 PDF user space units in one inch. Rect pageRect = new Rect(0, 0, newimage.Matrix.A, newimage.Matrix.D); using (Page docpage = doc.CreatePage(Document.BeforeFirstPage, pageRect)) { docpage.Content.AddElement(newimage); docpage.UpdateContent(); } } using (Page page = doc.GetPage(0)) { Content content = page.Content; Element elem = content.GetElement(0); Image image = (Image)elem; //PlaceTextUnder creates a form with the image and the generated text //under the image. The original image in the page is then replaced by //by the form. Form form = ocrEngine.PlaceTextUnder(image, doc); content.RemoveElement(0); content.AddElement(form, Content.BeforeFirst); page.UpdateContent(); } doc.Save(SaveFlags.Full, sOutput); } } } }
static void Main(string[] args) { Console.WriteLine("AddTextToImage Sample:"); using (Library lib = new Library()) { Console.WriteLine("Initialized the library."); String sInput = Library.ResourceDirectory + "Sample_Input/text_as_image.jpg"; String sOutput = "../AddTextToImage-out.pdf"; if (args.Length > 0) { sInput = args[0]; } if (args.Length > 1) { sOutput = args[1]; } Console.WriteLine("Input file: " + sInput); Console.WriteLine("Writing output to: " + sOutput); OCRParams ocrParams = new OCRParams(); // Setting the segmentation mode to AUTOMATIC lets the OCR engine // choose how to segment the page for text detection. ocrParams.PageSegmentationMode = PageSegmentationMode.Automatic; // This tells the selected engine to improve accuracy at the expense // of increased run time. For Tesseract 3, it runs two different // algorithms, and chooses the one that has the most confidence. ocrParams.Performance = Performance.BestAccuracy; using (OCREngine ocrEngine = new OCREngine(ocrParams)) { //Create a document object using (Document doc = new Document()) { using (Image newimage = new Image(sInput, doc)) { // Create a PDF page which is the size of the image. // Matrix.A and Matrix.D fields, respectively. // There are 72 PDF user space units in one inch. Rect pageRect = new Rect(0, 0, newimage.Matrix.A, newimage.Matrix.D); using (Page docpage = doc.CreatePage(Document.BeforeFirstPage, pageRect)) { docpage.Content.AddElement(newimage); docpage.UpdateContent(); } } using (Page page = doc.GetPage(0)) { Content content = page.Content; Element elem = content.GetElement(0); Image image = (Image)elem; //PlaceTextUnder creates a form with the image and the generated text //under the image. The original image in the page is then replaced by //by the form. Form form = ocrEngine.PlaceTextUnder(image, doc); content.RemoveElement(0); content.AddElement(form, -1); page.UpdateContent(); } doc.Save(SaveFlags.Full, sOutput); } } } }
static void Main(string[] args) { Console.WriteLine("AddTextToDocument Sample:"); using (Library lib = new Library()) { Console.WriteLine("Initialized the library."); String sInput = Library.ResourceDirectory + "Sample_Input/scanned_images.pdf"; String sOutput = "../AddTextToDocument-out.pdf"; if (args.Length > 0) { sInput = args[0]; } if (args.Length > 1) { sOutput = args[1]; } Console.WriteLine("Input file: " + sInput); Console.WriteLine("Writing output to: " + sOutput); OCRParams ocrParams = new OCRParams(); using (Document doc = new Document(sInput)) { var segmodeList = new List <PageSegmentationMode>(); var performanceOpt = new List <Datalogics.PDFL.Performance>(); performanceOpt.Add(Datalogics.PDFL.Performance.Default); performanceOpt.Add(Datalogics.PDFL.Performance.Faster); performanceOpt.Add(Datalogics.PDFL.Performance.MoreAccuracy); performanceOpt.Add(Datalogics.PDFL.Performance.BestAccuracy); int index = 0; foreach (Datalogics.PDFL.Performance perfOpt in performanceOpt) { ocrParams.PageSegmentationMode = PageSegmentationMode.Automatic; ocrParams.Performance = perfOpt; Console.WriteLine("\nUsing Performance option " + perfOpt.ToString() + " and PageSegmentationMode.Automatic"); var sw = Stopwatch.StartNew(); using (OCREngine ocrEngine = new OCREngine(ocrParams)) { for (int numPage = 0; numPage < doc.NumPages; numPage++) { using (Page page = doc.GetPage(numPage)) { Content content = page.Content; sw = Stopwatch.StartNew(); AddTextToImages(doc, content, ocrEngine); //page.UpdateContent(); sw.Stop(); TimeSpan timeElapsed = sw.Elapsed; Console.WriteLine(" Page " + numPage + " -- Time elapsed: " + timeElapsed.TotalMilliseconds + " milliseconds"); } } index++; //doc.Save(SaveFlags.Full, sOutput); } } /* * ocrParams.PageSegmentationMode = PageSegmentationMode.Automatic; * ocrParams.Performance = Performance.MoreAccuracy; * sw = Stopwatch.StartNew(); * using (OCREngine ocrEngine = new OCREngine(ocrParams)) * { * for (int numPage = 0; numPage < doc.NumPages; numPage++) * { * using (Page page = doc.GetPage(numPage)) * { * Content content = page.Content; * sw = Stopwatch.StartNew(); * AddTextToImages(doc, content, ocrEngine); * //page.UpdateContent(); * sw.Stop(); * TimeSpan timeElapsed = sw.Elapsed; * Console.WriteLine("\nUsing Performance.MoreAccuracy and PageSegmentationMode.Automatic"); * Console.WriteLine(" Page " + numPage + " -- Time elapsed: " + timeElapsed.TotalMilliseconds + " milliseconds"); * * } * } * //doc.Save(SaveFlags.Full, sOutput); * } * * ocrParams.PageSegmentationMode = PageSegmentationMode.Automatic; * ocrParams.Performance = Performance.BestAccuracy; * sw = Stopwatch.StartNew(); * using (OCREngine ocrEngine = new OCREngine(ocrParams)) * { * for (int numPage = 0; numPage < doc.NumPages; numPage++) * { * using (Page page = doc.GetPage(numPage)) * { * Content content = page.Content; * sw = Stopwatch.StartNew(); * AddTextToImages(doc, content, ocrEngine); * //page.UpdateContent(); * sw.Stop(); * TimeSpan timeElapsed = sw.Elapsed; * Console.WriteLine("\nUsing Performance.BestAccuracy and PageSegmentationMode.Automatic"); * Console.WriteLine(" Page " + numPage + " -- Time elapsed: " + timeElapsed.TotalMilliseconds + " milliseconds"); * * } * } * //doc.Save(SaveFlags.Full, sOutput); * } */ } } }