// POST api/values public IHttpActionResult Post([FromBody] string value) { try { var Ocr = new AdvancedOcr() { CleanBackgroundNoise = true, ColorDepth = 4, ColorSpace = AdvancedOcr.OcrColorSpace.Color, EnhanceContrast = true, DetectWhiteTextOnDarkBackgrounds = true, RotateAndStraighten = true, Language = IronOcr.Languages.English.OcrLanguagePack, EnhanceResolution = false, InputImageType = AdvancedOcr.InputTypes.Document, ReadBarCodes = true, Strategy = AdvancedOcr.OcrStrategy.Advanced }; var Results = Ocr.ReadPdf(@"C:\\Users\\hudso\\source\\repos\\IroBarCodeTest\\IroBarCodeTest\\teste2.pdf"); var Pages = Results.Pages; var Barcodes = Results.Barcodes; var FullPdfText = Results.Text; return(Ok(Barcodes)); } catch (Exception ex) { return(BadRequest("Internal server error")); } }
public void Execute() { foreach (var s in Directory.GetFiles(_settings.InputFolder).Where(f => Path.GetExtension(f) == ".pdf")) { try { // Ocr sometimes give sub-optimal results at random. Attempt 3 times for a good read. var customerNumber = Undetected; var soNumber = Undetected; var attempts = 3; do { // Read the text var imageText = _ocr.ReadPdf(s, _crop).Text; // Extract numbers (customerNumber, soNumber) = GetNumbers(imageText, ref soNumber, ref customerNumber); } while (--attempts > 0 && (IsUndetected(customerNumber) || IsUndetected(soNumber))); // Move the file SaveFile(s, customerNumber, soNumber); } catch (Exception e) { LogError(e.Message); } } }
private static void TestScannedPdf(string currentDir) { AdvancedOcr Ocr = new AdvancedOcr() { CleanBackgroundNoise = false, ColorDepth = 4, ColorSpace = AdvancedOcr.OcrColorSpace.Color, EnhanceContrast = false, DetectWhiteTextOnDarkBackgrounds = false, RotateAndStraighten = false, Language = IronOcr.Languages.English.OcrLanguagePack, EnhanceResolution = false, InputImageType = AdvancedOcr.InputTypes.Document, ReadBarCodes = true, Strategy = AdvancedOcr.OcrStrategy.Fast }; //string filePath = currentDir + @"\test-pdf-ocr.pdf"; //Image myImage = Image.FromFile(filePath, true); var testDocument = currentDir + @"\test-pdf-ocr.pdf"; Point point = new Point(71, 1278); Size size = new Size(1282, 90); Rectangle rect = new Rectangle(point, size); var Results = Ocr.ReadPdf(testDocument, rect, 1); //Results.Pages. var Pages = Results.Pages; var Barcodes = Results.Barcodes; var FullPdfText = Results.Text; foreach (var page in Results.Pages) { Console.WriteLine(page); } Console.WriteLine(FullPdfText); //Console.WriteLine("Barcodes:" + String.Join(",", Results.Barcodes.Select(b => b.Value))); }
public OcrResult UseOcr(string FileLocation) { Console.WriteLine("Processing PDF"); var Ocr = new AdvancedOcr() { CleanBackgroundNoise = true, EnhanceContrast = true, EnhanceResolution = true, Language = IronOcr.Languages.English.OcrLanguagePack, Strategy = IronOcr.AdvancedOcr.OcrStrategy.Advanced, ColorSpace = AdvancedOcr.OcrColorSpace.GrayScale, DetectWhiteTextOnDarkBackgrounds = false, InputImageType = AdvancedOcr.InputTypes.Document, RotateAndStraighten = false, ReadBarCodes = false, ColorDepth = 4 }; var Results = Ocr.ReadPdf(FileLocation, null); Console.WriteLine("PDF Processed"); return(Results); }
public static string ParsePdf(string pdfFilePath) { var Ocr = new AdvancedOcr() { CleanBackgroundNoise = false, ColorDepth = 4, ColorSpace = AdvancedOcr.OcrColorSpace.GrayScale, EnhanceContrast = false, DetectWhiteTextOnDarkBackgrounds = false, RotateAndStraighten = false, Language = IronOcr.Languages.English.OcrLanguagePack, EnhanceResolution = false, InputImageType = AdvancedOcr.InputTypes.Document, ReadBarCodes = false, Strategy = AdvancedOcr.OcrStrategy.Fast }; var Results = Ocr.ReadPdf(pdfFilePath); var Pages = Results.Pages; var Barcodes = Results.Barcodes; var FullPdfText = Results.Text; return(FullPdfText); }
public static string OCRPDF(string filePath) { try { var OCR = new AdvancedOcr() { Language = IronOcr.Languages.Danish.OcrLanguagePack, ColorSpace = AdvancedOcr.OcrColorSpace.GrayScale, EnhanceResolution = false, EnhanceContrast = true, CleanBackgroundNoise = true, ColorDepth = 4, RotateAndStraighten = false, DetectWhiteTextOnDarkBackgrounds = false, ReadBarCodes = false, Strategy = AdvancedOcr.OcrStrategy.Fast, InputImageType = AdvancedOcr.InputTypes.Document }; var result = OCR.ReadPdf(filePath); return(result.Text); } catch { return(""); } }
static void Main(string[] args) { string pdfFile = @"C:\Users\Dornas\Dropbox\__ XX - HARD-QUALE\_KNOWLEDGE-CENTER\_DOCUMENT_DATABASES\RETINA\10.1038-134065a0.pdf"; //string pdfFile = @"C:\Users\Dornas\Dropbox\__ XX - HARD-QUALE\_KNOWLEDGE-CENTER\_DOCUMENT_DATABASES\RETINA\10.1038-280064a0.pdf"; //string pdfFile = @"C:\Users\Dornas\Dropbox\__ D - BE-HAPPY\y. HARD-QUALE\_PROJECT (IN)\_KNOWLEDGE-CENTER\_DOC\_ADOBE\pdf_test.pdf"; var Ocr = new AdvancedOcr() { CleanBackgroundNoise = false, ColorDepth = 4, ColorSpace = AdvancedOcr.OcrColorSpace.Color, EnhanceContrast = false, DetectWhiteTextOnDarkBackgrounds = false, RotateAndStraighten = false, Language = IronOcr.Languages.English.OcrLanguagePack, EnhanceResolution = false, InputImageType = AdvancedOcr.InputTypes.Document, ReadBarCodes = true, Strategy = AdvancedOcr.OcrStrategy.Fast }; var PagesToRead = new[] { 1 }; var Results = Ocr.ReadPdf(pdfFile, PagesToRead); var Pages = Results.Pages; var Barcodes = Results.Barcodes; var FullPdfText = Results.Text; //string fileString = string.Empty; //PdfReader pdfReader = new PdfReader(pdfFile); ////PdfReaderContentParser pdfParser = new PdfReaderContentParser(pdfReader); //int nPages = pdfReader.NumberOfPages; //var strategy = new MyLocationTextExtractionStrategy(); //for (int iPage = 1; iPage <= nPages; iPage++) //{ // var ex = PdfTextExtractor.GetTextFromPage(pdfReader, iPage, strategy); // foreach (var p in strategy.myPoints) // { // fileString = fileString + p.Text; // } //} int x = 0; // EXTRACT TEXT //List<string> organizedText = new List<string>(); //List<int> organizedTextPages = new List<int>(); //getOrganizedText(ref organizedText, ref organizedTextPages, ref pdfReader); //string HTML = string.Empty; //formatTextAsHTML(ref organizedText, ref organizedTextPages, ref HTML); //File.WriteAllText(@"C:\Users\Dornas\Dropbox\__ D - BE-HAPPY\y. HARD-QUALE\_PROJECT (IN)\_KNOWLEDGE-CENTER\_DOC\_ADOBE\SanityCheck\Text.html", HTML); //// EXTRACT IMAGES //List<RectAndImage> myImages = new List<RectAndImage>(); //getOrganizedImages(ref myImages, ref pdfReader, ref pdfParser); //for (int i = 0; i < myImages.Count; i++) //{ // //myImages[i].image.Save(@"C:\Users\Dornas\Dropbox\__ D - BE-HAPPY\y. HARD-QUALE\_PROJECT (IN)\_KNOWLEDGE-CENTER\_DOC\_ADOBE\SanityCheck\" + Convert.ToString(i) + "." + myImages[i].fileExtension, myImages[i].format); // File.WriteAllBytes(@"C:\Users\Dornas\Dropbox\__ D - BE-HAPPY\y. HARD-QUALE\_PROJECT (IN)\_KNOWLEDGE-CENTER\_DOC\_ADOBE\SanityCheck\" + Convert.ToString(i) + "." + myImages[i].fileExtension, myImages[i].image); //} //int x = 0; }
public ActionResult Index() { var serverpath = Server.MapPath("~/doc/code c.PNG"); var Ocr = new AdvancedOcr() { Language = IronOcr.Languages.French.OcrLanguagePack, ColorSpace = AdvancedOcr.OcrColorSpace.GrayScale, EnhanceResolution = true, EnhanceContrast = true, CleanBackgroundNoise = true, ColorDepth = 4, RotateAndStraighten = false, DetectWhiteTextOnDarkBackgrounds = false, ReadBarCodes = true, Strategy = AdvancedOcr.OcrStrategy.Fast, InputImageType = AdvancedOcr.InputTypes.Document }; var Result = Ocr.Read(serverpath); ViewBag.message = Result.Text; serverpath = Server.MapPath("~/doc/carte-danick_takam.pdf"); Result = Ocr.ReadPdf(serverpath); var Barcodes = Result.Barcodes; ViewBag.pdf = Result.Text; serverpath = Server.MapPath("~/doc/carte-danick_takam.PNG"); Result = Ocr.Read(serverpath); ViewBag.carteImg = Result.Text; serverpath = Server.MapPath("~/doc/Diplome d'etude collegiale niveau BAC +2 (CCNB-CANADA).pdf"); Result = Ocr.ReadPdf(serverpath); Barcodes = Result.Barcodes; ViewBag.diplomepdf = Result.Text; //ViewBag.diplomepdf = ""; //foreach (var page in results.Pages) //{ // // page object // int page_number = page.PageNumber; // String page_text = page.Text; // int page_wordcount = page.WordCount; // List<OcrResult.OcrBarcode> barcodes = page.Barcodes; // System.Drawing.Image page_image = page.Image; // int page_width_px = page.Width; // int page_height_px = page.Height; // foreach (var paragraph in page.Paragraphs) // { // // pages -> paragraphs // int paragraph_number = paragraph.ParagraphNumber; // String paragraph_text = paragraph.Text; // System.Drawing.Image paragraph_image = paragraph.Image; // int paragraph_x_location = paragraph.X; // int paragraph_y_location = paragraph.Y; // int paragraph_width = paragraph.Width; // int paragraph_height = paragraph.Height; // double paragraph_ocr_accuracy = paragraph.Confidence; // string paragraph_font_name = paragraph.FontName; // double paragraph_font_size = paragraph.FontSize; // OcrResult.TextFlow paragrapth_text_direction = paragraph.TextDirection; // double paragrapth_rotation_degrees = paragraph.TextOrientation; // foreach (var line in paragraph.Lines) // { // // pages -> paragraphs -> lines // int line_number = line.LineNumber; // String line_text = line.Text; // ViewBag.diplomepdf += line_text + "\n"; // //System.Drawing.Image line_image = line.Image; // //int line_x_location = line.X; // //int line_y_location = line.Y; // //int line_width = line.Width; // //int line_height = line.Height; // //double line_ocr_accuracy = line.Confidence; // //double line_skew = line.BaselineAngle; // //double line_offset = line.BaselineOffset; // //foreach (var word in line.Words) // //{ // // // pages -> paragraphs -> lines -> words // // int word_number = word.WordNumber; // // String word_text = word.Text; // // System.Drawing.Image word_image = word.Image; // // int word_x_location = word.X; // // int word_y_location = word.Y; // // int word_width = word.Width; // // int word_height = word.Height; // // double word_ocr_accuracy = word.Confidence; // // String word_font_name = word.FontName; // // double word_font_size = word.FontSize; // // bool word_is_bold = word.FontIsBold; // // bool word_is_fixed_width_font = word.FontIsFixedWidth; // // bool word_is_italic = word.FontIsItalic; // // bool word_is_serif_font = word.FontIsSerif; // // bool word_is_underlined = word.FontIsUnderlined; // // foreach (var character in word.Characters) // // { // // // pages -> paragraphs -> lines -> words -> characters // // int character_number = character.CharacterNumber; // // String character_text = character.Text; // // System.Drawing.Image character_image = character.Image; // // int character_x_location = character.X; // // int character_y_location = character.Y; // // int character_width = character.Width; // // int character_height = character.Height; // // double character_ocr_accuracy = character.Confidence; // // } // //} // } // } //} serverpath = Server.MapPath("~/doc/Diplome d'etude collegiale niveau BAC +2 (CCNB-CANADA).PNG"); Result = Ocr.Read(serverpath); ViewBag.diplomeImg = Result.Text; serverpath = Server.MapPath("~/doc/permis-danick_takam.pdf"); Result = Ocr.ReadPdf(serverpath); Barcodes = Result.Barcodes; ViewBag.permispdf = Result.Text; serverpath = Server.MapPath("~/doc/permis-danick_takam.PNG"); Result = Ocr.Read(serverpath); ViewBag.permisImg = Result.Text; serverpath = Server.MapPath("~/doc/195710a19f414b6cbf6da3dc58e6e4a4.PNG"); Result = Ocr.Read(serverpath); var Barcode = Result.Barcodes.Select(b => b.Value); ViewBag.qrcode = string.Format("Texte : {0} Barcodes: {1}", Result.Text, String.Join(",", Barcode)); return(View()); }
protected void UploadButton_Click(object sender, EventArgs e) { int selectedIndexInt = DropDownList1.SelectedIndex; // file location var file = FileUpload1.FileContent; //this is a test file if (selectedIndexInt == 0) { //checking to see if a file has been uploaded if (FileUpload1.HasFile) { //RadioButton1 = pdf file if (RadioButton1.Checked) { // initializes list to be serialized into JSON var OutputList = new List <Result>(); var Ocr = new AdvancedOcr() { Language = IronOcr.Languages.English.OcrLanguagePack, ColorSpace = AdvancedOcr.OcrColorSpace.GrayScale, EnhanceResolution = true, EnhanceContrast = true, CleanBackgroundNoise = true, ColorDepth = 4, RotateAndStraighten = false, DetectWhiteTextOnDarkBackgrounds = false, ReadBarCodes = false, Strategy = AdvancedOcr.OcrStrategy.Fast, InputImageType = AdvancedOcr.InputTypes.Document }; //Henlo. this code I add - Minh var X = 50; //px var Y = 330; var Width = 2200; var Height = 115; var CropArea = new Rectangle(X, Y, Width, Height); //Also add , CropArea here uwu var Result = Ocr.ReadPdf(@file, CropArea); var ResultText = Result.Text; TextBox1.Text = ResultText.ToString(); foreach (var page in Result.Pages) { foreach (var paragraph in page.Paragraphs) { var paragraphText = paragraph.Text; //Console.WriteLine("Result paragraph: {0}", paragraphText); double paragraphConfidence = Math.Round(paragraph.Confidence, 2); //Console.WriteLine("Confidence score per paragraph: {0}\n", paragraphConfidence); // add values to list OutputList.Add(new Result() { Text = paragraphText, Confidence = paragraphConfidence }); //foreach (var line in paragraph.Lines) //{ // double lineConfidence = line.Confidence; // Console.WriteLine("Confidence score per line: {0}", lineConfidence); //} } } // serialize list into JSON var serializer = new JavaScriptSerializer(); var serializedOutputList = serializer.Serialize(OutputList); TextBox1.Text = serializedOutputList; //foreach (list in serializedOutputList) //{ //TextBox1.Text(list.Text); //TextBox1.Text(list.Confidence); //} } else if (RadioButton2.Checked) { var Ocr = new AdvancedOcr() { CleanBackgroundNoise = true, EnhanceContrast = true, EnhanceResolution = true, Language = IronOcr.Languages.English.OcrLanguagePack, Strategy = IronOcr.AdvancedOcr.OcrStrategy.Advanced, ColorSpace = AdvancedOcr.OcrColorSpace.Color, DetectWhiteTextOnDarkBackgrounds = true, InputImageType = AdvancedOcr.InputTypes.AutoDetect, RotateAndStraighten = true, ReadBarCodes = true, ColorDepth = 4 }; StreamReader reader = new StreamReader(file); string contents = reader.ReadToEnd(); Console.WriteLine(contents); Console.ReadLine(); //StreamReader reader = new StreamReader(file); //string filestring = reader.ReadToEnd(); var X = 41; //px var Y = 49; var Width = 1000; var Height = 80; var CropArea = new Rectangle(X, Y, Width, Height); var Result = Ocr.Read(contents, CropArea); var ResultText = Result.Text; TextBox1.Text = ResultText.ToString(); } else { TextBox1.Text = "Please select a file type."; } } else { TextBox1.Text = "Please upload a file."; } } else if (selectedIndexInt == 1) //this is the ACORD 25 Form - these will be typed { TextBox1.Text = "Processing for ACORD 25 Forms is unavailable at the moment."; } else if (selectedIndexInt == 2) //this is the W9 Form - these will be handwritten { TextBox1.Text = "Processing for W9 Forms is unavailable at the moment."; if (FileUpload1.HasFile) { TextBox1.Text = "Processing for W9 Forms is unavailable at the moment."; } else { TextBox1.Text = "Please upload a file."; } } }