예제 #1
0
        // POST api/values
        public IHttpActionResult Post([FromBody] string value)
        {
            try
            {
                var Ocr = new AdvancedOcr()
                {
                    CleanBackgroundNoise             = true,
                    ColorDepth                       = 4,
                    ColorSpace                       = AdvancedOcr.OcrColorSpace.Color,
                    EnhanceContrast                  = true,
                    DetectWhiteTextOnDarkBackgrounds = true,
                    RotateAndStraighten              = true,
                    Language          = IronOcr.Languages.English.OcrLanguagePack,
                    EnhanceResolution = false,
                    InputImageType    = AdvancedOcr.InputTypes.Document,
                    ReadBarCodes      = true,
                    Strategy          = AdvancedOcr.OcrStrategy.Advanced
                };

                var Results     = Ocr.ReadPdf(@"C:\\Users\\hudso\\source\\repos\\IroBarCodeTest\\IroBarCodeTest\\teste2.pdf");
                var Pages       = Results.Pages;
                var Barcodes    = Results.Barcodes;
                var FullPdfText = Results.Text;

                return(Ok(Barcodes));
            }
            catch (Exception ex)
            {
                return(BadRequest("Internal server error"));
            }
        }
예제 #2
0
        public void Execute()
        {
            foreach (var s in Directory.GetFiles(_settings.InputFolder).Where(f => Path.GetExtension(f) == ".pdf"))
            {
                try {
                    // Ocr sometimes give sub-optimal results at random. Attempt 3 times for a good read.
                    var customerNumber = Undetected;
                    var soNumber       = Undetected;
                    var attempts       = 3;
                    do
                    {
                        // Read the text
                        var imageText = _ocr.ReadPdf(s, _crop).Text;

                        // Extract numbers
                        (customerNumber, soNumber) = GetNumbers(imageText, ref soNumber, ref customerNumber);
                    } while (--attempts > 0 && (IsUndetected(customerNumber) || IsUndetected(soNumber)));

                    // Move the file
                    SaveFile(s, customerNumber, soNumber);
                }
                catch (Exception e) {
                    LogError(e.Message);
                }
            }
        }
        private static void TestScannedPdf(string currentDir)
        {
            AdvancedOcr Ocr = new AdvancedOcr()
            {
                CleanBackgroundNoise             = false,
                ColorDepth                       = 4,
                ColorSpace                       = AdvancedOcr.OcrColorSpace.Color,
                EnhanceContrast                  = false,
                DetectWhiteTextOnDarkBackgrounds = false,
                RotateAndStraighten              = false,
                Language          = IronOcr.Languages.English.OcrLanguagePack,
                EnhanceResolution = false,
                InputImageType    = AdvancedOcr.InputTypes.Document,
                ReadBarCodes      = true,
                Strategy          = AdvancedOcr.OcrStrategy.Fast
            };

            //string filePath = currentDir + @"\test-pdf-ocr.pdf";
            //Image myImage = Image.FromFile(filePath, true);

            var testDocument = currentDir + @"\test-pdf-ocr.pdf";

            Point     point = new Point(71, 1278);
            Size      size  = new Size(1282, 90);
            Rectangle rect  = new Rectangle(point, size);

            var Results = Ocr.ReadPdf(testDocument, rect, 1);
            //Results.Pages.
            var Pages       = Results.Pages;
            var Barcodes    = Results.Barcodes;
            var FullPdfText = Results.Text;


            foreach (var page in Results.Pages)
            {
                Console.WriteLine(page);
            }

            Console.WriteLine(FullPdfText);
            //Console.WriteLine("Barcodes:" + String.Join(",", Results.Barcodes.Select(b => b.Value)));
        }
예제 #4
0
        public OcrResult UseOcr(string FileLocation)
        {
            Console.WriteLine("Processing PDF");
            var Ocr = new AdvancedOcr()
            {
                CleanBackgroundNoise             = true,
                EnhanceContrast                  = true,
                EnhanceResolution                = true,
                Language                         = IronOcr.Languages.English.OcrLanguagePack,
                Strategy                         = IronOcr.AdvancedOcr.OcrStrategy.Advanced,
                ColorSpace                       = AdvancedOcr.OcrColorSpace.GrayScale,
                DetectWhiteTextOnDarkBackgrounds = false,
                InputImageType                   = AdvancedOcr.InputTypes.Document,
                RotateAndStraighten              = false,
                ReadBarCodes                     = false,
                ColorDepth                       = 4
            };
            var Results = Ocr.ReadPdf(FileLocation, null);

            Console.WriteLine("PDF Processed");
            return(Results);
        }
예제 #5
0
        public static string ParsePdf(string pdfFilePath)
        {
            var Ocr = new AdvancedOcr()
            {
                CleanBackgroundNoise             = false,
                ColorDepth                       = 4,
                ColorSpace                       = AdvancedOcr.OcrColorSpace.GrayScale,
                EnhanceContrast                  = false,
                DetectWhiteTextOnDarkBackgrounds = false,
                RotateAndStraighten              = false,
                Language          = IronOcr.Languages.English.OcrLanguagePack,
                EnhanceResolution = false,
                InputImageType    = AdvancedOcr.InputTypes.Document,
                ReadBarCodes      = false,
                Strategy          = AdvancedOcr.OcrStrategy.Fast
            };
            var Results     = Ocr.ReadPdf(pdfFilePath);
            var Pages       = Results.Pages;
            var Barcodes    = Results.Barcodes;
            var FullPdfText = Results.Text;

            return(FullPdfText);
        }
예제 #6
0
 public static string OCRPDF(string filePath)
 {
     try
     {
         var OCR = new AdvancedOcr()
         {
             Language                         = IronOcr.Languages.Danish.OcrLanguagePack,
             ColorSpace                       = AdvancedOcr.OcrColorSpace.GrayScale,
             EnhanceResolution                = false,
             EnhanceContrast                  = true,
             CleanBackgroundNoise             = true,
             ColorDepth                       = 4,
             RotateAndStraighten              = false,
             DetectWhiteTextOnDarkBackgrounds = false,
             ReadBarCodes                     = false,
             Strategy                         = AdvancedOcr.OcrStrategy.Fast,
             InputImageType                   = AdvancedOcr.InputTypes.Document
         };
         var result = OCR.ReadPdf(filePath);
         return(result.Text);
     }
     catch { return(""); }
 }
예제 #7
0
        static void Main(string[] args)
        {
            string pdfFile = @"C:\Users\Dornas\Dropbox\__ XX - HARD-QUALE\_KNOWLEDGE-CENTER\_DOCUMENT_DATABASES\RETINA\10.1038-134065a0.pdf";
            //string pdfFile = @"C:\Users\Dornas\Dropbox\__ XX - HARD-QUALE\_KNOWLEDGE-CENTER\_DOCUMENT_DATABASES\RETINA\10.1038-280064a0.pdf";
            //string pdfFile = @"C:\Users\Dornas\Dropbox\__ D - BE-HAPPY\y. HARD-QUALE\_PROJECT (IN)\_KNOWLEDGE-CENTER\_DOC\_ADOBE\pdf_test.pdf";

            var Ocr = new AdvancedOcr()
            {
                CleanBackgroundNoise             = false,
                ColorDepth                       = 4,
                ColorSpace                       = AdvancedOcr.OcrColorSpace.Color,
                EnhanceContrast                  = false,
                DetectWhiteTextOnDarkBackgrounds = false,
                RotateAndStraighten              = false,
                Language          = IronOcr.Languages.English.OcrLanguagePack,
                EnhanceResolution = false,
                InputImageType    = AdvancedOcr.InputTypes.Document,
                ReadBarCodes      = true,
                Strategy          = AdvancedOcr.OcrStrategy.Fast
            };
            var PagesToRead = new[] { 1 };
            var Results     = Ocr.ReadPdf(pdfFile, PagesToRead);
            var Pages       = Results.Pages;
            var Barcodes    = Results.Barcodes;
            var FullPdfText = Results.Text;

            //string fileString = string.Empty;

            //PdfReader pdfReader = new PdfReader(pdfFile);

            ////PdfReaderContentParser pdfParser = new PdfReaderContentParser(pdfReader);

            //int nPages = pdfReader.NumberOfPages;

            //var strategy = new MyLocationTextExtractionStrategy();

            //for (int iPage = 1; iPage <= nPages; iPage++)
            //{
            //    var ex = PdfTextExtractor.GetTextFromPage(pdfReader, iPage, strategy);

            //    foreach (var p in strategy.myPoints)
            //    {
            //        fileString = fileString + p.Text;
            //    }

            //}

            int x = 0;

            // EXTRACT TEXT

            //List<string> organizedText = new List<string>();
            //List<int> organizedTextPages = new List<int>();

            //getOrganizedText(ref organizedText, ref organizedTextPages, ref pdfReader);

            //string HTML = string.Empty;

            //formatTextAsHTML(ref organizedText, ref organizedTextPages, ref HTML);


            //File.WriteAllText(@"C:\Users\Dornas\Dropbox\__ D - BE-HAPPY\y. HARD-QUALE\_PROJECT (IN)\_KNOWLEDGE-CENTER\_DOC\_ADOBE\SanityCheck\Text.html", HTML);


            //// EXTRACT IMAGES

            //List<RectAndImage> myImages = new List<RectAndImage>();

            //getOrganizedImages(ref myImages, ref pdfReader, ref pdfParser);

            //for (int i = 0; i < myImages.Count; i++)
            //{
            //    //myImages[i].image.Save(@"C:\Users\Dornas\Dropbox\__ D - BE-HAPPY\y. HARD-QUALE\_PROJECT (IN)\_KNOWLEDGE-CENTER\_DOC\_ADOBE\SanityCheck\" + Convert.ToString(i) + "." + myImages[i].fileExtension, myImages[i].format);

            //    File.WriteAllBytes(@"C:\Users\Dornas\Dropbox\__ D - BE-HAPPY\y. HARD-QUALE\_PROJECT (IN)\_KNOWLEDGE-CENTER\_DOC\_ADOBE\SanityCheck\" + Convert.ToString(i) + "." + myImages[i].fileExtension, myImages[i].image);
            //}

            //int x = 0;
        }
        public ActionResult Index()
        {
            var serverpath = Server.MapPath("~/doc/code c.PNG");
            var Ocr        = new AdvancedOcr()
            {
                Language                         = IronOcr.Languages.French.OcrLanguagePack,
                ColorSpace                       = AdvancedOcr.OcrColorSpace.GrayScale,
                EnhanceResolution                = true,
                EnhanceContrast                  = true,
                CleanBackgroundNoise             = true,
                ColorDepth                       = 4,
                RotateAndStraighten              = false,
                DetectWhiteTextOnDarkBackgrounds = false,
                ReadBarCodes                     = true,
                Strategy                         = AdvancedOcr.OcrStrategy.Fast,
                InputImageType                   = AdvancedOcr.InputTypes.Document
            };
            var Result = Ocr.Read(serverpath);

            ViewBag.message = Result.Text;

            serverpath = Server.MapPath("~/doc/carte-danick_takam.pdf");
            Result     = Ocr.ReadPdf(serverpath);
            var Barcodes = Result.Barcodes;

            ViewBag.pdf = Result.Text;

            serverpath       = Server.MapPath("~/doc/carte-danick_takam.PNG");
            Result           = Ocr.Read(serverpath);
            ViewBag.carteImg = Result.Text;

            serverpath         = Server.MapPath("~/doc/Diplome d'etude collegiale niveau BAC +2 (CCNB-CANADA).pdf");
            Result             = Ocr.ReadPdf(serverpath);
            Barcodes           = Result.Barcodes;
            ViewBag.diplomepdf = Result.Text;
            //ViewBag.diplomepdf = "";
            //foreach (var page in results.Pages)
            //{
            //	// page object
            //	int page_number = page.PageNumber;
            //	String page_text = page.Text;
            //	int page_wordcount = page.WordCount;
            //	List<OcrResult.OcrBarcode> barcodes = page.Barcodes;
            //	System.Drawing.Image page_image = page.Image;
            //	int page_width_px = page.Width;
            //	int page_height_px = page.Height;
            //	foreach (var paragraph in page.Paragraphs)
            //	{
            //		// pages -> paragraphs
            //		int paragraph_number = paragraph.ParagraphNumber;
            //		String paragraph_text = paragraph.Text;
            //		System.Drawing.Image paragraph_image = paragraph.Image;
            //		int paragraph_x_location = paragraph.X;
            //		int paragraph_y_location = paragraph.Y;
            //		int paragraph_width = paragraph.Width;
            //		int paragraph_height = paragraph.Height;
            //		double paragraph_ocr_accuracy = paragraph.Confidence;
            //		string paragraph_font_name = paragraph.FontName;
            //		double paragraph_font_size = paragraph.FontSize;
            //		OcrResult.TextFlow paragrapth_text_direction = paragraph.TextDirection;
            //		double paragrapth_rotation_degrees = paragraph.TextOrientation;
            //		foreach (var line in paragraph.Lines)
            //		{
            //			// pages -> paragraphs -> lines
            //			int line_number = line.LineNumber;
            //			String line_text = line.Text;
            //			ViewBag.diplomepdf += line_text + "\n";
            //			//System.Drawing.Image line_image = line.Image;
            //			//int line_x_location = line.X;
            //			//int line_y_location = line.Y;
            //			//int line_width = line.Width;
            //			//int line_height = line.Height;
            //			//double line_ocr_accuracy = line.Confidence;
            //			//double line_skew = line.BaselineAngle;
            //			//double line_offset = line.BaselineOffset;
            //			//foreach (var word in line.Words)
            //			//{
            //			//	// pages -> paragraphs -> lines -> words
            //			//	int word_number = word.WordNumber;
            //			//	String word_text = word.Text;
            //			//	System.Drawing.Image word_image = word.Image;
            //			//	int word_x_location = word.X;
            //			//	int word_y_location = word.Y;
            //			//	int word_width = word.Width;
            //			//	int word_height = word.Height;
            //			//	double word_ocr_accuracy = word.Confidence;
            //			//	String word_font_name = word.FontName;
            //			//	double word_font_size = word.FontSize;
            //			//	bool word_is_bold = word.FontIsBold;
            //			//	bool word_is_fixed_width_font = word.FontIsFixedWidth;
            //			//	bool word_is_italic = word.FontIsItalic;
            //			//	bool word_is_serif_font = word.FontIsSerif;
            //			//	bool word_is_underlined = word.FontIsUnderlined;
            //			//	foreach (var character in word.Characters)
            //			//	{
            //			//		// pages -> paragraphs -> lines -> words -> characters
            //			//		int character_number = character.CharacterNumber;
            //			//		String character_text = character.Text;
            //			//		System.Drawing.Image character_image = character.Image;
            //			//		int character_x_location = character.X;
            //			//		int character_y_location = character.Y;
            //			//		int character_width = character.Width;
            //			//		int character_height = character.Height;
            //			//		double character_ocr_accuracy = character.Confidence;
            //			//	}
            //			//}
            //		}
            //	}
            //}

            serverpath         = Server.MapPath("~/doc/Diplome d'etude collegiale niveau BAC +2 (CCNB-CANADA).PNG");
            Result             = Ocr.Read(serverpath);
            ViewBag.diplomeImg = Result.Text;

            serverpath        = Server.MapPath("~/doc/permis-danick_takam.pdf");
            Result            = Ocr.ReadPdf(serverpath);
            Barcodes          = Result.Barcodes;
            ViewBag.permispdf = Result.Text;

            serverpath        = Server.MapPath("~/doc/permis-danick_takam.PNG");
            Result            = Ocr.Read(serverpath);
            ViewBag.permisImg = Result.Text;

            serverpath = Server.MapPath("~/doc/195710a19f414b6cbf6da3dc58e6e4a4.PNG");
            Result     = Ocr.Read(serverpath);
            var Barcode = Result.Barcodes.Select(b => b.Value);

            ViewBag.qrcode = string.Format("Texte : {0} Barcodes: {1}", Result.Text, String.Join(",", Barcode));

            return(View());
        }
예제 #9
0
        protected void UploadButton_Click(object sender, EventArgs e)
        {
            int selectedIndexInt = DropDownList1.SelectedIndex;

            // file location
            var file = FileUpload1.FileContent;

            //this is a test file
            if (selectedIndexInt == 0)
            {
                //checking to see if a file has been uploaded
                if (FileUpload1.HasFile)
                {
                    //RadioButton1 = pdf file
                    if (RadioButton1.Checked)
                    {
                        // initializes list to be serialized into JSON
                        var OutputList = new List <Result>();

                        var Ocr = new AdvancedOcr()
                        {
                            Language                         = IronOcr.Languages.English.OcrLanguagePack,
                            ColorSpace                       = AdvancedOcr.OcrColorSpace.GrayScale,
                            EnhanceResolution                = true,
                            EnhanceContrast                  = true,
                            CleanBackgroundNoise             = true,
                            ColorDepth                       = 4,
                            RotateAndStraighten              = false,
                            DetectWhiteTextOnDarkBackgrounds = false,
                            ReadBarCodes                     = false,
                            Strategy                         = AdvancedOcr.OcrStrategy.Fast,
                            InputImageType                   = AdvancedOcr.InputTypes.Document
                        };

                        //Henlo. this code I add - Minh
                        var X        = 50; //px
                        var Y        = 330;
                        var Width    = 2200;
                        var Height   = 115;
                        var CropArea = new Rectangle(X, Y, Width, Height);
                        //Also add , CropArea here uwu
                        var Result     = Ocr.ReadPdf(@file, CropArea);
                        var ResultText = Result.Text;
                        TextBox1.Text = ResultText.ToString();

                        foreach (var page in Result.Pages)
                        {
                            foreach (var paragraph in page.Paragraphs)
                            {
                                var paragraphText = paragraph.Text;
                                //Console.WriteLine("Result paragraph: {0}", paragraphText);

                                double paragraphConfidence = Math.Round(paragraph.Confidence, 2);
                                //Console.WriteLine("Confidence score per paragraph: {0}\n", paragraphConfidence);

                                // add values to list
                                OutputList.Add(new Result()
                                {
                                    Text = paragraphText, Confidence = paragraphConfidence
                                });

                                //foreach (var line in paragraph.Lines)
                                //{
                                //    double lineConfidence = line.Confidence;
                                //    Console.WriteLine("Confidence score per line: {0}", lineConfidence);
                                //}
                            }
                        }

                        // serialize list into JSON
                        var serializer           = new JavaScriptSerializer();
                        var serializedOutputList = serializer.Serialize(OutputList);

                        TextBox1.Text = serializedOutputList;

                        //foreach (list in serializedOutputList)
                        //{
                        //TextBox1.Text(list.Text);
                        //TextBox1.Text(list.Confidence);
                        //}
                    }
                    else if (RadioButton2.Checked)
                    {
                        var Ocr = new AdvancedOcr()
                        {
                            CleanBackgroundNoise             = true,
                            EnhanceContrast                  = true,
                            EnhanceResolution                = true,
                            Language                         = IronOcr.Languages.English.OcrLanguagePack,
                            Strategy                         = IronOcr.AdvancedOcr.OcrStrategy.Advanced,
                            ColorSpace                       = AdvancedOcr.OcrColorSpace.Color,
                            DetectWhiteTextOnDarkBackgrounds = true,
                            InputImageType                   = AdvancedOcr.InputTypes.AutoDetect,
                            RotateAndStraighten              = true,
                            ReadBarCodes                     = true,
                            ColorDepth                       = 4
                        };

                        StreamReader reader   = new StreamReader(file);
                        string       contents = reader.ReadToEnd();
                        Console.WriteLine(contents);
                        Console.ReadLine();

                        //StreamReader reader = new StreamReader(file);
                        //string filestring = reader.ReadToEnd();

                        var X          = 41;                //px
                        var Y          = 49;
                        var Width      = 1000;
                        var Height     = 80;
                        var CropArea   = new Rectangle(X, Y, Width, Height);
                        var Result     = Ocr.Read(contents, CropArea);
                        var ResultText = Result.Text;
                        TextBox1.Text = ResultText.ToString();
                    }
                    else
                    {
                        TextBox1.Text = "Please select a file type.";
                    }
                }
                else
                {
                    TextBox1.Text = "Please upload a file.";
                }
            }
            else if (selectedIndexInt == 1)
            //this is the ACORD 25 Form - these will be typed
            {
                TextBox1.Text = "Processing for ACORD 25 Forms is unavailable at the moment.";
            }

            else if (selectedIndexInt == 2)
            //this is the W9 Form - these will be handwritten
            {
                TextBox1.Text = "Processing for W9 Forms is unavailable at the moment.";
                if (FileUpload1.HasFile)
                {
                    TextBox1.Text = "Processing for W9 Forms is unavailable at the moment.";
                }
                else
                {
                    TextBox1.Text = "Please upload a file.";
                }
            }
        }