Пример #1
0
        public static void GetIMagesOnly(string FileDirectory)
        {
            var ext = new List <string> {
                ".jpg", ".gif", ".png", "tif"
            };

            var fileEntries = Directory.GetFiles(FileDirectory, "*.*", SearchOption.AllDirectories)
                              .Where(s => ext.Any(e => s.EndsWith(e)));

            foreach (string fileName in fileEntries)
            {
                Console.WriteLine("Image  file '{0}'.", fileName);

                //Console.WriteLine(Path.GetFileName(fileName));// used to get only file name
                // PerformingOCR.DoOCR1(fileName);
                ImagePreProcessing.EnhanceImageQuality(fileName.ToString());
                // Console.ReadLine();
            }
            fileEntries = Directory.GetFiles(@"C:\OCR\EnhancedImage", "*.*", SearchOption.AllDirectories)
                          .Where(s => ext.Any(e => s.EndsWith(e)));
            var InvoiceList = new List <Invoice>();

            foreach (string fileName in fileEntries)
            {
                Console.WriteLine("Image  file '{0}'.", fileName);


                var      pageItem    = PerformingOCR.DoOCR1(fileName);
                Identify removeSpace = new Identify();
                pageItem = removeSpace.RemoveSpace(pageItem);
                Invoice ck = PopInvoice.poplateInvoice(pageItem, fileName);
                InvoiceList.Add(ck);
            }

            List <String> lines = new List <String>();

            foreach (Invoice invoice in InvoiceList)
            {
                lines.Add("Invocie File Name" + " " + invoice.InvoiceID);

                lines.Add("Invoice Date" + "     " + invoice.InvoiceDate);
                lines.Add("VendorName" + "     " + invoice.VendorName);
                lines.Add("     " + "QTY" + "        " + "  " + "Amount " + "        " + " " + "Item" + "     ");
                foreach (Lineitem lineitem in invoice.Lineitems)
                {
                    lines.Add("     " + lineitem.ItemQty + "        " + "     " + lineitem.ItemAmount + "         " + lineitem.ItemName);
                }
                lines.Add("Total Amount" + "     " + invoice.TotalAmount);

                lines.Add("****************************************************************************");
            }

            // WriteAllLines creates a file, writes a collection of strings to the file,
            // and then closes the file.  You do NOT need to call Flush() or Close().
            System.IO.File.WriteAllLines(@"C:\OCR\EnhancedImage\OcrResult.txt", lines);
            Console.ReadLine();
        }
Пример #2
0
        public static List <row> DoOCR1(string imageDir)
        {
            if (imageDir != null)//&& imageDir.fil > 0) check file exist
            {
                var Rowlist = new List <row>();

                using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
                {
                    // have to load Pix via a bitmap since Pix doesn't support loading a stream.
                    engine.SetVariable("tessedit_char_whitelist", "16.00ABCDEFGHIJKLMNOPQRSTUVWXYZ(quick) brown { fox} jumps!over the $3,456.78 < lazy >: #90 dog & duck/goose, as 12.5% of Email from [email protected] is spam?");
                    // engine.DefaultPageSegMode = PageSegMode.AutoOsd;
                    // engine.SetVariable("tessedit_char_whitelist", "0123456789,/ABCDEFGHIJKLMNOPQRSTUVWXYZ.abcdefghijklmnopqrstuvwxyz():!'$");
                    Pix pixImage = Pix.LoadFromFile(imageDir);
                    // pixImage = pixImage.Deskew();
                    //Scew scew;
                    //pixImage = pixImage.Deskew(new ScewSweep(range: 90), Pix.DefaultBinarySearchReduction, Pix.DefaultBinaryThreshold, out scew);
                    //pixImage.Save(@"C:\OCR\EnhancedImage\deskew.tiff", ImageFormat.Tiff);
                    using (var image = new System.Drawing.Bitmap(imageDir))
                    {
                        using (var pix = PixConverter.ToPix(image))
                        {
                            // pix.Deskew();

                            using (var page = engine.Process(pix))
                            {
                                Console.WriteLine("Mean confidence: {0:p}", page.GetMeanConfidence());

                                Console.WriteLine(page.GetText());
                                var i = 1;
                                var j = 1;
                                using (var iter = page.GetIterator())
                                {
                                    iter.Begin();
                                    do
                                    {
                                        do
                                        {
                                            // Console.WriteLine("in-looop");
                                            do
                                            {
                                                Console.WriteLine("Line {0}", i);


                                                j = 1;
                                                do

                                                {
                                                    //if (iter.GetText(PageIteratorLevel.Word) != "")
                                                    // strWord = iter.GetText(PageIteratorLevel.Word).GetType().GetGenericTypeDefinition();

                                                    Console.WriteLine("word:{0}  ", iter.GetText(PageIteratorLevel.Word));
                                                    Identify strType = new Identify();



                                                    Rowlist.Add(new row
                                                    {
                                                        line    = i,
                                                        colomun = j,
                                                        type    = Convert.ToInt16(strType.StringType(iter.GetText(PageIteratorLevel.Word))),
                                                        word    = iter.GetText(PageIteratorLevel.Word)
                                                    });
                                                    j = j + 1;
                                                } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));


                                                i++;
                                            } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));
                                        } while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.Para));
                                    } while (iter.Next(PageIteratorLevel.Block));
                                    var k = Rowlist;
                                }
                            }
                        }
                    }
                }

                return(Rowlist);
            }
            else
            {
                return(null);
            }
        }