Пример #1
0
        public static byte[] OCRInner(byte[] byteImages, string strLanguageFileName, int nFileFormat)
        {
            Dynamsoft.DotNet.TWAIN.DynamicDotNetTwain twain = new Dynamsoft.DotNet.TWAIN.DynamicDotNetTwain("586300F2E000E88E9965483ECB6CCD4C");

            twain.OCRTessDataPath = StrPath;
            twain.OCRLanguage     = strLanguageFileName;
            twain.OCRDllPath      = StrPath + "bin";

            switch (nFileFormat)
            {
            case FILE_TYPE_TEXT:
            case FILE_TYPE_DOCX:
                twain.OCRResultFormat = Dynamsoft.DotNet.TWAIN.OCR.ResultFormat.Text;
                break;

            case FILE_TYPE_PDF:
                twain.OCRResultFormat = Dynamsoft.DotNet.TWAIN.OCR.ResultFormat.PDFPlainText;
                break;
            }

            twain.LoadImageFromBytes(byteImages, Dynamsoft.DotNet.TWAIN.Enums.DWTImageFileFormat.WEBTW_BMP);

            Dynamsoft.DotNet.TWAIN.IndexList tmp = new Dynamsoft.DotNet.TWAIN.IndexList(0);
            return(twain.OCR(tmp));
        }
        public static byte[] OCRInner(byte[] byteImages, string strOCRLanguage, int nFileFormat)
        {
            Dynamsoft.DotNet.TWAIN.DynamicDotNetTwain DNObject = new Dynamsoft.DotNet.TWAIN.DynamicDotNetTwain("/*no License available*/");

            DNObject.OCRTessDataPath = StrPath + "Resources\\ocr";
            DNObject.OCRLanguage = strOCRLanguage;
            DNObject.OCRDllPath = StrPath + "Resources\\ocr";

            if (nFileFormat == -1)
                DNObject.OCRResultFormat = (Dynamsoft.DotNet.TWAIN.OCR.ResultFormat)0;
            else
                DNObject.OCRResultFormat = (Dynamsoft.DotNet.TWAIN.OCR.ResultFormat)nFileFormat;
            /*
            * The uploaded image stream is in PDF format, so we are loading it as a PDF
            */
            DNObject.LoadImageFromBytes(byteImages, Dynamsoft.DotNet.TWAIN.Enums.DWTImageFileFormat.WEBTW_PDF);

            /*Fix the resolution for OCRing*/
            for (short index = 0; index < DNObject.HowManyImagesInBuffer; index++)
            {
                Bitmap img = (Bitmap)DNObject.GetImage(index);

                // If the PDF page says 72 dpi, and assume it's A4(8.0 X 11.0), then the page size is 612 X 792
                // This way, we can computer the resolution to get the correct value(s)
                int iTempImageResolution = (int)((float)img.Width * 72) / 612;
                int iImageResolutionInteger = (int)((iTempImageResolution / 100) * 100);
                int iImageResolutionMod = iTempImageResolution - iImageResolutionInteger;

                int iImageResolution = iImageResolutionInteger;
                if (iImageResolutionMod >= 80)
                    iImageResolution += 100; // e.g. 299
                else if (iImageResolutionMod < 20)
                {
                    // discard, e.g. 301
                }
                else
                    iImageResolutionInteger += 50;

                // if we are not sure whether the resolution is valid, we can do the following simple check
                /*100, 200, 150, 300 and 600 are the normal resolutions used to scan, add more if you use different ones*/
                bool bResolutionValid = ((img.HorizontalResolution == 100) || (img.HorizontalResolution == 200) ||
                    (img.HorizontalResolution == 150) || (img.HorizontalResolution == 300) || (img.HorizontalResolution == 600));

                if (!bResolutionValid)
                {
                    //the resolution is missing, so we re-apply it
                    img.SetResolution(iImageResolution, iImageResolution);
                    DNObject.SetImage(index, img);
                }
            }
            //Now we do OCR on the corrected images.
            Dynamsoft.DotNet.TWAIN.IndexList tmp = new Dynamsoft.DotNet.TWAIN.IndexList();

            for (short i = 0; i < DNObject.HowManyImagesInBuffer; i++)
            {
                tmp.Insert(i, i);
            }
            return DNObject.OCR(tmp);
        }