public static byte[] OCRInner(byte[] byteImages, string strOCRLanguage, int nFileFormat) { Dynamsoft.DotNet.TWAIN.DynamicDotNetTwain DNObject = new Dynamsoft.DotNet.TWAIN.DynamicDotNetTwain("/*no License available*/"); DNObject.OCRTessDataPath = StrPath + "Resources\\ocr"; DNObject.OCRLanguage = strOCRLanguage; DNObject.OCRDllPath = StrPath + "Resources\\ocr"; if (nFileFormat == -1) DNObject.OCRResultFormat = (Dynamsoft.DotNet.TWAIN.OCR.ResultFormat)0; else DNObject.OCRResultFormat = (Dynamsoft.DotNet.TWAIN.OCR.ResultFormat)nFileFormat; /* * The uploaded image stream is in PDF format, so we are loading it as a PDF */ DNObject.LoadImageFromBytes(byteImages, Dynamsoft.DotNet.TWAIN.Enums.DWTImageFileFormat.WEBTW_PDF); /*Fix the resolution for OCRing*/ for (short index = 0; index < DNObject.HowManyImagesInBuffer; index++) { Bitmap img = (Bitmap)DNObject.GetImage(index); // If the PDF page says 72 dpi, and assume it's A4(8.0 X 11.0), then the page size is 612 X 792 // This way, we can computer the resolution to get the correct value(s) int iTempImageResolution = (int)((float)img.Width * 72) / 612; int iImageResolutionInteger = (int)((iTempImageResolution / 100) * 100); int iImageResolutionMod = iTempImageResolution - iImageResolutionInteger; int iImageResolution = iImageResolutionInteger; if (iImageResolutionMod >= 80) iImageResolution += 100; // e.g. 299 else if (iImageResolutionMod < 20) { // discard, e.g. 301 } else iImageResolutionInteger += 50; // if we are not sure whether the resolution is valid, we can do the following simple check /*100, 200, 150, 300 and 600 are the normal resolutions used to scan, add more if you use different ones*/ bool bResolutionValid = ((img.HorizontalResolution == 100) || (img.HorizontalResolution == 200) || (img.HorizontalResolution == 150) || (img.HorizontalResolution == 300) || (img.HorizontalResolution == 600)); if (!bResolutionValid) { //the resolution is missing, so we re-apply it img.SetResolution(iImageResolution, iImageResolution); DNObject.SetImage(index, img); } } //Now we do OCR on the corrected images. Dynamsoft.DotNet.TWAIN.IndexList tmp = new Dynamsoft.DotNet.TWAIN.IndexList(); for (short i = 0; i < DNObject.HowManyImagesInBuffer; i++) { tmp.Insert(i, i); } return DNObject.OCR(tmp); }