コード例 #1
0
        public void getText2(string inputFile)
        {
            // Open a PDF file.
            String      inputFilePath = inputFile;
            PDFDocument doc           = new PDFDocument(inputFilePath);

            // The folder that contains '.traineddata' files.
            OCRHandler.SetTrainResourcePath(@"D:\Alice\DLL\Source\");
            // Set output file path.
            Stream[] streams = new MemoryStream[doc.GetPageCount()];
            for (int i = 0; i < doc.GetPageCount(); i++)
            {
                BasePage page = doc.GetPage(i);
                streams[i] = new MemoryStream();
                //the default resolution is 96, if you set larger, it will be helpful to recognize the text, but it can't be too large.
                Bitmap  bmp     = page.ConvertToImage(96);//192,288....
                OCRPage ocrPage = OCRHandler.Import(bmp);
                ocrPage.Recognize();
                ocrPage.SaveTo(MIMEType.DOCX, streams[i]);
                streams[i].Seek(0, SeekOrigin.Begin);
            }
            DOCXDocument.CombineDocument(streams, @"C:\output.docx");
        }
コード例 #2
0
 public bool doOCR(string fid)
 {
     Response.ContentType = "text/plain";
     Response.Clear();
     try
     {
         int    pageIndex    = Convert.ToInt32(Request.Params["pageindex"].ToString());
         string language     = Request.Params["lang"];
         string filename     = Request.Params["filename"];
         string projectName  = HttpContext.Current.Request.PhysicalApplicationPath.Replace("\\", "/");
         int    x            = (int)Convert.ToDouble(Request.Params["x"].ToString());
         int    y            = (int)Convert.ToDouble(Request.Params["y"].ToString());
         int    width        = (int)Convert.ToDouble(Request.Params["w"].ToString());
         int    height       = (int)Convert.ToDouble(Request.Params["h"].ToString());
         string resourcepath = projectName + "/OCRSource/";
         OCRHandler.SetTrainResourcePath(resourcepath);
         List <Language> langs = new List <Language>();
         if (language.Equals("English"))
         {
             langs.Add(Language.Eng);
         }
         else if (language.Equals("German"))
         {
             langs.Add(Language.Deu);
         }
         else if (language.Equals("French"))
         {
             langs.Add(Language.Fra);
         }
         else if (language.Equals("Dutch"))
         {
             langs.Add(Language.Nld);
         }
         else if (language.Equals("Italian"))
         {
             langs.Add(Language.Ita);
         }
         else if (language.Equals("Portuguese"))
         {
             langs.Add(Language.Por);
         }
         else if (language.Equals("Spanish"))
         {
             langs.Add(Language.Spa);
         }
         else if (language.Equals("Arabic"))
         {
             langs.Add(Language.Ara);
         }
         Object fileObject = RasterEdge.WDP.Handler.FileinfoHandler.LoadFile(fid, filename, "");
         OCRHandler.Settings.LanguagesEnabled = langs;
         OCRPage ocrPage = null;
         Bitmap  image   = null;
         if (fileObject is BaseDocument)
         {
             BaseDocument doc  = (BaseDocument)fileObject;
             BasePage     page = doc.GetPage(pageIndex);
             image   = page.ConvertToImage(1.0f);
             ocrPage = OCRHandler.Import(image);
         }
         else if (fileObject is REImage)
         {
             image = ((REImage)fileObject).Bitmap;
         }
         if (image != null)
         {
             Bitmap   newImg = new Bitmap(width, height);
             Graphics g      = Graphics.FromImage(newImg);
             g.DrawImage(image, 0, 0, new Rectangle(x, y, newImg.Width, newImg.Height), GraphicsUnit.Pixel);
             ocrPage = OCRHandler.Import(newImg);
             ocrPage.Recognize();
             g.Dispose();
             newImg.Save(@"d:\1.png");
             newImg.Dispose();
             image.Dispose();
             string text = ocrPage.GetText();
             text = text.Replace("\r\n", "<br/>");
             text = text.Replace("\"", "\\" + "\"");
             Response.Write(text);
         }
         else
         {
             Response.Write("None");
         }
     }
     catch (System.Exception ex)
     {
         Response.Write("None");
     }
     Response.End();
     return(true);
 }