Ejemplo n.º 1
0
        static void Main(string[] args)
        {
            string testFile = null;

            try
            {
                // ** Make sure an api key has been entered
                if (API_KEY == string.Empty)
                {
                    Console.WriteLine("[ERROR] Please update the sample code and enter the API Key that came with your subscription.");
                    return;
                }

                // ** Was a 'file to OCR' specified on the command line?
                if (args.Count() == 0)
                {
                    Console.WriteLine("[INFO] No file to OCR specified, using default file.");
                    testFile = Directory.GetFiles(".", "*.tif")[0];
                }
                else
                {
                    testFile = args[0];
                }

                // ** Specify the API key associated with your subscription.
                Configuration.Default.AddApiKey("api_key", API_KEY);

                // ** Accept all SSL Certificates, this makes life under mono a lot easier. This line is not needed on Windows
                ServicePointManager.ServerCertificateValidationCallback = delegate { return(true); };

                // ** The service's host name is already set, but for debugging purposes you may want to switch between 'http' and 'https'.'
                Configuration.Default.ApiClient.RestClient.BaseUrl = new Uri("https://api.muhimbi.com/api");

                // ** We are dealing with OCR, so instantiate the relevant class
                OCRApi ocrApi = new OCRApi();

                // ** Read the file we wish to OCR
                byte[] sourceFile = File.ReadAllBytes(testFile);

                // ** Fill out the data for the OCR operation.
                OcrTextData inputData = new OcrTextData(
                    SourceFileName: testFile,                                       // ** The name of the file to OCR. Always include the correct extension
                    SourceFileContent: sourceFile,                                  // ** The content of the file to OCR
                    Language: OcrTextData.LanguageEnum.English,                     // ** The document's primary language
                    Performance: OcrTextData.PerformanceEnum.Slowbutaccurate,       // ** Unless you have a good reason not to, always use the 'Slow' option.
                    CharactersOption: OcrTextData.CharactersOptionEnum.Blacklist,   // ** Any characters to black list or white list (e.g. 1234567890 to deal with numerical data)
                    Characters: "|",                                                // ** In this example we blacklist the '|' (pipe) as it looks similar to l (L) 1 (one) and I (capital I)
                    Paginate: false,                                                // ** Only 'paginate' when your documents have images that span multiple pages.
                    X: "36",                                                        // ** By default the entire page is OCRed, optionally specify an area (in pt., 1/72nd of an inch)
                    Y: "72",
                    Width: "400",
                    Height: "30",
                    PageNumber: "1"
                    );

                // ** Carry out the OCR operation
                Console.WriteLine("[INFO] Running OCR...");
                var response = ocrApi.OcrText(inputData);

                // ** Writing the recognised text to the console
                Console.OutputEncoding = System.Text.Encoding.UTF8;
                Console.Write("[INFO] Extracted text: ");
                Console.ForegroundColor = ConsoleColor.Yellow;
                Console.WriteLine(response.OutText);
                Console.ResetColor();
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.ToString());
            }
        }
        static void Main(string[] args)
        {
            string testFile = null;

            try
            {
                // ** Make sure an api key has been entered
                if (API_KEY == string.Empty)
                {
                    Console.WriteLine("[ERROR] Please update the sample code and enter the API Key that came with your subscription.");
                    return;
                }

                // ** Was a 'file to OCR' specified on the command line?
                if (args.Count() == 0)
                {
                    Console.WriteLine("[INFO] No file to OCR specified, using default file.");
                    testFile = Directory.GetFiles(".", "*.tif")[0];
                }
                else
                {
                    testFile = args[0];
                }

                // ** Specify the API key associated with your subscription.
                Configuration.Default.AddApiKey("api_key", API_KEY);

                // ** Accept all SSL Certificates, this makes life under mono a lot easier. This line is not needed on Windows
                ServicePointManager.ServerCertificateValidationCallback = delegate { return(true); };

                // ** The service's host name is already set, but for debugging purposes you may want to switch between 'http' and 'https'.'
                Configuration.Default.ApiClient.RestClient.BaseUrl = new Uri("https://api.muhimbi.com/api");

                // ** We are dealing with OCR, so instantiate the relevant class
                OCRApi ocrApi = new OCRApi();

                // ** Read the file we wish to OCR
                byte[] sourceFile = File.ReadAllBytes(testFile);

                // ** Fill out the data for the OCR operation.
                OcrPdfData inputData = new OcrPdfData(
                    SourceFileName: testFile,                                   // ** The name of the file to OCR. Always include the correct extension
                    SourceFileContent: sourceFile,                              // ** The content of the file to OCR
                    Language: OcrPdfData.LanguageEnum.English,                  // ** The document's primary language
                    Performance: OcrPdfData.PerformanceEnum.Slowbutaccurate,    // ** Unless you have a good reason not to, always use the 'Slow' option.
                    CharactersOption: OcrPdfData.CharactersOptionEnum.None,     // ** Any characters to black list or white list (e.g. 1234567890 to deal with numerical data)
                    Characters: null,                                           // ** The characters to black or white list.
                    Paginate: false,                                            // ** Only 'paginate' when your documents have images that span multiple pages.
                    Regions: null                                               // ** We want to OCR the entire document, not just specific areas.
                    );

                // ** Carry out the OCR operation
                Console.WriteLine("[INFO] Running OCR...");
                var response = ocrApi.OcrPdf(inputData);

                // ** Write the results back to the file system
                File.WriteAllBytes(@"result.pdf", response.ProcessedFileContent);

                Console.WriteLine("[INFO] 'result.pdf' written to output folder.");

                // ** On Windows open the generated file in the system PDF viewer
                Process.Start(@"result.pdf");
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.ToString());
            }
        }
Ejemplo n.º 3
0
        static string UseImage(string url, string path)
        {
            HttpWebRequest webReq    = (HttpWebRequest)WebRequest.Create(new System.Uri(url));
            Stream         memStream = new MemoryStream();

            webReq.Method = "POST";
            string boundary = "--------------" + DateTime.Now.Ticks.ToString("x");// 边界符

            webReq.ContentType = "multipart/form-data; boundary=" + boundary;
            byte[] enter = Encoding.ASCII.GetBytes("\r\n");  //换行
            memStream.Write(enter, 0, enter.Length);
            Dictionary <string, string> dic = new Dictionary <string, string>()
            {
                { "appid", config.appid },
                { "bucket", config.bucket }
            };
            //写入文本字段
            string inputPartHeaderFormat = "--" + boundary + "\r\n" + "Content-Disposition:form-data;name=\"{0}\";" + "\r\n\r\n{1}\r\n";

            foreach (var kv in dic)
            {
                string inputPartHeader      = string.Format(inputPartHeaderFormat, kv.Key, kv.Value);
                var    inputPartHeaderBytes = Encoding.ASCII.GetBytes(inputPartHeader);
                memStream.Write(inputPartHeaderBytes, 0, inputPartHeaderBytes.Length);
            }
            var fileStream = new FileStream(path, FileMode.Open, FileAccess.Read);
            // 写入文件

            //压缩图片
            string imagePartHeader = "--" + boundary + "\r\n" +
                                     "Content-Disposition: form-data; name=\"{0}\"; filename=\"{1}\"\r\n" +
                                     "Content-Type: image/jpeg\r\n\r\n";
            var header      = string.Format(imagePartHeader, "image", "1.jpg");
            var headerbytes = Encoding.UTF8.GetBytes(header);

            memStream.Write(headerbytes, 0, headerbytes.Length);
            var buffer = new byte[1024];
            int bytesRead;

            while ((bytesRead = fileStream.Read(buffer, 0, buffer.Length)) != 0)
            {
                memStream.Write(buffer, 0, bytesRead);
            }
            // 最后的结束符
            byte[] endBoundary = Encoding.ASCII.GetBytes("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "\r\n" + boundary + "--\r\n");
            memStream.Write(endBoundary, 0, endBoundary.Length);
            webReq.ContentLength = memStream.Length;
            webReq.Headers.Add(HttpRequestHeader.Authorization, OCRApi.HmacSha1Sign());
            webReq.Host = config.Host;
            var requestStream = webReq.GetRequestStream();

            memStream.Position = 0;
            memStream.CopyTo(requestStream);
            HttpWebResponse response = (HttpWebResponse)webReq.GetResponse();
            StreamReader    sr       = new StreamReader(response.GetResponseStream(), Encoding.UTF8);
            var             ret      = sr.ReadToEnd();

            sr.Close();
            response.Close();
            requestStream.Close();
            memStream.Close();
            return(ret);
        }