Example #1
0
        public static async Task <HttpResponseMessage> Run([HttpTrigger(AuthorizationLevel.Function, "get", "post", Route = "Recognition/ExtractText")] HttpRequestMessage req, TraceWriter log, ExecutionContext context)
        {
            try
            {
                if (!DemoConfiguration.UnlockSupport(log))
                {
                    return(GenerateErrorMessage(ApiError.LicenseNotSet, req));
                }

                var leadParameterObject = ParseLeadWebRequestParameters(req);
                if (!leadParameterObject.Successful)
                {
                    return(GenerateErrorMessage(ApiError.InvalidRequest, req));
                }

                var imageReturn = await GetImageStreamAsync(leadParameterObject.LeadWebRequest.fileUrl, req, DemoConfiguration.MaxUrlMbs);

                if (!imageReturn.Successful)
                {
                    return(GenerateErrorMessage(imageReturn.ErrorType.Value, req));
                }

                using (imageReturn.Stream)
                {
                    LoadDocumentOptions options = new LoadDocumentOptions()
                    {
                        FirstPageNumber = leadParameterObject.LeadWebRequest.FirstPage,
                        LastPageNumber  = leadParameterObject.LeadWebRequest.LastPage
                    };

                    RecognitionEngine recognitionEngine = new RecognitionEngine
                    {
                        WorkingDirectory = Path.GetTempPath(),
                        OcrEngine        = GetOcrEngine()
                    };

                    var documentPageText = recognitionEngine.ExtractText(imageReturn.Stream, options);
                    List <ExtractTextData> PageDataList = new List <ExtractTextData>();
                    int currentPage = options.FirstPageNumber;
                    foreach (var page in documentPageText)
                    {
                        for (int i = 0; i < page.Words.Count; i++)
                        {
                            var word = page.Words[i];
                            word.Bounds   = word.Bounds.ToLeadRect().ToLeadRectD();
                            page.Words[i] = word;
                        }
                        for (int i = 0; i < page.Characters.Count; i++)
                        {
                            var character = page.Characters[i];
                            character.Bounds   = character.Bounds.ToLeadRect().ToLeadRectD();
                            page.Characters[i] = character;
                        }

                        ExtractTextData pageData = new ExtractTextData
                        {
                            PageNumber = currentPage,
                            PageText   = page.Text,
                            Words      = page.Words,
                            Characters = page.Characters
                        };
                        PageDataList.Add(pageData);
                        currentPage++;
                    }

                    using (var ms = new MemoryStream())
                    {
                        using (TextWriter tw = new StreamWriter(ms))
                        {
                            tw.Write(JsonConvert.SerializeObject(PageDataList));
                            tw.Flush();
                            ms.Position = 0;

                            Guid   id            = Guid.NewGuid();
                            string baseName      = $"ExtractText-{id}.json";
                            var    blobUri       = UploadFileToBlobStorage(ms, baseName);
                            var    returnRequest = req.CreateResponse(HttpStatusCode.OK);
                            returnRequest.Content = new StringContent(JsonConvert.SerializeObject(blobUri));
                            return(returnRequest);
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                log.Error($"API Error occurred for request: {context.InvocationId} \n Details: {JsonConvert.SerializeObject(ex)}");
                return(GenerateErrorMessage(ApiError.InternalServerError, req));
            }
        }
Example #2
0
        private async Task <HttpResponseMessage> ParseText([FromUri] LeadWebRequest request, bool additionalInfo)
        {
            try
            {
                AuthenticateRequest();

                if (!VerifyCommonParameters(request))
                {
                    throw new MalformedRequestException();
                }

                using (var stream = await GetImageStream(request.fileUrl))
                {
                    int lastPage = request.LastPage;
                    ValidateFile(stream, ref lastPage);
                    LoadDocumentOptions options = new LoadDocumentOptions()
                    {
                        FirstPageNumber = request.FirstPage,
                        LastPageNumber  = lastPage
                    };

                    RecognitionEngine recognitionEngine = new RecognitionEngine();
                    recognitionEngine.OcrEngine        = ocrEngine;
                    recognitionEngine.WorkingDirectory = Path.GetTempPath();

                    var documentPageText = recognitionEngine.ExtractText(stream, options);
                    List <ExtractTextData> PageDataList = new List <ExtractTextData>();
                    int currentPage = options.FirstPageNumber;
                    if (!additionalInfo)
                    {
                        foreach (var page in documentPageText)
                        {
                            for (int i = 0; i < page.Words.Count; i++)
                            {
                                var word = page.Words[i];
                                word.Bounds   = word.Bounds.ToLeadRect().ToLeadRectD();
                                page.Words[i] = word;
                            }

                            ExtractTextData pageData = new ExtractTextData
                            {
                                PageNumber = currentPage,
                                PageText   = page.Text,
                                Words      = page.Words.Select(w => new { w.Value, w.Bounds }).ToList()
                            };
                            PageDataList.Add(pageData);
                            currentPage++;
                        }
                    }
                    else
                    {
                        foreach (var page in documentPageText)
                        {
                            for (int i = 0; i < page.Words.Count; i++)
                            {
                                var word = page.Words[i];
                                word.Bounds   = word.Bounds.ToLeadRect().ToLeadRectD();
                                page.Words[i] = word;
                            }
                            for (int i = 0; i < page.Characters.Count; i++)
                            {
                                var character = page.Characters[i];
                                character.Bounds   = character.Bounds.ToLeadRect().ToLeadRectD();
                                page.Characters[i] = character;
                            }

                            ExtractTextData pageData = new ExtractTextData
                            {
                                PageNumber = currentPage,
                                PageText   = page.Text,
                                Words      = page.Words,
                                Characters = page.Characters
                            };
                            PageDataList.Add(pageData);
                            currentPage++;
                        }
                    }

                    using (var ms = new MemoryStream())
                    {
                        using (TextWriter tw = new StreamWriter(ms))
                        {
                            tw.Write(JsonConvert.SerializeObject(PageDataList));
                            tw.Flush();
                            ms.Position = 0;

                            Guid   id             = Guid.NewGuid();
                            string baseName       = $"ExtractText-{id}.json";
                            string urlPath        = $"{Url.Request.RequestUri.GetComponents(UriComponents.SchemeAndServer, UriFormat.Unescaped)}/{new DirectoryInfo(DemoConfiguration.OutputFileDirectory).Name}/{baseName}";
                            string serverFilePath = $"{DemoConfiguration.OutputFileDirectory}{baseName}";
                            SaveToDisk(ms, serverFilePath);
                            return(new HttpResponseMessage(HttpStatusCode.OK)
                            {
                                Content = new StringContent(urlPath)
                            });
                        }
                    }
                }
            }
            catch (Exception e)
            {
                return(GenerateExceptionMessage(e));
            }
        }