public static async Task <HttpResponseMessage> Run([HttpTrigger(AuthorizationLevel.Function, "get", "post", Route = "Recognition/ExtractText")] HttpRequestMessage req, TraceWriter log, ExecutionContext context) { try { if (!DemoConfiguration.UnlockSupport(log)) { return(GenerateErrorMessage(ApiError.LicenseNotSet, req)); } var leadParameterObject = ParseLeadWebRequestParameters(req); if (!leadParameterObject.Successful) { return(GenerateErrorMessage(ApiError.InvalidRequest, req)); } var imageReturn = await GetImageStreamAsync(leadParameterObject.LeadWebRequest.fileUrl, req, DemoConfiguration.MaxUrlMbs); if (!imageReturn.Successful) { return(GenerateErrorMessage(imageReturn.ErrorType.Value, req)); } using (imageReturn.Stream) { LoadDocumentOptions options = new LoadDocumentOptions() { FirstPageNumber = leadParameterObject.LeadWebRequest.FirstPage, LastPageNumber = leadParameterObject.LeadWebRequest.LastPage }; RecognitionEngine recognitionEngine = new RecognitionEngine { WorkingDirectory = Path.GetTempPath(), OcrEngine = GetOcrEngine() }; var documentPageText = recognitionEngine.ExtractText(imageReturn.Stream, options); List <ExtractTextData> PageDataList = new List <ExtractTextData>(); int currentPage = options.FirstPageNumber; foreach (var page in documentPageText) { for (int i = 0; i < page.Words.Count; i++) { var word = page.Words[i]; word.Bounds = word.Bounds.ToLeadRect().ToLeadRectD(); page.Words[i] = word; } for (int i = 0; i < page.Characters.Count; i++) { var character = page.Characters[i]; character.Bounds = character.Bounds.ToLeadRect().ToLeadRectD(); page.Characters[i] = character; } ExtractTextData pageData = new ExtractTextData { PageNumber = currentPage, PageText = page.Text, Words = page.Words, Characters = page.Characters }; PageDataList.Add(pageData); currentPage++; } using (var ms = new MemoryStream()) { using (TextWriter tw = new StreamWriter(ms)) { tw.Write(JsonConvert.SerializeObject(PageDataList)); tw.Flush(); ms.Position = 0; Guid id = Guid.NewGuid(); string baseName = $"ExtractText-{id}.json"; var blobUri = UploadFileToBlobStorage(ms, baseName); var returnRequest = req.CreateResponse(HttpStatusCode.OK); returnRequest.Content = new StringContent(JsonConvert.SerializeObject(blobUri)); return(returnRequest); } } } } catch (Exception ex) { log.Error($"API Error occurred for request: {context.InvocationId} \n Details: {JsonConvert.SerializeObject(ex)}"); return(GenerateErrorMessage(ApiError.InternalServerError, req)); } }
private async Task <HttpResponseMessage> ParseText([FromUri] LeadWebRequest request, bool additionalInfo) { try { AuthenticateRequest(); if (!VerifyCommonParameters(request)) { throw new MalformedRequestException(); } using (var stream = await GetImageStream(request.fileUrl)) { int lastPage = request.LastPage; ValidateFile(stream, ref lastPage); LoadDocumentOptions options = new LoadDocumentOptions() { FirstPageNumber = request.FirstPage, LastPageNumber = lastPage }; RecognitionEngine recognitionEngine = new RecognitionEngine(); recognitionEngine.OcrEngine = ocrEngine; recognitionEngine.WorkingDirectory = Path.GetTempPath(); var documentPageText = recognitionEngine.ExtractText(stream, options); List <ExtractTextData> PageDataList = new List <ExtractTextData>(); int currentPage = options.FirstPageNumber; if (!additionalInfo) { foreach (var page in documentPageText) { for (int i = 0; i < page.Words.Count; i++) { var word = page.Words[i]; word.Bounds = word.Bounds.ToLeadRect().ToLeadRectD(); page.Words[i] = word; } ExtractTextData pageData = new ExtractTextData { PageNumber = currentPage, PageText = page.Text, Words = page.Words.Select(w => new { w.Value, w.Bounds }).ToList() }; PageDataList.Add(pageData); currentPage++; } } else { foreach (var page in documentPageText) { for (int i = 0; i < page.Words.Count; i++) { var word = page.Words[i]; word.Bounds = word.Bounds.ToLeadRect().ToLeadRectD(); page.Words[i] = word; } for (int i = 0; i < page.Characters.Count; i++) { var character = page.Characters[i]; character.Bounds = character.Bounds.ToLeadRect().ToLeadRectD(); page.Characters[i] = character; } ExtractTextData pageData = new ExtractTextData { PageNumber = currentPage, PageText = page.Text, Words = page.Words, Characters = page.Characters }; PageDataList.Add(pageData); currentPage++; } } using (var ms = new MemoryStream()) { using (TextWriter tw = new StreamWriter(ms)) { tw.Write(JsonConvert.SerializeObject(PageDataList)); tw.Flush(); ms.Position = 0; Guid id = Guid.NewGuid(); string baseName = $"ExtractText-{id}.json"; string urlPath = $"{Url.Request.RequestUri.GetComponents(UriComponents.SchemeAndServer, UriFormat.Unescaped)}/{new DirectoryInfo(DemoConfiguration.OutputFileDirectory).Name}/{baseName}"; string serverFilePath = $"{DemoConfiguration.OutputFileDirectory}{baseName}"; SaveToDisk(ms, serverFilePath); return(new HttpResponseMessage(HttpStatusCode.OK) { Content = new StringContent(urlPath) }); } } } } catch (Exception e) { return(GenerateExceptionMessage(e)); } }