public AnnotatedPage(OcrResult hw, ImageReference image, int pageNumber)
        {
            // page
            metadata.WriteLine($"  <div class='ocr_page' id='page_{pageNumber}' title='image \"{image.Url}\"; bbox 0 0 {image.Width} {image.Height}; ppageno {pageNumber}'>");
            metadata.WriteLine($"    <div class='ocr_carea' id='block_{pageNumber}_1'>");

            var allwords = new List <WordResult>();

            int li = 0;
            int wi = 0;

            foreach (var line in hw.lines)
            {
                metadata.WriteLine($"    <span class='ocr_line' id='line_{pageNumber}_{li}' title='baseline -0.002 -5; x_size 30; x_descenders 6; x_ascenders 6'>");

                var words = line.words.FirstOrDefault()?.boundingBox == null ? line.words : line.words.OrderBy(l => l.boundingBox[0]).ToArray();

                foreach (var word in words)
                {
                    var bbox = word.boundingBox != null && word.boundingBox.Length == 8 ? $"bbox {word.boundingBox[0]} {word.boundingBox[1]} {word.boundingBox[4]} {word.boundingBox[5]}" : "";
                    metadata.WriteLine($"      <span class='ocrx_word' id='word_{pageNumber}_{li}_{wi}' title='{bbox}'>{word.text}</span>");
                    text.WriteLine(word.text);
                    wi++;
                    allwords.Add(word);
                }
                li++;
                metadata.WriteLine(" </span>"); // line
            }

            metadata.WriteLine("    </div>"); // reading area
            metadata.WriteLine("  </div>");   // page
        }
예제 #2
0
        public async Task <OcrResult> GetText(Stream stream, string url = null, string name = null)
        {
            var visionResult = await(stream != null ? visionClient.RecognizeTextAsync(stream, "en", true) : visionClient.RecognizeTextAsync(url, "en", true));

            var lines = visionResult.Regions.SelectMany(r => r.Lines).Select(l =>

                                                                             new lineResult()
            {
                boundingBox = ConvertBoundingBox(l.BoundingBox),
                words       = l.Words.Select(w => new WordResult()
                {
                    boundingBox = ConvertBoundingBox(w.BoundingBox),
                    text        = w.Text
                }).ToArray()
            }
                                                                             );


            var result = new OcrResult()
            {
                lines       = lines.ToArray(),
                Orientation = visionResult.Orientation
            };

            return(result);
        }
예제 #3
0
        public async Task <OcrResult> GetVision(Stream stream, string url = null)
        {
            var features     = new[] { VisualFeature.Tags, VisualFeature.ImageType, VisualFeature.Description, VisualFeature.Adult };
            var visionResult = stream != null ?
                               await visionClient.AnalyzeImageAsync(stream, features)
                : await visionClient.AnalyzeImageAsync(url, features);

            List <lineResult> lines = new List <lineResult>();

            lines.AddRange(visionResult.Description.Captions.Select(c => new lineResult()
            {
                words = c.Text.Split(' ').Select(w => new WordResult()
                {
                    text = w
                }).ToArray()
            }
                                                                    ));

            lines.Add(new lineResult()
            {
                words = new[] { "(" }
                .Concat(visionResult.Tags.Select(t => t.Name))
                .Concat(new[] { ")" })
                .Select(t => new WordResult()
                {
                    text = t
                }).ToArray()
            });

            var result = new OcrResult()
            {
                lines = lines.ToArray()
            };

            return(result);
        }
 public AnnotatedPage(OcrResult hw, ImageReference image) : this(hw, image, 0)
 {
 }
예제 #5
0
        private async Task <OcrResult> GetHandwritingTextImpl(Stream stream, string url)
        {
            var client = new HttpClient();

            client.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", apiKey);

            var uri = "https://westus.api.cognitive.microsoft.com/vision/v1.0/recognizeText?handwriting=true";

            HttpResponseMessage response;

            // Request body
            if (stream != null)
            {
                using (var content = new StreamContent(stream))
                {
                    content.Headers.ContentType = new MediaTypeHeaderValue("application/octet-stream");
                    response = await client.PostAsync(uri, content);
                }
            }
            else
            {
                var json = JsonConvert.SerializeObject(new { url = url });
                using (var content = new StringContent(json))
                {
                    content.Headers.ContentType = new MediaTypeHeaderValue("application/json");
                    response = await client.PostAsync(uri, content);
                }
            }

            OcrResult            result = null;
            IEnumerable <string> opLocation;

            if (!response.IsSuccessStatusCode)
            {
                var err = await response.Content.ReadAsStringAsync();

                response.EnsureSuccessStatusCode();
            }



            if (response.Headers.TryGetValues("Operation-Location", out opLocation))
            {
                while (true)
                {
                    response = await client.GetAsync(opLocation.First());

                    var txt = await response.Content.ReadAsStringAsync();

                    var status = JsonConvert.DeserializeObject <AsyncStatusResult>(txt);
                    if (status.status == "Running" || status.status == "NotStarted")
                    {
                        await Task.Delay(TimeSpan.FromMilliseconds(100));
                    }
                    else
                    {
                        result = status.recognitionResult;

                        break;
                    }
                }
            }

            return(result);
        }