private async Task <GoogleOcrResult> DoOcr(Stream imageAsStream, FileFormatEnum fileFormatEnum, DateTime start)
        {
            try
            {
                var preprocessedResult = _ocrPreProcessing.AjustOrientationAndSize(imageAsStream, fileFormatEnum);
                using (var stream = preprocessedResult.ImageFileStream)
                {
                    var builder = new ImageAnnotatorClientBuilder {
                        JsonCredentials = File.ReadAllText(_configurations.CredentialsJsonFile)
                    };
                    var client = await builder.BuildAsync();

                    var img = await Image.FromStreamAsync(stream);

                    var textAnnotations = await client.DetectTextAsync(img);

                    var rawGoogleOcrResult = RawGoogleOcrResult.CreateFrom(textAnnotations.ToList());

                    var content = _googleOcrParser.Execute(rawGoogleOcrResult, preprocessedResult.NewImageHeight,
                                                           preprocessedResult.NewImageWidth);
                    return(GoogleOcrResult.CreateSuccessResult(DateTime.Now.Subtract(start), content, rawGoogleOcrResult));
                }
            }
            catch (Exception e)
            {
                return(GoogleOcrResult.CreateErrorResult(DateTime.Now.Subtract(start), e));
            }
        }
示例#2
0
        public ImageContent Execute(RawGoogleOcrResult output, int imgHeight, int imgWidth)
        {
            var sentences = new List <ISentence>();

            if (!output.TextFound())
            {
                return(new ImageContent(sentences));
            }

            var words = output?.EntityAnnotations?.ToList().Skip(1).Select(AsWord).Where(x => x != null).OrderBy(x => x.Top).ToList() ?? new List <GoogleWord>();
            var lines = new Dictionary <Point, List <GoogleWord> >();

            foreach (var word in words)
            {
                var point = new Point()
                {
                    Top = word.Top, Bottom = word.Bottom
                };
                var parentLine = lines.Where(x => x.Key.IsWithinThisPoint(point))
                                 .OrderBy(x => x.Key.Top)
                                 .Select(x => x.Value)
                                 .FirstOrDefault();

                if (parentLine == null)
                {
                    parentLine = new List <GoogleWord>();
                    lines.Add(point, parentLine);
                }
                parentLine.Add(word);
            }

            int lineCount = 0, sentenceIndex = 0;

            foreach (var line in lines.ToList().OrderBy(x => x.Key.Top))
            {
                var orderedLine = line.Value.OrderBy(x => x.Left);
                sentences.AddRange(CreateSentences(orderedLine, lineCount, imgWidth, imgHeight, ref sentenceIndex));
                lineCount++;
            }

            return(new ImageContent(sentences));
        }
示例#3
0
        private async Task <GoogleOcrResult> DoOcr(Stream imageAsStream, FileFormatEnum fileFormatEnum, DateTime start)
        {
            try
            {
                var preprocessedResult = _ocrPreProcessing.AjustOrientationAndSize(imageAsStream, fileFormatEnum);
                using (var stream = preprocessedResult.ImageFileStream)
                {
                    using (var service = VisionService())
                    {
                        var entries = await service.RecognizeTextAsync(stream);

                        var rawGoogleOcrResult = RawGoogleOcrResult.CreateFrom(entries);
                        var content            = _googleOcrParser.Execute(rawGoogleOcrResult, preprocessedResult.NewImageHeight,
                                                                          preprocessedResult.NewImageWidth);
                        return(GoogleOcrResult.CreateSuccessResult(DateTime.Now.Subtract(start), content, rawGoogleOcrResult));
                    }
                }
            }
            catch (Exception e)
            {
                return(GoogleOcrResult.CreateErrorResult(DateTime.Now.Subtract(start), e));
            }
        }