Beispiel #1
0
        private void LoadFromJsonRegion(JArray regions, OcrResult result)
        {
            var regionId = 1;

            foreach (var jRegion in regions)
            {
                var region = new OcrRegion()
                {
                    Id       = regionId,
                    Code     = OcrLoaderHelper.GetRegionCode(regionId),
                    Location = ParseBox(jRegion, result),
                    Words    = result.Words
                };
                var lineId = 1;
                foreach (var jLine in jRegion["lines"])
                {
                    var wordId = 1;
                    foreach (var jWord in jLine["words"])
                    {
                        var word = ParseWord(jWord, result);
                        result.Words.Add(word);
                        wordId++;
                    }
                    lineId++;
                }
                result.Regions.Add(region);
                regionId++;
            }
        }
        public OcrResult DoParse(string jsonResult, ImageInfo info)
        {
            var words  = new List <OcrWord>();
            var json   = JObject.Parse(jsonResult);
            var lang   = default(string);
            var result = new OcrResult()
            {
                Info = info, TextAngle = 0
            };
            var mainRegion = new OcrRegion()
            {
                Id    = 1,
                Code  = OcrLoaderHelper.GetRegionCode(1),
                Words = words
            };
            var annotations = json["responses"].Value <JArray>()[0]["textAnnotations"].Value <JArray>();
            var wordId      = 1;

            foreach (var ann in annotations)
            {
                if (string.IsNullOrWhiteSpace(lang))
                {
                    lang = ann["locale"].Value <string>();
                }
                if (wordId != 1)
                {
                    var word = new OcrWord()
                    {
                        Id       = wordId,
                        Location = GetLocation(ann, info),
                        Text     = ann["description"].Value <string>()
                    };
                    words.Add(word);
                }
                wordId++;
            }
            mainRegion.Location        = new OcrLocation();
            mainRegion.Location.X      = mainRegion.Words.Min(i => i.Location.X);
            mainRegion.Location.Width  = mainRegion.Words.Max(i => i.Location.XBound) - mainRegion.Location.X;
            mainRegion.Location.Y      = mainRegion.Words.Min(i => i.Location.Y);
            mainRegion.Location.Height = mainRegion.Words.Max(i => i.Location.YBound) - mainRegion.Location.Y;
            result.Regions.Add(mainRegion);
            result.Words = words;
            return(result);
        }
Beispiel #3
0
        public void TestInitialize()
        {
            _template = new OcrTemplate
            {
                CourtId       = 1,
                DocumentType  = "Case Management Statement",
                OcrTemplateId = 1
            };

            var caseNumberRegion  = new OcrRegion(1561, 957, 2291, 1158, OcrRegionName.CaseNumber);
            var plaintiffRegion   = new OcrRegion(150, 830, 1640, 1010, OcrRegionName.Plaintiff);
            var attorneyRegion    = new OcrRegion(150, 200, 1640, 590, OcrRegionName.Attorney);
            var hearingDateRegion = new OcrRegion(150, 1220, 2291, 1350, OcrRegionName.HearingDate);

            _template.Regions.Add(caseNumberRegion);
            _template.Regions.Add(plaintiffRegion);
            _template.Regions.Add(attorneyRegion);
            _template.Regions.Add(hearingDateRegion);
        }
Beispiel #4
0
        static void Main(string[] args)
        {
            var template = new OcrTemplate
            {
                CourtId = 1,
                DocumentType = "Case Management Statement",
                OcrTemplateId = 1
            };

            var caseNumberRegion = new OcrRegion(1561, 957, 2291, 1158, OcrRegionName.CaseNumber);
            var plaintiffRegion = new OcrRegion(150, 830, 1640, 1010, OcrRegionName.Plaintiff);
            var attorneyRegion = new OcrRegion(150, 200, 1640, 590, OcrRegionName.Attorney);
            var hearingDateRegion = new OcrRegion(150, 1220, 2291, 1350, OcrRegionName.HearingDate);
            template.Regions.Add(caseNumberRegion);
            template.Regions.Add(plaintiffRegion);
            template.Regions.Add(attorneyRegion);
            template.Regions.Add(hearingDateRegion);

            FileInfo pdfFile = new FileInfo("OLPDFS\\43159219.pdf");


            var pngFile = CloudOcrService.ConvertPdfToPng(pdfFile);

            var results = CloudOcrService.ExtractTemplateFromPdfAsync(pdfFile, template);

            foreach (var str in results.Result)
            {
                
            }
            Console.WriteLine(results.Result.Select(s => s));
            foreach (var region in template.Regions)
            {
                Console.WriteLine(region.RegionName.ToString() + ": " + CloudOcrService.ExtractRegionFromPng(pngFile, region));
            }
            Console.ReadLine();
        }
Beispiel #5
0
 public Extractor(ConfigOptions options, OcrRegion region, ImageInfo imageInfo)
 {
     _configOptions = options;
     _region        = region;
     _imageInfo     = imageInfo;
 }
Beispiel #6
0
        private string GetRegionText(OcrRegion region)
        {
            int regionScore = 0;
            var rawScores   = new List <int>();
            var lines       = new List <string>();

            var sb = new StringBuilder();

            foreach (var line in region.Lines)
            {
                var lineScore = 0;
                sb.Clear();

                for (int i = 0; i < line.Words.Count; i++)
                {
                    sb.Append(CleanupWord(line.Words[i].Text));
                    sb.Append(" ");
                }

                //Calculate line score after clean up the noise chars
                var cleanWords = sb.ToString().Split(' ');
                for (int i = 0; i < cleanWords.Length; i++)
                {
                    lineScore += GetWordScore(cleanWords[i]);
                    if (i + 1 < cleanWords.Length)
                    {
                        lineScore += GetWordScore2(cleanWords[i], cleanWords[i + 1]);
                    }

                    if (i + 2 < cleanWords.Length)
                    {
                        lineScore += GetWordScore3(cleanWords[i], cleanWords[i + 1], cleanWords[i + 2]);
                    }
                }

                lines.Add(sb.ToString());
                rawScores.Add(lineScore);
                regionScore += lineScore;
            }

            sb.Clear();

            //Process lines based on region, and surrounding lines
            for (int i = 0; i < lines.Count; i++)
            {
                var lineScore = WeightScore(regionScore) + rawScores[i];

                if (i >= 1)
                {
                    lineScore += WeightScore(rawScores[i - 1]);
                }

                if (i + 1 < lines.Count)
                {
                    lineScore += WeightScore(rawScores[i + 1]);
                }

                if (lineScore > 0)
                {
                    sb.Append(lines[i]);
                }
            }

            return(sb.ToString());
        }