private void LoadFromJsonRegion(JArray regions, OcrResult result) { var regionId = 1; foreach (var jRegion in regions) { var region = new OcrRegion() { Id = regionId, Code = OcrLoaderHelper.GetRegionCode(regionId), Location = ParseBox(jRegion, result), Words = result.Words }; var lineId = 1; foreach (var jLine in jRegion["lines"]) { var wordId = 1; foreach (var jWord in jLine["words"]) { var word = ParseWord(jWord, result); result.Words.Add(word); wordId++; } lineId++; } result.Regions.Add(region); regionId++; } }
public OcrResult DoParse(string jsonResult, ImageInfo info) { var words = new List <OcrWord>(); var json = JObject.Parse(jsonResult); var lang = default(string); var result = new OcrResult() { Info = info, TextAngle = 0 }; var mainRegion = new OcrRegion() { Id = 1, Code = OcrLoaderHelper.GetRegionCode(1), Words = words }; var annotations = json["responses"].Value <JArray>()[0]["textAnnotations"].Value <JArray>(); var wordId = 1; foreach (var ann in annotations) { if (string.IsNullOrWhiteSpace(lang)) { lang = ann["locale"].Value <string>(); } if (wordId != 1) { var word = new OcrWord() { Id = wordId, Location = GetLocation(ann, info), Text = ann["description"].Value <string>() }; words.Add(word); } wordId++; } mainRegion.Location = new OcrLocation(); mainRegion.Location.X = mainRegion.Words.Min(i => i.Location.X); mainRegion.Location.Width = mainRegion.Words.Max(i => i.Location.XBound) - mainRegion.Location.X; mainRegion.Location.Y = mainRegion.Words.Min(i => i.Location.Y); mainRegion.Location.Height = mainRegion.Words.Max(i => i.Location.YBound) - mainRegion.Location.Y; result.Regions.Add(mainRegion); result.Words = words; return(result); }
public void TestInitialize() { _template = new OcrTemplate { CourtId = 1, DocumentType = "Case Management Statement", OcrTemplateId = 1 }; var caseNumberRegion = new OcrRegion(1561, 957, 2291, 1158, OcrRegionName.CaseNumber); var plaintiffRegion = new OcrRegion(150, 830, 1640, 1010, OcrRegionName.Plaintiff); var attorneyRegion = new OcrRegion(150, 200, 1640, 590, OcrRegionName.Attorney); var hearingDateRegion = new OcrRegion(150, 1220, 2291, 1350, OcrRegionName.HearingDate); _template.Regions.Add(caseNumberRegion); _template.Regions.Add(plaintiffRegion); _template.Regions.Add(attorneyRegion); _template.Regions.Add(hearingDateRegion); }
static void Main(string[] args) { var template = new OcrTemplate { CourtId = 1, DocumentType = "Case Management Statement", OcrTemplateId = 1 }; var caseNumberRegion = new OcrRegion(1561, 957, 2291, 1158, OcrRegionName.CaseNumber); var plaintiffRegion = new OcrRegion(150, 830, 1640, 1010, OcrRegionName.Plaintiff); var attorneyRegion = new OcrRegion(150, 200, 1640, 590, OcrRegionName.Attorney); var hearingDateRegion = new OcrRegion(150, 1220, 2291, 1350, OcrRegionName.HearingDate); template.Regions.Add(caseNumberRegion); template.Regions.Add(plaintiffRegion); template.Regions.Add(attorneyRegion); template.Regions.Add(hearingDateRegion); FileInfo pdfFile = new FileInfo("OLPDFS\\43159219.pdf"); var pngFile = CloudOcrService.ConvertPdfToPng(pdfFile); var results = CloudOcrService.ExtractTemplateFromPdfAsync(pdfFile, template); foreach (var str in results.Result) { } Console.WriteLine(results.Result.Select(s => s)); foreach (var region in template.Regions) { Console.WriteLine(region.RegionName.ToString() + ": " + CloudOcrService.ExtractRegionFromPng(pngFile, region)); } Console.ReadLine(); }
public Extractor(ConfigOptions options, OcrRegion region, ImageInfo imageInfo) { _configOptions = options; _region = region; _imageInfo = imageInfo; }
private string GetRegionText(OcrRegion region) { int regionScore = 0; var rawScores = new List <int>(); var lines = new List <string>(); var sb = new StringBuilder(); foreach (var line in region.Lines) { var lineScore = 0; sb.Clear(); for (int i = 0; i < line.Words.Count; i++) { sb.Append(CleanupWord(line.Words[i].Text)); sb.Append(" "); } //Calculate line score after clean up the noise chars var cleanWords = sb.ToString().Split(' '); for (int i = 0; i < cleanWords.Length; i++) { lineScore += GetWordScore(cleanWords[i]); if (i + 1 < cleanWords.Length) { lineScore += GetWordScore2(cleanWords[i], cleanWords[i + 1]); } if (i + 2 < cleanWords.Length) { lineScore += GetWordScore3(cleanWords[i], cleanWords[i + 1], cleanWords[i + 2]); } } lines.Add(sb.ToString()); rawScores.Add(lineScore); regionScore += lineScore; } sb.Clear(); //Process lines based on region, and surrounding lines for (int i = 0; i < lines.Count; i++) { var lineScore = WeightScore(regionScore) + rawScores[i]; if (i >= 1) { lineScore += WeightScore(rawScores[i - 1]); } if (i + 1 < lines.Count) { lineScore += WeightScore(rawScores[i + 1]); } if (lineScore > 0) { sb.Append(lines[i]); } } return(sb.ToString()); }