private void pictureBoxOriginal_Click(object sender, EventArgs e) { MouseEventArgs me = (MouseEventArgs)e; if (_textSegments.Count > 0 && _imageSize.Width > 1) { double wfactor = (double)_imageSize.Width / pictureBoxOriginal.ClientSize.Width; double hfactor = (double)_imageSize.Height / pictureBoxOriginal.ClientSize.Height; double resizeFactor = Math.Max(wfactor, hfactor); Size imageSize = new Size((int)(_imageSize.Width / resizeFactor), (int)(_imageSize.Height / resizeFactor)); double xOffset = pictureBoxOriginal.ClientSize.Width / 2 - imageSize.Width / 2; double yOffset = pictureBoxOriginal.ClientSize.Height / 2 - imageSize.Height / 2; double mouseX = me.X - xOffset; double mouseY = me.Y - yOffset; foreach (var dGroup in _textSegments.Where(x => x.IsCJK).GroupBy( x => new { x.TextLineWidth, x.TextLineTop, x.TextLineLeft, x.TextLineHeight })) { TextSegmentData data = dGroup.ToArray().FirstOrDefault(); if (data == null) { continue; } if (mouseX >= data.TextLineLeft / resizeFactor && mouseX <= (data.TextLineLeft + data.TextLineWidth) / resizeFactor && mouseY >= data.TextLineTop / resizeFactor && mouseY <= (data.TextLineTop + data.TextLineHeight) / resizeFactor) { _currentTextSegmentData = data; pictureBoxOriginalCrop.Image = data.TextLineCroppedMat.ToBitmap(); _currentFontProbIndex = 0; DisplayFontImageInfo(); } } } }
private void LoadImage(string filePath) { _textSegments.Clear(); List <Mat> croppedChars = new List <Mat>(); byte[] image = File.ReadAllBytes(filePath); Mat originalMat = Mat.FromImageData(image, ImreadModes.AnyColor); Mat displayMat = originalMat.Clone(); _imageSize = originalMat.Size(); Dictionary <string, object> options = new Dictionary <string, object> { { "recognize_granularity", "small" }, { "detect_direction", "true" }, { "vertexes_location", "true" }, { "probability", "true" }, { "detect_language", "true" } }; JObject resultJson = _client.Accurate(image, options); // OCR accurate //Debug.Print(resultJson.ToString()); dynamic ocrResult = JsonConvert.DeserializeObject <dynamic>(resultJson.ToString()); int wordCount = ocrResult.words_result_num; for (int i = 0; i < wordCount; i++) { dynamic chars = ocrResult.words_result[i].chars; for (int j = 0; j < chars.Count; j++) { TextSegmentData segmentData = new TextSegmentData { TextLine = ocrResult.words_result[i].words, TextLineWidth = ocrResult.words_result[i].location.width, TextLineTop = ocrResult.words_result[i].location.top, TextLineLeft = ocrResult.words_result[i].location.left, TextLineHeight = ocrResult.words_result[i].location.height, TextChar = chars[j]["char"], TextCharWidth = chars[j].location.width, TextCharTop = chars[j].location.top, TextCharLeft = chars[j].location.left, TextCharHeight = chars[j].location.height }; segmentData.IsCJK = segmentData.TextChar.Any(x => x.IsChinese()); Debug.Print($"Text: {segmentData.TextChar}, IsCJK? {segmentData.IsCJK}, W {segmentData.TextCharWidth}, H {segmentData.TextCharHeight}, T {segmentData.TextCharTop}, L {segmentData.TextCharLeft} "); if (segmentData.IsCJK) { Rect cropCharRect = new Rect( segmentData.TextCharLeft, segmentData.TextCharTop, GetSizeSafe((int)(segmentData.TextCharWidth * 1.5), segmentData.TextCharLeft, originalMat.Width), GetSizeSafe((int)(segmentData.TextCharHeight * 1.2), segmentData.TextCharTop, originalMat.Height) ); //displayMat.Rectangle(cropCharRect, Scalar.RandomColor(), 2, LineTypes.AntiAlias); // mark every word Mat croppedChar = new Mat(originalMat, cropCharRect); croppedChars.Add(croppedChar); segmentData.TextCharCroppedMat = croppedChar.Clone(); Rect cropTextRect = new Rect(segmentData.TextLineLeft, segmentData.TextLineTop, segmentData.TextLineWidth, segmentData.TextLineHeight); Mat croppedLine = new Mat(originalMat, cropTextRect); segmentData.TextLineCroppedMat = croppedLine.Clone(); //croppedChar.SaveImage("!" + DateTime.Now.Ticks + ".png"); } _textSegments.Add(segmentData); } } int netInputWidth = 80; int netInputHeight = 80; using (Net net = CvDnn.ReadNetFromTensorflow(AppDomain.CurrentDomain.BaseDirectory + "all_freezed_vgg19_tf115.pb")) { foreach (TextSegmentData sgData in _textSegments.Where(x => x.IsCJK).ToArray()) { // preprocess //sgData.TextCharCroppedMat.SaveImage("!" + DateTime.Now.Ticks + ".png"); Mat greyText = sgData.TextCharCroppedMat.CvtColor(ColorConversionCodes.BGR2GRAY); //Mat textAfterThreshold = new Mat(); //Cv2.Threshold(greyText, textAfterThreshold, 0, 255, ThresholdTypes.Binary | ThresholdTypes.Otsu); //Mat textAfterMorph = new Mat(); //Mat kernel = Cv2.GetStructuringElement(MorphShapes.Rect, new Size(1, 1)); //Cv2.MorphologyEx(textAfterThreshold, textAfterMorph, MorphTypes.Open, kernel); //Cv2.MorphologyEx(textAfterMorph, textAfterMorph, MorphTypes.Close, kernel); // resize double scaleW = netInputWidth / (double)sgData.TextCharCroppedMat.Width; double scaleH = netInputHeight / (double)sgData.TextCharCroppedMat.Height; double scale = scaleW < scaleH ? scaleW : scaleH; Mat resizedText = new Mat(); Cv2.Resize(greyText, resizedText, new Size(0, 0), scale, scale, InterpolationFlags.Cubic); int padTop = 0; int padBottom = 0; int padLeft = 0; int padRight = 0; if (resizedText.Width < netInputWidth) { padLeft = (netInputWidth - resizedText.Width) / 2; if ((netInputWidth - resizedText.Width) % 2 > 0) { padRight = padLeft + 1; } else { padRight = padLeft; } } else if (resizedText.Height < netInputHeight) { padTop = (netInputHeight - resizedText.Height) / 2; if ((netInputHeight - resizedText.Height) % 2 > 0) { padBottom = padTop + 1; } else { padBottom = padTop; } } resizedText = resizedText.CopyMakeBorder(padTop, padBottom, padLeft, padRight, BorderTypes.Constant, Scalar.White); resizedText = resizedText.CvtColor(ColorConversionCodes.GRAY2BGR); // inferring needs BGR input instead of gray //Cv2.ImShow("" + Guid.NewGuid(), resizedText); //resizedText.SaveImage("!" + DateTime.Now.Ticks + ".png"); int classId1; double classProb1; List <CharProbClass> probList; var inputBlob = CvDnn.BlobFromImage(resizedText, 1, new Size(netInputWidth, netInputHeight), new Scalar(104, 117, 123)); net.SetInput(inputBlob); var prob = net.Forward(); GetMaxClass(prob, out classId1, out classProb1, out probList); sgData.ClassLable = GetClassText(classId1); sgData.ClassProb = classProb1; sgData.ProbClassList = probList; Debug.Print($"Char:{sgData.TextChar}, ClassID:{GetClassText(classId1)}, classProb:{classProb1}"); } } // done image processing, calculating var groupedTextLines = _textSegments.Where(x => x.IsCJK).GroupBy( x => new { x.TextLineWidth, x.TextLineTop, x.TextLineLeft, x.TextLineHeight }).ToArray(); foreach (var textLine in groupedTextLines) { Dictionary <string, double> fontProbDict = new Dictionary <string, double>(); foreach (TextSegmentData segmentData in textLine) { if (!fontProbDict.ContainsKey(segmentData.ClassLable)) { fontProbDict.Add(segmentData.ClassLable, segmentData.ClassProb); } else if (segmentData.ClassProb > fontProbDict[segmentData.ClassLable]) { fontProbDict[segmentData.ClassLable] += segmentData.ClassProb; } } var orderedFontProb = fontProbDict.OrderByDescending(x => x.Value).ToArray(); Debug.Print($"Text Line: {textLine.FirstOrDefault()?.TextLine}, Font Name: {orderedFontProb[0].Key}"); foreach (TextSegmentData data in textLine) { data.TextLineFont = orderedFontProb[0].Key; data.ProbClassList = textLine.ToList().FirstOrDefault()?.ProbClassList; } Rect textLineRect = new Rect((int)textLine.FirstOrDefault()?.TextLineLeft, (int)textLine.FirstOrDefault()?.TextLineTop, (int)textLine.FirstOrDefault()?.TextLineWidth, (int)textLine.FirstOrDefault()?.TextLineHeight); displayMat.Rectangle(textLineRect, Scalar.RandomColor(), 2, LineTypes.AntiAlias); } pictureBoxOriginal.Image = displayMat.ToBitmap(); }