private void IterateFullPage(ResultIterator iter, ref List <TextLine> _textLines) { int left, top, right, bottom; StringBuilder ss = new StringBuilder(string.Empty); PageIteratorLevel level = PageIteratorLevel.RIL_TEXTLINE; string t; do { TextLine l = new TextLine(); t = iter.GetUTF8Text(level); ss.Append(t); iter.BoundingBox(level, out left, out top, out right, out bottom); l.Bounds = new Rectangle(left, top, right - left, bottom - top); l.Text = t ?? string.Empty; level = PageIteratorLevel.RIL_WORD; l.Words = new List <Word>(); do { Word w = new Word(); iter.BoundingBox(level, out left, out top, out right, out bottom); w.Text = iter.GetUTF8Text(level); w.Confidence = iter.Confidence(level); w.Bounds = new Rectangle(left, top, right - left, bottom - top); l.Words.Add(w); if (iter.IsAtFinalElement(PageIteratorLevel.RIL_TEXTLINE, PageIteratorLevel.RIL_WORD)) { break; } } while (iter.Next(level)); level = PageIteratorLevel.RIL_TEXTLINE; ss.Append(System.Environment.NewLine); _textLines.Add(l); } while (iter.Next(level)); text = ss.ToString(); }
//public static void clearFaceFlag() { faceFlag = 0; } public string RecognizeText(int id) { DBService dbs = new DBService(); Document doc = dbs.FindDocumentById(id); Image <Gray, Byte> img = scale(doc); //var image = PixConverter.ToPix(img.ToBitmap())) Tesseract.Native.DllImports.TesseractDirectory = System.Web.HttpContext.Current.Server.MapPath("~/Tesseract/bin/Debug/DLLS/"); TessBaseAPI tessBaseAPI = new TessBaseAPI(); System.Diagnostics.Debug.WriteLine("The current version is {0}", tessBaseAPI.GetVersion()); string dataPath = System.Web.HttpContext.Current.Server.MapPath("~/tessdata/"); string language = "eng"; string inputFile = doc.Path; OcrEngineMode oem = OcrEngineMode.DEFAULT; //OcrEngineMode oem = OcrEngineMode.DEFAULT; PageSegmentationMode psm = PageSegmentationMode.AUTO_OSD; // Initialize tesseract-ocr if (!tessBaseAPI.Init(dataPath, language, oem)) { throw new Exception("Could not initialize tesseract."); } // Set the Page Segmentation mode tessBaseAPI.SetPageSegMode(psm); // Set the input image Pix pix = tessBaseAPI.SetImage(inputFile); // Recognize image tessBaseAPI.Recognize(); ResultIterator resultIterator = tessBaseAPI.GetIterator(); // extract text from result iterator StringBuilder stringBuilder = new StringBuilder(); int top, bottom, left, right, i = 0; List <OCRText> forJson = new List <OCRText>(); PageIteratorLevel pageIteratorLevel = PageIteratorLevel.RIL_TEXTLINE; do { string textContent = resultIterator.GetUTF8Text(pageIteratorLevel); resultIterator.BoundingBox(pageIteratorLevel, out left, out top, out right, out bottom); string coordsString = "" + left + "," + top + "," + right + "," + bottom; forJson.Add(new OCRText() { Coords = coordsString, Text = textContent }); } while (resultIterator.Next(pageIteratorLevel)); tessBaseAPI.Dispose(); pix.Dispose(); var textForReturn = JsonConvert.SerializeObject(forJson); dbs.UpdateDocument(textForReturn, id); if (HasFace(img) == true) { FaceFlag = 1; } else { FaceFlag = 0; } return(textForReturn); }