private void OCR(byte[] fileName) { using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) { using (var img = Pix.LoadTiffFromMemory(fileName)) { var i = 1; using (var page = engine.Process(img)) { Message = page.GetText(); OCRRow row = new OCRRow(); using (var iter = page.GetIterator()) { iter.Begin(); do { if (iter.IsAtBeginningOf(PageIteratorLevel.TextLine)) { listOCRRow.Add(row); row = new OCRRow(); row.Row = iter.GetText(PageIteratorLevel.TextLine); row.Confidence = iter.GetConfidence(PageIteratorLevel.TextLine); } WordInfo wordInfo = new WordInfo(); wordInfo.Word = iter.GetText(PageIteratorLevel.Word); if (!string.IsNullOrWhiteSpace(wordInfo.Word)) { Pix pix = iter.GetImage(PageIteratorLevel.Word, 0, out wordInfo.XPos, out wordInfo.YPos); wordInfo.Width = pix.Width; wordInfo.Height = pix.Height; wordInfo.Confidence = iter.GetConfidence(PageIteratorLevel.Word); row.ListWord.Add(wordInfo); } i++; }while (iter.Next(PageIteratorLevel.Word)); listOCRRow.Add(row); } } } } }
private void SetOCRRow(ref TOCRRESULTS Results, ref List <WordInfo> listWord, ref List <Character> listCharacter, ref int index, int ItemNo) { int wordIndex = 1; if (Convert.ToChar(Results.Item[ItemNo].OCRCha) != 13) { if (Convert.ToChar(Results.Item[ItemNo].OCRCha) != ' ') { Character character2 = new Character() { Value = Convert.ToChar(Results.Item[ItemNo].OCRCha).ToString(), Confidence = Results.Item[ItemNo].Confidence, XDim = Results.Item[ItemNo].XDim, XPos = Results.Item[ItemNo].XPos, YDim = Results.Item[ItemNo].YDim, YPos = Results.Item[ItemNo].YPos, }; listCharacter.Add(character2); } if (Convert.ToChar(Results.Item[ItemNo].OCRCha) == ' ') { WordInfo word = new WordInfo() { //ListCharacter = listCharacter Word = string.Join("", listCharacter.Select(x => x.Value)), ListCharacter = listCharacter, Height = listCharacter.Max(x => x.YDim), Width = listCharacter.Count == 1 ? listCharacter.First().XDim : listCharacter.Last().XPos - listCharacter.First().XPos + listCharacter.Last().XDim, XPos = listCharacter.First().XPos, YPos = listCharacter.First().YPos, Confidence = listCharacter.Sum(x => x.Confidence) / listCharacter.Count * 100, Position = wordIndex, CorrectWord = string.Empty, }; listCharacter = new List <Character>(); listWord.Add(word); wordIndex++; } } else { WordInfo word = new WordInfo() { //ListCharacter = listCharacter Word = string.Join("", listCharacter.Select(x => x.Value)), ListCharacter = listCharacter, Height = listCharacter.Max(x => x.YDim), Width = listCharacter.Count == 1 ? listCharacter.First().XDim : listCharacter.Last().XPos - listCharacter.First().XPos + listCharacter.Last().XDim, XPos = listCharacter.First().XPos, YPos = listCharacter.First().YPos, Confidence = listCharacter.Sum(x => x.Confidence) / listCharacter.Count * 100, CorrectWord = string.Empty, Position = wordIndex, }; listCharacter = new List <Character>(); listWord.Add(word); wordIndex++; OCRRow info = new OCRRow() { ListWord = listWord, Row = string.Join(" ", listWord.Select(x => x.Word)), Position = index, CorrectRow = string.Empty, Confidence = listWord.Sum(x => x.Confidence) / listWord.Count, }; listOCRRow.Add(info); listWord = new List <WordInfo>(); index++; } if (ItemNo == Results.Hdr.NumItems - 1) { WordInfo word = new WordInfo() { //ListCharacter = listCharacter Word = string.Join("", listCharacter.Select(x => x.Value)), ListCharacter = listCharacter, Height = listCharacter.Max(x => x.YDim), Width = listCharacter.Count == 1 ? listCharacter.First().XDim : listCharacter.Last().XPos - listCharacter.First().XPos + listCharacter.Last().XDim, XPos = listCharacter.First().XPos, YPos = listCharacter.First().YPos, Confidence = listCharacter.Sum(x => x.Confidence) / listCharacter.Count * 100, CorrectWord = string.Empty, Position = wordIndex, }; listCharacter = new List <Character>(); listWord.Add(word); wordIndex++; OCRRow info = new OCRRow() { ListWord = listWord, Row = string.Join(" ", listWord.Select(x => x.Word)), Position = index, CorrectRow = string.Empty, Confidence = listWord.Sum(x => x.Confidence) / listWord.Count, }; listOCRRow.Add(info); listWord = new List <WordInfo>(); index++; } }