/
OCR.cs
104 lines (95 loc) · 4.79 KB
/
OCR.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
using System;
using Tesseract;
using System.Diagnostics;
namespace ScreenParser
{
class OCR
{
public string Text { get; set; }
public OCR(String imagePath)
{
try
{
var logger = new FormattedConsoleLogger();
var resultPrinter = new ResultPrinter(logger);
using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
{
using (var img = Pix.LoadFromFile(imagePath))
{
using (logger.Begin("Process image"))
{
var i = 1;
using (var page = engine.Process(img))
{
Text = page.GetText();
logger.Log("Text: {0}", Text);
logger.Log("Mean confidence: {0}", page.GetMeanConfidence());
using (var iter = page.GetIterator())
{
iter.Begin();
do
{
if (i % 2 == 0)
{
using (logger.Begin("Line {0}", i))
{
do
{
using (logger.Begin("Word Iteration"))
{
if (iter.IsAtBeginningOf(PageIteratorLevel.Block))
{
logger.Log("New block");
}
if (iter.IsAtBeginningOf(PageIteratorLevel.Para))
{
logger.Log("New paragraph");
}
if (iter.IsAtBeginningOf(PageIteratorLevel.TextLine))
{
logger.Log("New line");
}
logger.Log("word: " + iter.GetText(PageIteratorLevel.Word));
}
} while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
}
}
i++;
} while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));
}
}
}
}
}
}
catch (Exception e)
{
Trace.TraceError(e.ToString());
Console.WriteLine("Unexpected Error: " + e.Message);
Console.WriteLine("Details: ");
Console.WriteLine(e.ToString());
}
}
private class ResultPrinter
{
readonly FormattedConsoleLogger logger;
public ResultPrinter(FormattedConsoleLogger logger)
{
this.logger = logger;
}
public void Print(ResultIterator iter)
{
logger.Log("Is beginning of block: {0}", iter.IsAtBeginningOf(PageIteratorLevel.Block));
logger.Log("Is beginning of para: {0}", iter.IsAtBeginningOf(PageIteratorLevel.Para));
logger.Log("Is beginning of text line: {0}", iter.IsAtBeginningOf(PageIteratorLevel.TextLine));
logger.Log("Is beginning of word: {0}", iter.IsAtBeginningOf(PageIteratorLevel.Word));
logger.Log("Is beginning of symbol: {0}", iter.IsAtBeginningOf(PageIteratorLevel.Symbol));
logger.Log("Block text: \"{0}\"", iter.GetText(PageIteratorLevel.Block));
logger.Log("Para text: \"{0}\"", iter.GetText(PageIteratorLevel.Para));
logger.Log("TextLine text: \"{0}\"", iter.GetText(PageIteratorLevel.TextLine));
logger.Log("Word text: \"{0}\"", iter.GetText(PageIteratorLevel.Word));
logger.Log("Symbol text: \"{0}\"", iter.GetText(PageIteratorLevel.Symbol));
}
}
}
}