private static OcrWord ReadWord(BinaryReader reader) { OcrWord word = new OcrWord(); word.Rect = ReadRect(reader); word.Confidence = reader.ReadInt32(); word.Text = reader.ReadString(); return(word); }
public OcrLine AddLine(string line) { if (line == null) { line = string.Empty; } string[] words = line.Split(); OcrLine ocrLine = new OcrLine(); foreach (string word in words) { OcrWord ocrWord = new OcrWord(); ocrWord.Text = word; ocrLine.Words.Add(ocrWord); } Lines.Add(ocrLine); return(ocrLine); }
public static OcrResult Parse(string xml) { Dictionary <string, string> meta = new Dictionary <string, string>(); List <Page> pages = new List <Page>(); XmlReaderSettings settings = new XmlReaderSettings(); settings.DtdProcessing = DtdProcessing.Parse; settings.XmlResolver = new XmlPreloadedResolver(XmlKnownDtds.Xhtml10); using (StringReader stringReader = new StringReader(xml)) using (XmlReader reader = XmlReader.Create(stringReader, settings)) { while (reader.Read()) { switch (reader.NodeType) { case XmlNodeType.Element: switch (reader.Name) { case "meta": { string metaName = reader.GetAttribute("name"); string metaContent = reader.GetAttribute("content"); string metaValue = reader.GetAttribute("value"); if (!string.IsNullOrEmpty(metaName)) { meta[metaName] = metaContent == null ? metaValue : metaContent; } } break; case "div": case "span": case "p": { switch (reader.GetAttribute("class")) { case "ocr_page": pages.Add(Parse <Page>(reader, null)); break; } } break; } break; case XmlNodeType.EndElement: break; } } } OcrResult result = new OcrResult(); foreach (Page page in pages) { foreach (Area area in page.Areas) { OcrArea ocrArea = new OcrArea(area.Rect.ToRect()); List <Line> lines = new List <Line>(); lines.AddRange(area.Lines); foreach (Paragraph paragraph in area.Paragraphs) { lines.AddRange(paragraph.Lines); } foreach (Line line in lines) { OcrLine ocrLine = new OcrLine(); ocrLine.Rect = line.Rect.ToRect(); foreach (Word word in line.Words) { OcrWord ocrWord = new OcrWord(); ocrWord.Rect = word.Rect.ToRect(); ocrWord.Confidence = word.Confidence; ocrWord.Text = word.Text; ocrLine.Words.Add(ocrWord); } ocrArea.Lines.Add(ocrLine); } if (!string.IsNullOrEmpty(ocrArea.Text.Trim())) { result.AddArea(ocrArea); } } } return(result); }
private static void WriteWord(BinaryWriter writer, OcrWord word) { WriteRect(writer, word.Rect); writer.Write(word.Confidence); writer.Write(word.Text == null ? string.Empty : word.Text); }