public OcrArea AddArea(OcrArea area) { if (!Areas.Contains(area)) { Areas.Add(area); } return(area); }
private static void WriteArea(BinaryWriter writer, OcrArea area) { WriteRect(writer, area.Rect); writer.Write(area.Lines.Count); foreach (OcrLine line in area.Lines) { WriteLine(writer, line); } }
private static OcrArea ReadArea(BinaryReader reader) { OcrArea area = new OcrArea(); area.Rect = ReadRect(reader); int lineCount = reader.ReadInt32(); for (int i = 0; i < lineCount; i++) { area.Lines.Add(ReadLine(reader)); } return(area); }
public static OcrResult Parse(string xml) { Dictionary <string, string> meta = new Dictionary <string, string>(); List <Page> pages = new List <Page>(); XmlReaderSettings settings = new XmlReaderSettings(); settings.DtdProcessing = DtdProcessing.Parse; settings.XmlResolver = new XmlPreloadedResolver(XmlKnownDtds.Xhtml10); using (StringReader stringReader = new StringReader(xml)) using (XmlReader reader = XmlReader.Create(stringReader, settings)) { while (reader.Read()) { switch (reader.NodeType) { case XmlNodeType.Element: switch (reader.Name) { case "meta": { string metaName = reader.GetAttribute("name"); string metaContent = reader.GetAttribute("content"); string metaValue = reader.GetAttribute("value"); if (!string.IsNullOrEmpty(metaName)) { meta[metaName] = metaContent == null ? metaValue : metaContent; } } break; case "div": case "span": case "p": { switch (reader.GetAttribute("class")) { case "ocr_page": pages.Add(Parse <Page>(reader, null)); break; } } break; } break; case XmlNodeType.EndElement: break; } } } OcrResult result = new OcrResult(); foreach (Page page in pages) { foreach (Area area in page.Areas) { OcrArea ocrArea = new OcrArea(area.Rect.ToRect()); List <Line> lines = new List <Line>(); lines.AddRange(area.Lines); foreach (Paragraph paragraph in area.Paragraphs) { lines.AddRange(paragraph.Lines); } foreach (Line line in lines) { OcrLine ocrLine = new OcrLine(); ocrLine.Rect = line.Rect.ToRect(); foreach (Word word in line.Words) { OcrWord ocrWord = new OcrWord(); ocrWord.Rect = word.Rect.ToRect(); ocrWord.Confidence = word.Confidence; ocrWord.Text = word.Text; ocrLine.Words.Add(ocrWord); } ocrArea.Lines.Add(ocrLine); } if (!string.IsNullOrEmpty(ocrArea.Text.Trim())) { result.AddArea(ocrArea); } } } return(result); }