Пример #1
0
 public OcrArea AddArea(OcrArea area)
 {
     if (!Areas.Contains(area))
     {
         Areas.Add(area);
     }
     return(area);
 }
Пример #2
0
 private static void WriteArea(BinaryWriter writer, OcrArea area)
 {
     WriteRect(writer, area.Rect);
     writer.Write(area.Lines.Count);
     foreach (OcrLine line in area.Lines)
     {
         WriteLine(writer, line);
     }
 }
Пример #3
0
        private static OcrArea ReadArea(BinaryReader reader)
        {
            OcrArea area = new OcrArea();

            area.Rect = ReadRect(reader);
            int lineCount = reader.ReadInt32();

            for (int i = 0; i < lineCount; i++)
            {
                area.Lines.Add(ReadLine(reader));
            }
            return(area);
        }
Пример #4
0
        public static OcrResult Parse(string xml)
        {
            Dictionary <string, string> meta = new Dictionary <string, string>();
            List <Page> pages = new List <Page>();

            XmlReaderSettings settings = new XmlReaderSettings();

            settings.DtdProcessing = DtdProcessing.Parse;
            settings.XmlResolver   = new XmlPreloadedResolver(XmlKnownDtds.Xhtml10);

            using (StringReader stringReader = new StringReader(xml))
                using (XmlReader reader = XmlReader.Create(stringReader, settings))
                {
                    while (reader.Read())
                    {
                        switch (reader.NodeType)
                        {
                        case XmlNodeType.Element:
                            switch (reader.Name)
                            {
                            case "meta":
                            {
                                string metaName    = reader.GetAttribute("name");
                                string metaContent = reader.GetAttribute("content");
                                string metaValue   = reader.GetAttribute("value");
                                if (!string.IsNullOrEmpty(metaName))
                                {
                                    meta[metaName] = metaContent == null ? metaValue : metaContent;
                                }
                            }
                            break;

                            case "div":
                            case "span":
                            case "p":
                            {
                                switch (reader.GetAttribute("class"))
                                {
                                case "ocr_page":
                                    pages.Add(Parse <Page>(reader, null));
                                    break;
                                }
                            }
                            break;
                            }
                            break;

                        case XmlNodeType.EndElement:
                            break;
                        }
                    }
                }

            OcrResult result = new OcrResult();

            foreach (Page page in pages)
            {
                foreach (Area area in page.Areas)
                {
                    OcrArea ocrArea = new OcrArea(area.Rect.ToRect());

                    List <Line> lines = new List <Line>();
                    lines.AddRange(area.Lines);
                    foreach (Paragraph paragraph in area.Paragraphs)
                    {
                        lines.AddRange(paragraph.Lines);
                    }

                    foreach (Line line in lines)
                    {
                        OcrLine ocrLine = new OcrLine();
                        ocrLine.Rect = line.Rect.ToRect();
                        foreach (Word word in line.Words)
                        {
                            OcrWord ocrWord = new OcrWord();
                            ocrWord.Rect       = word.Rect.ToRect();
                            ocrWord.Confidence = word.Confidence;
                            ocrWord.Text       = word.Text;
                            ocrLine.Words.Add(ocrWord);
                        }
                        ocrArea.Lines.Add(ocrLine);
                    }

                    if (!string.IsNullOrEmpty(ocrArea.Text.Trim()))
                    {
                        result.AddArea(ocrArea);
                    }
                }
            }

            return(result);
        }