Пример #1
0
        public static OcrResult FromBinary(byte[] buffer)
        {
            try
            {
                using (BinaryReader reader = new BinaryReader(new MemoryStream(buffer)))
                {
                    OcrResult result = new OcrResult();

                    result.Rect       = ReadRect(reader);
                    result.ResultType = (OcrResultType)reader.ReadByte();
                    result.Error      = reader.ReadString();
                    if (string.IsNullOrEmpty(result.Error))
                    {
                        result.Error = null;
                    }

                    int areaCount = reader.ReadInt32();
                    for (int i = 0; i < areaCount; i++)
                    {
                        result.AddArea(ReadArea(reader));
                    }

                    int lineCount = reader.ReadInt32();
                    for (int i = 0; i < lineCount; i++)
                    {
                        result.AddLine(ReadLine(reader));
                    }

                    return(result);
                }
            }
            catch (Exception e)
            {
                return(OcrResult.Create(OcrResultType.Exception, e.ToString()));
            }
        }
Пример #2
0
        public static OcrResult Parse(string xml)
        {
            Dictionary <string, string> meta = new Dictionary <string, string>();
            List <Page> pages = new List <Page>();

            XmlReaderSettings settings = new XmlReaderSettings();

            settings.DtdProcessing = DtdProcessing.Parse;
            settings.XmlResolver   = new XmlPreloadedResolver(XmlKnownDtds.Xhtml10);

            using (StringReader stringReader = new StringReader(xml))
                using (XmlReader reader = XmlReader.Create(stringReader, settings))
                {
                    while (reader.Read())
                    {
                        switch (reader.NodeType)
                        {
                        case XmlNodeType.Element:
                            switch (reader.Name)
                            {
                            case "meta":
                            {
                                string metaName    = reader.GetAttribute("name");
                                string metaContent = reader.GetAttribute("content");
                                string metaValue   = reader.GetAttribute("value");
                                if (!string.IsNullOrEmpty(metaName))
                                {
                                    meta[metaName] = metaContent == null ? metaValue : metaContent;
                                }
                            }
                            break;

                            case "div":
                            case "span":
                            case "p":
                            {
                                switch (reader.GetAttribute("class"))
                                {
                                case "ocr_page":
                                    pages.Add(Parse <Page>(reader, null));
                                    break;
                                }
                            }
                            break;
                            }
                            break;

                        case XmlNodeType.EndElement:
                            break;
                        }
                    }
                }

            OcrResult result = new OcrResult();

            foreach (Page page in pages)
            {
                foreach (Area area in page.Areas)
                {
                    OcrArea ocrArea = new OcrArea(area.Rect.ToRect());

                    List <Line> lines = new List <Line>();
                    lines.AddRange(area.Lines);
                    foreach (Paragraph paragraph in area.Paragraphs)
                    {
                        lines.AddRange(paragraph.Lines);
                    }

                    foreach (Line line in lines)
                    {
                        OcrLine ocrLine = new OcrLine();
                        ocrLine.Rect = line.Rect.ToRect();
                        foreach (Word word in line.Words)
                        {
                            OcrWord ocrWord = new OcrWord();
                            ocrWord.Rect       = word.Rect.ToRect();
                            ocrWord.Confidence = word.Confidence;
                            ocrWord.Text       = word.Text;
                            ocrLine.Words.Add(ocrWord);
                        }
                        ocrArea.Lines.Add(ocrLine);
                    }

                    if (!string.IsNullOrEmpty(ocrArea.Text.Trim()))
                    {
                        result.AddArea(ocrArea);
                    }
                }
            }

            return(result);
        }