예제 #1
0
        private static OcrWord ReadWord(BinaryReader reader)
        {
            OcrWord word = new OcrWord();

            word.Rect       = ReadRect(reader);
            word.Confidence = reader.ReadInt32();
            word.Text       = reader.ReadString();
            return(word);
        }
예제 #2
0
파일: OcrResult.cs 프로젝트: radtek/Ocr
        public OcrLine AddLine(string line)
        {
            if (line == null)
            {
                line = string.Empty;
            }

            string[] words = line.Split();

            OcrLine ocrLine = new OcrLine();

            foreach (string word in words)
            {
                OcrWord ocrWord = new OcrWord();
                ocrWord.Text = word;
                ocrLine.Words.Add(ocrWord);
            }

            Lines.Add(ocrLine);
            return(ocrLine);
        }
예제 #3
0
파일: OcrXml.cs 프로젝트: radtek/Ocr
        public static OcrResult Parse(string xml)
        {
            Dictionary <string, string> meta = new Dictionary <string, string>();
            List <Page> pages = new List <Page>();

            XmlReaderSettings settings = new XmlReaderSettings();

            settings.DtdProcessing = DtdProcessing.Parse;
            settings.XmlResolver   = new XmlPreloadedResolver(XmlKnownDtds.Xhtml10);

            using (StringReader stringReader = new StringReader(xml))
                using (XmlReader reader = XmlReader.Create(stringReader, settings))
                {
                    while (reader.Read())
                    {
                        switch (reader.NodeType)
                        {
                        case XmlNodeType.Element:
                            switch (reader.Name)
                            {
                            case "meta":
                            {
                                string metaName    = reader.GetAttribute("name");
                                string metaContent = reader.GetAttribute("content");
                                string metaValue   = reader.GetAttribute("value");
                                if (!string.IsNullOrEmpty(metaName))
                                {
                                    meta[metaName] = metaContent == null ? metaValue : metaContent;
                                }
                            }
                            break;

                            case "div":
                            case "span":
                            case "p":
                            {
                                switch (reader.GetAttribute("class"))
                                {
                                case "ocr_page":
                                    pages.Add(Parse <Page>(reader, null));
                                    break;
                                }
                            }
                            break;
                            }
                            break;

                        case XmlNodeType.EndElement:
                            break;
                        }
                    }
                }

            OcrResult result = new OcrResult();

            foreach (Page page in pages)
            {
                foreach (Area area in page.Areas)
                {
                    OcrArea ocrArea = new OcrArea(area.Rect.ToRect());

                    List <Line> lines = new List <Line>();
                    lines.AddRange(area.Lines);
                    foreach (Paragraph paragraph in area.Paragraphs)
                    {
                        lines.AddRange(paragraph.Lines);
                    }

                    foreach (Line line in lines)
                    {
                        OcrLine ocrLine = new OcrLine();
                        ocrLine.Rect = line.Rect.ToRect();
                        foreach (Word word in line.Words)
                        {
                            OcrWord ocrWord = new OcrWord();
                            ocrWord.Rect       = word.Rect.ToRect();
                            ocrWord.Confidence = word.Confidence;
                            ocrWord.Text       = word.Text;
                            ocrLine.Words.Add(ocrWord);
                        }
                        ocrArea.Lines.Add(ocrLine);
                    }

                    if (!string.IsNullOrEmpty(ocrArea.Text.Trim()))
                    {
                        result.AddArea(ocrArea);
                    }
                }
            }

            return(result);
        }
예제 #4
0
 private static void WriteWord(BinaryWriter writer, OcrWord word)
 {
     WriteRect(writer, word.Rect);
     writer.Write(word.Confidence);
     writer.Write(word.Text == null ? string.Empty : word.Text);
 }