Пример #1
0
 public OcrLine AddLine(OcrLine line)
 {
     if (!Lines.Contains(line))
     {
         Lines.Add(line);
     }
     return(line);
 }
Пример #2
0
 private static void WriteLine(BinaryWriter writer, OcrLine line)
 {
     WriteRect(writer, line.Rect);
     writer.Write(line.Words.Count);
     foreach (OcrWord word in line.Words)
     {
         WriteWord(writer, word);
     }
 }
Пример #3
0
        private static OcrLine ReadLine(BinaryReader reader)
        {
            OcrLine line = new OcrLine();

            line.Rect = ReadRect(reader);
            int wordCount = reader.ReadInt32();

            for (int i = 0; i < wordCount; i++)
            {
                line.Words.Add(ReadWord(reader));
            }
            return(line);
        }
Пример #4
0
        public OcrLine AddLine(string line)
        {
            if (line == null)
            {
                line = string.Empty;
            }

            string[] words = line.Split();

            OcrLine ocrLine = new OcrLine();

            foreach (string word in words)
            {
                OcrWord ocrWord = new OcrWord();
                ocrWord.Text = word;
                ocrLine.Words.Add(ocrWord);
            }

            Lines.Add(ocrLine);
            return(ocrLine);
        }
Пример #5
0
        public static OcrResult Parse(string xml)
        {
            Dictionary <string, string> meta = new Dictionary <string, string>();
            List <Page> pages = new List <Page>();

            XmlReaderSettings settings = new XmlReaderSettings();

            settings.DtdProcessing = DtdProcessing.Parse;
            settings.XmlResolver   = new XmlPreloadedResolver(XmlKnownDtds.Xhtml10);

            using (StringReader stringReader = new StringReader(xml))
                using (XmlReader reader = XmlReader.Create(stringReader, settings))
                {
                    while (reader.Read())
                    {
                        switch (reader.NodeType)
                        {
                        case XmlNodeType.Element:
                            switch (reader.Name)
                            {
                            case "meta":
                            {
                                string metaName    = reader.GetAttribute("name");
                                string metaContent = reader.GetAttribute("content");
                                string metaValue   = reader.GetAttribute("value");
                                if (!string.IsNullOrEmpty(metaName))
                                {
                                    meta[metaName] = metaContent == null ? metaValue : metaContent;
                                }
                            }
                            break;

                            case "div":
                            case "span":
                            case "p":
                            {
                                switch (reader.GetAttribute("class"))
                                {
                                case "ocr_page":
                                    pages.Add(Parse <Page>(reader, null));
                                    break;
                                }
                            }
                            break;
                            }
                            break;

                        case XmlNodeType.EndElement:
                            break;
                        }
                    }
                }

            OcrResult result = new OcrResult();

            foreach (Page page in pages)
            {
                foreach (Area area in page.Areas)
                {
                    OcrArea ocrArea = new OcrArea(area.Rect.ToRect());

                    List <Line> lines = new List <Line>();
                    lines.AddRange(area.Lines);
                    foreach (Paragraph paragraph in area.Paragraphs)
                    {
                        lines.AddRange(paragraph.Lines);
                    }

                    foreach (Line line in lines)
                    {
                        OcrLine ocrLine = new OcrLine();
                        ocrLine.Rect = line.Rect.ToRect();
                        foreach (Word word in line.Words)
                        {
                            OcrWord ocrWord = new OcrWord();
                            ocrWord.Rect       = word.Rect.ToRect();
                            ocrWord.Confidence = word.Confidence;
                            ocrWord.Text       = word.Text;
                            ocrLine.Words.Add(ocrWord);
                        }
                        ocrArea.Lines.Add(ocrLine);
                    }

                    if (!string.IsNullOrEmpty(ocrArea.Text.Trim()))
                    {
                        result.AddArea(ocrArea);
                    }
                }
            }

            return(result);
        }