示例#1
0
        /// <summary>
        /// Parses all glyphs from the page xml file.
        /// </summary>
        /// <returns>List of glyph objects with ID and Unicode propagated.</returns>
        public List <Glyph> GetGlyphs()
        {
            /*
             * For some odd reason we have to specify our own xml prefix for the namespace,
             * because .NET didn't accept a URI for the default namespace xmlns.
             *
             */
            XmlNamespaceManager namespaceManager = new XmlNamespaceManager(reader.NameTable);

            namespaceManager.AddNamespace("aletheia", pageXmlNamespace.NamespaceName);

            var Extracts = from REC in aletheiaDoc.Descendants(pageXmlNamespace + "TextLine")
                           select new Glyph
            {
                ID      = (string)(REC.Attribute("id") ?? new XAttribute("id", string.Empty)),
                Unicode = REC.XPathSelectElement("./aletheia:TextEquiv/aletheia:Unicode", namespaceManager) != null
                                       ? REC.XPathSelectElement("./aletheia:TextEquiv/aletheia:Unicode", namespaceManager).Value
                                       : string.Empty,
                PointsString = REC.XPathSelectElement("./aletheia:Coords", namespaceManager) != null
                                            ? REC.XPathSelectElement("./aletheia:Coords", namespaceManager).Attribute("points").Value
                                            : string.Empty
            };

            return(Extracts.ToList());
        }
示例#2
0
        public void IngestImages(string LangName, string FontName, string BaseName, string XMLFile, string PathToImages, bool UseSubList)
        {
            XDocument  AletheiaDoc = XDocument.Load(XMLFile);
            XNamespace ns          = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19";

            var Extracts = from REC in AletheiaDoc.Descendants(ns + "Glyph")
                           select new
            {
                ID   = (string)(REC.Attribute("id") ?? new XAttribute("id", string.Empty)),
                Char = (string)((REC.Element(ns + "TextEquiv") != null) ? (REC.Element(ns + "TextEquiv").Element(ns + "Unicode") != null)
                    ? REC.Element(ns + "TextEquiv").Element(ns + "Unicode").Value : string.Empty
                    : string.Empty)
            };

            string[] GlyphFiles = System.IO.Directory.GetFiles(PathToImages);

            foreach (var E in Extracts)
            {
                if (E.ID != "" && E.Char.Trim() != "")
                {
                    string ImagePath = "";
                    string FixedChar = E.Char.Trim();
                    if (UseSubList)
                    {
                        FixedChar = FixGlyphChar(FixedChar);
                    }

                    for (int x = 0; x < GlyphFiles.Length; x++)
                    {
                        if (GlyphFiles[x].Replace(PathToImages + "\\", "").Replace(BaseName + "_", "").Replace(".tif", "") == E.ID)
                        {
                            ImagePath = GlyphFiles[x].Replace(db.DataDirectory, "");
                            break;
                        }
                    }

                    if (ImagePath != "")
                    {
                        int gIndex = this.FindGlyph(FixedChar);

                        if (gIndex < 0)
                        {
                            this.AddGlyph(FixedChar, ImagePath);
                        }
                        else
                        {
                            this.Glyphs[gIndex].AddImage(ImagePath);
                        }
                    }
                }
            }
        }