/// <summary> /// Parses all glyphs from the page xml file. /// </summary> /// <returns>List of glyph objects with ID and Unicode propagated.</returns> public List <Glyph> GetGlyphs() { /* * For some odd reason we have to specify our own xml prefix for the namespace, * because .NET didn't accept a URI for the default namespace xmlns. * */ XmlNamespaceManager namespaceManager = new XmlNamespaceManager(reader.NameTable); namespaceManager.AddNamespace("aletheia", pageXmlNamespace.NamespaceName); var Extracts = from REC in aletheiaDoc.Descendants(pageXmlNamespace + "TextLine") select new Glyph { ID = (string)(REC.Attribute("id") ?? new XAttribute("id", string.Empty)), Unicode = REC.XPathSelectElement("./aletheia:TextEquiv/aletheia:Unicode", namespaceManager) != null ? REC.XPathSelectElement("./aletheia:TextEquiv/aletheia:Unicode", namespaceManager).Value : string.Empty, PointsString = REC.XPathSelectElement("./aletheia:Coords", namespaceManager) != null ? REC.XPathSelectElement("./aletheia:Coords", namespaceManager).Attribute("points").Value : string.Empty }; return(Extracts.ToList()); }
public void IngestImages(string LangName, string FontName, string BaseName, string XMLFile, string PathToImages, bool UseSubList) { XDocument AletheiaDoc = XDocument.Load(XMLFile); XNamespace ns = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19"; var Extracts = from REC in AletheiaDoc.Descendants(ns + "Glyph") select new { ID = (string)(REC.Attribute("id") ?? new XAttribute("id", string.Empty)), Char = (string)((REC.Element(ns + "TextEquiv") != null) ? (REC.Element(ns + "TextEquiv").Element(ns + "Unicode") != null) ? REC.Element(ns + "TextEquiv").Element(ns + "Unicode").Value : string.Empty : string.Empty) }; string[] GlyphFiles = System.IO.Directory.GetFiles(PathToImages); foreach (var E in Extracts) { if (E.ID != "" && E.Char.Trim() != "") { string ImagePath = ""; string FixedChar = E.Char.Trim(); if (UseSubList) { FixedChar = FixGlyphChar(FixedChar); } for (int x = 0; x < GlyphFiles.Length; x++) { if (GlyphFiles[x].Replace(PathToImages + "\\", "").Replace(BaseName + "_", "").Replace(".tif", "") == E.ID) { ImagePath = GlyphFiles[x].Replace(db.DataDirectory, ""); break; } } if (ImagePath != "") { int gIndex = this.FindGlyph(FixedChar); if (gIndex < 0) { this.AddGlyph(FixedChar, ImagePath); } else { this.Glyphs[gIndex].AddImage(ImagePath); } } } } }