private static void parsePage(XHTMLContentHandler xhtml, XmlReader pageContentReader) { xhtml.startElement("span"); if (pageContentReader == null) return; while (pageContentReader.Read()) { if (pageContentReader.Name != Glyphs) continue; if (!pageContentReader.HasAttributes) continue; var text = pageContentReader.GetAttribute(UniStr); if (string.IsNullOrEmpty(text)) continue; xhtml.element("div", text); } xhtml.endElement("span"); }
public override void parse(java.io.InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) { metadata.set(Metadata.TYPE, XPS_MIME_TYPE); try { var getFile = stream.getClass().getMethod("getFile"); var file = getFile.invoke(stream); var path = file.ToString(); var xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); using (var xpsDocument = new XpsDocument(path, FileAccess.Read)) { var fixedDocSeqReader = xpsDocument.FixedDocumentSequenceReader; if (fixedDocSeqReader == null) return; foreach (var document in fixedDocSeqReader.FixedDocuments) { var page = document.FixedPages[0]; using (var pageContentReader = page.XmlReader) { parsePage(xhtml, pageContentReader); } } } xhtml.endDocument(); } catch (Exception e) { throw new java.io.IOException(e); } }