protected override void parseContentDocument(string filePath, Project project, XmlNode xmlNode, TreeNode parentTreeNode, string dtdUniqueResourceId, DocumentMarkupType docMarkupType) { if (RequestCancellation) { return; } XmlNodeType xmlType = xmlNode.NodeType; switch (xmlType) { case XmlNodeType.Attribute: { System.Diagnostics.Debug.Fail("Calling this method with an XmlAttribute should never happen !!"); break; } case XmlNodeType.Document: { XmlNode bodyElement = XmlDocumentHelper.GetFirstChildElementOrSelfWithName(xmlNode, true, "body", null); if (bodyElement == null) { bodyElement = XmlDocumentHelper.GetFirstChildElementOrSelfWithName(xmlNode, true, "book", null); } if (bodyElement != null) { Presentation presentation = m_Project.Presentations.Get(0); presentation.PropertyFactory.DefaultXmlNamespaceUri = bodyElement.NamespaceURI; parseContentDocument(filePath, project, bodyElement, parentTreeNode, null, docMarkupType); } //parseContentDocument(((XmlDocument)xmlNode).DocumentElement, parentTreeNode); break; } case XmlNodeType.Element: { Presentation presentation = m_Project.Presentations.Get(0); TreeNode treeNode = null; if (parentTreeNode == null) { parentTreeNode = GetFirstTreeNodeForXmlDocument(presentation, xmlNode); //parentTreeNode = presentation.RootNode; } if (parentTreeNode != null) { treeNode = CreateAndAddTreeNodeForContentDocument(parentTreeNode, xmlNode, Path.GetFileName(filePath)); if (treeNode != null && treeNode is EmptyNode && ((EmptyNode)treeNode).PageNumber != null) { string strfRefID = Path.GetFileName(filePath) + "#" + xmlNode.Attributes.GetNamedItem("id").Value; m_XmlIdToPageNodeMap.Add(strfRefID, (EmptyNode)treeNode); } //parentTreeNode.AppendChild(treeNode); } if (xmlNode.ParentNode != null && xmlNode.ParentNode.NodeType == XmlNodeType.Document) { presentation.PropertyFactory.DefaultXmlNamespaceUri = xmlNode.NamespaceURI; } XmlProperty xmlProp = null; if (treeNode != null) { xmlProp = presentation.PropertyFactory.CreateXmlProperty(); treeNode.AddProperty(xmlProp); // we get rid of element name prefixes, we use namespace URIs instead. // check inherited NS URI string nsUri = treeNode.Parent != null? treeNode.Parent.GetXmlNamespaceUri() : xmlNode.NamespaceURI; //presentation.PropertyFactory.DefaultXmlNamespaceUri if (xmlNode.NamespaceURI != nsUri) { nsUri = xmlNode.NamespaceURI; xmlProp.SetQName(xmlNode.LocalName, nsUri == null ? "" : nsUri); } else { xmlProp.SetQName(xmlNode.LocalName, ""); } //string nsUri = treeNode.GetXmlNamespaceUri(); // if xmlNode.NamespaceURI != nsUri // => xmlProp.GetNamespaceUri() == xmlNode.NamespaceURI } if (parentTreeNode is SectionNode && (xmlNode.LocalName == "h1" || xmlNode.LocalName == "h2" || xmlNode.LocalName == "h3" || xmlNode.LocalName == "h4" || xmlNode.LocalName == "h5" || xmlNode.LocalName == "h6" || xmlNode.LocalName == "HD")) { ((SectionNode)parentTreeNode).Label = xmlNode.InnerText.Replace("\n", "").Replace("\r", "").Replace("\t", ""); Console.WriteLine("DTBook: " + ((SectionNode)parentTreeNode).Label); if (xmlNode.Attributes.GetNamedItem("id") != null) { string strfRefID = Path.GetFileName(filePath) + "#" + xmlNode.Attributes.GetNamedItem("id").Value; if (!m_XmlIdToSectionNodeMap.ContainsKey(strfRefID)) { m_XmlIdToSectionNodeMap.Add(strfRefID, (SectionNode)parentTreeNode); } } } if (treeNode != null && treeNode is SectionNode && xmlNode.LocalName == "doctitle") { ((SectionNode)treeNode).Label = xmlNode.InnerText.Replace("\n", "").Replace("\r", "").Replace("\t", "");; } if (RequestCancellation) { return; } foreach (XmlNode childXmlNode in xmlNode.ChildNodes) { parseContentDocument(filePath, project, childXmlNode, treeNode != null && treeNode is SectionNode ? treeNode : parentTreeNode, null, docMarkupType); } break; } case XmlNodeType.Whitespace: case XmlNodeType.CDATA: case XmlNodeType.SignificantWhitespace: case XmlNodeType.Text: { /* * Presentation presentation = Project.Presentations.Get(0); * * if (xmlType == XmlNodeType.Whitespace) * { * bool onlySpaces = true; * for (int i = 0; i < xmlNode.Value.Length; i++) * { * if (xmlNode.Value[i] != ' ') * { * onlySpaces = false; * break; * } * } * if (!onlySpaces) * { * break; * } * //else * //{ * // int l = xmlNode.Value.Length; * //} * } #if DEBUG * if (xmlType == XmlNodeType.CDATA) * { * Debugger.Break(); * } * * if (xmlType == XmlNodeType.SignificantWhitespace) * { * Debugger.Break(); * } #endif * //string text = xmlNode.Value.Trim(); * string text = System.Text.RegularExpressions.Regex.Replace(xmlNode.Value, @"\s+", " "); * * Debug.Assert(!string.IsNullOrEmpty(text)); * #if DEBUG * if (text.Length != xmlNode.Value.Length) * { * int debug = 1; * //Debugger.Break(); * } * * if (string.IsNullOrEmpty(text)) * { * Debugger.Break(); * } * if (xmlType != XmlNodeType.Whitespace && text == " ") * { * int debug = 1; * //Debugger.Break(); * } #endif * if (string.IsNullOrEmpty(text)) * { * break; * } * urakawa.media.TextMedia textMedia = presentation.MediaFactory.CreateTextMedia(); * textMedia.Text = text; * * urakawa.property.channel.ChannelsProperty cProp = presentation.PropertyFactory.CreateChannelsProperty(); * cProp.SetMedia(m_textChannel, textMedia); * * * int counter = 0; * foreach (XmlNode childXmlNode in xmlNode.ParentNode.ChildNodes) * { * XmlNodeType childXmlType = childXmlNode.NodeType; * if (childXmlType == XmlNodeType.Text || childXmlType == XmlNodeType.Element || childXmlType == XmlNodeType.Whitespace || childXmlType == XmlNodeType.SignificantWhitespace || childXmlType == XmlNodeType.CDATA) || { || counter++; || } ||} ||if (counter == 1) ||{ || parentTreeNode.AddProperty(cProp); ||} ||else ||{ || TreeNode txtWrapperNode = presentation.TreeNodeFactory.Create(); || txtWrapperNode.AddProperty(cProp); || parentTreeNode.AppendChild(txtWrapperNode); ||} */ break; } default: { return; } } }
protected DocumentMarkupType parseContentDocument_DTD(Project project, XmlDocument xmlDoc, TreeNode parentTreeNode, string filePath, out string dtdUniqueResourceId) { dtdUniqueResourceId = null; DocumentMarkupType docMarkupType = DocumentMarkupType.NA; //xmlNode.OwnerDocument string dtdID = xmlDoc.DocumentType == null ? string.Empty : !string.IsNullOrEmpty(xmlDoc.DocumentType.SystemId) ? xmlDoc.DocumentType.SystemId : !string.IsNullOrEmpty(xmlDoc.DocumentType.PublicId) ? xmlDoc.DocumentType.PublicId : xmlDoc.DocumentType.Name; string rootElemName = xmlDoc.DocumentElement.LocalName; if (dtdID == @"html" && string.IsNullOrEmpty(xmlDoc.DocumentType.SystemId) && string.IsNullOrEmpty(xmlDoc.DocumentType.PublicId)) { dtdID = @"html5"; docMarkupType = DocumentMarkupType.XHTML5; DebugFix.Assert(rootElemName == @"html"); } else if (dtdID.Contains(@"xhtml1") //systemId.Contains(@"xhtml11.dtd") //|| systemId.Contains(@"xhtml1-strict.dtd") //|| systemId.Contains(@"xhtml1-transitional.dtd") ) { dtdID = @"http://www.w3.org/xhtml-math-svg-flat.dtd"; docMarkupType = DocumentMarkupType.XHTML; DebugFix.Assert(rootElemName == @"html"); } else if (rootElemName == @"dtbook") { docMarkupType = DocumentMarkupType.DTBOOK; } else if (rootElemName == @"html") { dtdID = @"html5"; docMarkupType = DocumentMarkupType.XHTML5; } if (docMarkupType == DocumentMarkupType.NA) { #if DEBUG Debugger.Break(); #endif } if (string.IsNullOrEmpty(dtdID)) { return(docMarkupType); } if (!string.IsNullOrEmpty(dtdID) && !dtdID.StartsWith(@"http://")) { dtdID = @"http://www.daisy.org/" + dtdID; } bool needToLoadDTDManuallyToCheckMixedContentElements = docMarkupType == DocumentMarkupType.XHTML5; if (docMarkupType == DocumentMarkupType.DTBOOK) { XmlNode rootElement = XmlDocumentHelper.GetFirstChildElementOrSelfWithName(xmlDoc, true, "book", null); DebugFix.Assert(rootElement != null); if (rootElement != null) { XmlAttributeCollection attrs = rootElement.Attributes; if (attrs != null) { XmlNode attr = attrs.GetNamedItem("space", XmlReaderWriterHelper.NS_URL_XML); if (attr == null) { attr = attrs.GetNamedItem("xml:space", XmlReaderWriterHelper.NS_URL_XML); } if (attr != null && attr.Value == "preserve") { //Bookshare hack! :( needToLoadDTDManuallyToCheckMixedContentElements = true; } } } } if (!needToLoadDTDManuallyToCheckMixedContentElements) { return(docMarkupType); } bool isHTML = docMarkupType == DocumentMarkupType.XHTML || docMarkupType == DocumentMarkupType.XHTML5; #if ENABLE_DTDSHARP Stream dtdStream = LocalXmlUrlResolver.mapUri(new Uri(dtdID, UriKind.Absolute), out dtdUniqueResourceId); if (!string.IsNullOrEmpty(dtdUniqueResourceId)) { DebugFix.Assert(dtdStream != null); List <string> list; m_listOfMixedContentXmlElementNames.TryGetValue(dtdUniqueResourceId, out list); if (list == null) { if (dtdStream != null) { list = new List <string>(); m_listOfMixedContentXmlElementNames.Add(dtdUniqueResourceId, list); initMixedContentXmlElementNamesFromDTD(dtdUniqueResourceId, dtdStream); } else { #if DEBUG Debugger.Break(); #endif } } else { if (dtdStream != null) { dtdStream.Close(); } } } else { #if DEBUG Debugger.Break(); #endif } #else dtdUniqueResourceId = dtdID; List <string> list; m_listOfMixedContentXmlElementNames.TryGetValue(dtdUniqueResourceId, out list); if (list != null) { return(docMarkupType); } list = new List <string>(); m_listOfMixedContentXmlElementNames.Add(dtdUniqueResourceId, list); IXmlReader reader = null; //string dll = @"SaxNET.dll"; ////#if NET40 //// dll = @"\SaxNET_NET4.dll"; ////#endif //string appFolder = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); //string dtdPath = Path.Combine(appFolder, dll); //Assembly assembly = Assembly.LoadFrom(dtdPath); // try // { // reader = SaxReaderFactory.CreateReader(assembly, null); // } // catch (Exception e) // { //#if DEBUG // Debugger.Break(); //#endif // } bool useCSharpSaxImpl = IsRunning64(); // docMarkupType == DocumentMarkupType.XHTML5; if (useCSharpSaxImpl) { reader = new SaxDriver(); } else { reader = new ExpatReader(); } DebugFix.Assert(reader != null); if (reader == null) { return(docMarkupType); } //Type readerType = reader.GetType(); reader.EntityResolver = new SaxEntityResolver(); SaxErrorHandler errorHandler = new SaxErrorHandler(); reader.ErrorHandler = errorHandler; if (reader is SaxDriver) { //"namespaces" try { reader.SetFeature(Constants.NamespacesFeature, true); } catch (Exception e) { #if DEBUG Debugger.Break(); #endif } //"namespace-prefixes" try { reader.SetFeature(Constants.NamespacePrefixesFeature, true); } catch (Exception e) { #if DEBUG Debugger.Break(); #endif } //"external-general-entities" try { reader.SetFeature(Constants.ExternalGeneralFeature, true); } catch (Exception e) { #if DEBUG Debugger.Break(); #endif } //"external-parameter-entities" try { reader.SetFeature(Constants.ExternalParameterFeature, true); } catch (Exception e) { #if DEBUG Debugger.Break(); #endif } //"xmlns-uris" try { reader.SetFeature(Constants.XmlNsUrisFeature, true); } catch (Exception e) { #if DEBUG Debugger.Break(); #endif } //"resolve-dtd-uris" try { reader.SetFeature(Constants.ResolveDtdUrisFeature, true); } catch (Exception e) { #if DEBUG Debugger.Break(); #endif } } if (reader is ExpatReader) { // http://xml.org/sax/features/namespaces try { reader.SetFeature(Constants.NamespacesFeature, true); } catch (Exception e) { #if DEBUG Debugger.Break(); #endif } // http://xml.org/sax/features/external-general-entities try { reader.SetFeature(Constants.ExternalGeneralFeature, true); } catch (Exception e) { #if DEBUG Debugger.Break(); #endif } // http://xml.org/sax/features/external-parameter-entities try { reader.SetFeature(Constants.ExternalParameterFeature, true); } catch (Exception e) { #if DEBUG Debugger.Break(); #endif } // http://xml.org/sax/features/resolve-dtd-uris try { reader.SetFeature(Constants.ResolveDtdUrisFeature, true); } catch (Exception e) { #if DEBUG Debugger.Break(); #endif } // http://xml.org/sax/features/lexical-handler/parameter-entities try { reader.SetFeature(Constants.LexicalParameterFeature, true); } catch (Exception e) { #if DEBUG Debugger.Break(); #endif } if (false) { try { reader.SetFeature("http://kd-soft.net/sax/features/skip-internal-entities", false); } catch (Exception e) { #if DEBUG Debugger.Break(); #endif } try { reader.SetFeature( "http://kd-soft.net/sax/features/parse-unless-standalone", true); } catch (Exception e) { #if DEBUG Debugger.Break(); #endif } try { reader.SetFeature("http://kd-soft.net/sax/features/parameter-entities", true); } catch (Exception e) { #if DEBUG Debugger.Break(); #endif } try { reader.SetFeature("http://kd-soft.net/sax/features/standalone-error", true); } catch (Exception e) { #if DEBUG Debugger.Break(); #endif } } // SUPPORTED, but then NOT SUPPORTED (deeper inside Expat C# wrapper code) // // http://xml.org/sax/features/namespace-prefixes // try // { // reader.SetFeature(Constants.NamespacePrefixesFeature, true); // } // catch (Exception e) // { //#if DEBUG // Debugger.Break(); //#endif // } // // http://xml.org/sax/features/xmlns-uris // try // { // reader.SetFeature(Constants.XmlNsUrisFeature, true); // } // catch (Exception e) // { //#if DEBUG // Debugger.Break(); //#endif // } // // http://xml.org/sax/features/validation // try // { // reader.SetFeature(Constants.ValidationFeature, true); // } // catch (Exception e) // { //#if DEBUG // Debugger.Break(); //#endif // } // // http://xml.org/sax/features/unicode-normalization-checking // try // { // reader.SetFeature(Constants.UnicodeNormCheckFeature, true); // } // catch (Exception e) // { //#if DEBUG // Debugger.Break(); //#endif // } // NOT SUPPORTED: // http://xml.org/sax/features/xml-1.1 // try // { // reader.SetFeature(Constants.Xml11Feature, true); // } // catch (Exception e) // { //#if DEBUG // Debugger.Break(); //#endif // } // http://xml.org/sax/features/xml-declaration // try // { // reader.SetFeature(Constants.XmlDeclFeature, true); // } // catch (Exception e) // { //#if DEBUG // Debugger.Break(); //#endif // } // http://xml.org/sax/features/use-external-subset // try // { // reader.SetFeature(Constants.UseExternalSubsetFeature, true); // } // catch (Exception e) // { //#if DEBUG // Debugger.Break(); //#endif // } // http://xml.org/sax/features/reader-control // try // { // reader.SetFeature(Constants.ReaderControlFeature, true); // } // catch (Exception e) // { //#if DEBUG // Debugger.Break(); //#endif // } } SaxContentHandler handler = new SaxContentHandler(list); try { reader.DtdHandler = handler; } catch (Exception e) { #if DEBUG Debugger.Break(); #endif errorHandler.AddMessage("Cannot set dtd handler: " + e.Message); } try { reader.ContentHandler = handler; } catch (Exception e) { #if DEBUG Debugger.Break(); #endif errorHandler.AddMessage("Cannot set content handler: " + e.Message); } try { reader.LexicalHandler = handler; } catch (Exception e) { #if DEBUG Debugger.Break(); #endif errorHandler.AddMessage("Cannot set lexical handler: " + e.Message); } try { reader.DeclHandler = handler; } catch (Exception e) { #if DEBUG Debugger.Break(); #endif errorHandler.AddMessage("Cannot set declaration handler: " + e.Message); } string rootElementName = isHTML ? @"html" : @"dtbook"; string dtdWrapper = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><!DOCTYPE " + rootElementName + " SYSTEM \"" + dtdID + "\"><" + rootElementName + "></" + rootElementName + ">"; //StringReader strReader = new StringReader(dtdWrapper); Stream stream = new MemoryStream(Encoding.UTF8.GetBytes(dtdWrapper)); TextReader txtReader = new StreamReader(stream, Encoding.UTF8); InputSource input = new InputSource <TextReader>(txtReader, dtdID + "/////SYSID"); input.Encoding = "UTF-8"; input.PublicId = "??"; reader.Parse(input); #endif //ENABLE_DTDSHARP return(docMarkupType); }
protected bool parseContentDocParts(string filePath, Project project, XmlDocument xmlDoc, string displayPath, DocumentMarkupType type) { if (RequestCancellation) { return(true); } reportProgress(-1, String.Format(UrakawaSDK_daisy_Lang.ParsingMetadata, displayPath)); XmlNode headXmlNode = XmlDocumentHelper.GetFirstChildElementOrSelfWithName(xmlDoc.DocumentElement, true, "head", null); parseMetadata(filePath, project, xmlDoc, headXmlNode); if (RequestCancellation) { return(true); } parseHeadLinks(filePath, project, xmlDoc); if (RequestCancellation) { return(true); } reportProgress(-1, String.Format(UrakawaSDK_daisy_Lang.ParsingContent, displayPath)); try { urakawa.core.TreeNode.EnableTextCache = false; parseContentDocument(filePath, project, xmlDoc, null, null, type); } finally { urakawa.core.TreeNode.EnableTextCache = true; } return(false); }