Ejemplo n.º 1
0
        protected override void parseContentDocument(string filePath, Project project, XmlNode xmlNode, TreeNode parentTreeNode, string dtdUniqueResourceId, DocumentMarkupType docMarkupType)
        {
            if (RequestCancellation)
            {
                return;
            }

            XmlNodeType xmlType = xmlNode.NodeType;

            switch (xmlType)
            {
            case XmlNodeType.Attribute:
            {
                System.Diagnostics.Debug.Fail("Calling this method with an XmlAttribute should never happen !!");
                break;
            }

            case XmlNodeType.Document:
            {
                XmlNode bodyElement = XmlDocumentHelper.GetFirstChildElementOrSelfWithName(xmlNode, true, "body", null);

                if (bodyElement == null)
                {
                    bodyElement = XmlDocumentHelper.GetFirstChildElementOrSelfWithName(xmlNode, true, "book", null);
                }

                if (bodyElement != null)
                {
                    Presentation presentation = m_Project.Presentations.Get(0);
                    presentation.PropertyFactory.DefaultXmlNamespaceUri = bodyElement.NamespaceURI;

                    parseContentDocument(filePath, project, bodyElement, parentTreeNode, null, docMarkupType);
                }
                //parseContentDocument(((XmlDocument)xmlNode).DocumentElement, parentTreeNode);
                break;
            }

            case XmlNodeType.Element:
            {
                Presentation presentation = m_Project.Presentations.Get(0);

                TreeNode treeNode = null;

                if (parentTreeNode == null)
                {
                    parentTreeNode = GetFirstTreeNodeForXmlDocument(presentation, xmlNode);
                    //parentTreeNode = presentation.RootNode;
                }
                if (parentTreeNode != null)
                {
                    treeNode = CreateAndAddTreeNodeForContentDocument(parentTreeNode,
                                                                      xmlNode,
                                                                      Path.GetFileName(filePath));
                    if (treeNode != null && treeNode is EmptyNode && ((EmptyNode)treeNode).PageNumber != null)
                    {
                        string strfRefID = Path.GetFileName(filePath) + "#" + xmlNode.Attributes.GetNamedItem("id").Value;
                        m_XmlIdToPageNodeMap.Add(strfRefID, (EmptyNode)treeNode);
                    }
                    //parentTreeNode.AppendChild(treeNode);
                }
                if (xmlNode.ParentNode != null && xmlNode.ParentNode.NodeType == XmlNodeType.Document)
                {
                    presentation.PropertyFactory.DefaultXmlNamespaceUri = xmlNode.NamespaceURI;
                }

                XmlProperty xmlProp = null;
                if (treeNode != null)
                {
                    xmlProp = presentation.PropertyFactory.CreateXmlProperty();
                    treeNode.AddProperty(xmlProp);


                    // we get rid of element name prefixes, we use namespace URIs instead.
                    // check inherited NS URI

                    string nsUri = treeNode.Parent != null?
                                   treeNode.Parent.GetXmlNamespaceUri() :
                                       xmlNode.NamespaceURI; //presentation.PropertyFactory.DefaultXmlNamespaceUri

                    if (xmlNode.NamespaceURI != nsUri)
                    {
                        nsUri = xmlNode.NamespaceURI;
                        xmlProp.SetQName(xmlNode.LocalName, nsUri == null ? "" : nsUri);
                    }
                    else
                    {
                        xmlProp.SetQName(xmlNode.LocalName, "");
                    }


                    //string nsUri = treeNode.GetXmlNamespaceUri();
                    // if xmlNode.NamespaceURI != nsUri
                    // => xmlProp.GetNamespaceUri() == xmlNode.NamespaceURI
                }



                if (parentTreeNode is SectionNode &&
                    (xmlNode.LocalName == "h1" || xmlNode.LocalName == "h2" || xmlNode.LocalName == "h3" ||
                     xmlNode.LocalName == "h4" || xmlNode.LocalName == "h5" || xmlNode.LocalName == "h6" || xmlNode.LocalName == "HD"))
                {
                    ((SectionNode)parentTreeNode).Label =
                        xmlNode.InnerText.Replace("\n", "").Replace("\r", "").Replace("\t", "");
                    Console.WriteLine("DTBook: " + ((SectionNode)parentTreeNode).Label);
                    if (xmlNode.Attributes.GetNamedItem("id") != null)
                    {
                        string strfRefID = Path.GetFileName(filePath) + "#" + xmlNode.Attributes.GetNamedItem("id").Value;
                        if (!m_XmlIdToSectionNodeMap.ContainsKey(strfRefID))
                        {
                            m_XmlIdToSectionNodeMap.Add(strfRefID, (SectionNode)parentTreeNode);
                        }
                    }
                }
                if (treeNode != null && treeNode is SectionNode && xmlNode.LocalName == "doctitle")
                {
                    ((SectionNode)treeNode).Label = xmlNode.InnerText.Replace("\n", "").Replace("\r", "").Replace("\t", "");;
                }


                if (RequestCancellation)
                {
                    return;
                }
                foreach (XmlNode childXmlNode in xmlNode.ChildNodes)
                {
                    parseContentDocument(filePath, project, childXmlNode, treeNode != null && treeNode is SectionNode ? treeNode : parentTreeNode, null, docMarkupType);
                }
                break;
            }

            case XmlNodeType.Whitespace:
            case XmlNodeType.CDATA:
            case XmlNodeType.SignificantWhitespace:
            case XmlNodeType.Text:
            {
                /*
                 * Presentation presentation = Project.Presentations.Get(0);
                 *
                 * if (xmlType == XmlNodeType.Whitespace)
                 * {
                 *  bool onlySpaces = true;
                 *  for (int i = 0; i < xmlNode.Value.Length; i++)
                 *  {
                 *      if (xmlNode.Value[i] != ' ')
                 *      {
                 *          onlySpaces = false;
                 *          break;
                 *      }
                 *  }
                 *  if (!onlySpaces)
                 *  {
                 *      break;
                 *  }
                 *  //else
                 *  //{
                 *  //    int l = xmlNode.Value.Length;
                 *  //}
                 * }
                 #if DEBUG
                 * if (xmlType == XmlNodeType.CDATA)
                 * {
                 *  Debugger.Break();
                 * }
                 *
                 * if (xmlType == XmlNodeType.SignificantWhitespace)
                 * {
                 *  Debugger.Break();
                 * }
                 #endif
                 * //string text = xmlNode.Value.Trim();
                 * string text = System.Text.RegularExpressions.Regex.Replace(xmlNode.Value, @"\s+", " ");
                 *
                 * Debug.Assert(!string.IsNullOrEmpty(text));
                 *
                 #if DEBUG
                 * if (text.Length != xmlNode.Value.Length)
                 * {
                 *  int debug = 1;
                 *  //Debugger.Break();
                 * }
                 *
                 * if (string.IsNullOrEmpty(text))
                 * {
                 *  Debugger.Break();
                 * }
                 * if (xmlType != XmlNodeType.Whitespace && text == " ")
                 * {
                 *  int debug = 1;
                 *  //Debugger.Break();
                 * }
                 #endif
                 * if (string.IsNullOrEmpty(text))
                 * {
                 *  break;
                 * }
                 * urakawa.media.TextMedia textMedia = presentation.MediaFactory.CreateTextMedia();
                 * textMedia.Text = text;
                 *
                 * urakawa.property.channel.ChannelsProperty cProp = presentation.PropertyFactory.CreateChannelsProperty();
                 * cProp.SetMedia(m_textChannel, textMedia);
                 *
                 *
                 * int counter = 0;
                 * foreach (XmlNode childXmlNode in xmlNode.ParentNode.ChildNodes)
                 * {
                 *  XmlNodeType childXmlType = childXmlNode.NodeType;
                 *  if (childXmlType == XmlNodeType.Text
                 || childXmlType == XmlNodeType.Element
                 || childXmlType == XmlNodeType.Whitespace
                 || childXmlType == XmlNodeType.SignificantWhitespace
                 || childXmlType == XmlNodeType.CDATA)
                 || {
                 ||     counter++;
                 || }
                 ||}
                 ||if (counter == 1)
                 ||{
                 || parentTreeNode.AddProperty(cProp);
                 ||}
                 ||else
                 ||{
                 || TreeNode txtWrapperNode = presentation.TreeNodeFactory.Create();
                 || txtWrapperNode.AddProperty(cProp);
                 || parentTreeNode.AppendChild(txtWrapperNode);
                 ||}
                 */
                break;
            }

            default:
            {
                return;
            }
            }
        }
        protected DocumentMarkupType parseContentDocument_DTD(Project project, XmlDocument xmlDoc, TreeNode parentTreeNode, string filePath, out string dtdUniqueResourceId)
        {
            dtdUniqueResourceId = null;

            DocumentMarkupType docMarkupType = DocumentMarkupType.NA;

            //xmlNode.OwnerDocument
            string dtdID = xmlDoc.DocumentType == null ? string.Empty
            : !string.IsNullOrEmpty(xmlDoc.DocumentType.SystemId) ? xmlDoc.DocumentType.SystemId
            : !string.IsNullOrEmpty(xmlDoc.DocumentType.PublicId) ? xmlDoc.DocumentType.PublicId
            : xmlDoc.DocumentType.Name;

            string rootElemName = xmlDoc.DocumentElement.LocalName;

            if (dtdID == @"html" &&
                string.IsNullOrEmpty(xmlDoc.DocumentType.SystemId) &&
                string.IsNullOrEmpty(xmlDoc.DocumentType.PublicId))
            {
                dtdID         = @"html5";
                docMarkupType = DocumentMarkupType.XHTML5;
                DebugFix.Assert(rootElemName == @"html");
            }
            else if (dtdID.Contains(@"xhtml1")
                     //systemId.Contains(@"xhtml11.dtd")
                     //|| systemId.Contains(@"xhtml1-strict.dtd")
                     //|| systemId.Contains(@"xhtml1-transitional.dtd")
                     )
            {
                dtdID         = @"http://www.w3.org/xhtml-math-svg-flat.dtd";
                docMarkupType = DocumentMarkupType.XHTML;
                DebugFix.Assert(rootElemName == @"html");
            }
            else if (rootElemName == @"dtbook")
            {
                docMarkupType = DocumentMarkupType.DTBOOK;
            }
            else if (rootElemName == @"html")
            {
                dtdID         = @"html5";
                docMarkupType = DocumentMarkupType.XHTML5;
            }

            if (docMarkupType == DocumentMarkupType.NA)
            {
#if DEBUG
                Debugger.Break();
#endif
            }

            if (string.IsNullOrEmpty(dtdID))
            {
                return(docMarkupType);
            }

            if (!string.IsNullOrEmpty(dtdID) && !dtdID.StartsWith(@"http://"))
            {
                dtdID = @"http://www.daisy.org/" + dtdID;
            }

            bool needToLoadDTDManuallyToCheckMixedContentElements = docMarkupType == DocumentMarkupType.XHTML5;
            if (docMarkupType == DocumentMarkupType.DTBOOK)
            {
                XmlNode rootElement = XmlDocumentHelper.GetFirstChildElementOrSelfWithName(xmlDoc, true, "book", null);
                DebugFix.Assert(rootElement != null);
                if (rootElement != null)
                {
                    XmlAttributeCollection attrs = rootElement.Attributes;
                    if (attrs != null)
                    {
                        XmlNode attr = attrs.GetNamedItem("space", XmlReaderWriterHelper.NS_URL_XML);
                        if (attr == null)
                        {
                            attr = attrs.GetNamedItem("xml:space", XmlReaderWriterHelper.NS_URL_XML);
                        }

                        if (attr != null && attr.Value == "preserve")
                        {
                            //Bookshare hack! :(
                            needToLoadDTDManuallyToCheckMixedContentElements = true;
                        }
                    }
                }
            }

            if (!needToLoadDTDManuallyToCheckMixedContentElements)
            {
                return(docMarkupType);
            }

            bool isHTML = docMarkupType == DocumentMarkupType.XHTML || docMarkupType == DocumentMarkupType.XHTML5;

#if ENABLE_DTDSHARP
            Stream dtdStream = LocalXmlUrlResolver.mapUri(new Uri(dtdID, UriKind.Absolute), out dtdUniqueResourceId);

            if (!string.IsNullOrEmpty(dtdUniqueResourceId))
            {
                DebugFix.Assert(dtdStream != null);

                List <string> list;
                m_listOfMixedContentXmlElementNames.TryGetValue(dtdUniqueResourceId, out list);

                if (list == null)
                {
                    if (dtdStream != null)
                    {
                        list = new List <string>();
                        m_listOfMixedContentXmlElementNames.Add(dtdUniqueResourceId, list);

                        initMixedContentXmlElementNamesFromDTD(dtdUniqueResourceId, dtdStream);
                    }
                    else
                    {
#if DEBUG
                        Debugger.Break();
#endif
                    }
                }
                else
                {
                    if (dtdStream != null)
                    {
                        dtdStream.Close();
                    }
                }
            }
            else
            {
#if DEBUG
                Debugger.Break();
#endif
            }
#else
            dtdUniqueResourceId = dtdID;

            List <string> list;
            m_listOfMixedContentXmlElementNames.TryGetValue(dtdUniqueResourceId, out list);

            if (list != null)
            {
                return(docMarkupType);
            }

            list = new List <string>();
            m_listOfMixedContentXmlElementNames.Add(dtdUniqueResourceId, list);

            IXmlReader reader = null;


            //string dll = @"SaxNET.dll";
            ////#if NET40
            ////                            dll = @"\SaxNET_NET4.dll";
            ////#endif
            //string appFolder = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location);
            //string dtdPath = Path.Combine(appFolder, dll);
            //Assembly assembly = Assembly.LoadFrom(dtdPath);
            //                            try
            //                            {
            //                                reader = SaxReaderFactory.CreateReader(assembly, null);
            //                            }
            //                            catch (Exception e)
            //                            {
            //#if DEBUG
            //                                Debugger.Break();
            //#endif
            //                            }

            bool useCSharpSaxImpl = IsRunning64(); // docMarkupType == DocumentMarkupType.XHTML5;
            if (useCSharpSaxImpl)
            {
                reader = new SaxDriver();
            }
            else
            {
                reader = new ExpatReader();
            }

            DebugFix.Assert(reader != null);
            if (reader == null)
            {
                return(docMarkupType);
            }
            //Type readerType = reader.GetType();

            reader.EntityResolver = new SaxEntityResolver();

            SaxErrorHandler errorHandler = new SaxErrorHandler();
            reader.ErrorHandler = errorHandler;


            if (reader is SaxDriver)
            {
                //"namespaces"
                try
                {
                    reader.SetFeature(Constants.NamespacesFeature, true);
                }
                catch (Exception e)
                {
#if DEBUG
                    Debugger.Break();
#endif
                }

                //"namespace-prefixes"
                try
                {
                    reader.SetFeature(Constants.NamespacePrefixesFeature, true);
                }
                catch (Exception e)
                {
#if DEBUG
                    Debugger.Break();
#endif
                }

                //"external-general-entities"
                try
                {
                    reader.SetFeature(Constants.ExternalGeneralFeature, true);
                }
                catch (Exception e)
                {
#if DEBUG
                    Debugger.Break();
#endif
                }

                //"external-parameter-entities"
                try
                {
                    reader.SetFeature(Constants.ExternalParameterFeature, true);
                }
                catch (Exception e)
                {
#if DEBUG
                    Debugger.Break();
#endif
                }

                //"xmlns-uris"
                try
                {
                    reader.SetFeature(Constants.XmlNsUrisFeature, true);
                }
                catch (Exception e)
                {
#if DEBUG
                    Debugger.Break();
#endif
                }

                //"resolve-dtd-uris"
                try
                {
                    reader.SetFeature(Constants.ResolveDtdUrisFeature, true);
                }
                catch (Exception e)
                {
#if DEBUG
                    Debugger.Break();
#endif
                }
            }


            if (reader is ExpatReader)
            {
                // http://xml.org/sax/features/namespaces
                try
                {
                    reader.SetFeature(Constants.NamespacesFeature, true);
                }
                catch (Exception e)
                {
#if DEBUG
                    Debugger.Break();
#endif
                }

                // http://xml.org/sax/features/external-general-entities
                try
                {
                    reader.SetFeature(Constants.ExternalGeneralFeature, true);
                }
                catch (Exception e)
                {
#if DEBUG
                    Debugger.Break();
#endif
                }

                // http://xml.org/sax/features/external-parameter-entities
                try
                {
                    reader.SetFeature(Constants.ExternalParameterFeature, true);
                }
                catch (Exception e)
                {
#if DEBUG
                    Debugger.Break();
#endif
                }

                // http://xml.org/sax/features/resolve-dtd-uris
                try
                {
                    reader.SetFeature(Constants.ResolveDtdUrisFeature, true);
                }
                catch (Exception e)
                {
#if DEBUG
                    Debugger.Break();
#endif
                }

                // http://xml.org/sax/features/lexical-handler/parameter-entities
                try
                {
                    reader.SetFeature(Constants.LexicalParameterFeature, true);
                }
                catch (Exception e)
                {
#if DEBUG
                    Debugger.Break();
#endif
                }

                if (false)
                {
                    try
                    {
                        reader.SetFeature("http://kd-soft.net/sax/features/skip-internal-entities",
                                          false);
                    }
                    catch (Exception e)
                    {
#if DEBUG
                        Debugger.Break();
#endif
                    }

                    try
                    {
                        reader.SetFeature(
                            "http://kd-soft.net/sax/features/parse-unless-standalone", true);
                    }
                    catch (Exception e)
                    {
#if DEBUG
                        Debugger.Break();
#endif
                    }

                    try
                    {
                        reader.SetFeature("http://kd-soft.net/sax/features/parameter-entities", true);
                    }
                    catch (Exception e)
                    {
#if DEBUG
                        Debugger.Break();
#endif
                    }

                    try
                    {
                        reader.SetFeature("http://kd-soft.net/sax/features/standalone-error", true);
                    }
                    catch (Exception e)
                    {
#if DEBUG
                        Debugger.Break();
#endif
                    }
                }

                // SUPPORTED, but then NOT SUPPORTED (deeper inside Expat C# wrapper code)

                //                                    // http://xml.org/sax/features/namespace-prefixes
                //                                    try
                //                                    {
                //                                        reader.SetFeature(Constants.NamespacePrefixesFeature, true);
                //                                    }
                //                                    catch (Exception e)
                //                                    {
                //#if DEBUG
                //                                        Debugger.Break();
                //#endif
                //                                    }

                //                                    // http://xml.org/sax/features/xmlns-uris
                //                                    try
                //                                    {
                //                                        reader.SetFeature(Constants.XmlNsUrisFeature, true);
                //                                    }
                //                                    catch (Exception e)
                //                                    {
                //#if DEBUG
                //                                        Debugger.Break();
                //#endif
                //                                    }
                //                                    // http://xml.org/sax/features/validation
                //                                    try
                //                                    {
                //                                        reader.SetFeature(Constants.ValidationFeature, true);
                //                                    }
                //                                    catch (Exception e)
                //                                    {
                //#if DEBUG
                //                                        Debugger.Break();
                //#endif
                //                                    }

                //                                    // http://xml.org/sax/features/unicode-normalization-checking
                //                                    try
                //                                    {
                //                                        reader.SetFeature(Constants.UnicodeNormCheckFeature, true);
                //                                    }
                //                                    catch (Exception e)
                //                                    {
                //#if DEBUG
                //                                        Debugger.Break();
                //#endif
                //                                    }


                // NOT SUPPORTED:


                // http://xml.org/sax/features/xml-1.1
                //                                    try
                //                                    {
                //                                        reader.SetFeature(Constants.Xml11Feature, true);
                //                                    }
                //                                    catch (Exception e)
                //                                    {
                //#if DEBUG
                //                                        Debugger.Break();
                //#endif
                //                                    }

                // http://xml.org/sax/features/xml-declaration
                //                                    try
                //                                    {
                //                                        reader.SetFeature(Constants.XmlDeclFeature, true);
                //                                    }
                //                                    catch (Exception e)
                //                                    {
                //#if DEBUG
                //                                        Debugger.Break();
                //#endif
                //                                    }

                // http://xml.org/sax/features/use-external-subset
                //                                    try
                //                                    {
                //                                        reader.SetFeature(Constants.UseExternalSubsetFeature, true);
                //                                    }
                //                                    catch (Exception e)
                //                                    {
                //#if DEBUG
                //                                        Debugger.Break();
                //#endif
                //                                    }

                // http://xml.org/sax/features/reader-control
                //                                    try
                //                                    {
                //                                        reader.SetFeature(Constants.ReaderControlFeature, true);
                //                                    }
                //                                    catch (Exception e)
                //                                    {
                //#if DEBUG
                //                                        Debugger.Break();
                //#endif
                //                                    }
            }

            SaxContentHandler handler = new SaxContentHandler(list);

            try
            {
                reader.DtdHandler = handler;
            }
            catch (Exception e)
            {
#if DEBUG
                Debugger.Break();
#endif
                errorHandler.AddMessage("Cannot set dtd handler: " + e.Message);
            }

            try
            {
                reader.ContentHandler = handler;
            }
            catch (Exception e)
            {
#if DEBUG
                Debugger.Break();
#endif
                errorHandler.AddMessage("Cannot set content handler: " + e.Message);
            }

            try
            {
                reader.LexicalHandler = handler;
            }
            catch (Exception e)
            {
#if DEBUG
                Debugger.Break();
#endif
                errorHandler.AddMessage("Cannot set lexical handler: " + e.Message);
            }

            try
            {
                reader.DeclHandler = handler;
            }
            catch (Exception e)
            {
#if DEBUG
                Debugger.Break();
#endif
                errorHandler.AddMessage("Cannot set declaration handler: " + e.Message);
            }

            string rootElementName = isHTML ? @"html" : @"dtbook";
            string dtdWrapper      = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><!DOCTYPE " + rootElementName + " SYSTEM \"" + dtdID + "\"><" + rootElementName + "></" + rootElementName + ">";
            //StringReader strReader = new StringReader(dtdWrapper);
            Stream      stream    = new MemoryStream(Encoding.UTF8.GetBytes(dtdWrapper));
            TextReader  txtReader = new StreamReader(stream, Encoding.UTF8);
            InputSource input     = new InputSource <TextReader>(txtReader, dtdID + "/////SYSID");
            input.Encoding = "UTF-8";
            input.PublicId = "??";

            reader.Parse(input);
#endif //ENABLE_DTDSHARP


            return(docMarkupType);
        }
Ejemplo n.º 3
0
        protected bool parseContentDocParts(string filePath, Project project, XmlDocument xmlDoc, string displayPath, DocumentMarkupType type)
        {
            if (RequestCancellation)
            {
                return(true);
            }
            reportProgress(-1, String.Format(UrakawaSDK_daisy_Lang.ParsingMetadata, displayPath));

            XmlNode headXmlNode = XmlDocumentHelper.GetFirstChildElementOrSelfWithName(xmlDoc.DocumentElement, true, "head", null);

            parseMetadata(filePath, project, xmlDoc, headXmlNode);

            if (RequestCancellation)
            {
                return(true);
            }
            parseHeadLinks(filePath, project, xmlDoc);

            if (RequestCancellation)
            {
                return(true);
            }
            reportProgress(-1, String.Format(UrakawaSDK_daisy_Lang.ParsingContent, displayPath));

            try
            {
                urakawa.core.TreeNode.EnableTextCache = false;
                parseContentDocument(filePath, project, xmlDoc, null, null, type);
            }
            finally
            {
                urakawa.core.TreeNode.EnableTextCache = true;
            }

            return(false);
        }