Example #1
0
 private void AddAttribute(XXXNode_v2 parent, string name, string value)
 {
     if (gXmlDocument != null)
     {
         if (parent.XmlNode.Attributes.GetNamedItem(name) == null)
         {
             XmlAttribute attrib = gXmlDocument.CreateAttribute(name);
             if (value == null)
             {
                 value = "";
             }
             attrib.Value = value;
             parent.XmlNode.Attributes.Append(attrib);
         }
     }
     if (gXDocument != null)
     {
         XElement xeParent = (XElement)parent.XNode;
         if (xeParent.Attribute(name) == null)
         {
             if (value == null)
             {
                 value = "";
             }
             XAttribute attrib = new XAttribute(name, value);
             xeParent.Add(attrib);
         }
     }
 }
Example #2
0
        public XmlDocument GenerateXmlDocument()
        {
            _xmlDocument          = new XmlDocument();
            _documentNode         = new XXXNode_v2();
            _documentNode.XmlNode = _xmlDocument;

            GenerateXml();
            return(_xmlDocument);
        }
Example #3
0
        public XDocument GenerateXDocument()
        {
            gXDocument          = new XDocument();
            gDocumentNode       = new XXXNode_v2();
            gDocumentNode.XNode = gXDocument;

            GenerateXml();
            return(gXDocument);
        }
Example #4
0
 private void AddComment(XXXNode_v2 parent, string comment)
 {
     if (gXmlDocument != null)
     {
         XmlComment node = gXmlDocument.CreateComment(comment);
         parent.XmlNode.AppendChild(node);
     }
     if (gXDocument != null)
     {
         XComment node = new XComment(comment);
         ((XElement)parent.XNode).Add(node);
     }
 }
Example #5
0
        private static XXXNode_v2 GetParentXXNode(XXXNode_v2 node)
        {
            XXXNode_v2 parentNode = new XXXNode_v2();

            if (node.XmlNode != null)
            {
                parentNode.XmlNode = node.XmlNode.ParentNode;
            }
            if (node.XNode != null)
            {
                parentNode.XNode = node.XNode.Parent;
            }
            return(parentNode);
        }
Example #6
0
        private XXXNode_v2 CreateElement(string name)
        {
            XXXNode_v2 node = new XXXNode_v2();

            if (_xmlDocument != null)
            {
                XmlElement element = _xmlDocument.CreateElement(name);
                node.XmlNode = element;
            }
            if (_xDocument != null)
            {
                XElement element = new XElement(name);
                node.XNode = element;
            }
            return(node);
        }
Example #7
0
        private static XXXNode_v2 GetParentXXNodeByName(XXXNode_v2 node, string name)
        {
            bool       found = false;
            XXXNode_v2 node2 = new XXXNode_v2();

            XmlNode xmlNode = node.XmlNode;

            while (xmlNode != null)
            {
                if (xmlNode.Name == name)
                {
                    node2.XmlNode = xmlNode;
                    found         = true;
                    break;
                }
                xmlNode = xmlNode.ParentNode;
            }

            if (node.XNode != null)
            {
                if (!(node.XNode is XElement))
                {
                    throw new PBException("error generating XDocument node is not a XElement");
                }
                XElement element = (XElement)node.XNode;
                while (element != null)
                {
                    if (element.Name == name)
                    {
                        node2.XNode = element;
                        found       = true;
                        break;
                    }
                    element = element.Parent;
                }
            }

            if (found)
            {
                return(node2);
            }
            else
            {
                return(null);
            }
        }
Example #8
0
        public XDocument GenerateXDocument()
        {
            if (_useXDocumentCreator)
            {
                _xdCreator = new XDocumentCreator();
                _xDocument = _xdCreator.XDocument;
            }
            else
            {
                _xDocument = new XDocument();
            }
            _documentNode       = new XXXNode_v2();
            _documentNode.XNode = _xDocument;

            GenerateXml();
            return(_xDocument);
        }
Example #9
0
 private void AddElement(XXXNode_v2 parent, string element)
 {
     if (parent.XmlNode != null)
     {
         XmlElement node = gXmlDocument.CreateElement(element);
         parent.XmlNode.AppendChild(node);
     }
     if (parent.XNode != null)
     {
         if (!(parent.XNode is XElement))
         {
             throw new PBException("error generating XDocument node is not a XElement");
         }
         XElement node = new XElement(element);
         ((XElement)parent.XNode).Add(node);
     }
 }
Example #10
0
 private bool TagBeginDefinitionListCategory(HtmlTag tag, bool bTagEnd)
 {
     if (_definitionList == null || tag.TagCategory != HtmlTagCategory.DefinitionList)
     {
         return(false);
     }
     switch (tag.TagType)
     {
     case HtmlTagType.DT:
     case HtmlTagType.DD:
         AddElement(_definitionList, _currentNode);
         if (!bTagEnd)
         {
             _currentTreeNode = _currentNode;
         }
         return(true);
     }
     return(false);
 }
Example #11
0
 private void AddText(XXXNode_v2 parent, string text)
 {
     if (IsSeparator(text))
     {
         return;
     }
     if (gXmlDocument != null)
     {
         //XmlText node = gXmlDocument.CreateTextNode("text");
         //node.Value = text;
         XmlText node = gXmlDocument.CreateTextNode(text);
         parent.XmlNode.AppendChild(node);
     }
     if (gXDocument != null)
     {
         XText node = new XText(text);
         ((XElement)parent.XNode).Add(node);
     }
 }
Example #12
0
 private bool TagBeginDefinitionListCategory(HtmlTag tag, bool bTagEnd)
 {
     if (gDefinitionList == null || tag.TagCategory != HtmlTagCategory.DefinitionList)
     {
         return(false);
     }
     switch (tag.TagType)
     {
     case HtmlTagType.DT:
     case HtmlTagType.DD:
         //gDefinitionList.AppendChild(gCurrentNode);
         AddElement(gDefinitionList, gCurrentNode);
         if (!bTagEnd)
         {
             gCurrentTreeNode = gCurrentNode;
         }
         return(true);
     }
     return(false);
 }
Example #13
0
 private void AddComment(XXXNode_v2 parent, string comment)
 {
     if (_xmlDocument != null)
     {
         XmlComment node = _xmlDocument.CreateComment(comment);
         parent.XmlNode.AppendChild(node);
     }
     if (_xDocument != null)
     {
         if (_xdCreator != null)
         {
             _xdCreator.AddComment((XElement)parent.XNode, comment);
         }
         else
         {
             XComment node = new XComment(comment);
             ((XElement)parent.XNode).Add(node);
         }
     }
 }
Example #14
0
        private void AddElement(XXXNode_v2 parent, XXXNode_v2 child)
        {
            if (parent.XmlNode != null)
            {
                parent.XmlNode.AppendChild(child.XmlNode);
            }
            XNode xnode = parent.XNode;

            if (xnode != null)
            {
                if (xnode is XElement)
                {
                    if (_xdCreator != null)
                    {
                        _xdCreator.AddElement((XElement)xnode, (XElement)child.XNode);
                    }
                    else
                    {
                        ((XElement)xnode).Add(child.XNode);
                    }
                }
                else if (xnode is XDocument)
                {
                    if (_xdCreator != null)
                    {
                        _xdCreator.AddRootElement((XElement)child.XNode);
                    }
                    else
                    {
                        ((XDocument)xnode).Add(child.XNode);
                    }
                }
                else
                {
                    throw new PBException("error generating XDocument node is neither a XElement nor a XDocument");
                }
            }
        }
Example #15
0
 private void AddText(XXXNode_v2 parent, string text)
 {
     if (IsSeparator(text))
     {
         return;
     }
     if (_xmlDocument != null)
     {
         XmlText node = _xmlDocument.CreateTextNode(text);
         parent.XmlNode.AppendChild(node);
     }
     if (_xDocument != null)
     {
         if (_xdCreator != null)
         {
             _xdCreator.AddText((XElement)parent.XNode, text);
         }
         else
         {
             XText node = new XText(text);
             ((XElement)parent.XNode).Add(node);
         }
     }
 }
Example #16
0
        private void TagEnd(string sTagName)
        {
            if (_normalizeXml)
            {
                HtmlTagType tagType = HtmlTags.GetHtmlTagType(sTagName);
                switch (tagType)
                {
                case HtmlTagType.Html:
                case HtmlTagType.Head:
                case HtmlTagType.Body:
                    return;

                case HtmlTagType.Title:
                    _currentNode = _currentTreeNode;
                    return;

                case HtmlTagType.Table:
                    if (_table == null)
                    {
                        return;
                    }
                    _currentNode = _currentTreeNode = GetParentXXNode(_table.Table);
                    _table       = null;
                    if (_tableStack.Count != 0)
                    {
                        _table = _tableStack.Pop();
                    }
                    return;

                case HtmlTagType.DL:
                    if (_definitionList == null)
                    {
                        return;
                    }
                    _currentNode    = _currentTreeNode = GetParentXXNode(_definitionList);
                    _definitionList = null;
                    if (_definitionListStack.Count != 0)
                    {
                        _definitionList = _definitionListStack.Pop();
                    }
                    return;
                }
                if (_table != null)
                {
                    switch (tagType)
                    {
                    case HtmlTagType.THead:
                    case HtmlTagType.TBody:
                    case HtmlTagType.TFoot:
                        _currentNode = _currentTreeNode = _table.Table;
                        _table.Body  = null;
                        return;

                    case HtmlTagType.ColGroup:
                        _currentNode    = _currentTreeNode = _table.Table;
                        _table.ColGroup = null;
                        return;

                    case HtmlTagType.Col:
                        if (_table.Col != null)
                        {
                            _currentNode = _currentTreeNode = GetParentXXNode(_table.Col);
                            _table.Col   = null;
                        }
                        return;

                    case HtmlTagType.TR:
                        if (_table.Row != null)
                        {
                            _currentNode = _currentTreeNode = GetParentXXNode(_table.Row);
                            _table.Row   = null;
                        }
                        return;

                    case HtmlTagType.TH:
                    case HtmlTagType.TD:
                        if (_table.Data != null)
                        {
                            _currentNode = _currentTreeNode = GetParentXXNode(_table.Data);
                            _table.Data  = null;
                        }
                        return;
                    }
                }
            }
            XXXNode_v2 node = GetParentXXNodeByName(_currentTreeNode, sTagName);

            if (node != null)
            {
                _currentTreeNode = GetParentXXNode(node);
            }
            _currentNode = _currentTreeNode;
        }
Example #17
0
        private void GenerateXml()
        {
            // gbNormalizeXml = true :
            //   - les tag html, head, title et body sont créés automatiquement
            //   - les tag html, head, title et body rencontrés ne sont pas pris en compte
            //   - seul les tag title et meta sont mis dans la partie head les autre tag sont mis dans la partie body
            //   - si un tag meta est placé après le début de la partie body, ce tag reste dans la partie body
            //   - seul le premier tag title est pris en compte et placé dans la partie head, les autre tag title ne sont pas pris en compte

            try
            {
                //cTrace.Trace("GenerateXml NewGenerateXml  : {0}", XmlConfig.CurrentConfig.Get("NewGenerateXml"));

                InitXml();

                gTableStack = new Stack <HtmlTable_v2>();
                gTable      = null;

                gDefinitionListStack = new Stack <XXXNode_v2>();
                gDefinitionList      = null;

                gbNoTag = false;
                gbBody  = false;
                gbTitle = false;
                while (gHTMLReader.Read())
                {
                    if (gHTMLReader.IsText || gHTMLReader.IsComment)
                    {
                        if (gHTMLReader.IsText && !gHTMLReader.IsTextSeparator && !gbBody)
                        {
                            gbBody       = true;
                            gCurrentNode = gCurrentTreeNode = gBodyNode;
                        }
                        if (!gbGenerateXmlNodeOnly)
                        {
                            //if (string.Compare(XmlConfig.CurrentConfig.Get("NewGenerateXml"), "true", true) != 0)
                            if (gbReadCommentInText)
                            {
                                //XmlText text = gXmlDocument.CreateTextNode("text");
                                //text.Value = gHTMLReader.Value;
                                //gCurrentNode.AppendChild(text);
                                AddText(gCurrentNode, gHTMLReader.Value);
                            }
                            else
                            {
                                if (gHTMLReader.IsText)
                                {
                                    //XmlText text = gXmlDocument.CreateTextNode("text");
                                    //text.Value = gHTMLReader.Value;
                                    //gCurrentNode.AppendChild(text);
                                    AddText(gCurrentNode, gHTMLReader.Value);
                                }
                                else
                                {
                                    string s = gHTMLReader.Value;
                                    s = gCommentCorrection.Replace(s, "-");
                                    if (s.EndsWith("-"))
                                    {
                                        s += " ";
                                    }
                                    //XmlComment comment = gXmlDocument.CreateComment(s);
                                    //gCurrentNode.AppendChild(comment);
                                    AddComment(gCurrentNode, s);
                                }
                            }
                        }
                    }
                    else if (gHTMLReader.IsDocType)
                    {
                        //XmlAttribute attrib = gXmlDocument.CreateAttribute("doctype");
                        //attrib.Value = gHTMLReader.DocType;
                        //gHtmlNode.Attributes.Append(attrib);
                        AddAttribute(gHtmlNode, "doctype", gHTMLReader.DocType);
                    }
                    else if (gHTMLReader.IsProperty)
                    {
                        if (gbGenerateXmlNodeOnly || gbNoTag)
                        {
                            continue;
                        }
                        try
                        {
                            string sPropertyName = gHTMLReader.PropertyName;
                            //sPropertyName = sPropertyName.Replace("\"", "");
                            //sPropertyName = sPropertyName.Replace("/", "");
                            //sPropertyName = sPropertyName.Replace("\\", "");
                            //sPropertyName = sPropertyName.Replace("-", "");
                            //sPropertyName = sPropertyName.Replace(",", "");
                            sPropertyName = gReplace.Replace(sPropertyName, "");
                            sPropertyName = sPropertyName.ToLower();
                            if (sPropertyName == "")
                            {
                                sPropertyName = "__value";
                            }
                            //XmlAttribute attrib = gXmlDocument.CreateAttribute(sPropertyName);
                            //attrib.Value = gHTMLReader.PropertyValue;
                            //gCurrentNode.Attributes.Append(attrib);


                            // modif le 28/01/2014
                            //   hexadecimal value 0x03, is an invalid character
                            //   found in http://www.reseau-gesat.com/Gesat/Yvelines,78/Fontenay-le-Fleury,31443/esat-cotra,e1596/
                            //   <html><head><meta name="keywords" content="Conditionnement, travaux &amp;agrave; fa&amp;ccedil;onToutes activit&amp;eacute;s en entreprise Entretien et cr&amp;eacute;ation despaces verts" />
                            string propertyValue = gHTMLReader.PropertyValue;
                            if (propertyValue != null)
                            {
                                propertyValue = propertyValue.Replace("\x03", "");
                            }
                            //AddAttribute(gCurrentNode, sPropertyName, gHTMLReader.PropertyValue);
                            AddAttribute(gCurrentNode, sPropertyName, propertyValue);


                            if (gHTMLReader.IsMarkBeginEnd)
                            {
                                TagEnd(gHTMLReader.MarkName.ToLower());
                            }
                        }
                        catch
                        {
                        }
                    }
                    else if (gHTMLReader.IsMarkBeginEnd)
                    {
                        //TagBegin(gHTMLReader.MarkName.ToLower(), true);

                        string sTagName = gHTMLReader.MarkName.ToLower();
                        sTagName = gReplace.Replace(sTagName, "_");
                        if (sTagName == "")
                        {
                            sTagName = "_";
                        }

                        TagBegin(sTagName, true);
                    }
                    else if (gHTMLReader.IsMarkBegin)
                    {
                        //TagBegin(gHTMLReader.MarkName.ToLower(), false);

                        string sTagName = gHTMLReader.MarkName.ToLower();
                        sTagName = gReplace.Replace(sTagName, "_");
                        if (sTagName == "")
                        {
                            sTagName = "_";
                        }

                        TagBegin(sTagName, false);
                    }
                    else if (gHTMLReader.IsMarkEnd)
                    {
                        //TagEnd(gHTMLReader.MarkName.ToLower());

                        string sTagName = gHTMLReader.MarkName.ToLower();
                        sTagName = gReplace.Replace(sTagName, "_");
                        if (sTagName == "")
                        {
                            sTagName = "_";
                        }

                        TagEnd(sTagName);
                    }
                }
            }
            finally
            {
                gHTMLReader.Close();
            }
        }
Example #18
0
        private bool TagBeginTableCategory(HtmlTag tag, bool bTagEnd)
        {
            if (_table == null || tag.TagCategory != HtmlTagCategory.Table)
            {
                return(false);
            }
            switch (tag.TagType)
            {
            case HtmlTagType.THead:
            case HtmlTagType.TBody:
            case HtmlTagType.TFoot:
                AddElement(_table.Table, _currentNode);
                if (!bTagEnd)
                {
                    _table.Body      = _currentNode;
                    _currentTreeNode = _currentNode;
                }
                else
                {
                    _table.Body = null;
                }
                return(true);

            case HtmlTagType.ColGroup:
                AddElement(_table.Table, _currentNode);
                if (!bTagEnd)
                {
                    _table.ColGroup  = _currentNode;
                    _currentTreeNode = _currentNode;
                }
                else
                {
                    _table.ColGroup = null;
                }
                return(true);

            case HtmlTagType.Col:
                _currentTreeNode = _table.Table;
                if (_table.ColGroup != null)
                {
                    _currentTreeNode = _table.ColGroup;
                }
                AddElement(_currentTreeNode, _currentNode);
                if (!bTagEnd)
                {
                    _table.Col       = _currentNode;
                    _currentTreeNode = _currentNode;
                }
                else
                {
                    _table.Col = null;
                }
                return(true);

            case HtmlTagType.TR:
                if (_table.Body == null)
                {
                    _table.Body = CreateElement("tbody");
                    AddElement(_table.Table, _table.Body);
                }
                AddElement(_table.Body, _currentNode);
                if (!bTagEnd)
                {
                    _table.Row       = _currentNode;
                    _currentTreeNode = _currentNode;
                }
                else
                {
                    _table.Row = null;
                }
                return(true);

            case HtmlTagType.TH:
            case HtmlTagType.TD:
                if (_table.Row == null)
                {
                    if (_table.Body == null)
                    {
                        _table.Body = CreateElement("tbody");
                        AddElement(_table.Table, _table.Body);
                    }
                    _table.Row = CreateElement("tr");
                    AddElement(_table.Body, _table.Row);
                }
                AddElement(_table.Row, _currentNode);
                if (!bTagEnd)
                {
                    _table.Data      = _currentNode;
                    _currentTreeNode = _currentNode;
                }
                else
                {
                    _table.Data = null;
                }
                return(true);
            }
            return(false);
        }
Example #19
0
        private void TagBegin(string tagName, bool tagEnd)
        {
            _noTag = false;
            HtmlTagType tagType = HtmlTags.GetHtmlTagType(tagName);
            HtmlTag     tag     = HtmlTags.GetHtmlTag(tagType);

            if (_normalizeXml)
            {
                if (tagType == HtmlTagType.Html || tagType == HtmlTagType.Head)
                {
                    _noTag = true;
                    return;
                }
                if (tagType == HtmlTagType.Body)
                {
                    _noTag = true;
                    if (!_body)
                    {
                        _body        = true;
                        _currentNode = _currentTreeNode = _bodyNode;
                    }
                    return;
                }
                if (tagType == HtmlTagType.Title)
                {
                    if (!_title)
                    {
                        if (!tagEnd)
                        {
                            _title       = true;
                            _currentNode = _titleNode;
                        }
                    }
                    else
                    {
                        _noTag = true;
                    }
                    return;
                }
                // $$pb modif le 11/01/2015
                //if (!_body && tag.TagCategory != HtmlTagCategory.Head)
                //{
                //    _body = true;
                //    _currentNode = _currentTreeNode = _bodyNode;
                //}
            }
            _currentNode = CreateElement(tagName);
            if (_normalizeXml)
            {
                if (tagType == HtmlTagType.Table && !tagEnd)
                {
                    if (_table != null)
                    {
                        _tableStack.Push(_table);
                    }
                    _table       = new HtmlTable_v2();
                    _table.Table = _currentNode;
                    AddElement(_currentTreeNode, _currentNode);
                    _currentTreeNode = _currentNode;
                    return;
                }
                if (TagBeginTableCategory(tag, tagEnd))
                {
                    return;
                }
                if (tagType == HtmlTagType.DL && !tagEnd)
                {
                    if (_definitionList != null)
                    {
                        _definitionListStack.Push(_definitionList);
                    }
                    _definitionList = _currentNode;
                    AddElement(_currentTreeNode, _currentNode);
                    _currentTreeNode = _currentNode;
                    return;
                }
                if (TagBeginDefinitionListCategory(tag, tagEnd))
                {
                    return;
                }

                // $$pb à revérifier
                // il faut au moins annuler gLastPNode quand un des parents de gLastPNode se ferme
                //if (tagType == HtmlTagTypeEnum.P)
                //{
                //    // pour gérer une balise <p> qui n'a pas de fin de balise </p>
                //    if (gLastPNode != null)
                //    {
                //        gCurrentTreeNode = GetParentXXNode(gLastPNode);
                //        gLastPNode = null;
                //    }
                //    if (!bTagEnd) gLastPNode = gCurrentNode;
                //}
            }
            AddElement(_currentTreeNode, _currentNode);
            //if (!tagEnd && tag.EndBoundType != HtmlBoundType.Forbidden)
            //    _currentTreeNode = _currentNode;

            if (!tagEnd)
            {
                if (tag.EndBoundType != HtmlBoundType.Forbidden)
                {
                    _currentTreeNode = _currentNode;
                }
            }
            else if (_correctionMarkBeginEnd)
            {
                _currentNode = _currentTreeNode;
            }
        }
Example #20
0
        private void InitXml()
        {
            _htmlReader.ReadCommentInText = _readCommentInText;

            if (_xmlDocument != null)
            {
                if (!_generateXmlNodeOnly)
                {
                    _xmlDocument.PreserveWhitespace = true;
                }
                else
                {
                    _xmlDocument.PreserveWhitespace = false;
                }
            }

            // création du tag xml
            XXXNode_v2 element = CreateElement("xml");

            AddElement(_documentNode, element);
            _currentNode = _currentTreeNode = element;

            if (!_generateXmlNodeOnly)
            {
                AddText(_currentNode, "\r\n");
            }

            if (_normalizeXml)
            {
                // création du tag html
                element = CreateElement("html");
                AddElement(_currentTreeNode, element);
                _htmlNode = _currentNode = _currentTreeNode = element;

                if (!_generateXmlNodeOnly)
                {
                    AddText(_currentNode, "\r\n");
                }

                // création du tag head
                element = CreateElement("head");
                AddElement(_currentTreeNode, element);
                _headNode = _currentNode = element;

                if (!_generateXmlNodeOnly)
                {
                    AddText(_currentNode, "\r\n");
                }

                // création du tag title
                element = CreateElement("title");
                AddElement(_currentNode, element);
                _titleNode = _currentNode = element;

                // création du tag body
                element = CreateElement("body");
                AddElement(_currentTreeNode, element);
                _bodyNode = _currentNode = element;

                if (!_generateXmlNodeOnly)
                {
                    AddText(_currentNode, "\r\n");
                }

                _currentNode = _currentTreeNode = _headNode;
            }
        }
Example #21
0
        private void GenerateXml()
        {
            // gbNormalizeXml = true :
            //   - les tag html, head, title et body sont créés automatiquement
            //   - les tag html, head, title et body rencontrés ne sont pas pris en compte
            //   - seul les tag title et meta sont mis dans la partie head les autre tag sont mis dans la partie body
            //   - si un tag meta est placé après le début de la partie body, ce tag reste dans la partie body
            //   - seul le premier tag title est pris en compte et placé dans la partie head, les autre tag title ne sont pas pris en compte

            try
            {
                //cTrace.Trace("GenerateXml NewGenerateXml  : {0}", XmlConfig.CurrentConfig.Get("NewGenerateXml"));

                InitXml();

                _tableStack = new Stack <HtmlTable_v2>();
                _table      = null;

                _definitionListStack = new Stack <XXXNode_v2>();
                _definitionList      = null;

                _noTag = false;
                _body  = false;
                _title = false;
                while (_htmlReader.Read())
                {
                    if (_htmlReader.IsText || _htmlReader.IsComment)
                    {
                        // $$pb modif le 11/01/2015
                        //if (_htmlReader.IsText && !_htmlReader.IsTextSeparator && !_body)
                        //if (_htmlReader.IsText && !_htmlReader.IsTextSeparator && !_htmlReader.IsScript && !_body)
                        //{
                        //    _body = true;
                        //    _currentNode = _currentTreeNode = _bodyNode;
                        //}
                        if (!_generateXmlNodeOnly)
                        {
                            if (_readCommentInText)
                            {
                                AddText(_currentNode, _htmlReader.Value);
                            }
                            else
                            {
                                if (_htmlReader.IsText)
                                {
                                    AddText(_currentNode, _htmlReader.Value);
                                }
                                else
                                {
                                    string s = _htmlReader.Value;
                                    s = _commentCorrection.Replace(s, "-");
                                    if (s.EndsWith("-"))
                                    {
                                        s += " ";
                                    }
                                    AddComment(_currentNode, s);
                                }
                            }
                        }
                    }
                    else if (_htmlReader.IsDocType)
                    {
                        AddAttribute(_htmlNode, "doctype", _htmlReader.DocType);
                    }
                    else if (_htmlReader.IsProperty)
                    {
                        if (_generateXmlNodeOnly || _noTag)
                        {
                            continue;
                        }
                        try
                        {
                            string sPropertyName = _htmlReader.PropertyName;
                            //sPropertyName = sPropertyName.Replace("\"", "");
                            //sPropertyName = sPropertyName.Replace("/", "");
                            //sPropertyName = sPropertyName.Replace("\\", "");
                            //sPropertyName = sPropertyName.Replace("-", "");
                            //sPropertyName = sPropertyName.Replace(",", "");
                            sPropertyName = _replace.Replace(sPropertyName, "");
                            sPropertyName = sPropertyName.ToLower();
                            if (sPropertyName == "")
                            {
                                sPropertyName = "__value";
                            }

                            // modif le 28/01/2014
                            //   hexadecimal value 0x03, is an invalid character
                            //   found in http://www.reseau-gesat.com/Gesat/Yvelines,78/Fontenay-le-Fleury,31443/esat-cotra,e1596/
                            //   <html><head><meta name="keywords" content="Conditionnement, travaux &amp;agrave; fa&amp;ccedil;onToutes activit&amp;eacute;s en entreprise Entretien et cr&amp;eacute;ation despaces verts" />
                            string propertyValue = _htmlReader.PropertyValue;
                            if (propertyValue != null)
                            {
                                propertyValue = propertyValue.Replace("\x03", "");
                            }
                            AddAttribute(_currentNode, sPropertyName, propertyValue);
                            if (_htmlReader.IsMarkBeginEnd)
                            {
                                TagEnd(_htmlReader.MarkName.ToLower());
                            }
                        }
                        catch (Exception ex)
                        {
                            Trace.WriteLine("error in HtmlToXml.GenerateXml() : line {0} column {1}", _htmlReader.Line, _htmlReader.Column);
                            Trace.WriteLine(ex.Message);
                        }
                    }
                    else if (_htmlReader.IsMarkBeginEnd)
                    {
                        string sTagName = _htmlReader.MarkName.ToLower();
                        sTagName = _replace.Replace(sTagName, "_");
                        if (sTagName == "")
                        {
                            sTagName = "_";
                        }

                        TagBegin(sTagName, true);
                    }
                    else if (_htmlReader.IsMarkBegin)
                    {
                        string sTagName = _htmlReader.MarkName.ToLower();
                        sTagName = _replace.Replace(sTagName, "_");
                        if (sTagName == "")
                        {
                            sTagName = "_";
                        }

                        TagBegin(sTagName, false);
                    }
                    else if (_htmlReader.IsMarkEnd)
                    {
                        string sTagName = _htmlReader.MarkName.ToLower();
                        sTagName = _replace.Replace(sTagName, "_");
                        if (sTagName == "")
                        {
                            sTagName = "_";
                        }

                        TagEnd(sTagName);
                    }
                }
            }
            finally
            {
                _htmlReader.Close();
            }
        }
Example #22
0
        private void InitXml()
        {
            gHTMLReader.ReadCommentInText = gbReadCommentInText;

            //gXmlDocument = new XmlDocument();
            if (gXmlDocument != null)
            {
                if (!gbGenerateXmlNodeOnly)
                {
                    gXmlDocument.PreserveWhitespace = true;
                }
                else
                {
                    gXmlDocument.PreserveWhitespace = false;
                }
            }
            //XmlDeclaration declaration = gXml.CreateXmlDeclaration("1.0", "utf-8", null);
            //gXml.AppendChild(declaration);

            // création du tag xml
            //XmlElement element = gXmlDocument.CreateElement("xml");
            //gXmlDocument.AppendChild(element);
            //gCurrentNode = gCurrentTreeNode = (XmlNode)element;
            XXXNode_v2 element = CreateElement("xml");

            AddElement(gDocumentNode, element);
            gCurrentNode = gCurrentTreeNode = element;

            if (!gbGenerateXmlNodeOnly)
            {
                AddText(gCurrentNode, "\r\n");
            }

            if (gbNormalizeXml)
            {
                // création du tag html
                //element = gXmlDocument.CreateElement("html");
                //gCurrentTreeNode.AppendChild(element);
                //gHtmlNode = gCurrentNode = gCurrentTreeNode = (XmlNode)element;
                element = CreateElement("html");
                AddElement(gCurrentTreeNode, element);
                gHtmlNode = gCurrentNode = gCurrentTreeNode = element;

                if (!gbGenerateXmlNodeOnly)
                {
                    AddText(gCurrentNode, "\r\n");
                }

                // création du tag head
                //element = gXmlDocument.CreateElement("head");
                //gCurrentTreeNode.AppendChild(element);
                //gHeadNode = gCurrentNode = (XmlNode)element;
                element = CreateElement("head");
                AddElement(gCurrentTreeNode, element);
                gHeadNode = gCurrentNode = element;

                if (!gbGenerateXmlNodeOnly)
                {
                    AddText(gCurrentNode, "\r\n");
                }

                // création du tag title
                //element = gXmlDocument.CreateElement("title");
                //gCurrentNode.AppendChild(element);
                //gTitleNode = gCurrentNode = (XmlNode)element;
                element = CreateElement("title");
                AddElement(gCurrentNode, element);
                gTitleNode = gCurrentNode = element;

                // création du tag body
                //element = gXmlDocument.CreateElement("body");
                //gCurrentTreeNode.AppendChild(element);
                //gBodyNode = gCurrentNode = (XmlNode)element;
                element = CreateElement("body");
                AddElement(gCurrentTreeNode, element);
                gBodyNode = gCurrentNode = element;

                if (!gbGenerateXmlNodeOnly)
                {
                    AddText(gCurrentNode, "\r\n");
                }

                gCurrentNode = gCurrentTreeNode = gHeadNode;
            }
        }
Example #23
0
        private void TagBegin(string sTagName, bool bTagEnd)
        {
            gbNoTag = false;

            //sTagName = sTagName.Replace('-', '_');
            //sTagName = sTagName.Replace('!', '_');
            //sTagName = sTagName.Replace('[', '_');
            //sTagName = sTagName.Replace(']', '_');
            //sTagName = gReplace.Replace(sTagName, "_");
            //if (sTagName == "") sTagName = "_";
            HtmlTagType tagType = HtmlTags.GetHtmlTagType(sTagName);
            HtmlTag     tag     = HtmlTags.GetHtmlTag(tagType);

            if (gbNormalizeXml)
            {
                if (tagType == HtmlTagType.Html || tagType == HtmlTagType.Head)
                {
                    gbNoTag = true;
                    return;
                }
                if (tagType == HtmlTagType.Body)
                {
                    gbNoTag = true;
                    if (!gbBody)
                    {
                        gbBody       = true;
                        gCurrentNode = gCurrentTreeNode = gBodyNode;
                    }
                    return;
                }
                if (tagType == HtmlTagType.Title)
                {
                    if (!gbTitle)
                    {
                        if (!bTagEnd)
                        {
                            gbTitle      = true;
                            gCurrentNode = gTitleNode;
                        }
                    }
                    else
                    {
                        gbNoTag = true;
                    }
                    return;
                }
                if (!gbBody && tag.TagCategory != HtmlTagCategory.Head)
                {
                    gbBody       = true;
                    gCurrentNode = gCurrentTreeNode = gBodyNode;
                }
            }
            //gCurrentNode = gXmlDocument.CreateElement(sTagName);
            gCurrentNode = CreateElement(sTagName);
            if (gbNormalizeXml)
            {
                if (tagType == HtmlTagType.Table && !bTagEnd)
                {
                    if (gTable != null)
                    {
                        gTableStack.Push(gTable);
                    }
                    gTable       = new HtmlTable_v2();
                    gTable.Table = gCurrentNode;
                    //gCurrentTreeNode.AppendChild(gCurrentNode);
                    AddElement(gCurrentTreeNode, gCurrentNode);
                    gCurrentTreeNode = gCurrentNode;
                    return;
                }
                if (TagBeginTableCategory(tag, bTagEnd))
                {
                    return;
                }
                if (tagType == HtmlTagType.DL && !bTagEnd)
                {
                    if (gDefinitionList != null)
                    {
                        gDefinitionListStack.Push(gDefinitionList);
                    }
                    gDefinitionList = gCurrentNode;
                    //gCurrentTreeNode.AppendChild(gCurrentNode);
                    AddElement(gCurrentTreeNode, gCurrentNode);
                    gCurrentTreeNode = gCurrentNode;
                    return;
                }
                if (TagBeginDefinitionListCategory(tag, bTagEnd))
                {
                    return;
                }

                // $$pb à revérifier
                // il faut au moins annuler gLastPNode quand un des parents de gLastPNode se ferme
                //if (tagType == HtmlTagTypeEnum.P)
                //{
                //    // pour gérer une balise <p> qui n'a pas de fin de balise </p>
                //    if (gLastPNode != null)
                //    {
                //        gCurrentTreeNode = GetParentXXNode(gLastPNode);
                //        gLastPNode = null;
                //    }
                //    if (!bTagEnd) gLastPNode = gCurrentNode;
                //}
            }
            //gCurrentTreeNode.AppendChild(gCurrentNode);
            AddElement(gCurrentTreeNode, gCurrentNode);
            if (!bTagEnd && tag.EndBoundType != HtmlBoundType.Forbidden)
            {
                gCurrentTreeNode = gCurrentNode;
            }
        }
Example #24
0
        private bool TagBeginTableCategory(HtmlTag tag, bool bTagEnd)
        {
            if (gTable == null || tag.TagCategory != HtmlTagCategory.Table)
            {
                return(false);
            }
            switch (tag.TagType)
            {
            case HtmlTagType.THead:
            case HtmlTagType.TBody:
            case HtmlTagType.TFoot:
                //gTable.Table.AppendChild(gCurrentNode);
                AddElement(gTable.Table, gCurrentNode);
                if (!bTagEnd)
                {
                    gTable.Body      = gCurrentNode;
                    gCurrentTreeNode = gCurrentNode;
                }
                else
                {
                    gTable.Body = null;
                }
                return(true);

            case HtmlTagType.ColGroup:
                //gTable.Table.AppendChild(gCurrentNode);
                AddElement(gTable.Table, gCurrentNode);
                if (!bTagEnd)
                {
                    gTable.ColGroup  = gCurrentNode;
                    gCurrentTreeNode = gCurrentNode;
                }
                else
                {
                    gTable.ColGroup = null;
                }
                return(true);

            case HtmlTagType.Col:
                gCurrentTreeNode = gTable.Table;
                if (gTable.ColGroup != null)
                {
                    gCurrentTreeNode = gTable.ColGroup;
                }
                //gCurrentTreeNode.AppendChild(gCurrentNode);
                AddElement(gCurrentTreeNode, gCurrentNode);
                if (!bTagEnd)
                {
                    gTable.Col       = gCurrentNode;
                    gCurrentTreeNode = gCurrentNode;
                }
                else
                {
                    gTable.Col = null;
                }
                return(true);

            case HtmlTagType.TR:
                if (gTable.Body == null)
                {
                    //gTable.Body = gXmlDocument.CreateElement("tbody");
                    //gTable.Table.AppendChild(gTable.Body);
                    gTable.Body = CreateElement("tbody");
                    AddElement(gTable.Table, gTable.Body);
                }
                //gTable.Body.AppendChild(gCurrentNode);
                AddElement(gTable.Body, gCurrentNode);
                if (!bTagEnd)
                {
                    gTable.Row       = gCurrentNode;
                    gCurrentTreeNode = gCurrentNode;
                }
                else
                {
                    gTable.Row = null;
                }
                return(true);

            case HtmlTagType.TH:
            case HtmlTagType.TD:
                if (gTable.Row == null)
                {
                    if (gTable.Body == null)
                    {
                        //gtable.body = gxmldocument.createelement("tbody");
                        //gtable.table.appendchild(gtable.body);
                        gTable.Body = CreateElement("tbody");
                        AddElement(gTable.Table, gTable.Body);
                    }
                    //gTable.Row = gXmlDocument.CreateElement("tr");
                    //gTable.Body.AppendChild(gTable.Row);
                    gTable.Row = CreateElement("tr");
                    AddElement(gTable.Body, gTable.Row);
                }
                //gTable.Row.AppendChild(gCurrentNode);
                AddElement(gTable.Row, gCurrentNode);
                if (!bTagEnd)
                {
                    gTable.Data      = gCurrentNode;
                    gCurrentTreeNode = gCurrentNode;
                }
                else
                {
                    gTable.Data = null;
                }
                return(true);
            }
            return(false);
        }
Example #25
0
        private void TagEnd(string sTagName)
        {
            if (gbNormalizeXml)
            {
                HtmlTagType tagType = HtmlTags.GetHtmlTagType(sTagName);
                switch (tagType)
                {
                case HtmlTagType.Html:
                case HtmlTagType.Head:
                case HtmlTagType.Body:
                    return;

                case HtmlTagType.Title:
                    gCurrentNode = gCurrentTreeNode;
                    return;

                case HtmlTagType.Table:
                    if (gTable == null)
                    {
                        return;
                    }
                    //gCurrentNode = gCurrentTreeNode = gTable.Table.ParentNode;
                    gCurrentNode = gCurrentTreeNode = GetParentXXNode(gTable.Table);
                    gTable       = null;
                    if (gTableStack.Count != 0)
                    {
                        gTable = gTableStack.Pop();
                    }
                    return;

                case HtmlTagType.DL:
                    if (gDefinitionList == null)
                    {
                        return;
                    }
                    //gCurrentNode = gCurrentTreeNode = gDefinitionList.ParentNode;
                    gCurrentNode    = gCurrentTreeNode = GetParentXXNode(gDefinitionList);
                    gDefinitionList = null;
                    if (gDefinitionListStack.Count != 0)
                    {
                        gDefinitionList = gDefinitionListStack.Pop();
                    }
                    return;
                }
                if (gTable != null)
                {
                    switch (tagType)
                    {
                    case HtmlTagType.THead:
                    case HtmlTagType.TBody:
                    case HtmlTagType.TFoot:
                        gCurrentNode = gCurrentTreeNode = gTable.Table;
                        gTable.Body  = null;
                        return;

                    case HtmlTagType.ColGroup:
                        gCurrentNode    = gCurrentTreeNode = gTable.Table;
                        gTable.ColGroup = null;
                        return;

                    case HtmlTagType.Col:
                        if (gTable.Col != null)
                        {
                            //gCurrentNode = gCurrentTreeNode = gTable.Col.ParentNode;
                            gCurrentNode = gCurrentTreeNode = GetParentXXNode(gTable.Col);
                            gTable.Col   = null;
                        }
                        return;

                    case HtmlTagType.TR:
                        if (gTable.Row != null)
                        {
                            //gCurrentNode = gCurrentTreeNode = gTable.Row.ParentNode;
                            gCurrentNode = gCurrentTreeNode = GetParentXXNode(gTable.Row);
                            gTable.Row   = null;
                        }
                        return;

                    case HtmlTagType.TH:
                    case HtmlTagType.TD:
                        if (gTable.Data != null)
                        {
                            //gCurrentNode = gCurrentTreeNode = gTable.Data.ParentNode;
                            gCurrentNode = gCurrentTreeNode = GetParentXXNode(gTable.Data);
                            gTable.Data  = null;
                        }
                        return;
                    }
                }
            }
            //XmlNode node = gCurrentTreeNode;
            //while (node != null)
            //{
            //    if (node.Name == sTagName)
            //    {
            //        gCurrentTreeNode = node.ParentNode;
            //        break;
            //    }
            //    node = node.ParentNode;
            //}
            XXXNode_v2 node = GetParentXXNodeByName(gCurrentTreeNode, sTagName);

            if (node != null)
            {
                gCurrentTreeNode = GetParentXXNode(node);
            }
            gCurrentNode = gCurrentTreeNode;
        }