Esempio n. 1
0
        //private void AddElement(XXXNode parent, string element)
        //{
        //    if (parent.XmlNode != null)
        //    {
        //        XmlElement node = _xmlDocument.CreateElement(element);
        //        parent.XmlNode.AppendChild(node);
        //    }
        //    if (parent.XNode != null)
        //    {
        //        if (!(parent.XNode is XElement)) throw new PBException("error generating XDocument node is not a XElement");
        //        XElement node = new XElement(element);
        //        ((XElement)parent.XNode).Add(node);
        //    }
        //}

        private void AddAttribute(XXXNode_v2 parent, string name, string value)
        {
            if (_xmlDocument != null)
            {
                if (parent.XmlNode.Attributes.GetNamedItem(name) == null)
                {
                    XmlAttribute attrib = _xmlDocument.CreateAttribute(name);
                    if (value == null)
                    {
                        value = "";
                    }
                    attrib.Value = value;
                    parent.XmlNode.Attributes.Append(attrib);
                }
            }
            if (_xDocument != null)
            {
                XElement xeParent = (XElement)parent.XNode;
                if (_xdCreator != null)
                {
                    _xdCreator.AddAttribute(xeParent, name, value);
                }
                else
                {
                    if (xeParent.Attribute(name) == null)
                    {
                        if (value == null)
                        {
                            value = "";
                        }
                        XAttribute attrib = new XAttribute(name, value);
                        xeParent.Add(attrib);
                    }
                }
            }
        }
Esempio n. 2
0
        public XDocument CreateXml()
        {
            // ATTENTION HtmlReader_v4 dont manage ReadCommentInText
            //_htmlReader.ReadCommentInText = _readCommentInText;

            // need close tag
            //_htmlReader.GenerateCloseTag = true;
            //if (!_htmlReader.GenerateCloseTag)
            //    throw new PBException("html reader must have option GenerateCloseTag");

            //_xdocument = new XDocument();
            _xdCreator = new XDocumentCreator();
            //_documentNode = _xdocument;

            InitXml();

            _tableStack = new Stack <HtmlTable_v3>();
            _table      = null;

            _definitionListStack = new Stack <XElement>();
            _definitionList      = null;

            _noTag = false;
            _body  = false;
            _title = false;

            foreach (HtmlNode htmlNode in _htmlReader.Read())
            {
                if (htmlNode.Type == HtmlNodeType.Text || htmlNode.Type == HtmlNodeType.Comment)
                {
                    // $$pb modif le 11/01/2015
                    //if (_htmlReader.IsText && !_htmlReader.IsTextSeparator && !_body)
                    //if (_htmlReader.IsText && !_htmlReader.IsTextSeparator && !_htmlReader.IsScript && !_body)
                    //{
                    //    _body = true;
                    //    _currentNode = _currentTreeNode = _bodyNode;
                    //}
                    if (!_generateXmlNodeOnly)
                    {
                        if (_readCommentInText)
                        {
                            if (htmlNode.Type == HtmlNodeType.Text)
                            {
                                AddText(_currentNode, ((HtmlNodeText)htmlNode).Text);
                            }
                            else //if (htmlNode.Type == HtmlNodeType.Comment)
                            {
                                AddText(_currentNode, ((HtmlNodeComment)htmlNode).Comment);
                            }
                        }
                        else
                        {
                            if (htmlNode.Type == HtmlNodeType.Text)
                            {
                                AddText(_currentNode, ((HtmlNodeText)htmlNode).Text);
                            }
                            else //if (htmlNode.Type == HtmlNodeType.Comment)
                            {
                                string s = ((HtmlNodeComment)htmlNode).Comment;
                                s = _commentCorrection.Replace(s, "-");
                                if (s.EndsWith("-"))
                                {
                                    s += " ";
                                }
                                //AddComment(_currentNode, s);
                                _xdCreator.AddComment(_currentNode, s);
                            }
                        }
                    }
                }
                else if (htmlNode.Type == HtmlNodeType.Script)
                {
                    AddText(_currentNode, ((HtmlNodeScript)htmlNode).Script);
                }
                else if (htmlNode.Type == HtmlNodeType.DocumentType)
                {
                    //AddAttribute(_htmlNode, "doctype", ((HtmlNodeDocType)htmlNode).DocType);
                    _xdCreator.AddAttribute(_htmlNode, "doctype", ((HtmlNodeDocType)htmlNode).DocType);
                }
                else if (htmlNode.Type == HtmlNodeType.Property)
                {
                    if (_generateXmlNodeOnly || _noTag)
                    {
                        continue;
                    }
                    HtmlNodeProperty htmlNodeProperty = (HtmlNodeProperty)htmlNode;
                    try
                    {
                        string propertyName = htmlNodeProperty.Name;
                        propertyName = _nameCorrection.Replace(propertyName, "");
                        propertyName = propertyName.ToLower();
                        if (propertyName == "")
                        {
                            propertyName = "__value";
                        }

                        // modif le 28/01/2014
                        //   hexadecimal value 0x03, is an invalid character
                        //   found in http://www.reseau-gesat.com/Gesat/Yvelines,78/Fontenay-le-Fleury,31443/esat-cotra,e1596/
                        //   <html><head><meta name="keywords" content="Conditionnement, travaux &amp;agrave; fa&amp;ccedil;onToutes activit&amp;eacute;s en entreprise Entretien et cr&amp;eacute;ation despaces verts" />
                        string propertyValue = htmlNodeProperty.Value;
                        if (propertyValue != null)
                        {
                            propertyValue = propertyValue.Replace("\x03", "");
                        }
                        //AddAttribute(_currentNode, propertyName, propertyValue);
                        _xdCreator.AddAttribute(_currentNode, propertyName, propertyValue);
                        //if (_htmlReader.IsMarkBeginEnd)
                        //    TagEnd(_htmlReader.MarkName.ToLower());
                    }
                    catch (Exception ex)
                    {
                        Trace.WriteLine($"error in HtmlToXml_v2.CreateXml() : line {htmlNode.Line} column {htmlNode.Column}");
                        Trace.WriteLine(ex.Message);
                    }
                }
                //else if (_htmlReader.IsMarkBeginEnd)
                //{
                //    string tagName = _htmlReader.MarkName.ToLower();
                //    tagName = _replace.Replace(tagName, "_");
                //    if (tagName == "") tagName = "_";

                //    TagBegin(tagName, true);
                //}
                //else if (_htmlReader.IsMarkBegin)
                else if (htmlNode.Type == HtmlNodeType.OpenTag)
                {
                    HtmlNodeOpenTag htmlNodeOpenTag = (HtmlNodeOpenTag)htmlNode;
                    string          tagName         = htmlNodeOpenTag.Name.ToLower();
                    tagName = _nameCorrection.Replace(tagName, "_");
                    if (tagName == "")
                    {
                        tagName = "_";
                    }

                    //TagBegin(tagName, false);
                    AddTagBegin(tagName);
                }
                //else if (htmlNode.Type == HtmlNodeType.CloseTag)
                //{
                //    HtmlNodeCloseTag htmlNodeCloseTag = (HtmlNodeCloseTag)htmlNode;
                //    string tagName = htmlNodeCloseTag.Name.ToLower();
                //    tagName = _nameCorrection.Replace(tagName, "_");
                //    if (tagName == "")
                //        tagName = "_";
                //    TagEnd(tagName);
                //}
                //else if (_htmlReader.IsMarkEnd)
                else if (htmlNode.Type == HtmlNodeType.EndTag)
                {
                    HtmlNodeEndTag htmlNodeEndTag = (HtmlNodeEndTag)htmlNode;
                    string         tagName        = htmlNodeEndTag.Name.ToLower();
                    tagName = _nameCorrection.Replace(tagName, "_");
                    if (tagName == "")
                    {
                        tagName = "_";
                    }
                    TagEnd(tagName);
                }
            }

            //return _xdocument;
            return(_xdCreator.XDocument);
        }