Ejemplo n.º 1
0
        private void EndTag(HtmlNodeEndTag endTag)
        {
            _node = null;
            switch (endTag.Name.ToLower())
            {
            case "p":
                _node = new HtmlDocNodeEndTag()
                {
                    Tag = HtmlTagType.P, Index = ++_nodeIndex, Line = endTag.Line, Column = endTag.Column
                };
                break;

            case "a":
                _node = new HtmlDocNodeEndTag()
                {
                    Tag = HtmlTagType.A, Index = ++_nodeIndex, Line = endTag.Line, Column = endTag.Column
                };
                break;
            }
        }
Ejemplo n.º 2
0
        private HtmlNodeEndTag ReadEndTag()
        {
            // read <
            _charStreamReader.ReadChar();
            HtmlNodeEndTag endTag = new HtmlNodeEndTag
            {
                Index  = _htmlNodeIndex++,
                Line   = _disableLineColumn ? 0 : _charStreamReader.Line,
                Column = _disableLineColumn ? 0 : _charStreamReader.Column
            };

            // read /
            _charStreamReader.ReadChar();
            endTag.Name = ReadTagName();

            if ((char)_charStreamReader.PeekChar() == '>')
            {
                _charStreamReader.ReadChar();
            }
            return(endTag);
        }
Ejemplo n.º 3
0
        public XDocument CreateXml()
        {
            // ATTENTION HtmlReader_v4 dont manage ReadCommentInText
            //_htmlReader.ReadCommentInText = _readCommentInText;

            // need close tag
            //_htmlReader.GenerateCloseTag = true;
            //if (!_htmlReader.GenerateCloseTag)
            //    throw new PBException("html reader must have option GenerateCloseTag");

            //_xdocument = new XDocument();
            _xdCreator = new XDocumentCreator();
            //_documentNode = _xdocument;

            InitXml();

            _tableStack = new Stack <HtmlTable_v3>();
            _table      = null;

            _definitionListStack = new Stack <XElement>();
            _definitionList      = null;

            _noTag = false;
            _body  = false;
            _title = false;

            foreach (HtmlNode htmlNode in _htmlReader.Read())
            {
                if (htmlNode.Type == HtmlNodeType.Text || htmlNode.Type == HtmlNodeType.Comment)
                {
                    // $$pb modif le 11/01/2015
                    //if (_htmlReader.IsText && !_htmlReader.IsTextSeparator && !_body)
                    //if (_htmlReader.IsText && !_htmlReader.IsTextSeparator && !_htmlReader.IsScript && !_body)
                    //{
                    //    _body = true;
                    //    _currentNode = _currentTreeNode = _bodyNode;
                    //}
                    if (!_generateXmlNodeOnly)
                    {
                        if (_readCommentInText)
                        {
                            if (htmlNode.Type == HtmlNodeType.Text)
                            {
                                AddText(_currentNode, ((HtmlNodeText)htmlNode).Text);
                            }
                            else //if (htmlNode.Type == HtmlNodeType.Comment)
                            {
                                AddText(_currentNode, ((HtmlNodeComment)htmlNode).Comment);
                            }
                        }
                        else
                        {
                            if (htmlNode.Type == HtmlNodeType.Text)
                            {
                                AddText(_currentNode, ((HtmlNodeText)htmlNode).Text);
                            }
                            else //if (htmlNode.Type == HtmlNodeType.Comment)
                            {
                                string s = ((HtmlNodeComment)htmlNode).Comment;
                                s = _commentCorrection.Replace(s, "-");
                                if (s.EndsWith("-"))
                                {
                                    s += " ";
                                }
                                //AddComment(_currentNode, s);
                                _xdCreator.AddComment(_currentNode, s);
                            }
                        }
                    }
                }
                else if (htmlNode.Type == HtmlNodeType.Script)
                {
                    AddText(_currentNode, ((HtmlNodeScript)htmlNode).Script);
                }
                else if (htmlNode.Type == HtmlNodeType.DocumentType)
                {
                    //AddAttribute(_htmlNode, "doctype", ((HtmlNodeDocType)htmlNode).DocType);
                    _xdCreator.AddAttribute(_htmlNode, "doctype", ((HtmlNodeDocType)htmlNode).DocType);
                }
                else if (htmlNode.Type == HtmlNodeType.Property)
                {
                    if (_generateXmlNodeOnly || _noTag)
                    {
                        continue;
                    }
                    HtmlNodeProperty htmlNodeProperty = (HtmlNodeProperty)htmlNode;
                    try
                    {
                        string propertyName = htmlNodeProperty.Name;
                        propertyName = _nameCorrection.Replace(propertyName, "");
                        propertyName = propertyName.ToLower();
                        if (propertyName == "")
                        {
                            propertyName = "__value";
                        }

                        // modif le 28/01/2014
                        //   hexadecimal value 0x03, is an invalid character
                        //   found in http://www.reseau-gesat.com/Gesat/Yvelines,78/Fontenay-le-Fleury,31443/esat-cotra,e1596/
                        //   <html><head><meta name="keywords" content="Conditionnement, travaux &amp;agrave; fa&amp;ccedil;onToutes activit&amp;eacute;s en entreprise Entretien et cr&amp;eacute;ation despaces verts" />
                        string propertyValue = htmlNodeProperty.Value;
                        if (propertyValue != null)
                        {
                            propertyValue = propertyValue.Replace("\x03", "");
                        }
                        //AddAttribute(_currentNode, propertyName, propertyValue);
                        _xdCreator.AddAttribute(_currentNode, propertyName, propertyValue);
                        //if (_htmlReader.IsMarkBeginEnd)
                        //    TagEnd(_htmlReader.MarkName.ToLower());
                    }
                    catch (Exception ex)
                    {
                        Trace.WriteLine($"error in HtmlToXml_v2.CreateXml() : line {htmlNode.Line} column {htmlNode.Column}");
                        Trace.WriteLine(ex.Message);
                    }
                }
                //else if (_htmlReader.IsMarkBeginEnd)
                //{
                //    string tagName = _htmlReader.MarkName.ToLower();
                //    tagName = _replace.Replace(tagName, "_");
                //    if (tagName == "") tagName = "_";

                //    TagBegin(tagName, true);
                //}
                //else if (_htmlReader.IsMarkBegin)
                else if (htmlNode.Type == HtmlNodeType.OpenTag)
                {
                    HtmlNodeOpenTag htmlNodeOpenTag = (HtmlNodeOpenTag)htmlNode;
                    string          tagName         = htmlNodeOpenTag.Name.ToLower();
                    tagName = _nameCorrection.Replace(tagName, "_");
                    if (tagName == "")
                    {
                        tagName = "_";
                    }

                    //TagBegin(tagName, false);
                    AddTagBegin(tagName);
                }
                //else if (htmlNode.Type == HtmlNodeType.CloseTag)
                //{
                //    HtmlNodeCloseTag htmlNodeCloseTag = (HtmlNodeCloseTag)htmlNode;
                //    string tagName = htmlNodeCloseTag.Name.ToLower();
                //    tagName = _nameCorrection.Replace(tagName, "_");
                //    if (tagName == "")
                //        tagName = "_";
                //    TagEnd(tagName);
                //}
                //else if (_htmlReader.IsMarkEnd)
                else if (htmlNode.Type == HtmlNodeType.EndTag)
                {
                    HtmlNodeEndTag htmlNodeEndTag = (HtmlNodeEndTag)htmlNode;
                    string         tagName        = htmlNodeEndTag.Name.ToLower();
                    tagName = _nameCorrection.Replace(tagName, "_");
                    if (tagName == "")
                    {
                        tagName = "_";
                    }
                    TagEnd(tagName);
                }
            }

            //return _xdocument;
            return(_xdCreator.XDocument);
        }