示例#1
0
 private void Property(HtmlNodeProperty property)
 {
     if (_node is HtmlDocNodeBeginTagA)
     {
         if (property.Name.ToLower() == "href")
         {
             ((HtmlDocNodeBeginTagA)_node).Link = property.Value;
         }
     }
     else if (_node is HtmlDocNodeTagImg)
     {
         if (property.Name.ToLower() == "src")
         {
             ((HtmlDocNodeTagImg)_node).Link = property.Value;
         }
         else if (property.Name.ToLower() == "class")
         {
             ((HtmlDocNodeTagImg)_node).ClassList = zsplit.Split(property.Value, ' ', true);
         }
         else if (property.Name.ToLower() == "width")
         {
             int?width = property.Value.zTryParseAs <int?>();
             if (width == null)
             {
                 Trace.WriteLine($"unknow width \"{property.Value}\"");
             }
             ((HtmlDocNodeTagImg)_node).Width = width;
         }
     }
 }
示例#2
0
        public XDocument CreateXml()
        {
            // ATTENTION HtmlReader_v4 dont manage ReadCommentInText
            //_htmlReader.ReadCommentInText = _readCommentInText;

            // need close tag
            //_htmlReader.GenerateCloseTag = true;
            //if (!_htmlReader.GenerateCloseTag)
            //    throw new PBException("html reader must have option GenerateCloseTag");

            //_xdocument = new XDocument();
            _xdCreator = new XDocumentCreator();
            //_documentNode = _xdocument;

            InitXml();

            _tableStack = new Stack <HtmlTable_v3>();
            _table      = null;

            _definitionListStack = new Stack <XElement>();
            _definitionList      = null;

            _noTag = false;
            _body  = false;
            _title = false;

            foreach (HtmlNode htmlNode in _htmlReader.Read())
            {
                if (htmlNode.Type == HtmlNodeType.Text || htmlNode.Type == HtmlNodeType.Comment)
                {
                    // $$pb modif le 11/01/2015
                    //if (_htmlReader.IsText && !_htmlReader.IsTextSeparator && !_body)
                    //if (_htmlReader.IsText && !_htmlReader.IsTextSeparator && !_htmlReader.IsScript && !_body)
                    //{
                    //    _body = true;
                    //    _currentNode = _currentTreeNode = _bodyNode;
                    //}
                    if (!_generateXmlNodeOnly)
                    {
                        if (_readCommentInText)
                        {
                            if (htmlNode.Type == HtmlNodeType.Text)
                            {
                                AddText(_currentNode, ((HtmlNodeText)htmlNode).Text);
                            }
                            else //if (htmlNode.Type == HtmlNodeType.Comment)
                            {
                                AddText(_currentNode, ((HtmlNodeComment)htmlNode).Comment);
                            }
                        }
                        else
                        {
                            if (htmlNode.Type == HtmlNodeType.Text)
                            {
                                AddText(_currentNode, ((HtmlNodeText)htmlNode).Text);
                            }
                            else //if (htmlNode.Type == HtmlNodeType.Comment)
                            {
                                string s = ((HtmlNodeComment)htmlNode).Comment;
                                s = _commentCorrection.Replace(s, "-");
                                if (s.EndsWith("-"))
                                {
                                    s += " ";
                                }
                                //AddComment(_currentNode, s);
                                _xdCreator.AddComment(_currentNode, s);
                            }
                        }
                    }
                }
                else if (htmlNode.Type == HtmlNodeType.Script)
                {
                    AddText(_currentNode, ((HtmlNodeScript)htmlNode).Script);
                }
                else if (htmlNode.Type == HtmlNodeType.DocumentType)
                {
                    //AddAttribute(_htmlNode, "doctype", ((HtmlNodeDocType)htmlNode).DocType);
                    _xdCreator.AddAttribute(_htmlNode, "doctype", ((HtmlNodeDocType)htmlNode).DocType);
                }
                else if (htmlNode.Type == HtmlNodeType.Property)
                {
                    if (_generateXmlNodeOnly || _noTag)
                    {
                        continue;
                    }
                    HtmlNodeProperty htmlNodeProperty = (HtmlNodeProperty)htmlNode;
                    try
                    {
                        string propertyName = htmlNodeProperty.Name;
                        propertyName = _nameCorrection.Replace(propertyName, "");
                        propertyName = propertyName.ToLower();
                        if (propertyName == "")
                        {
                            propertyName = "__value";
                        }

                        // modif le 28/01/2014
                        //   hexadecimal value 0x03, is an invalid character
                        //   found in http://www.reseau-gesat.com/Gesat/Yvelines,78/Fontenay-le-Fleury,31443/esat-cotra,e1596/
                        //   <html><head><meta name="keywords" content="Conditionnement, travaux &amp;agrave; fa&amp;ccedil;onToutes activit&amp;eacute;s en entreprise Entretien et cr&amp;eacute;ation despaces verts" />
                        string propertyValue = htmlNodeProperty.Value;
                        if (propertyValue != null)
                        {
                            propertyValue = propertyValue.Replace("\x03", "");
                        }
                        //AddAttribute(_currentNode, propertyName, propertyValue);
                        _xdCreator.AddAttribute(_currentNode, propertyName, propertyValue);
                        //if (_htmlReader.IsMarkBeginEnd)
                        //    TagEnd(_htmlReader.MarkName.ToLower());
                    }
                    catch (Exception ex)
                    {
                        Trace.WriteLine($"error in HtmlToXml_v2.CreateXml() : line {htmlNode.Line} column {htmlNode.Column}");
                        Trace.WriteLine(ex.Message);
                    }
                }
                //else if (_htmlReader.IsMarkBeginEnd)
                //{
                //    string tagName = _htmlReader.MarkName.ToLower();
                //    tagName = _replace.Replace(tagName, "_");
                //    if (tagName == "") tagName = "_";

                //    TagBegin(tagName, true);
                //}
                //else if (_htmlReader.IsMarkBegin)
                else if (htmlNode.Type == HtmlNodeType.OpenTag)
                {
                    HtmlNodeOpenTag htmlNodeOpenTag = (HtmlNodeOpenTag)htmlNode;
                    string          tagName         = htmlNodeOpenTag.Name.ToLower();
                    tagName = _nameCorrection.Replace(tagName, "_");
                    if (tagName == "")
                    {
                        tagName = "_";
                    }

                    //TagBegin(tagName, false);
                    AddTagBegin(tagName);
                }
                //else if (htmlNode.Type == HtmlNodeType.CloseTag)
                //{
                //    HtmlNodeCloseTag htmlNodeCloseTag = (HtmlNodeCloseTag)htmlNode;
                //    string tagName = htmlNodeCloseTag.Name.ToLower();
                //    tagName = _nameCorrection.Replace(tagName, "_");
                //    if (tagName == "")
                //        tagName = "_";
                //    TagEnd(tagName);
                //}
                //else if (_htmlReader.IsMarkEnd)
                else if (htmlNode.Type == HtmlNodeType.EndTag)
                {
                    HtmlNodeEndTag htmlNodeEndTag = (HtmlNodeEndTag)htmlNode;
                    string         tagName        = htmlNodeEndTag.Name.ToLower();
                    tagName = _nameCorrection.Replace(tagName, "_");
                    if (tagName == "")
                    {
                        tagName = "_";
                    }
                    TagEnd(tagName);
                }
            }

            //return _xdocument;
            return(_xdCreator.XDocument);
        }
示例#3
0
        private IEnumerable <HtmlNode> ReadOpenTag()
        {
            // read <
            _charStreamReader.ReadChar();
            HtmlNodeOpenTag openTag = new HtmlNodeOpenTag {
                Index  = _htmlNodeIndex++, Line = _disableLineColumn ? 0 : _charStreamReader.Line,
                Column = _disableLineColumn ? 0 : _charStreamReader.Column
            };

            // read tag name
            openTag.Name = ReadTagName();

            //if (string.Compare(openTag.Name, "script", true) == 0)
            if (!_disableScriptTreatment && string.Compare(openTag.Name, "script", true) == 0)
            {
                openTag.IsScript = true;
            }

            yield return(openTag);

            int code = _charStreamReader.PeekChar();

            if (code == -1)
            {
                yield break;
            }

            char car = (char)code;

            ReadSeparator();

            int line   = 0;
            int column = 0;

            code = _charStreamReader.PeekChar();
            car  = (char)code;
            // read properties
            if (car != '/' && car != '>')
            {
                while (true)
                {
                    // read property name
                    _stringBuilder.Remove(0, _stringBuilder.Length);
                    line   = 0;
                    column = 0;
                    while (true)
                    {
                        code = _charStreamReader.PeekChar();
                        if (code == -1)
                        {
                            break;
                        }
                        car = (char)code;
                        if (car == ' ' || car == '\t' || car == '\r' || car == '\n' || car == '=' || car == '>' || car == '<' || car == '/')
                        {
                            break;
                        }
                        _stringBuilder.Append(car);
                        _charStreamReader.ReadChar();
                        if (line == 0)
                        {
                            line   = _charStreamReader.Line;
                            column = _charStreamReader.Column;
                        }
                    }
                    if (_stringBuilder.Length == 0)
                    {
                        break;
                    }

                    HtmlNodeProperty property = new HtmlNodeProperty
                    {
                        Index  = _htmlNodeIndex++,
                        Line   = _disableLineColumn ? 0 : line,
                        Column = _disableLineColumn ? 0 : column
                    };

                    property.Name = _stringBuilder.ToString();
                    ReadSeparator();

                    if ((char)_charStreamReader.PeekChar() == '=')
                    {
                        _charStreamReader.ReadChar();
                        ReadSeparator();
                        HtmlReaderStringValue value = ReadStringValue();
                        property.Quote = value.Quote;
                        property.Value = value.Value;
                        ReadSeparator();
                    }
                    yield return(property);
                }
            }

            code = _charStreamReader.PeekChar();
            car  = (char)code;

            bool endTag = false;

            line   = 0;
            column = 0;
            if (car == '/')
            {
                endTag = true;
                _charStreamReader.ReadChar();
                line   = _charStreamReader.Line;
                column = _charStreamReader.Column;
                code   = _charStreamReader.PeekChar();
                car    = (char)code;
            }

            if (car == '>')
            {
                _charStreamReader.ReadChar();
                if (endTag)
                {
                    yield return(CreateHtmlNodeEndTag(openTag.Name, line, column));
                }
                else if (_generateCloseTag)
                {
                    yield return(CreateHtmlNodeCloseTag(openTag.Name, _charStreamReader.Line, _charStreamReader.Column));
                }
            }
        }