private void Property(HtmlNodeProperty property) { if (_node is HtmlDocNodeBeginTagA) { if (property.Name.ToLower() == "href") { ((HtmlDocNodeBeginTagA)_node).Link = property.Value; } } else if (_node is HtmlDocNodeTagImg) { if (property.Name.ToLower() == "src") { ((HtmlDocNodeTagImg)_node).Link = property.Value; } else if (property.Name.ToLower() == "class") { ((HtmlDocNodeTagImg)_node).ClassList = zsplit.Split(property.Value, ' ', true); } else if (property.Name.ToLower() == "width") { int?width = property.Value.zTryParseAs <int?>(); if (width == null) { Trace.WriteLine($"unknow width \"{property.Value}\""); } ((HtmlDocNodeTagImg)_node).Width = width; } } }
public XDocument CreateXml() { // ATTENTION HtmlReader_v4 dont manage ReadCommentInText //_htmlReader.ReadCommentInText = _readCommentInText; // need close tag //_htmlReader.GenerateCloseTag = true; //if (!_htmlReader.GenerateCloseTag) // throw new PBException("html reader must have option GenerateCloseTag"); //_xdocument = new XDocument(); _xdCreator = new XDocumentCreator(); //_documentNode = _xdocument; InitXml(); _tableStack = new Stack <HtmlTable_v3>(); _table = null; _definitionListStack = new Stack <XElement>(); _definitionList = null; _noTag = false; _body = false; _title = false; foreach (HtmlNode htmlNode in _htmlReader.Read()) { if (htmlNode.Type == HtmlNodeType.Text || htmlNode.Type == HtmlNodeType.Comment) { // $$pb modif le 11/01/2015 //if (_htmlReader.IsText && !_htmlReader.IsTextSeparator && !_body) //if (_htmlReader.IsText && !_htmlReader.IsTextSeparator && !_htmlReader.IsScript && !_body) //{ // _body = true; // _currentNode = _currentTreeNode = _bodyNode; //} if (!_generateXmlNodeOnly) { if (_readCommentInText) { if (htmlNode.Type == HtmlNodeType.Text) { AddText(_currentNode, ((HtmlNodeText)htmlNode).Text); } else //if (htmlNode.Type == HtmlNodeType.Comment) { AddText(_currentNode, ((HtmlNodeComment)htmlNode).Comment); } } else { if (htmlNode.Type == HtmlNodeType.Text) { AddText(_currentNode, ((HtmlNodeText)htmlNode).Text); } else //if (htmlNode.Type == HtmlNodeType.Comment) { string s = ((HtmlNodeComment)htmlNode).Comment; s = _commentCorrection.Replace(s, "-"); if (s.EndsWith("-")) { s += " "; } //AddComment(_currentNode, s); _xdCreator.AddComment(_currentNode, s); } } } } else if (htmlNode.Type == HtmlNodeType.Script) { AddText(_currentNode, ((HtmlNodeScript)htmlNode).Script); } else if (htmlNode.Type == HtmlNodeType.DocumentType) { //AddAttribute(_htmlNode, "doctype", ((HtmlNodeDocType)htmlNode).DocType); _xdCreator.AddAttribute(_htmlNode, "doctype", ((HtmlNodeDocType)htmlNode).DocType); } else if (htmlNode.Type == HtmlNodeType.Property) { if (_generateXmlNodeOnly || _noTag) { continue; } HtmlNodeProperty htmlNodeProperty = (HtmlNodeProperty)htmlNode; try { string propertyName = htmlNodeProperty.Name; propertyName = _nameCorrection.Replace(propertyName, ""); propertyName = propertyName.ToLower(); if (propertyName == "") { propertyName = "__value"; } // modif le 28/01/2014 // hexadecimal value 0x03, is an invalid character // found in http://www.reseau-gesat.com/Gesat/Yvelines,78/Fontenay-le-Fleury,31443/esat-cotra,e1596/ // <html><head><meta name="keywords" content="Conditionnement, travaux &agrave; fa&ccedil;onToutes activit&eacute;s en entreprise Entretien et cr&eacute;ation despaces verts" /> string propertyValue = htmlNodeProperty.Value; if (propertyValue != null) { propertyValue = propertyValue.Replace("\x03", ""); } //AddAttribute(_currentNode, propertyName, propertyValue); _xdCreator.AddAttribute(_currentNode, propertyName, propertyValue); //if (_htmlReader.IsMarkBeginEnd) // TagEnd(_htmlReader.MarkName.ToLower()); } catch (Exception ex) { Trace.WriteLine($"error in HtmlToXml_v2.CreateXml() : line {htmlNode.Line} column {htmlNode.Column}"); Trace.WriteLine(ex.Message); } } //else if (_htmlReader.IsMarkBeginEnd) //{ // string tagName = _htmlReader.MarkName.ToLower(); // tagName = _replace.Replace(tagName, "_"); // if (tagName == "") tagName = "_"; // TagBegin(tagName, true); //} //else if (_htmlReader.IsMarkBegin) else if (htmlNode.Type == HtmlNodeType.OpenTag) { HtmlNodeOpenTag htmlNodeOpenTag = (HtmlNodeOpenTag)htmlNode; string tagName = htmlNodeOpenTag.Name.ToLower(); tagName = _nameCorrection.Replace(tagName, "_"); if (tagName == "") { tagName = "_"; } //TagBegin(tagName, false); AddTagBegin(tagName); } //else if (htmlNode.Type == HtmlNodeType.CloseTag) //{ // HtmlNodeCloseTag htmlNodeCloseTag = (HtmlNodeCloseTag)htmlNode; // string tagName = htmlNodeCloseTag.Name.ToLower(); // tagName = _nameCorrection.Replace(tagName, "_"); // if (tagName == "") // tagName = "_"; // TagEnd(tagName); //} //else if (_htmlReader.IsMarkEnd) else if (htmlNode.Type == HtmlNodeType.EndTag) { HtmlNodeEndTag htmlNodeEndTag = (HtmlNodeEndTag)htmlNode; string tagName = htmlNodeEndTag.Name.ToLower(); tagName = _nameCorrection.Replace(tagName, "_"); if (tagName == "") { tagName = "_"; } TagEnd(tagName); } } //return _xdocument; return(_xdCreator.XDocument); }
private IEnumerable <HtmlNode> ReadOpenTag() { // read < _charStreamReader.ReadChar(); HtmlNodeOpenTag openTag = new HtmlNodeOpenTag { Index = _htmlNodeIndex++, Line = _disableLineColumn ? 0 : _charStreamReader.Line, Column = _disableLineColumn ? 0 : _charStreamReader.Column }; // read tag name openTag.Name = ReadTagName(); //if (string.Compare(openTag.Name, "script", true) == 0) if (!_disableScriptTreatment && string.Compare(openTag.Name, "script", true) == 0) { openTag.IsScript = true; } yield return(openTag); int code = _charStreamReader.PeekChar(); if (code == -1) { yield break; } char car = (char)code; ReadSeparator(); int line = 0; int column = 0; code = _charStreamReader.PeekChar(); car = (char)code; // read properties if (car != '/' && car != '>') { while (true) { // read property name _stringBuilder.Remove(0, _stringBuilder.Length); line = 0; column = 0; while (true) { code = _charStreamReader.PeekChar(); if (code == -1) { break; } car = (char)code; if (car == ' ' || car == '\t' || car == '\r' || car == '\n' || car == '=' || car == '>' || car == '<' || car == '/') { break; } _stringBuilder.Append(car); _charStreamReader.ReadChar(); if (line == 0) { line = _charStreamReader.Line; column = _charStreamReader.Column; } } if (_stringBuilder.Length == 0) { break; } HtmlNodeProperty property = new HtmlNodeProperty { Index = _htmlNodeIndex++, Line = _disableLineColumn ? 0 : line, Column = _disableLineColumn ? 0 : column }; property.Name = _stringBuilder.ToString(); ReadSeparator(); if ((char)_charStreamReader.PeekChar() == '=') { _charStreamReader.ReadChar(); ReadSeparator(); HtmlReaderStringValue value = ReadStringValue(); property.Quote = value.Quote; property.Value = value.Value; ReadSeparator(); } yield return(property); } } code = _charStreamReader.PeekChar(); car = (char)code; bool endTag = false; line = 0; column = 0; if (car == '/') { endTag = true; _charStreamReader.ReadChar(); line = _charStreamReader.Line; column = _charStreamReader.Column; code = _charStreamReader.PeekChar(); car = (char)code; } if (car == '>') { _charStreamReader.ReadChar(); if (endTag) { yield return(CreateHtmlNodeEndTag(openTag.Name, line, column)); } else if (_generateCloseTag) { yield return(CreateHtmlNodeCloseTag(openTag.Name, _charStreamReader.Line, _charStreamReader.Column)); } } }