private void EndTag(HtmlNodeEndTag endTag) { _node = null; switch (endTag.Name.ToLower()) { case "p": _node = new HtmlDocNodeEndTag() { Tag = HtmlTagType.P, Index = ++_nodeIndex, Line = endTag.Line, Column = endTag.Column }; break; case "a": _node = new HtmlDocNodeEndTag() { Tag = HtmlTagType.A, Index = ++_nodeIndex, Line = endTag.Line, Column = endTag.Column }; break; } }
private HtmlNodeEndTag ReadEndTag() { // read < _charStreamReader.ReadChar(); HtmlNodeEndTag endTag = new HtmlNodeEndTag { Index = _htmlNodeIndex++, Line = _disableLineColumn ? 0 : _charStreamReader.Line, Column = _disableLineColumn ? 0 : _charStreamReader.Column }; // read / _charStreamReader.ReadChar(); endTag.Name = ReadTagName(); if ((char)_charStreamReader.PeekChar() == '>') { _charStreamReader.ReadChar(); } return(endTag); }
public XDocument CreateXml() { // ATTENTION HtmlReader_v4 dont manage ReadCommentInText //_htmlReader.ReadCommentInText = _readCommentInText; // need close tag //_htmlReader.GenerateCloseTag = true; //if (!_htmlReader.GenerateCloseTag) // throw new PBException("html reader must have option GenerateCloseTag"); //_xdocument = new XDocument(); _xdCreator = new XDocumentCreator(); //_documentNode = _xdocument; InitXml(); _tableStack = new Stack <HtmlTable_v3>(); _table = null; _definitionListStack = new Stack <XElement>(); _definitionList = null; _noTag = false; _body = false; _title = false; foreach (HtmlNode htmlNode in _htmlReader.Read()) { if (htmlNode.Type == HtmlNodeType.Text || htmlNode.Type == HtmlNodeType.Comment) { // $$pb modif le 11/01/2015 //if (_htmlReader.IsText && !_htmlReader.IsTextSeparator && !_body) //if (_htmlReader.IsText && !_htmlReader.IsTextSeparator && !_htmlReader.IsScript && !_body) //{ // _body = true; // _currentNode = _currentTreeNode = _bodyNode; //} if (!_generateXmlNodeOnly) { if (_readCommentInText) { if (htmlNode.Type == HtmlNodeType.Text) { AddText(_currentNode, ((HtmlNodeText)htmlNode).Text); } else //if (htmlNode.Type == HtmlNodeType.Comment) { AddText(_currentNode, ((HtmlNodeComment)htmlNode).Comment); } } else { if (htmlNode.Type == HtmlNodeType.Text) { AddText(_currentNode, ((HtmlNodeText)htmlNode).Text); } else //if (htmlNode.Type == HtmlNodeType.Comment) { string s = ((HtmlNodeComment)htmlNode).Comment; s = _commentCorrection.Replace(s, "-"); if (s.EndsWith("-")) { s += " "; } //AddComment(_currentNode, s); _xdCreator.AddComment(_currentNode, s); } } } } else if (htmlNode.Type == HtmlNodeType.Script) { AddText(_currentNode, ((HtmlNodeScript)htmlNode).Script); } else if (htmlNode.Type == HtmlNodeType.DocumentType) { //AddAttribute(_htmlNode, "doctype", ((HtmlNodeDocType)htmlNode).DocType); _xdCreator.AddAttribute(_htmlNode, "doctype", ((HtmlNodeDocType)htmlNode).DocType); } else if (htmlNode.Type == HtmlNodeType.Property) { if (_generateXmlNodeOnly || _noTag) { continue; } HtmlNodeProperty htmlNodeProperty = (HtmlNodeProperty)htmlNode; try { string propertyName = htmlNodeProperty.Name; propertyName = _nameCorrection.Replace(propertyName, ""); propertyName = propertyName.ToLower(); if (propertyName == "") { propertyName = "__value"; } // modif le 28/01/2014 // hexadecimal value 0x03, is an invalid character // found in http://www.reseau-gesat.com/Gesat/Yvelines,78/Fontenay-le-Fleury,31443/esat-cotra,e1596/ // <html><head><meta name="keywords" content="Conditionnement, travaux &agrave; fa&ccedil;onToutes activit&eacute;s en entreprise Entretien et cr&eacute;ation despaces verts" /> string propertyValue = htmlNodeProperty.Value; if (propertyValue != null) { propertyValue = propertyValue.Replace("\x03", ""); } //AddAttribute(_currentNode, propertyName, propertyValue); _xdCreator.AddAttribute(_currentNode, propertyName, propertyValue); //if (_htmlReader.IsMarkBeginEnd) // TagEnd(_htmlReader.MarkName.ToLower()); } catch (Exception ex) { Trace.WriteLine($"error in HtmlToXml_v2.CreateXml() : line {htmlNode.Line} column {htmlNode.Column}"); Trace.WriteLine(ex.Message); } } //else if (_htmlReader.IsMarkBeginEnd) //{ // string tagName = _htmlReader.MarkName.ToLower(); // tagName = _replace.Replace(tagName, "_"); // if (tagName == "") tagName = "_"; // TagBegin(tagName, true); //} //else if (_htmlReader.IsMarkBegin) else if (htmlNode.Type == HtmlNodeType.OpenTag) { HtmlNodeOpenTag htmlNodeOpenTag = (HtmlNodeOpenTag)htmlNode; string tagName = htmlNodeOpenTag.Name.ToLower(); tagName = _nameCorrection.Replace(tagName, "_"); if (tagName == "") { tagName = "_"; } //TagBegin(tagName, false); AddTagBegin(tagName); } //else if (htmlNode.Type == HtmlNodeType.CloseTag) //{ // HtmlNodeCloseTag htmlNodeCloseTag = (HtmlNodeCloseTag)htmlNode; // string tagName = htmlNodeCloseTag.Name.ToLower(); // tagName = _nameCorrection.Replace(tagName, "_"); // if (tagName == "") // tagName = "_"; // TagEnd(tagName); //} //else if (_htmlReader.IsMarkEnd) else if (htmlNode.Type == HtmlNodeType.EndTag) { HtmlNodeEndTag htmlNodeEndTag = (HtmlNodeEndTag)htmlNode; string tagName = htmlNodeEndTag.Name.ToLower(); tagName = _nameCorrection.Replace(tagName, "_"); if (tagName == "") { tagName = "_"; } TagEnd(tagName); } } //return _xdocument; return(_xdCreator.XDocument); }