public static IEnumerable <HtmlDocNode> Read(TextReader textReader) { HtmlDocReader reader = new HtmlDocReader(); //bool disableLineColumn = false; //bool disableScriptTreatment = false; //bool useReadAttributeValue_v2 = true; //bool useTranslateChar = true; //reader._nodes = HtmlReader_v4.Read(textReader, generateCloseTag: true, disableLineColumn: disableLineColumn, disableScriptTreatment: disableScriptTreatment, // useReadAttributeValue_v2: useReadAttributeValue_v2, useTranslateChar: useTranslateChar); HtmlReaderOptions options = HtmlReaderOptions.Default | HtmlReaderOptions.GenerateCloseTag; reader._nodes = HtmlReader_v4.Read(textReader, options); return(reader.Read()); }
//HtmlReaderOptions //public static IEnumerable<HtmlNode> Read(TextReader textReader, bool generateCloseTag = false, bool disableLineColumn = false, // bool disableScriptTreatment = false, bool useReadAttributeValue_v2 = true, bool useTranslateChar = true, bool useFilterChar = true) public static IEnumerable <HtmlNode> Read(TextReader textReader, HtmlReaderOptions options = HtmlReaderOptions.Default) { //HtmlReader_v4 htmlReader = new HtmlReader_v4(textReader, useTranslateChar, useFilterChar); HtmlReader_v4 htmlReader = new HtmlReader_v4(textReader, options); ////htmlReader.GenerateCloseTag = generateCloseTag; //htmlReader.GenerateCloseTag = (options & HtmlReaderOptions.GenerateCloseTag) == HtmlReaderOptions.GenerateCloseTag; ////htmlReader.DisableLineColumn = disableLineColumn; //htmlReader.DisableLineColumn = (options & HtmlReaderOptions.DisableLineColumn) == HtmlReaderOptions.DisableLineColumn; ////htmlReader.DisableScriptTreatment = disableScriptTreatment; //htmlReader.DisableScriptTreatment = (options & HtmlReaderOptions.DisableScriptTreatment) == HtmlReaderOptions.DisableScriptTreatment; ////htmlReader.UseReadAttributeValue_v2 = useReadAttributeValue_v2; //htmlReader.TextReplaceControl = (options & HtmlReaderOptions.TextReplaceControl) == HtmlReaderOptions.TextReplaceControl; return(htmlReader.Read()); }
public XDocument CreateXml() { // ATTENTION HtmlReader_v4 dont manage ReadCommentInText //_htmlReader.ReadCommentInText = _readCommentInText; // need close tag //_htmlReader.GenerateCloseTag = true; //if (!_htmlReader.GenerateCloseTag) // throw new PBException("html reader must have option GenerateCloseTag"); //_xdocument = new XDocument(); _xdCreator = new XDocumentCreator(); //_documentNode = _xdocument; InitXml(); _tableStack = new Stack <HtmlTable_v3>(); _table = null; _definitionListStack = new Stack <XElement>(); _definitionList = null; _noTag = false; _body = false; _title = false; foreach (HtmlNode htmlNode in _htmlReader.Read()) { if (htmlNode.Type == HtmlNodeType.Text || htmlNode.Type == HtmlNodeType.Comment) { // $$pb modif le 11/01/2015 //if (_htmlReader.IsText && !_htmlReader.IsTextSeparator && !_body) //if (_htmlReader.IsText && !_htmlReader.IsTextSeparator && !_htmlReader.IsScript && !_body) //{ // _body = true; // _currentNode = _currentTreeNode = _bodyNode; //} if (!_generateXmlNodeOnly) { if (_readCommentInText) { if (htmlNode.Type == HtmlNodeType.Text) { AddText(_currentNode, ((HtmlNodeText)htmlNode).Text); } else //if (htmlNode.Type == HtmlNodeType.Comment) { AddText(_currentNode, ((HtmlNodeComment)htmlNode).Comment); } } else { if (htmlNode.Type == HtmlNodeType.Text) { AddText(_currentNode, ((HtmlNodeText)htmlNode).Text); } else //if (htmlNode.Type == HtmlNodeType.Comment) { string s = ((HtmlNodeComment)htmlNode).Comment; s = _commentCorrection.Replace(s, "-"); if (s.EndsWith("-")) { s += " "; } //AddComment(_currentNode, s); _xdCreator.AddComment(_currentNode, s); } } } } else if (htmlNode.Type == HtmlNodeType.Script) { AddText(_currentNode, ((HtmlNodeScript)htmlNode).Script); } else if (htmlNode.Type == HtmlNodeType.DocumentType) { //AddAttribute(_htmlNode, "doctype", ((HtmlNodeDocType)htmlNode).DocType); _xdCreator.AddAttribute(_htmlNode, "doctype", ((HtmlNodeDocType)htmlNode).DocType); } else if (htmlNode.Type == HtmlNodeType.Property) { if (_generateXmlNodeOnly || _noTag) { continue; } HtmlNodeProperty htmlNodeProperty = (HtmlNodeProperty)htmlNode; try { string propertyName = htmlNodeProperty.Name; propertyName = _nameCorrection.Replace(propertyName, ""); propertyName = propertyName.ToLower(); if (propertyName == "") { propertyName = "__value"; } // modif le 28/01/2014 // hexadecimal value 0x03, is an invalid character // found in http://www.reseau-gesat.com/Gesat/Yvelines,78/Fontenay-le-Fleury,31443/esat-cotra,e1596/ // <html><head><meta name="keywords" content="Conditionnement, travaux &agrave; fa&ccedil;onToutes activit&eacute;s en entreprise Entretien et cr&eacute;ation despaces verts" /> string propertyValue = htmlNodeProperty.Value; if (propertyValue != null) { propertyValue = propertyValue.Replace("\x03", ""); } //AddAttribute(_currentNode, propertyName, propertyValue); _xdCreator.AddAttribute(_currentNode, propertyName, propertyValue); //if (_htmlReader.IsMarkBeginEnd) // TagEnd(_htmlReader.MarkName.ToLower()); } catch (Exception ex) { Trace.WriteLine($"error in HtmlToXml_v2.CreateXml() : line {htmlNode.Line} column {htmlNode.Column}"); Trace.WriteLine(ex.Message); } } //else if (_htmlReader.IsMarkBeginEnd) //{ // string tagName = _htmlReader.MarkName.ToLower(); // tagName = _replace.Replace(tagName, "_"); // if (tagName == "") tagName = "_"; // TagBegin(tagName, true); //} //else if (_htmlReader.IsMarkBegin) else if (htmlNode.Type == HtmlNodeType.OpenTag) { HtmlNodeOpenTag htmlNodeOpenTag = (HtmlNodeOpenTag)htmlNode; string tagName = htmlNodeOpenTag.Name.ToLower(); tagName = _nameCorrection.Replace(tagName, "_"); if (tagName == "") { tagName = "_"; } //TagBegin(tagName, false); AddTagBegin(tagName); } //else if (htmlNode.Type == HtmlNodeType.CloseTag) //{ // HtmlNodeCloseTag htmlNodeCloseTag = (HtmlNodeCloseTag)htmlNode; // string tagName = htmlNodeCloseTag.Name.ToLower(); // tagName = _nameCorrection.Replace(tagName, "_"); // if (tagName == "") // tagName = "_"; // TagEnd(tagName); //} //else if (_htmlReader.IsMarkEnd) else if (htmlNode.Type == HtmlNodeType.EndTag) { HtmlNodeEndTag htmlNodeEndTag = (HtmlNodeEndTag)htmlNode; string tagName = htmlNodeEndTag.Name.ToLower(); tagName = _nameCorrection.Replace(tagName, "_"); if (tagName == "") { tagName = "_"; } TagEnd(tagName); } } //return _xdocument; return(_xdCreator.XDocument); }