示例#1
0
        public static IEnumerable <HtmlDocNode> Read(TextReader textReader)
        {
            HtmlDocReader reader = new HtmlDocReader();
            //bool disableLineColumn = false;
            //bool disableScriptTreatment = false;
            //bool useReadAttributeValue_v2 = true;
            //bool useTranslateChar = true;
            //reader._nodes = HtmlReader_v4.Read(textReader, generateCloseTag: true, disableLineColumn: disableLineColumn, disableScriptTreatment: disableScriptTreatment,
            //    useReadAttributeValue_v2: useReadAttributeValue_v2, useTranslateChar: useTranslateChar);
            HtmlReaderOptions options = HtmlReaderOptions.Default | HtmlReaderOptions.GenerateCloseTag;

            reader._nodes = HtmlReader_v4.Read(textReader, options);
            return(reader.Read());
        }
示例#2
0
        //HtmlReaderOptions
        //public static IEnumerable<HtmlNode> Read(TextReader textReader, bool generateCloseTag = false, bool disableLineColumn = false,
        //    bool disableScriptTreatment = false, bool useReadAttributeValue_v2 = true, bool useTranslateChar = true, bool useFilterChar = true)
        public static IEnumerable <HtmlNode> Read(TextReader textReader, HtmlReaderOptions options = HtmlReaderOptions.Default)
        {
            //HtmlReader_v4 htmlReader = new HtmlReader_v4(textReader, useTranslateChar, useFilterChar);
            HtmlReader_v4 htmlReader = new HtmlReader_v4(textReader, options);

            ////htmlReader.GenerateCloseTag = generateCloseTag;
            //htmlReader.GenerateCloseTag = (options & HtmlReaderOptions.GenerateCloseTag) == HtmlReaderOptions.GenerateCloseTag;
            ////htmlReader.DisableLineColumn = disableLineColumn;
            //htmlReader.DisableLineColumn = (options & HtmlReaderOptions.DisableLineColumn) == HtmlReaderOptions.DisableLineColumn;
            ////htmlReader.DisableScriptTreatment = disableScriptTreatment;
            //htmlReader.DisableScriptTreatment = (options & HtmlReaderOptions.DisableScriptTreatment) == HtmlReaderOptions.DisableScriptTreatment;
            ////htmlReader.UseReadAttributeValue_v2 = useReadAttributeValue_v2;
            //htmlReader.TextReplaceControl = (options & HtmlReaderOptions.TextReplaceControl) == HtmlReaderOptions.TextReplaceControl;
            return(htmlReader.Read());
        }
示例#3
0
        public XDocument CreateXml()
        {
            // ATTENTION HtmlReader_v4 dont manage ReadCommentInText
            //_htmlReader.ReadCommentInText = _readCommentInText;

            // need close tag
            //_htmlReader.GenerateCloseTag = true;
            //if (!_htmlReader.GenerateCloseTag)
            //    throw new PBException("html reader must have option GenerateCloseTag");

            //_xdocument = new XDocument();
            _xdCreator = new XDocumentCreator();
            //_documentNode = _xdocument;

            InitXml();

            _tableStack = new Stack <HtmlTable_v3>();
            _table      = null;

            _definitionListStack = new Stack <XElement>();
            _definitionList      = null;

            _noTag = false;
            _body  = false;
            _title = false;

            foreach (HtmlNode htmlNode in _htmlReader.Read())
            {
                if (htmlNode.Type == HtmlNodeType.Text || htmlNode.Type == HtmlNodeType.Comment)
                {
                    // $$pb modif le 11/01/2015
                    //if (_htmlReader.IsText && !_htmlReader.IsTextSeparator && !_body)
                    //if (_htmlReader.IsText && !_htmlReader.IsTextSeparator && !_htmlReader.IsScript && !_body)
                    //{
                    //    _body = true;
                    //    _currentNode = _currentTreeNode = _bodyNode;
                    //}
                    if (!_generateXmlNodeOnly)
                    {
                        if (_readCommentInText)
                        {
                            if (htmlNode.Type == HtmlNodeType.Text)
                            {
                                AddText(_currentNode, ((HtmlNodeText)htmlNode).Text);
                            }
                            else //if (htmlNode.Type == HtmlNodeType.Comment)
                            {
                                AddText(_currentNode, ((HtmlNodeComment)htmlNode).Comment);
                            }
                        }
                        else
                        {
                            if (htmlNode.Type == HtmlNodeType.Text)
                            {
                                AddText(_currentNode, ((HtmlNodeText)htmlNode).Text);
                            }
                            else //if (htmlNode.Type == HtmlNodeType.Comment)
                            {
                                string s = ((HtmlNodeComment)htmlNode).Comment;
                                s = _commentCorrection.Replace(s, "-");
                                if (s.EndsWith("-"))
                                {
                                    s += " ";
                                }
                                //AddComment(_currentNode, s);
                                _xdCreator.AddComment(_currentNode, s);
                            }
                        }
                    }
                }
                else if (htmlNode.Type == HtmlNodeType.Script)
                {
                    AddText(_currentNode, ((HtmlNodeScript)htmlNode).Script);
                }
                else if (htmlNode.Type == HtmlNodeType.DocumentType)
                {
                    //AddAttribute(_htmlNode, "doctype", ((HtmlNodeDocType)htmlNode).DocType);
                    _xdCreator.AddAttribute(_htmlNode, "doctype", ((HtmlNodeDocType)htmlNode).DocType);
                }
                else if (htmlNode.Type == HtmlNodeType.Property)
                {
                    if (_generateXmlNodeOnly || _noTag)
                    {
                        continue;
                    }
                    HtmlNodeProperty htmlNodeProperty = (HtmlNodeProperty)htmlNode;
                    try
                    {
                        string propertyName = htmlNodeProperty.Name;
                        propertyName = _nameCorrection.Replace(propertyName, "");
                        propertyName = propertyName.ToLower();
                        if (propertyName == "")
                        {
                            propertyName = "__value";
                        }

                        // modif le 28/01/2014
                        //   hexadecimal value 0x03, is an invalid character
                        //   found in http://www.reseau-gesat.com/Gesat/Yvelines,78/Fontenay-le-Fleury,31443/esat-cotra,e1596/
                        //   <html><head><meta name="keywords" content="Conditionnement, travaux &amp;agrave; fa&amp;ccedil;onToutes activit&amp;eacute;s en entreprise Entretien et cr&amp;eacute;ation despaces verts" />
                        string propertyValue = htmlNodeProperty.Value;
                        if (propertyValue != null)
                        {
                            propertyValue = propertyValue.Replace("\x03", "");
                        }
                        //AddAttribute(_currentNode, propertyName, propertyValue);
                        _xdCreator.AddAttribute(_currentNode, propertyName, propertyValue);
                        //if (_htmlReader.IsMarkBeginEnd)
                        //    TagEnd(_htmlReader.MarkName.ToLower());
                    }
                    catch (Exception ex)
                    {
                        Trace.WriteLine($"error in HtmlToXml_v2.CreateXml() : line {htmlNode.Line} column {htmlNode.Column}");
                        Trace.WriteLine(ex.Message);
                    }
                }
                //else if (_htmlReader.IsMarkBeginEnd)
                //{
                //    string tagName = _htmlReader.MarkName.ToLower();
                //    tagName = _replace.Replace(tagName, "_");
                //    if (tagName == "") tagName = "_";

                //    TagBegin(tagName, true);
                //}
                //else if (_htmlReader.IsMarkBegin)
                else if (htmlNode.Type == HtmlNodeType.OpenTag)
                {
                    HtmlNodeOpenTag htmlNodeOpenTag = (HtmlNodeOpenTag)htmlNode;
                    string          tagName         = htmlNodeOpenTag.Name.ToLower();
                    tagName = _nameCorrection.Replace(tagName, "_");
                    if (tagName == "")
                    {
                        tagName = "_";
                    }

                    //TagBegin(tagName, false);
                    AddTagBegin(tagName);
                }
                //else if (htmlNode.Type == HtmlNodeType.CloseTag)
                //{
                //    HtmlNodeCloseTag htmlNodeCloseTag = (HtmlNodeCloseTag)htmlNode;
                //    string tagName = htmlNodeCloseTag.Name.ToLower();
                //    tagName = _nameCorrection.Replace(tagName, "_");
                //    if (tagName == "")
                //        tagName = "_";
                //    TagEnd(tagName);
                //}
                //else if (_htmlReader.IsMarkEnd)
                else if (htmlNode.Type == HtmlNodeType.EndTag)
                {
                    HtmlNodeEndTag htmlNodeEndTag = (HtmlNodeEndTag)htmlNode;
                    string         tagName        = htmlNodeEndTag.Name.ToLower();
                    tagName = _nameCorrection.Replace(tagName, "_");
                    if (tagName == "")
                    {
                        tagName = "_";
                    }
                    TagEnd(tagName);
                }
            }

            //return _xdocument;
            return(_xdCreator.XDocument);
        }