private IEnumerable <HtmlNode> ReadPropertiesNodes(HtmlReaderTag tag) { while (true) { HtmlReaderProperty property = ReadProperty(); if (property != null) { yield return(CreateHtmlNodeProperty(property)); if (property.CloseTag) { if (property.EndTag) { yield return(CreateHtmlNodeEndTag(tag)); } else if (_generateCloseTag) { yield return(CreateHtmlNodeCloseTag(tag)); } break; } } else { if (_generateCloseTag) { yield return(CreateHtmlNodeCloseTag(tag)); } break; } } }
public IEnumerable <HtmlNode> Read_v2() { while (true) { int charInt = PeekChar(); if (charInt == -1) { yield break; } char car = (char)charInt; HtmlReaderTag tag = ReadTag(); if (tag != null) { foreach (HtmlNode node in ReadTagNodes(tag)) { yield return(node); } continue; } HtmlReaderComment comment = ReadComment(); if (comment != null) { yield return(CreateHtmlNodeComment(comment)); continue; } HtmlReaderText text = ReadText(); yield return(CreateHtmlNodeText(text)); } }
private HtmlNodeEndTag CreateHtmlNodeEndTag(HtmlReaderTag tag) { return(new HtmlNodeEndTag { Index = _htmlNodeIndex++, Line = _disableLineColumn ? 0 : tag.Line, Column = _disableLineColumn ? 0 : tag.Column, Name = tag.Name }); }
private HtmlNodeDocType CreateHtmlNodeDocType(HtmlReaderTag tag) { return(new HtmlNodeDocType { Index = _htmlNodeIndex++, Line = _disableLineColumn ? 0 : tag.Line, Column = _disableLineColumn ? 0 : tag.Column, DocType = tag.DocType }); }
private HtmlNodeOpenTag CreateHtmlNodeOpenTag(HtmlReaderTag tag) { return(new HtmlNodeOpenTag { Index = _htmlNodeIndex++, Line = _disableLineColumn ? 0 : tag.Line, Column = _disableLineColumn ? 0 : tag.Column, Name = tag.Name, IsScript = tag.ScriptTag }); }
private IEnumerable <HtmlNode> ReadTagNodes(HtmlReaderTag tag) { // <div> : BeginTag = true, EndTag = false, CloseTag = true, DocTypeTag = false // <div ... > : BeginTag = true, EndTag = false, CloseTag = false, DocTypeTag = false, ready to read properties // <div ... /> : BeginTag = true, EndTag = false, CloseTag = false, DocTypeTag = false, ready to read properties // <div /> : BeginTag = true, EndTag = true, CloseTag = true, DocTypeTag = false // </div> : BeginTag = false, EndTag = true, CloseTag = true, DocTypeTag = false // <!DOCTYPE ... > : BeginTag = false, EndTag = false, CloseTag = true, DocTypeTag = true // <div> : OpenTag div, CloseTag div EndTag = false // <div ... > : OpenTag div, Property ..., CloseTag div EndTag = false // <div ... /> : OpenTag div, Property ..., CloseTag div EndTag = true // <div /> : OpenTag div, CloseTag div EndTag = true // </div> : EndTag div // <!DOCTYPE ... > : DocumentType if (tag.DocTypeTag) { yield return(CreateHtmlNodeDocType(tag)); } else if (tag.BeginTag) { yield return(CreateHtmlNodeOpenTag(tag)); if (tag.EndTag) { yield return(CreateHtmlNodeEndTag(tag)); } // read properties if (!tag.CloseTag) { foreach (HtmlNode node in ReadPropertiesNodes(tag)) { yield return(node); } } // read script if (tag.ScriptTag) { HtmlReaderScript script = ReadScript(); if (script != null) { yield return(CreateHtmlNodeScript(script)); } } } else if (tag.EndTag) { yield return(CreateHtmlNodeEndTag(tag)); } }
//private bool BeginTag() //{ // // $$pb modif le 11/01/2015 pour gérer <!DOCTYPE ... // return (char)PeekChar() == '<' && (char.IsLetter((char)PeekChar(1)) || (PeekChar(1) == '/' && char.IsLetter((char)PeekChar(2))) || (PeekChar(1) == '!' && PeekChar(2) == 'D')); //} //private void ReadMarkName() private HtmlReaderTag ReadTag() { // <div> : BeginTag = true, EndTag = false, CloseTag = true, DocTypeTag = false // <div ... > : BeginTag = true, EndTag = false, CloseTag = false, DocTypeTag = false, ready to read properties // <div ... /> : BeginTag = true, EndTag = false, CloseTag = false, DocTypeTag = false, ready to read properties // <div /> : BeginTag = true, EndTag = true, CloseTag = true, DocTypeTag = false // </div> : BeginTag = false, EndTag = true, CloseTag = true, DocTypeTag = false // <!DOCTYPE ... > : BeginTag = false, EndTag = false, CloseTag = true, DocTypeTag = true HtmlReaderTag tag = null; if ((char)PeekChar() == '<' && (char.IsLetter((char)PeekChar(1)) || (PeekChar(1) == '/' && char.IsLetter((char)PeekChar(2))) || (PeekChar(1) == '!' && PeekChar(2) == 'D'))) { tag = new HtmlReaderTag(); tag.Line = _line; tag.Column = _column; // read '<' GetChar(); if (PeekChar() == '/') { //_isMarkEnd = true; tag.EndTag = true; GetChar(); } else { //_isMarkBegin = true; tag.BeginTag = true; } // read tag name _stringBuilder.Remove(0, _stringBuilder.Length); GetChar(); while (_charInt != -1 && _char != '<' && _char != '/' && _char != '>' && _char != ' ' && _char != '\t' && _char != '\r' && _char != '\n') { _stringBuilder.Append(_char); GetChar(); } //_markName = _stringBuilder.ToString(); tag.Name = _stringBuilder.ToString(); //_scriptMarkInProgress = false; //if (string.Compare(_markName, "!doctype", true) == 0 && _isMarkBegin) if (string.Compare(tag.Name, "!doctype", true) == 0 && tag.BeginTag) { HtmlReaderTag tag2 = ReadDocType(); tag2.Line = tag.Line; tag2.Column = tag.Column; tag = tag2; //return; } else { //if (string.Compare(_markName, "script", true) == 0 && _isMarkBegin) if (string.Compare(tag.Name, "script", true) == 0 && tag.BeginTag) { //_scriptMarkInProgress = true; tag.ScriptTag = true; } if (_char == ' ' || _char == '\t' || _char == '\r' || _char == '\n') { ReadSeparator(); } if (_char == '/') { //_isMarkBegin = false; //_isMarkEnd = false; //_isMarkBeginEnd = true; tag.BeginTag = true; tag.EndTag = true; //tag.Tag = true; GetChar(); } if (_char == '>') { tag.CloseTag = true; } else { UnreadChar(); } } } return(tag); }
/// <returns>true valeur trouvée, false plus de valeur</returns> // Valeurs retournées par read : // une marque est soit MarkBegin, soit MarkEnd, soit MarkBeginEnd // la dernière propriété d'une marque peut être MarkBeginEnd // <mark>text</mark> : // - IsMarkBegin = true MarkName = "mark" // - IsText = true MarkName = "mark" Value = "text" // - IsMarkEnd = true MarkName = "mark" // <mark property="value">text</mark> : // - IsMarkBegin = true MarkName = "mark" // - IsProperty = true MarkName = "mark" PropertyName = "property" PropertyValue = "value" // - IsText = true MarkName = "mark" Value = "text" // - IsMarkEnd = true MarkName = "mark" // <mark property="value"/> : // - IsMarkBegin = true MarkName = "mark" // - IsProperty = true MarkName = "mark" PropertyName = "property" PropertyValue = "value" IsMarkBeginEnd = true // <mark/> : // - IsMarkBeginEnd = true MarkName = "mark" // <script>source</script> // - IsMarkBegin = true IsScript = true MarkName = "script" // - IsText = true IsScript = true MarkName = "script" Value = "source" // - IsMarkEnd = true IsScript = false MarkName = "script" public IEnumerable <HtmlNode> Read_v1() { while (true) { int charInt = PeekChar(); if (charInt == -1) { yield break; } char car = (char)charInt; HtmlReaderTag tag = ReadTag(); if (tag != null) { // <div> : BeginTag = true, EndTag = false, CloseTag = true, DocTypeTag = false // <div ... > : BeginTag = true, EndTag = false, CloseTag = false, DocTypeTag = false, ready to read properties // <div ... /> : BeginTag = true, EndTag = false, CloseTag = false, DocTypeTag = false, ready to read properties // <div /> : BeginTag = true, EndTag = true, CloseTag = true, DocTypeTag = false // </div> : BeginTag = false, EndTag = true, CloseTag = true, DocTypeTag = false // <!DOCTYPE ... > : BeginTag = false, EndTag = false, CloseTag = true, DocTypeTag = true // <div> : OpenTag div, CloseTag div EndTag = false // <div ... > : OpenTag div, Property ..., CloseTag div EndTag = false // <div ... /> : OpenTag div, Property ..., CloseTag div EndTag = true // <div /> : OpenTag div, CloseTag div EndTag = true // </div> : EndTag div // <!DOCTYPE ... > : DocumentType if (tag.DocTypeTag) { yield return(new HtmlNodeDocType { Index = _htmlNodeIndex++, Line = _disableLineColumn ? 0 : tag.Line, Column = _disableLineColumn ? 0 : tag.Column, DocType = tag.DocType }); } else if (tag.BeginTag) { yield return(new HtmlNodeOpenTag { Index = _htmlNodeIndex++, Line = _disableLineColumn ? 0 : tag.Line, Column = _disableLineColumn ? 0 : tag.Column, Name = tag.Name, IsScript = tag.ScriptTag }); if (tag.EndTag) { yield return(new HtmlNodeEndTag { Index = _htmlNodeIndex++, Line = _disableLineColumn ? 0 : tag.Line, Column = _disableLineColumn ? 0 : tag.Column, Name = tag.Name }); } // read properties if (!tag.CloseTag) { while (true) { HtmlReaderProperty property = ReadProperty(); if (property != null) { yield return(new HtmlNodeProperty { Index = _htmlNodeIndex++, Line = _disableLineColumn ? 0 : property.Line, Column = _disableLineColumn ? 0 : property.Column, Name = property.Name, Value = property.Value.Value, Quote = property.Value.Quote }); if (property.CloseTag) { if (property.EndTag) { yield return new HtmlNodeEndTag { Index = _htmlNodeIndex++, Line = _disableLineColumn ? 0 : tag.Line, Column = _disableLineColumn ? 0 : tag.Column, Name = tag.Name } } ; else if (_generateCloseTag) { yield return new HtmlNodeCloseTag { Index = _htmlNodeIndex++, Line = _disableLineColumn ? 0 : tag.Line, Column = _disableLineColumn ? 0 : tag.Column, Name = tag.Name } } ; break; } } else { if (_generateCloseTag) { yield return new HtmlNodeCloseTag { Index = _htmlNodeIndex++, Line = _disableLineColumn ? 0 : tag.Line, Column = _disableLineColumn ? 0 : tag.Column, Name = tag.Name } } ; break; } } } // read script if (tag.ScriptTag) { HtmlReaderScript script = ReadScript(); if (script != null) { yield return new HtmlNodeScript { Index = _htmlNodeIndex++, Line = _disableLineColumn ? 0 : script.Line, Column = _disableLineColumn ? 0 : script.Column, Script = script.Script.zReplaceControl() } } ; } } else if (tag.EndTag) { yield return(new HtmlNodeEndTag { Index = _htmlNodeIndex++, Line = _disableLineColumn ? 0 : tag.Line, Column = _disableLineColumn ? 0 : tag.Column, Name = tag.Name, }); } continue; } HtmlReaderComment comment = ReadComment(); if (comment != null) { yield return(new HtmlNodeComment { Index = _htmlNodeIndex++, Line = _disableLineColumn ? 0 : comment.Line, Column = _disableLineColumn ? 0 : comment.Column, Comment = comment.Comment.zReplaceControl() }); continue; } HtmlReaderText text = ReadText(); yield return(new HtmlNodeText { Index = _htmlNodeIndex++, Line = _disableLineColumn ? 0 : text.Line, Column = _disableLineColumn ? 0 : text.Column, Text = text.Text.zReplaceControl(), IsTextSeparator = text.IsTextSeparator }); } }