List <HtmlAttribute> GetAttributes(ref bool isSingle) { List <HtmlAttribute> attributes = new List <HtmlAttribute> (); TextTokenizer.MoveNext(); while (true) { if (IsEnd(ref isSingle)) { break; } switch (TextTokenizer.Current.Type) { case HtmlTokenType.String: HtmlAttribute attribute = new HtmlAttribute(TextTokenizer.Current); attributes.Add(attribute); TextTokenizer.MoveNext(); if (IsEnd(ref isSingle)) { return(attributes); } switch (TextTokenizer.Current.Type) { case HtmlTokenType.EqualSign: TextTokenizer.MoveNext(); switch (TextTokenizer.Current.Type) { case HtmlTokenType.String: attribute.Values = new List <string> (); string value = HtmlAPI.Unescape(TextTokenizer.Current.String); if (HtmlAPI.Equals(attribute.Name, HtmlKeyword.Class)) { attribute.Values.AddRange(HtmlAPI.Split(value)); } else { attribute.Values.Add(value); } TextTokenizer.MoveNext(); continue; } throw new HtmlTextReaderException <HtmlTokenType> (TextTokenizer, "属性值"); case HtmlTokenType.String: continue; default: throw new HtmlTextReaderException <HtmlTokenType> (TextTokenizer, HtmlKeyword.EqualSign, HtmlKeyword.SingleTag, HtmlKeyword.RightAngleBracket); } default: throw new HtmlTextReaderException <HtmlTokenType> (TextTokenizer, "属性名", HtmlKeyword.SingleTag, HtmlKeyword.RightAngleBracket); } } return(attributes); }
public HtmlTextReaderException(string message, LexicalAnalyzer <T> textTokenizer) { if (message is null) { throw new ArgumentNullException(nameof(message)); } if (textTokenizer is null) { throw new ArgumentNullException(nameof(textTokenizer)); } StringBuilder stringBuilder = new StringBuilder(); stringBuilder.AppendLine(message); stringBuilder.AppendLine( $"类型:{textTokenizer.Current.Type} " + $"值:{textTokenizer.Current.Value}" + $"索引:{textTokenizer.Current.StartIndex} " + $"长度:{textTokenizer.Current.Length} " + $"行:{textTokenizer.Current.Line} " + $"行索引:{textTokenizer.Current.LineStartIndex} " ); HtmlAPI.SetExceptionMessage(this, stringBuilder.ToString()); }
bool ReadNode(HtmlElement parentElement, out HtmlNode node, string endElementLocalName = null) { if (MoveNext()) { switch (Current.Type) { case HtmlTagType.Define: node = new HtmlDocumentType(Current.Attributes, parentElement); return(true); case HtmlTagType.Single: node = new HtmlElement(Current.Name, Current.Attributes, parentElement); return(true); case HtmlTagType.Start: { HtmlElement element = new HtmlElement(Current.Name, Current.Attributes, parentElement); node = element; if (HtmlAPI.IsContentTag(element.LocalName)) { TextTokenizer.SkipIgnoreCharacters(); string text = TextTokenizer.ReadTo($"</{element.LocalName}>").TrimEnd(); if (text.Length > 0) { node.ChildNodes.Add(new HtmlText(text, element)); } return(true); } Tags.Add(element.LocalName); HtmlNode lastNode = null; while (ReadNode(element, out HtmlNode childNode, element.LocalName)) { if (childNode is null) { continue; } if (lastNode != null) { lastNode.NextSibling = childNode; childNode.PreviousSibling = lastNode; } node.ChildNodes.Add(childNode); lastNode = childNode; } return(true); } case HtmlTagType.End: if (endElementLocalName != null && !HtmlAPI.Equals(Current.Name, endElementLocalName)) { if (Tags.Contains(Current.Name)) { Buffer.Push(Current); } else { node = null; return(true); } } if (Tags.Count > 0) { Tags.RemoveAt(Tags.Count - 1); } break; case HtmlTagType.Text: node = new HtmlText(HtmlAPI.Unescape(Current.Content), parentElement); return(true); case HtmlTagType.Comment: node = new HtmlComment(Current.Content, parentElement); return(true); default: throw new NotImplementedException(Current.Type.ToString()); } } node = null; return(false); }
public List <HtmlElement> QuerySelectorAll(string path) { if (path is null) { throw new ArgumentNullException(nameof(path)); } TextReader = new StringReader(path); List <HtmlElement> elements = new List <HtmlElement> (); List <HtmlElement> lastElements = new List <HtmlElement> (); List <HtmlElement> targetElements = new List <HtmlElement> (); List <HtmlElement> tempElements = new List <HtmlElement> (); switch (RootNode) { case HtmlElement element: Root = element; break; case HtmlDocument document: Root = document.Root; break; default: throw new Exception($"必须是元素或文档元素"); } targetElements.Add(Root); bool isChild = true; int depth = -1; while (MoveNext()) { switch (Current.Type) { case HtmlTokenType.MultipleSign: { Query(targetElements, tempElements, element => element.All); break; } case HtmlTokenType.Dot: { MoveNext(); string name = Current; if (isChild) { Query(targetElements, tempElements, element => element.GetElementsByClassName(name, depth)); break; } Filter(targetElements, element => element.ClassList.Contains(name)); break; } case HtmlTokenType.NumberSign: { MoveNext(); string id = Current; if (isChild) { Query(targetElements, tempElements, element => element.GetElementsByAttribute(HtmlKeyword.ID, id, depth)); break; } Filter(targetElements, element => HtmlAPI.Equals(element.GetAttribute(HtmlKeyword.ID), id)); break; } case HtmlTokenType.Comma: { elements.AddRange(targetElements); targetElements.Clear(); targetElements.Add(Root); break; } case HtmlTokenType.String: { string name = Current; Query(targetElements, tempElements, element => element.GetElementsByTagName(name, depth)); break; } case HtmlTokenType.LeftBracket: { MoveNext(); if (Current.Type == HtmlTokenType.Integer) { HtmlElement element = targetElements[Current.Int]; targetElements.Clear(); targetElements.Add(element); MoveNext(); if (Current.Type != HtmlTokenType.RightBracket) { throw new HtmlTextReaderException <HtmlTokenType> (this, HtmlKeyword.RightBracket); } break; } string name = Current; MoveNext(); switch (Current.Type) { case HtmlTokenType.EqualSign: MoveNext(); string value = Current; Filter(targetElements, element => HtmlAPI.Equals(element.GetAttribute(name), value)); MoveNext(); if (Current.Type != HtmlTokenType.RightBracket) { throw new HtmlTextReaderException <HtmlTokenType> (this, HtmlKeyword.RightBracket); } break; case HtmlTokenType.RightBracket: Filter(targetElements, element => element.GetAttributeNode(name) != null); break; default: throw new HtmlTextReaderException <HtmlTokenType> (this, HtmlKeyword.EqualSign, HtmlKeyword.RightBracket); } break; } } depth = -1; switch (Current.Type) { case HtmlTokenType.RightAngleBracket: depth = 0; break; } isChild = false; if (char.IsWhiteSpace((char)Peek())) { isChild = true; } } elements.AddRange(targetElements); return(elements); }