예제 #1
0
        List <HtmlAttribute> GetAttributes(ref bool isSingle)
        {
            List <HtmlAttribute> attributes = new List <HtmlAttribute> ();

            TextTokenizer.MoveNext();
            while (true)
            {
                if (IsEnd(ref isSingle))
                {
                    break;
                }
                switch (TextTokenizer.Current.Type)
                {
                case HtmlTokenType.String:
                    HtmlAttribute attribute = new HtmlAttribute(TextTokenizer.Current);
                    attributes.Add(attribute);
                    TextTokenizer.MoveNext();
                    if (IsEnd(ref isSingle))
                    {
                        return(attributes);
                    }
                    switch (TextTokenizer.Current.Type)
                    {
                    case HtmlTokenType.EqualSign:
                        TextTokenizer.MoveNext();
                        switch (TextTokenizer.Current.Type)
                        {
                        case HtmlTokenType.String:
                            attribute.Values = new List <string> ();
                            string value = HtmlAPI.Unescape(TextTokenizer.Current.String);
                            if (HtmlAPI.Equals(attribute.Name, HtmlKeyword.Class))
                            {
                                attribute.Values.AddRange(HtmlAPI.Split(value));
                            }
                            else
                            {
                                attribute.Values.Add(value);
                            }
                            TextTokenizer.MoveNext();
                            continue;
                        }
                        throw new HtmlTextReaderException <HtmlTokenType> (TextTokenizer, "属性值");

                    case HtmlTokenType.String:
                        continue;

                    default:
                        throw new HtmlTextReaderException <HtmlTokenType> (TextTokenizer, HtmlKeyword.EqualSign, HtmlKeyword.SingleTag, HtmlKeyword.RightAngleBracket);
                    }

                default:
                    throw new HtmlTextReaderException <HtmlTokenType> (TextTokenizer, "属性名", HtmlKeyword.SingleTag, HtmlKeyword.RightAngleBracket);
                }
            }
            return(attributes);
        }
예제 #2
0
        public HtmlTextReaderException(string message, LexicalAnalyzer <T> textTokenizer)
        {
            if (message is null)
            {
                throw new ArgumentNullException(nameof(message));
            }
            if (textTokenizer is null)
            {
                throw new ArgumentNullException(nameof(textTokenizer));
            }
            StringBuilder stringBuilder = new StringBuilder();

            stringBuilder.AppendLine(message);
            stringBuilder.AppendLine(
                $"类型:{textTokenizer.Current.Type} " +
                $"值:{textTokenizer.Current.Value}" +
                $"索引:{textTokenizer.Current.StartIndex} " +
                $"长度:{textTokenizer.Current.Length} " +
                $"行:{textTokenizer.Current.Line} " +
                $"行索引:{textTokenizer.Current.LineStartIndex} "
                );
            HtmlAPI.SetExceptionMessage(this, stringBuilder.ToString());
        }
예제 #3
0
        bool ReadNode(HtmlElement parentElement, out HtmlNode node, string endElementLocalName = null)
        {
            if (MoveNext())
            {
                switch (Current.Type)
                {
                case HtmlTagType.Define:
                    node = new HtmlDocumentType(Current.Attributes, parentElement);
                    return(true);

                case HtmlTagType.Single:
                    node = new HtmlElement(Current.Name, Current.Attributes, parentElement);
                    return(true);

                case HtmlTagType.Start: {
                    HtmlElement element = new HtmlElement(Current.Name, Current.Attributes, parentElement);
                    node = element;
                    if (HtmlAPI.IsContentTag(element.LocalName))
                    {
                        TextTokenizer.SkipIgnoreCharacters();
                        string text = TextTokenizer.ReadTo($"</{element.LocalName}>").TrimEnd();
                        if (text.Length > 0)
                        {
                            node.ChildNodes.Add(new HtmlText(text, element));
                        }
                        return(true);
                    }
                    Tags.Add(element.LocalName);
                    HtmlNode lastNode = null;
                    while (ReadNode(element, out HtmlNode childNode, element.LocalName))
                    {
                        if (childNode is null)
                        {
                            continue;
                        }
                        if (lastNode != null)
                        {
                            lastNode.NextSibling      = childNode;
                            childNode.PreviousSibling = lastNode;
                        }
                        node.ChildNodes.Add(childNode);
                        lastNode = childNode;
                    }
                    return(true);
                }

                case HtmlTagType.End:
                    if (endElementLocalName != null && !HtmlAPI.Equals(Current.Name, endElementLocalName))
                    {
                        if (Tags.Contains(Current.Name))
                        {
                            Buffer.Push(Current);
                        }
                        else
                        {
                            node = null;
                            return(true);
                        }
                    }
                    if (Tags.Count > 0)
                    {
                        Tags.RemoveAt(Tags.Count - 1);
                    }
                    break;

                case HtmlTagType.Text:
                    node = new HtmlText(HtmlAPI.Unescape(Current.Content), parentElement);
                    return(true);

                case HtmlTagType.Comment:
                    node = new HtmlComment(Current.Content, parentElement);
                    return(true);

                default:
                    throw new NotImplementedException(Current.Type.ToString());
                }
            }
            node = null;
            return(false);
        }
예제 #4
0
        public List <HtmlElement> QuerySelectorAll(string path)
        {
            if (path is null)
            {
                throw new ArgumentNullException(nameof(path));
            }
            TextReader = new StringReader(path);
            List <HtmlElement> elements       = new List <HtmlElement> ();
            List <HtmlElement> lastElements   = new List <HtmlElement> ();
            List <HtmlElement> targetElements = new List <HtmlElement> ();
            List <HtmlElement> tempElements   = new List <HtmlElement> ();

            switch (RootNode)
            {
            case HtmlElement element:
                Root = element;
                break;

            case HtmlDocument document:
                Root = document.Root;
                break;

            default:
                throw new Exception($"必须是元素或文档元素");
            }
            targetElements.Add(Root);
            bool isChild = true;
            int  depth   = -1;

            while (MoveNext())
            {
                switch (Current.Type)
                {
                case HtmlTokenType.MultipleSign: {
                    Query(targetElements, tempElements, element => element.All);
                    break;
                }

                case HtmlTokenType.Dot: {
                    MoveNext();
                    string name = Current;
                    if (isChild)
                    {
                        Query(targetElements, tempElements, element => element.GetElementsByClassName(name, depth));
                        break;
                    }
                    Filter(targetElements, element => element.ClassList.Contains(name));
                    break;
                }

                case HtmlTokenType.NumberSign: {
                    MoveNext();
                    string id = Current;
                    if (isChild)
                    {
                        Query(targetElements, tempElements, element => element.GetElementsByAttribute(HtmlKeyword.ID, id, depth));
                        break;
                    }
                    Filter(targetElements, element => HtmlAPI.Equals(element.GetAttribute(HtmlKeyword.ID), id));
                    break;
                }

                case HtmlTokenType.Comma: {
                    elements.AddRange(targetElements);
                    targetElements.Clear();
                    targetElements.Add(Root);
                    break;
                }

                case HtmlTokenType.String: {
                    string name = Current;
                    Query(targetElements, tempElements, element => element.GetElementsByTagName(name, depth));
                    break;
                }

                case HtmlTokenType.LeftBracket: {
                    MoveNext();
                    if (Current.Type == HtmlTokenType.Integer)
                    {
                        HtmlElement element = targetElements[Current.Int];
                        targetElements.Clear();
                        targetElements.Add(element);
                        MoveNext();
                        if (Current.Type != HtmlTokenType.RightBracket)
                        {
                            throw new HtmlTextReaderException <HtmlTokenType> (this, HtmlKeyword.RightBracket);
                        }
                        break;
                    }
                    string name = Current;
                    MoveNext();
                    switch (Current.Type)
                    {
                    case HtmlTokenType.EqualSign:
                        MoveNext();
                        string value = Current;
                        Filter(targetElements, element => HtmlAPI.Equals(element.GetAttribute(name), value));
                        MoveNext();
                        if (Current.Type != HtmlTokenType.RightBracket)
                        {
                            throw new HtmlTextReaderException <HtmlTokenType> (this, HtmlKeyword.RightBracket);
                        }
                        break;

                    case HtmlTokenType.RightBracket:
                        Filter(targetElements, element => element.GetAttributeNode(name) != null);
                        break;

                    default:
                        throw new HtmlTextReaderException <HtmlTokenType> (this, HtmlKeyword.EqualSign, HtmlKeyword.RightBracket);
                    }
                    break;
                }
                }
                depth = -1;
                switch (Current.Type)
                {
                case HtmlTokenType.RightAngleBracket:
                    depth = 0;
                    break;
                }
                isChild = false;
                if (char.IsWhiteSpace((char)Peek()))
                {
                    isChild = true;
                }
            }
            elements.AddRange(targetElements);
            return(elements);
        }