예제 #1
0
파일: Parser.cs 프로젝트: abarbarov/crawler
        public bool ParseNext(string name, out Tag tag)
        {
            tag = null;

            if (String.IsNullOrEmpty(name))
            {
                return false;
            }

            while (MoveToNextTag())
            {
                Move();

                char c = Peek();
                if (c == '!' && Peek(1) == '-' && Peek(2) == '-')
                {
                    _pos = _html.IndexOf(endComment, _pos, StringComparison.OrdinalIgnoreCase);
                    NormalizePosition();
                    Move(endComment.Length);
                }
                else if (c == '/')
                {
                    _pos = _html.IndexOf('>', _pos);
                    NormalizePosition();
                    Move();
                }
                else
                {
                    bool result = ParseTag(name, ref tag);

                    if (_scriptBegin)
                    {
                        _pos = _html.IndexOf(endScript, _pos,  StringComparison.OrdinalIgnoreCase);
                        NormalizePosition();
                        Move(endScript.Length);
                        SkipWhitespace();
                        if (Peek() == '>')
                        {
                            Move();
                        }
                    }

                    if (result)
                    {
                        return true;
                    }
                }
            }
            return false;
        }
예제 #2
0
파일: Parser.cs 프로젝트: abarbarov/crawler
        protected bool ParseTag(string name, ref Tag tag)
        {
            string s = ParseTagName();

            bool doctype = _scriptBegin = false;
            if (String.Compare(s, "!DOCTYPE", true) == 0)
            {
                doctype = true;
            }
            else if (String.Compare(s, "script", true) == 0)
            {
                _scriptBegin = true;
            }

            bool requested = false;
            if (name == "*" || String.Compare(s, name, true) == 0)
            {
                tag = new Tag {Name = s, Attributes = new Dictionary<string, string>()};
                requested = true;
            }

            SkipWhitespace();
            while (Peek() != '>')
            {
                if (Peek() == '/')
                {
                    if (requested)
                    {
                        tag.TrailingSlash = true;
                    }

                    Move();
                    SkipWhitespace();
                    _scriptBegin = false;
                }
                else
                {
                    s = (!doctype) ? ParseAttributeName() : ParseAttributeValue();
                    SkipWhitespace();
                    string value = String.Empty;
                    if (Peek() == '=')
                    {
                        Move();
                        SkipWhitespace();
                        value = ParseAttributeValue();
                        SkipWhitespace();
                    }
                    if (requested)
                    {
                        if (tag.Attributes.Keys.Contains(s))
                        {
                            tag.Attributes.Remove(s);
                        }
                        tag.Attributes.Add(s, value);
                    }
                }
            }

            Move();

            return requested;
        }