protected virtual bool IsImage(HTMLTag linkTag)
 {
     string url = General.GetAbsoluteURL(_url, HttpUtility.HtmlDecode(linkTag.GetAttributeValueOrEmpty("href")));
     return url != null && url.IndexOf(ImageURLKeyword, StringComparison.OrdinalIgnoreCase) >= 0;
 }
Ejemplo n.º 2
0
 public HTMLTagRange(HTMLTag startTag, HTMLTag endTag)
 {
     StartTag = startTag;
     EndTag = endTag;
 }
Ejemplo n.º 3
0
 public string GetHTML(HTMLTag startTag, HTMLTag endTag)
 {
     return(GetSection(_preprocessedHTML, startTag.Offset, startTag.IsSelfClosing ? startTag.EndOffset : endTag.EndOffset));
 }
Ejemplo n.º 4
0
        public static bool ClassAttributeValueHas(HTMLTag tag, string targetClassName)
        {
            string attributeValue = tag.GetAttributeValue("class");

            return(attributeValue != null && ClassAttributeValueHas(attributeValue, targetClassName));
        }
Ejemplo n.º 5
0
 public HTMLTag FindCorrespondingEndTag(HTMLTag tag)
 {
     return(FindCorrespondingEndTag(tag, null));
 }
Ejemplo n.º 6
0
        public HTMLTagRange CreateTagRange(HTMLTag tag, HTMLTag stopBeforeTag)
        {
            HTMLTag endTag = FindCorrespondingEndTag(tag, stopBeforeTag);

            return((tag != null && endTag != null) ? new HTMLTagRange(tag, endTag) : null);
        }
Ejemplo n.º 7
0
 public IEnumerable<HTMLTag> FindStartTags(HTMLTag startAfterTag, HTMLTag stopBeforeTag, params string[] names)
 {
     return FindTags(false, startAfterTag, stopBeforeTag, names);
 }
Ejemplo n.º 8
0
 public IEnumerable <HTMLTag> FindEndTags(HTMLTag startAfterTag, HTMLTag stopBeforeTag, params string[] names)
 {
     return(FindTags(true, startAfterTag, stopBeforeTag, names));
 }
Ejemplo n.º 9
0
 public HTMLTag FindCorrespondingEndTag(HTMLTag tag, HTMLTag stopBeforeTag)
 {
     if (tag == null) {
         return null;
     }
     if (tag.IsEnd) {
         throw new ArgumentException("Tag must be a start tag.");
     }
     if (tag.IsSelfClosing) {
         return tag;
     }
     int startIndex = GetTagIndex(tag) + 1;
     int stopIndex = stopBeforeTag != null ? (GetTagIndex(stopBeforeTag) - 1) : (_tags.Count - 1);
     int depth = 1;
     for (int i = startIndex; i <= stopIndex; i++) {
         HTMLTag tag2 = _tags[i];
         if (!tag2.IsSelfClosing && tag2.NameEquals(tag.Name)) {
             depth += tag2.IsEnd ? -1 : 1;
             if (depth == 0) {
                 return tag2;
             }
         }
     }
     return null;
 }
Ejemplo n.º 10
0
 public HTMLTag FindStartTag(HTMLTag startAfterTag, HTMLTag stopBeforeTag, params string[] names)
 {
     return FindTag(false, startAfterTag, stopBeforeTag, names);
 }
Ejemplo n.º 11
0
 public HTMLTag FindCorrespondingEndTag(HTMLTag tag)
 {
     return FindCorrespondingEndTag(tag, null);
 }
Ejemplo n.º 12
0
 public IEnumerable<HTMLTag> EnumerateTags(HTMLTag startAfterTag, HTMLTag stopBeforeTag)
 {
     int startIndex = startAfterTag != null ? (GetTagIndex(startAfterTag) + 1) : 0;
     int stopIndex = stopBeforeTag != null ? (GetTagIndex(stopBeforeTag) - 1) : (_tags.Count - 1);
     for (int i = startIndex; i <= stopIndex; i++) {
         yield return _tags[i];
     }
 }
Ejemplo n.º 13
0
 public HTMLTagRange CreateTagRange(HTMLTag tag, HTMLTag stopBeforeTag)
 {
     HTMLTag endTag = FindCorrespondingEndTag(tag, stopBeforeTag);
     return (tag != null && endTag != null) ? new HTMLTagRange(tag, endTag) : null;
 }
Ejemplo n.º 14
0
 public static bool ClassAttributeValueHas(HTMLTag tag, string targetClassName)
 {
     string attributeValue = tag.GetAttributeValue("class");
     return attributeValue != null && ClassAttributeValueHas(attributeValue, targetClassName);
 }
Ejemplo n.º 15
0
 public HTMLTag FindTag(bool isEndTag, HTMLTag startAfterTag, HTMLTag stopBeforeTag, params string[] names)
 {
     foreach (HTMLTag tag in FindTags(isEndTag, startAfterTag, stopBeforeTag, names)) {
         return tag;
     }
     return null;
 }
Ejemplo n.º 16
0
 public HTMLTagRange CreateTagRange(HTMLTag tag)
 {
     return CreateTagRange(tag, null);
 }
Ejemplo n.º 17
0
 public IEnumerable<HTMLTag> FindTags(bool isEndTag, HTMLTag startAfterTag, HTMLTag stopBeforeTag, params string[] names)
 {
     foreach (HTMLTag tag in EnumerateTags(startAfterTag, stopBeforeTag)) {
         if (tag.IsEnd == isEndTag && tag.NameEqualsAny(names)) {
             yield return tag;
         }
     }
 }
Ejemplo n.º 18
0
 public HTMLTag FindEndTag(HTMLTag startAfterTag, HTMLTag stopBeforeTag, params string[] names)
 {
     return(FindTag(true, startAfterTag, stopBeforeTag, names));
 }
Ejemplo n.º 19
0
 public string GetHTML(HTMLTag startTag, HTMLTag endTag)
 {
     return GetSection(_preprocessedHTML, startTag.Offset, startTag.IsSelfClosing ? startTag.EndOffset : endTag.EndOffset);
 }
Ejemplo n.º 20
0
 public HTMLTagRange CreateTagRange(HTMLTag tag)
 {
     return(CreateTagRange(tag, null));
 }
Ejemplo n.º 21
0
 public string GetInnerHTML(HTMLTag startTag, HTMLTag endTag)
 {
     return startTag.IsSelfClosing ? String.Empty : GetSection(_preprocessedHTML, startTag.EndOffset, endTag.Offset);
 }
Ejemplo n.º 22
0
        private static IEnumerable <HTMLTag> ParseTags(string html, int htmlStart, int htmlEnd)
        {
            while (htmlStart < htmlEnd)
            {
                int pos = IndexOf(html, htmlStart, htmlEnd, '<');
                if (pos == -1)
                {
                    yield break;
                }

                HTMLTag tag = new HTMLTag();
                tag.Offset = pos;
                htmlStart  = pos + 1;
                tag.IsEnd  = StartsWith(html, htmlStart, htmlEnd, '/');
                if (StartsWithLetter(html, tag.IsEnd ? (htmlStart + 1) : htmlStart, htmlEnd))
                {
                    // Parse tag name
                    if (tag.IsEnd)
                    {
                        htmlStart += 1;
                    }
                    pos = IndexOfAny(html, htmlStart, htmlEnd, true, '/', '>');
                    if (pos == -1)
                    {
                        yield break;
                    }
                    tag.Name  = GetSectionLower(html, htmlStart, pos);
                    htmlStart = pos;

                    // Parse attributes
                    bool isTagComplete = false;
                    do
                    {
                        while (StartsWithWhiteSpace(html, htmlStart, htmlEnd))
                        {
                            htmlStart++;
                        }
                        tag.IsSelfClosing = StartsWith(html, htmlStart, htmlEnd, '/');
                        if (tag.IsSelfClosing)
                        {
                            htmlStart += 1;
                        }
                        if (StartsWith(html, htmlStart, htmlEnd, '>'))
                        {
                            htmlStart    += 1;
                            isTagComplete = true;
                        }
                        else if (tag.IsSelfClosing)
                        {
                        }
                        else
                        {
                            HTMLAttribute attribute = new HTMLAttribute();
                            attribute.Offset = htmlStart;

                            // Parse attribute name
                            pos = IndexOfAny(html, htmlStart + 1, htmlEnd, true, '=', '/', '>');
                            if (pos == -1)
                            {
                                yield break;
                            }
                            attribute.Name = GetSectionLower(html, htmlStart, pos);
                            htmlStart      = pos;

                            while (StartsWithWhiteSpace(html, htmlStart, htmlEnd))
                            {
                                htmlStart++;
                            }
                            if (StartsWith(html, htmlStart, htmlEnd, '='))
                            {
                                // Parse attribute value
                                htmlStart += 1;
                                while (StartsWithWhiteSpace(html, htmlStart, htmlEnd))
                                {
                                    htmlStart++;
                                }
                                if (StartsWithAny(html, htmlStart, htmlEnd, '"', '\''))
                                {
                                    char quoteChar = html[htmlStart];
                                    htmlStart += 1;
                                    pos        = IndexOf(html, htmlStart, htmlEnd, quoteChar);
                                    if (pos == -1)
                                    {
                                        yield break;
                                    }
                                    attribute.Value = GetSection(html, htmlStart, pos);
                                    htmlStart       = pos + 1;
                                }
                                else
                                {
                                    pos = IndexOfAny(html, htmlStart, htmlEnd, true, '>');
                                    if (pos == -1)
                                    {
                                        yield break;
                                    }
                                    attribute.Value = GetSection(html, htmlStart, pos);
                                    htmlStart       = pos;
                                }
                            }
                            else
                            {
                                attribute.Value = String.Empty;
                            }

                            attribute.Length = htmlStart - attribute.Offset;
                            if (tag.GetAttribute(attribute.Name) == null)
                            {
                                tag.Attributes.Add(attribute);
                            }
                        }
                    } while (!isTagComplete);
                    tag.Length = htmlStart - tag.Offset;

                    // Yield result
                    yield return(tag);

                    // Skip contents of special tags whose contents are to be treated as raw text
                    if (!tag.IsEnd && !tag.IsSelfClosing && tag.NameEqualsAny("script", "style", "title", "textarea"))
                    {
                        bool foundEndTag = false;
                        do
                        {
                            pos = IndexOf(html, htmlStart, htmlEnd, '<');
                            if (pos == -1)
                            {
                                yield break;
                            }
                            htmlStart = pos + 1;
                            string endTagText = "/" + tag.Name;
                            if (StartsWith(html, htmlStart, htmlEnd, endTagText, true) &&
                                (StartsWithWhiteSpace(html, htmlStart + endTagText.Length, htmlEnd) ||
                                 StartsWithAny(html, htmlStart + endTagText.Length, htmlEnd, '/', '>')))
                            {
                                htmlStart  -= 1;
                                foundEndTag = true;
                            }
                        } while (!foundEndTag);
                    }
                }
                else if (StartsWith(html, htmlStart, htmlEnd, "!--", false) && !StartsWith(html, htmlStart + 3, htmlEnd, '>'))
                {
                    // Skip comment
                    htmlStart += 3;
                    bool foundEnd = false;
                    do
                    {
                        pos = IndexOf(html, htmlStart, htmlEnd, '-');
                        if (pos == -1)
                        {
                            yield break;
                        }
                        htmlStart = pos + 1;
                        if (StartsWith(html, htmlStart, htmlEnd, "->", false))
                        {
                            htmlStart += 2;
                            foundEnd   = true;
                        }
                        else if (StartsWith(html, htmlStart, htmlEnd, "-!>", false))
                        {
                            htmlStart += 3;
                            foundEnd   = true;
                        }
                    } while (!foundEnd);
                }
                else if (StartsWithAny(html, htmlStart, htmlEnd, '?', '/', '!'))
                {
                    // Skip bogus comment or DOCTYPE
                    htmlStart += 1;
                    pos        = IndexOf(html, htmlStart, htmlEnd, '>');
                    if (pos == -1)
                    {
                        yield break;
                    }
                    htmlStart = pos + 1;
                }
            }
        }
Ejemplo n.º 23
0
        private static IEnumerable<HTMLTag> ParseTags(string html, int htmlStart, int htmlEnd)
        {
            while (htmlStart < htmlEnd) {
                int pos = IndexOf(html, htmlStart, htmlEnd, '<');
                if (pos == -1) yield break;

                HTMLTag tag = new HTMLTag();
                tag.Offset = pos;
                htmlStart = pos + 1;
                tag.IsEnd = StartsWith(html, htmlStart, htmlEnd, '/');
                if (StartsWithLetter(html, tag.IsEnd ? (htmlStart + 1) : htmlStart, htmlEnd)) {
                    // Parse tag name
                    if (tag.IsEnd) htmlStart += 1;
                    pos = IndexOfAny(html, htmlStart, htmlEnd, true, '/', '>');
                    if (pos == -1) yield break;
                    tag.Name = GetSectionLower(html, htmlStart, pos);
                    htmlStart = pos;

                    // Parse attributes
                    bool isTagComplete = false;
                    do {
                        while (StartsWithWhiteSpace(html, htmlStart, htmlEnd)) htmlStart++;
                        tag.IsSelfClosing = StartsWith(html, htmlStart, htmlEnd, '/');
                        if (tag.IsSelfClosing) htmlStart += 1;
                        if (StartsWith(html, htmlStart, htmlEnd, '>')) {
                            htmlStart += 1;
                            isTagComplete = true;
                        }
                        else if (tag.IsSelfClosing) { }
                        else {
                            HTMLAttribute attribute = new HTMLAttribute();
                            attribute.Offset = htmlStart;

                            // Parse attribute name
                            pos = IndexOfAny(html, htmlStart + 1, htmlEnd, true, '=', '/', '>');
                            if (pos == -1) yield break;
                            attribute.Name = GetSectionLower(html, htmlStart, pos);
                            htmlStart = pos;

                            while (StartsWithWhiteSpace(html, htmlStart, htmlEnd)) htmlStart++;
                            if (StartsWith(html, htmlStart, htmlEnd, '=')) {
                                // Parse attribute value
                                htmlStart += 1;
                                while (StartsWithWhiteSpace(html, htmlStart, htmlEnd)) htmlStart++;
                                if (StartsWithAny(html, htmlStart, htmlEnd, '"', '\'')) {
                                    char quoteChar = html[htmlStart];
                                    htmlStart += 1;
                                    pos = IndexOf(html, htmlStart, htmlEnd, quoteChar);
                                    if (pos == -1) yield break;
                                    attribute.Value = GetSection(html, htmlStart, pos);
                                    htmlStart = pos + 1;
                                }
                                else {
                                    pos = IndexOfAny(html, htmlStart, htmlEnd, true, '>');
                                    if (pos == -1) yield break;
                                    attribute.Value = GetSection(html, htmlStart, pos);
                                    htmlStart = pos;
                                }
                            }
                            else {
                                attribute.Value = String.Empty;
                            }

                            attribute.Length = htmlStart - attribute.Offset;
                            if (tag.GetAttribute(attribute.Name) == null) {
                                tag.Attributes.Add(attribute);
                            }
                        }
                    } while (!isTagComplete);
                    tag.Length = htmlStart - tag.Offset;

                    // Yield result
                    yield return tag;

                    // Skip contents of special tags whose contents are to be treated as raw text
                    if (!tag.IsEnd && !tag.IsSelfClosing && tag.NameEqualsAny("script", "style", "title", "textarea")) {
                        bool foundEndTag = false;
                        do {
                            pos = IndexOf(html, htmlStart, htmlEnd, '<');
                            if (pos == -1) yield break;
                            htmlStart = pos + 1;
                            string endTagText = "/" + tag.Name;
                            if (StartsWith(html, htmlStart, htmlEnd, endTagText, true) &&
                                (StartsWithWhiteSpace(html, htmlStart + endTagText.Length, htmlEnd) ||
                                 StartsWithAny(html, htmlStart + endTagText.Length, htmlEnd, '/', '>')))
                            {
                                htmlStart -= 1;
                                foundEndTag = true;
                            }
                        } while (!foundEndTag);
                    }
                }
                else if (StartsWith(html, htmlStart, htmlEnd, "!--", false) && !StartsWith(html, htmlStart + 3, htmlEnd, '>')) {
                    // Skip comment
                    htmlStart += 3;
                    bool foundEnd = false;
                    do {
                        pos = IndexOf(html, htmlStart, htmlEnd, '-');
                        if (pos == -1) yield break;
                        htmlStart = pos + 1;
                        if (StartsWith(html, htmlStart, htmlEnd, "->", false)) {
                            htmlStart += 2;
                            foundEnd = true;
                        }
                        else if (StartsWith(html, htmlStart, htmlEnd, "-!>", false)) {
                            htmlStart += 3;
                            foundEnd = true;
                        }
                    } while (!foundEnd);
                }
                else if (StartsWithAny(html, htmlStart, htmlEnd, '?', '/', '!')) {
                    // Skip bogus comment or DOCTYPE
                    htmlStart += 1;
                    pos = IndexOf(html, htmlStart, htmlEnd, '>');
                    if (pos == -1) yield break;
                    htmlStart = pos + 1;
                }
            }
        }
Ejemplo n.º 24
0
 public string GetInnerHTML(HTMLTag startTag, HTMLTag endTag)
 {
     return(startTag.IsSelfClosing ? String.Empty : GetSection(_preprocessedHTML, startTag.EndOffset, endTag.Offset));
 }
Ejemplo n.º 25
0
 private int GetTagIndex(HTMLTag tag)
 {
     int i;
     if (!_offsetToIndex.TryGetValue(tag.Offset, out i)) {
         throw new Exception("Unable to locate the specified tag.");
     }
     return i;
 }
Ejemplo n.º 26
0
 public HTMLTagRange(HTMLTag startTag, HTMLTag endTag)
 {
     StartTag = startTag;
     EndTag   = endTag;
 }
 protected override bool IsImage(HTMLTag linkTag)
 {
     return Enumerable.FirstOrDefault(Enumerable.Where(_htmlParser.FindStartTags(_htmlParser.CreateTagRange(linkTag), "img"), t => HTMLParser.ClassAttributeValueHas(t, "thumb"))) != null;
 }