예제 #1
0
        public override IHtmlElement Create(HtmlAttributesCollection attributes, string html, IHtmlParserManager htmlParserManager)
        {
            HtmlDocTypeElement document = new HtmlDocTypeElement();

            document.Attributes = attributes;
            return(document);
        }
        public override IHtmlElement Create(HtmlAttributesCollection attributes, string html, int startContentIndex, int endContentIndex, IHtmlParserManager htmlParserManager)
        {
            HtmlNodeElement element = (HtmlNodeElement)CreateInstance();

            // add children and texts
            if (endContentIndex - startContentIndex > 0)
            {
                element.Children = htmlParserManager.ParseElements(startContentIndex, endContentIndex);
                Dictionary <int, string> texts = htmlParserManager.ParseText(startContentIndex, endContentIndex);
                foreach (var text in texts)
                {
                    string decodedText = HttpUtility.HtmlDecode(text.Value);
                    string value       = text.Value;
                    if (Regex.IsMatch(decodedText, HtmlRegexParserManager.commentRegex))
                    {
                        value = decodedText;
                    }

                    if (text.Key >= 0)
                    {
                        element.Text(new HtmlString(value), element.Children[text.Key].UId);
                    }
                    else
                    {
                        element.Text(new HtmlString(value));
                    }
                }
            }

            element.Attributes = attributes;

            return(element);
        }
        public virtual IHtmlElement Create(HtmlAttributesCollection attributes, string html, IHtmlParserManager htmlParserManager)
        {
            IHtmlElement element = (HtmlSelfClosingTagElement)CreateInstance();

            element.Attributes = attributes;
            return(element);
        }
        public override IHtmlElement Create(HtmlAttributesCollection attributes, string html, int startContentIndex, int endContentIndex, IHtmlParserManager htmlParserManager)
        {
            HtmlScriptElement element = new HtmlScriptElement();

            element.Attributes = attributes;
            element.Text(html.SubStringToIndex(startContentIndex, endContentIndex - 1), true);
            return(element);
        }
예제 #5
0
        public virtual IHtmlElement Create(HtmlAttributesCollection attributes, string html, int startContentIndex, int endContentIndex, IHtmlParserManager htmlParserManager)
        {
            HtmlPairTagsElement element = (HtmlPairTagsElement)CreateInstance();

            element.Attributes = attributes;
            element.Text(html.SubStringToIndex(startContentIndex, endContentIndex - 1));
            return(element);
        }
예제 #6
0
        public IHtmlElementsCollection ParseElements(int startIndex, int endIndex)
        {
            IHtmlElementsCollection elements = new HtmlElementsCollection();

            // get all matches in current range
            List <Match> startTagMatchesInCurrentRange     = _startTagsMathes.Where(x => x.Index >= startIndex && x.Index < endIndex).ToList();
            List <Match> pairStartTagMatchesInCurrentRange = _pairStartTagsMathes.Where(x => x.Index >= startIndex && x.Index < endIndex).ToList();
            List <Match> endTagMatchesInCurrentRange       = _endTagsMathes.Where(x => x.Index > startIndex && x.Index < endIndex).ToList();

            try
            {
                // get root start tags in current range
                List <Match> rootStartTagMatches = startTagMatchesInCurrentRange
                                                   .Where(currentStartTag =>
                                                          pairStartTagMatchesInCurrentRange
                                                          .Count(startTag => startTag.Index < currentStartTag.Index) == endTagMatchesInCurrentRange.Count(endTag => endTag.Index <= currentStartTag.Index)
                                                          )
                                                   .ToList();
                // get root end tags in current range
                List <Match> rootEndTagMatches = endTagMatchesInCurrentRange
                                                 .Where(currentEndTag =>
                                                        pairStartTagMatchesInCurrentRange
                                                        .Count(startTag => startTag.Index < currentEndTag.Index) == endTagMatchesInCurrentRange.Count(endTag2 => endTag2.Index <= currentEndTag.Index)
                                                        )
                                                 .ToList();

                // create IHtmlElement for every start tag match and add to collection
                foreach (Match startTagMatch in rootStartTagMatches)
                {
                    // get tag name
                    string tagName = _htmlHelper.ExtractTagNameFromStartTag(startTagMatch.Value);

                    // parse attributes
                    HtmlAttributesCollection attributes = ParseAttributes(startTagMatch.Value);

                    // if tag is self closing
                    if (_htmlHelper.IsSelfClosingHtmlTag(tagName))
                    {
                        // create elements factory
                        IHtmlSelfClosingTagElementFactory htmlElementsFactory;
                        switch (tagName.ToLower())
                        {
                        case "!doctype":
                            htmlElementsFactory = new HtmlDoctypeElementFactory();
                            break;

                        default:
                            htmlElementsFactory = new HtmlSelfClosingTagElementFactory(tagName);
                            break;
                        }

                        // create element
                        IHtmlElement element = htmlElementsFactory.Create(attributes, _encodedHtml, this);
                        // add
                        elements.Add(element);
                    }
                    // when tag have pair tags
                    else
                    {
                        // create elements factory
                        IHtmlPairTagsElementFactory htmlElementsFactory;
                        switch (tagName.ToLower())
                        {
                        case "noscript":
                            htmlElementsFactory = new HtmlNoScriptElementFactory();
                            break;

                        case "script":
                            htmlElementsFactory = new HtmlScriptElementFactory();
                            break;

                        case "style":
                            htmlElementsFactory = new HtmlStyleElementFactory();
                            break;

                        case "code":
                            htmlElementsFactory = new HtmlCodeElementFactory();
                            break;

                        default:
                            htmlElementsFactory = new HtmlNodeElementFactory(tagName);
                            break;
                        }

                        // find start content index
                        int startContentIndex = startTagMatch.Index + startTagMatch.Value.Length;

                        // find cloing tang on current star tag
                        Match endTagMatch = rootEndTagMatches.FirstOrDefault(x => x.Index > startTagMatch.Index);
                        // in html may have tags which should have end tag but he is not defined, in this case just skip him as set end index to be end index on current start tag
                        int endContentIndex = startTagMatch.Index + startTagMatch.Value.Length;
                        if (endTagMatch != null)
                        {
                            endContentIndex = endTagMatch.Index;
                        }

                        // create element
                        IHtmlElement element = htmlElementsFactory.Create(attributes, _encodedHtml, startContentIndex, endContentIndex, this);

                        // add
                        elements.Add(element);
                    }
                }
            }
            catch (Exception ex)
            {
                return(elements);
            }

            return(elements);
        }
예제 #7
0
        /// <summary>
        /// Create attributes collection including in the start tag string.
        /// </summary>
        /// <param name="startTag">Html start tag string. Example <span id="1">, <div class="div">, ect. </param>
        /// <returns cref="HtmlAttributesCollection">HtmlAttributesCollection whit all found attributes in given start tag.</returns>
        private HtmlAttributesCollection ParseAttributes(string startTag)
        {
            HtmlAttributesCollection attributes = new HtmlAttributesCollection();

            // decode
            startTag = HttpUtility.HtmlDecode(startTag);

            // replace spacing between name-value
            startTag = Regex.Replace(startTag, keyValueAttributeEqualSymbolSpacingRegex, "=");

            // return if dont have attributes
            int endTagNameIndex = startTag.IndexOf(' ');

            if (startTag.IndexOf("\r\n") > 0)
            {
                if (startTag.IndexOf("\r\n") < endTagNameIndex || endTagNameIndex == -1)
                {
                    endTagNameIndex = startTag.IndexOf("\r\n") + 2;
                }
            }

            if (endTagNameIndex < 0)
            {
                return(attributes);
            }

            // remove tag name from string
            startTag = startTag.Remove(0, endTagNameIndex);
            // trim start
            startTag = startTag.TrimEnd(new char[] { '>', '/' }).TrimEnd();

            try
            {
                while (startTag.IsNotNullNorEmpty())
                {
                    startTag = startTag.TrimStart();
                    // create attribute name
                    string attributeName = "";
                    for (int i = 0; i < startTag.Length; i++)
                    {
                        if (startTag[i] == ' ' || startTag[i] == '=')
                        {
                            break;
                        }

                        attributeName += startTag[i];
                    }

                    // create attributes factory
                    IHtmlAttributeFactory htmlAttributeFactory;
                    switch (attributeName)
                    {
                    case "style":
                        htmlAttributeFactory = new HtmlStyleAttributeFactory();
                        break;

                    default:
                        htmlAttributeFactory = new HtmlAttributeFactory(attributeName);
                        break;
                    }

                    IHtmlAttribute attribute = htmlAttributeFactory.Create(startTag);
                    attributes.Add(attribute);

                    var endAttributeIndex = startTag.IndexOf(attribute.Name) + attribute.Name.Length;
                    if (attribute.Value.IsNotNullNorEmpty())
                    {
                        endAttributeIndex = startTag.IndexOf(attribute.Value, endAttributeIndex) + attribute.Value.Length;
                    }
                    else if (startTag.TrimStart().StarstWithPattern(_htmlHelper.GetEmptyValueAttributeRegex(attribute.Name)))
                    {
                        endAttributeIndex += 3;
                    }

                    startTag = startTag.Remove(0, endAttributeIndex);
                    startTag = startTag.TrimStart().TrimStart(new char[] { '"', '\'' });
                }
            }
            catch (Exception)
            {
                return(attributes);
            }

            return(attributes);
        }
예제 #8
0
 public HtmlElement()
 {
     Attributes = new HtmlAttributesCollection();
 }