Example #1
0
        protected override void OnBeginTag(BeginTag tag)
        {
            if (tag.NameEquals(HTMLTokens.Title) && !tag.Complete)
            {
                _inTitle = true;
            }

            if (TagsToPreserve.Contains(tag.Name.ToUpper(CultureInfo.InvariantCulture)))
            {
                EmitTagAndAttributes(tag.Name, tag);
            }
            else if (ReplaceTags.ContainsKey(tag.Name.ToUpper(CultureInfo.InvariantCulture)))
            {
                EmitTagAndAttributes((string)ReplaceTags[tag.Name.ToUpper(CultureInfo.InvariantCulture)], tag);
            }
        }
Example #2
0
        protected override void OnEndTag(EndTag tag)
        {
            if (tag.Implicit)
            {
                return;
            }
            if (tag.NameEquals(HTMLTokens.Title))
            {
                _inTitle = false;
            }

            if (TagsToPreserve.Contains(tag.Name.ToUpper(CultureInfo.InvariantCulture)))
            {
                EmitTagAndAttributes(tag.Name, tag);
            }
            else if (ReplaceTags.ContainsKey(tag.Name.ToUpper(CultureInfo.InvariantCulture)))
            {
                EmitTagAndAttributes((string)ReplaceTags[tag.Name.ToUpper(CultureInfo.InvariantCulture)], tag);
            }
        }
        private void HandleNextNode(Queue <HtmlNode> nodes, IDataMap map, string itemPath, Item importRow)
        {
            var node       = nodes.Dequeue();
            var nodeName   = node.Name.ToLower();
            var parentNode = node.ParentNode;
            var childNodes = node.SelectNodes("./*|./text()");

            if (childNodes != null)
            {
                foreach (var child in childNodes)
                {
                    nodes.Enqueue(child);
                }
            }

            if (UnwantedTags.Any(tag => tag == nodeName))
            {
                // if this node is one to remove
                if (childNodes != null)
                {
                    // make sure children are added back
                    foreach (var child in childNodes)
                    {
                        parentNode.InsertBefore(child, node);
                    }
                }

                parentNode.RemoveChild(node);
            }
            else if (node.HasAttributes)
            {
                // if it's not being removed
                foreach (string s in UnwantedAttributes)                 // remove unwanted attributes
                {
                    node.Attributes.Remove(s);
                }

                //replace images
                if (nodeName.Equals("img"))
                {
                    // see if it exists
                    string      imgSrc = node.Attributes["src"].Value;
                    DynamicLink dynamicLink;
                    if (!DynamicLink.TryParse(imgSrc, out dynamicLink))
                    {
                        return;
                    }
                    MediaItem mediaItem       = importRow.Database.GetItem(dynamicLink.ItemId, dynamicLink.Language ?? map.ImportToLanguage);
                    var       mediaParentItem = BuildMediaPath(map.ToDB, mediaItem.InnerItem.Paths.ParentPath);
                    MediaItem newImg          = HandleMediaItem(map, mediaParentItem, itemPath, mediaItem);
                    if (newImg != null)
                    {
                        string newSrc = string.Format("-/media/{0}.ashx", newImg.ID.ToShortID().ToString());
                        // replace the node with sitecore tag
                        node.SetAttributeValue("src", newSrc);
                    }
                }
            }
            else if (ReplaceTags.ContainsKey(nodeName))
            {
                // Replace tag
                node.Name = ReplaceTags[nodeName];
            }
            else
            {
                //Keep node as is
            }
        }
Example #4
0
        /// <summary>
        /// Used as a part of HTML thinning to remove extraneous child nodes from an HTMLDOMNode
        /// </summary>
        /// <param name="node">The node whose children should be stripped</param>
        /// <returns>An HTML string with the DOMNodes cleaned out</returns>
        private static void StripChildNodes(IHTMLDOMNode node, StringBuilder escapedText, bool preserveImages, TickableProgressTick progress)
        {
            // is this a text node?  If so, just get the text and return it
            if (node.nodeType == HTMLDocumentHelper.HTMLDOMNodeTypes.TextNode)
            {
                escapedText.Append(HttpUtility.HtmlEncode(node.nodeValue.ToString()));
            }
            else
            {
                progress.Tick();
                bool      tagStillOpen = false;
                ArrayList preserveTags = PreserveTags;
                if (preserveImages)
                {
                    preserveTags = PreserveTagsWithImages;
                }

                // if we're in an element node (a tag) and we should preserve the tag,
                // append it to the returned text
                if (preserveTags.Contains(node.nodeName))
                {
                    // Append the opening tag element, with any extraneous
                    // attributes stripped
                    escapedText.Append("<" + node.nodeName);
                    StripAttributes((IHTMLElement)node, escapedText);

                    // if the element has no children, we can simply close out the tag
                    if (!node.hasChildNodes())
                    {
                        if (node.nodeName == HTMLTokens.IFrame)
                        {
                            escapedText.Append("></" + node.nodeName + ">");
                        }
                        else
                        {
                            escapedText.Append("/>");
                        }
                    }
                    else                     // the element has children, leave the tag open
                    {
                        escapedText.Append(">");
                        tagStillOpen = true;
                    }
                }
                else if (ReplaceTags.Contains(node.nodeName))
                {
                    // If there are no children, just emit the replacement tag
                    if (!node.hasChildNodes())
                    {
                        // Replace the tag
                        escapedText.Append("<" + (string)ReplaceTags[node.nodeName] + "/>");
                    }
                    else
                    {
                        if (!IsChildlessTag((string)ReplaceTags[node.nodeName]))
                        {
                            escapedText.Append("<" + (string)ReplaceTags[node.nodeName] + ">");
                        }
                        // Since there are children, we're going to emit the replacement
                        // tag at the end of this node
                        tagStillOpen = true;
                    }
                }

                if (node.firstChild != null)
                {
                    StripChildNodes(node.firstChild, escapedText, preserveImages, progress);
                }

                // put a closing tag in for the current element (because we left it open in case of children)
                if (tagStillOpen)
                {
                    if (PreserveTags.Contains(node.nodeName))
                    {
                        escapedText.Append("</" + node.nodeName + ">");
                    }
                    else if (ReplaceTags.Contains(node.nodeName))
                    {
                        if (!IsChildlessTag((string)ReplaceTags[node.nodeName]))
                        {
                            escapedText.Append("</" + (string)ReplaceTags[node.nodeName] + ">");
                        }
                        else
                        {
                            escapedText.Append("<" + (string)ReplaceTags[node.nodeName] + "/>");
                        }
                    }
                }
            }

            if (node.nextSibling != null)
            {
                StripChildNodes(node.nextSibling, escapedText, preserveImages, progress);
            }
        }