Exemplo n.º 1
0
        public override PagePart Execute(PagePart thisPart)
        {
            int counter = 0;

            HtmlNode thisContent = thisPart.ParsedContent;

            string pattern = Settings["pattern"];
            string attribute = Settings["attribute"];
            string value = Settings["value"];

            if (thisContent.SelectNodes(pattern) == null)
            {
                //No such pattern. Return the part with no changes.
                return thisPart;
            }

            foreach (HtmlNode thisNode in thisContent.SelectNodes(pattern))
            {
                counter++;

                if (thisNode.Attributes[attribute] != null)
                {
                    thisNode.Attributes[attribute].Value = value;
                }
                else
                {
                    thisNode.Attributes.Add(attribute, value);
                }
            }

            thisPart.ParsedContent = thisContent;

            thisPart.AddMessage(this, StandardMessageTypes.Executed, String.Format("Pattern: {0}; Attribute: {1}; Value: {2} Set attribute on (3) element(s)", pattern, attribute, value, counter.ToString()));
            return thisPart;
        }
Exemplo n.º 2
0
        public override PagePart Execute(PagePart thisPart)
        {
            int counter = 0;

            List<string> domainsToRemove = Settings["removeDomains"].Split(new Char[] {','}).ToList<string>();

            if (thisPart.ParsedContent.SelectNodes(".//a") == null)
            {
                return thisPart;
            }

            foreach (HtmlNode link in thisPart.ParsedContent.SelectNodes(".//a"))
            {
                if (link.Attributes["href"] == null)
                {
                    continue;
                }

                foreach (string domainToRemove in domainsToRemove)
                {
                    if (link.Attributes["href"].Value.Contains(domainToRemove))
                    {
                        counter++;
                        link.Attributes["href"].Value = link.Attributes["href"].Value.Replace(String.Concat("http://", domainToRemove), "");
                        link.Attributes["href"].Value = link.Attributes["href"].Value.Replace(String.Concat("https://", domainToRemove), "");
                    }
                }
            }

            thisPart.AddMessage(this, StandardMessageTypes.Executed, String.Format("Removed {0} domain(s)", counter.ToString()));

            return thisPart;
        }
Exemplo n.º 3
0
        public override PagePart Execute(PagePart thisPart)
        {
            int counter = 0;

            string pattern = Settings["pattern"];

            pattern = pattern.Replace("&", "&amp;");

            if (thisPart.ParsedContent.SelectNodes(pattern) != null)
            {

                foreach (HtmlNode nodeToRemove in thisPart.ParsedContent.SelectNodes(pattern))
                {
                    // Only remove nodes with nothing but whitespace in them
                    if (nodeToRemove.InnerHtml.Trim().Length == 0)
                    {
                        counter++;
                        nodeToRemove.ParentNode.RemoveChild(nodeToRemove);
                    }
                }
            }

            thisPart.AddMessage(this, StandardMessageTypes.Executed, String.Format("Pattern: {0}; Removed {1} element(s).", pattern, counter.ToString()));
            return thisPart;
        }
Exemplo n.º 4
0
        public override PagePart Execute(PagePart thisPart)
        {
            int counter = 0;

            string pattern = Settings["pattern"];
            string attribute = Settings["attribute"];

            string value = null;
            if (Settings.ContainsKey("value"))
            {
                value = Settings["value"];
            }

            if (thisPart.ParsedContent.SelectNodes(pattern) != null)
            {
                foreach (HtmlNode thisNode in thisPart.ParsedContent.SelectNodes(pattern))
                {
                    if (thisNode.Attributes[attribute] != null)
                    {
                        //If we have a value, and this attribute doesn't match that value, skip it
                        if (value != null && value.ToLowerInvariant() != thisNode.Attributes[attribute].Value.ToLowerInvariant())
                        {
                            continue;
                        }

                        counter++;
                        thisNode.Attributes[attribute].Remove();
                    }
                }
            }

            thisPart.AddMessage(this, StandardMessageTypes.Executed, String.Format(" Pattern: {0}; Attribute: {1}; Removed {2} attribute(s).", pattern, attribute, counter.ToString()));
            return thisPart;
        }
Exemplo n.º 5
0
        public override PagePart Execute(PagePart thisPart)
        {
            if (!thisPart.Content.Contains(Settings["character"]))
            {
                thisPart.AddMessage(this, StandardMessageTypes.Notice, String.Format("Character \"{0}\" not found.", Settings["character"]));
            }

            int lengthBefore = thisPart.Content.Length;

            var parts = thisPart.Content.Split(Settings["character"].ToCharArray());

            thisPart.Content = String.Join(String.Empty, parts.Take(parts.Length - 1).ToArray());

            thisPart.AddMessage(this, StandardMessageTypes.Executed, String.Format("Length before: {0}; Length after: {1}", lengthBefore.ToString(), thisPart.Content.Length.ToString()));

            return thisPart;
        }
Exemplo n.º 6
0
        public override PagePart Execute(PagePart thisPart)
        {
            int lengthBeforeStripping = thisPart.Content.Length;
            string content = thisPart.Content;
            thisPart.Content = content.Trim();

            thisPart.AddMessage(this, StandardMessageTypes.Executed, String.Format("Length before: {0}; Length after: {1}", lengthBeforeStripping.ToString(), thisPart.Content.Length.ToString()));
            return thisPart;
        }
Exemplo n.º 7
0
        public override PagePart Execute(PagePart thisPart)
        {
            int counter = 0;

            string pattern = "img";

            string directory = String.Empty;
            if (Settings.Exists("directory"))
            {
                directory = Settings["directory"];
            }
            else
            {
                directory = ArtifactManager.GetArtifactPath("downloaded-assets");
            }

            //Have any images?
            if (thisPart.ParsedContent.SelectNodes(pattern) != null)
            {
                //Loop through them
                foreach(HtmlNode image in thisPart.ParsedContent.SelectNodes(pattern))
                {
                    //If they're remote, skip it
                    if (image.Attributes["src"].Value.StartsWith("http"))
                    {
                        continue;
                    }

                    counter++;

                    //Get the corrected path with the domain (to make the request)
                    string assetPathWithDomain = thisPart.Page.GetContextualUrl(image.Attributes["src"].Value, true);

                    //Get the corrected path without the domain (to save the file)
                    string assetPathWithoutDomain = thisPart.Page.GetContextualUrl(image.Attributes["src"].Value, false);

                    //Add the directory from the settings
                    string newImagePath = String.Concat(directory, "/", assetPathWithoutDomain);

                    //Get the file data
                    Byte[] fileData = Utils.MakeBinaryHttpRequest(assetPathWithDomain);

                    //Save the file
                    ArtifactManager.SaveFileData(fileData, newImagePath);

                    //Correct the reference to the image
                    image.Attributes["src"].Value = newImagePath;

                    thisPart.Meta.Add(String.Concat("Downloaded Asset ", counter.ToString()), assetPathWithoutDomain);
                }
            }

            thisPart.AddMessage(this, StandardMessageTypes.Executed, String.Format("Saved {0} image(s) locally.", counter.ToString()));

            return thisPart;
        }
Exemplo n.º 8
0
        public override PagePart Execute(PagePart thisPart)
        {
            int lengthBeforeStripping = thisPart.Content.Length;
            string content = thisPart.Content;

            thisPart.Content = Regex.Replace(content, @"<!--.*?-->", String.Empty, RegexOptions.Singleline);

            thisPart.AddMessage(this, StandardMessageTypes.Executed, String.Format("Length before: {0}; Length after: {1}; Comment characters removed: {2}", lengthBeforeStripping.ToString(), thisPart.Content.Length.ToString(), (lengthBeforeStripping - thisPart.Content.Length).ToString()));
            return thisPart;
        }
Exemplo n.º 9
0
        public override PagePart Execute(PagePart thisPart)
        {
            string content = thisPart.Content;
            int lengthBefore = content.Length;

            string newString = Settings.Exists("newString") ? Settings["newString"] : String.Empty;
            string oldString = Settings["oldString"];

            thisPart.Content = content.Replace(oldString, newString);

            thisPart.AddMessage(this, StandardMessageTypes.Executed, String.Format("Length before: {0}; Length after: {1}", lengthBefore.ToString(), thisPart.Content.Length.ToString()));
            return thisPart;
        }
Exemplo n.º 10
0
        public override PagePart Execute(PagePart thisPart)
        {
            int counter = 0;

            string pattern = Settings.Exists("tag") ? Settings["tag"] : "*";
            string className = Settings.Exists("class") ? Settings["class"] : "*";

            if (thisPart.ParsedContent.SelectNodes(pattern) != null)
            {

            foreach (HtmlNode matchingNode in thisPart.ParsedContent.SelectNodes(pattern))
            {
                if (matchingNode.Attributes["class"] == null)
                {
                    continue;
                }

                counter++;

                if (className == "*")
                {
                    matchingNode.Attributes["class"].Remove();
                    continue;
                }

                List<string> currentClasses = matchingNode.Attributes["class"].Value.Split(new Char[] { ' ' }).ToList<string>();
                currentClasses.Remove(className);

                //If we now have no classes, just remove the whole thing
                if (currentClasses.Count == 0)
                {
                    matchingNode.Attributes["class"].Remove();
                }
                else
                {
                    //Otherwise, set it back to what's left
                    matchingNode.Attributes["class"].Value = String.Join(" ", currentClasses.ToArray());
                }

            }
            }

            thisPart.AddMessage(this, StandardMessageTypes.Executed, String.Format(" Pattern: {0}; Class: {1}; Removed classes from {2} tag(s).", pattern, className, counter.ToString()));
            return thisPart;
        }
Exemplo n.º 11
0
        public override PagePart Execute(PagePart thisPart)
        {
            string pattern = Settings["pattern"];
            string result = String.Empty;

            if (thisPart.ParsedContent.SelectSingleNode(pattern) != null)
            {
                thisPart.Content = thisPart.ParsedContent.SelectSingleNode(pattern).InnerHtml;
                result = "Found and inflated";
            }
            else
            {
                result = "Not found";
            }

            thisPart.AddMessage(this, StandardMessageTypes.Executed, String.Format(" Pattern: {0}; Result: {1}", pattern, result ));
            return thisPart;
        }
Exemplo n.º 12
0
        public override PagePart Execute(PagePart thisPart)
        {
            using (Document doc = new Document(thisPart.Content))
            {
                doc.ShowWarnings = false;
                doc.Quiet = true;
                doc.IndentBlockElements = AutoBool.Yes;
                doc.OutputXhtml = true;
                doc.OutputBodyOnly = AutoBool.Yes;
                doc.WrapAt = 10000;
                doc.CleanAndRepair();
                string parsed = doc.Save();

                thisPart.Content = parsed;

                thisPart.AddMessage(this, StandardMessageTypes.Executed, "Processed by HTMLTidy");
            }

            return thisPart;
        }
Exemplo n.º 13
0
        public override PagePart Execute(PagePart thisPart)
        {
            int counter = 0;

            string oldTag = Settings["old"].TrimStart('/');
            string newTag = Settings["new"].TrimStart('/');

            if (thisPart.ParsedContent.SelectNodes("//" + oldTag) == null)
            {
                return thisPart;
            }

            foreach (HtmlNode matchingNode in thisPart.ParsedContent.SelectNodes("//" + oldTag))
            {
                counter++;
                matchingNode.Name = newTag;
            }

            thisPart.AddMessage(this, StandardMessageTypes.Executed, String.Format("Old tag: {0}; New tag: {1}; Swapped {2} tag(s).", oldTag, newTag, counter.ToString()));
            return thisPart;
        }
Exemplo n.º 14
0
        public override PagePart Execute(PagePart thisPart)
        {
            int counter = 0;

            string pattern = Settings["pattern"];

            pattern = pattern.Replace("&", "&amp;");

            bool preserveContents = Settings.Exists("preserveContents") ? Convert.ToBoolean(Settings["preserveContents"]) : false;

            if (thisPart.ParsedContent.SelectNodes(pattern) != null)
            {

                foreach (HtmlNode nodeToRemove in thisPart.ParsedContent.SelectNodes(pattern))
                {
                    counter++;
                    nodeToRemove.ParentNode.RemoveChild(nodeToRemove, preserveContents);
                }
            }

            thisPart.AddMessage(this, StandardMessageTypes.Executed, String.Format("Pattern: {0}; Removed {1} element(s).", pattern, counter.ToString()));
            return thisPart;
        }