public HtmlTextElementIterator(HtmlPlainText HtmlPlainText) { }
/// <summary> /// This function replaces the Html-Tag-Names from the input, while it conserves the structure of the html and possible Html-Attributes. /// </summary> /// <param name="HtmlInput">The Html-String you want to replace tags in.</param> /// <param name="TagReplacementMap">Dictionary in which the replacement rules are defined. (Form: Key-OldTagName, Value-NewTagName)</param> /// <param name="ReplacementType">Whether or not unmatched Tags (and their content!) should remain in the string.</param> /// <returns></returns> public string ApplyTagReplacementMap(string HtmlInput, Dictionary<string, string> TagReplacementMap, TagReplacementTypes ReplacementType) { HtmlTag root = new HtmlTag(null, false, "root"); HtmlConstruct ObjectConstruct = Analyze(HtmlInput, root); root.AddChildren(ObjectConstruct.elements); IHtmlTagIterator i = ObjectConstruct.getIterator(); while (i.hasNext()) { HtmlElement ele = i.next(); if (ele.GetType() == typeof(HtmlPlainText)) // Current element is Plain Text { // ((HtmlPlainText)ele) // ignore } else if (ele.GetType() == typeof(HtmlTag)) // Current element is tag { // ((HtmlTag)ele) if (TagReplacementMap.Keys.Contains(((HtmlTag)ele).tagName)) // tag found in ReplacementMap { // Replace HtmlTag replacement = new HtmlTag(((HtmlTag)ele).getParent(), ((HtmlTag)ele).isSingleTag, TagReplacementMap[((HtmlTag)ele).tagName], ((HtmlTag)ele).children, ((HtmlTag)ele).tagAttributes); int index = ((HtmlTag)ele).getParent().children.IndexOf(ele); ((HtmlTag)ele).getParent().children[index] = replacement; } else // tag not found in ReplacementMap { switch (ReplacementType) { case TagReplacementTypes.RemoveNotMatchedTags: ((HtmlTag)ele).getParent().children.Remove(ele); break; case TagReplacementTypes.IgnoreNotMatchedTags: // ignore break; case TagReplacementTypes.ConvertNotMatchedTagsToPlainText: string plaintext = ""; IHtmlTagIterator it = ((HtmlTag)ele).getIterator(); while (it.hasNext()) { HtmlElement e = it.next(); if (e.GetType() == typeof(HtmlPlainText)) plaintext += ((HtmlPlainText)e).content; } HtmlPlainText replacement = new HtmlPlainText(((HtmlTag)ele).getParent(), plaintext); int index = ((HtmlTag)ele).getParent().children.IndexOf(ele); ((HtmlTag)ele).getParent().children[index] = replacement; break; default: throw new NotImplementedException("TagReplacementType not implemented!"); //break; } } } else throw new NotImplementedException("HtmlElement Type not implemented!"); } return new HtmlConstruct(root.children).ToString(); }