private static int TagCost(HTMLBalancerCostFilter costFilter, ArrayList openTags, Tag tag) { try { checked { int baseTagCost = costFilter.ElementCost(tag); if (tag is BeginTag) { return(baseTagCost + ((TagRequiresClose(tag.Name)) ? costFilter.ElementCost(MakeEndTag(tag.Name)) : 0)); } else if (tag is EndTag) { if (LastIndexOf(openTags, tag) != -1) { return(baseTagCost - costFilter.ElementCost(MakeEndTag(tag.Name))); } else { return(baseTagCost); } } else { Trace.Fail("Unknown tag type"); return(baseTagCost); } } } catch (OverflowException) { return(int.MaxValue); } }
private static long LengthToClose(HTMLBalancerCostFilter costFilter, ArrayList openTags) { long total = 0; try { checked { for (int i = openTags.Count - 1; i >= 0; i--) { total += costFilter.ElementCost(MakeEndTag((string)openTags[i])); } } } catch (OverflowException) { return(int.MaxValue); } return(total); }
/// <summary> /// Balances the HTML and safely truncates it, using a custom algorithm /// to determine how much each character/string counts against maxCost. /// </summary> public static string Balance(string html, int maxCost, HTMLBalancerCostFilter costFilter, bool ellipsis) { bool appendEllipsis = false; SimpleHtmlParser parser = new SimpleHtmlParser(html); ArrayList openTags = new ArrayList(); StringBuilder output = new StringBuilder(); long balance = 0; // long to make sure that int32.MaxValue does not cause overflow if (costFilter == null) costFilter = new DefaultCostFilter(); Element el; while (null != (el = parser.Next())) { if (el is StyleElement || el is ScriptElement || el is Comment || el is MarkupDirective) { continue; } long lenLeft = Math.Max(0, maxCost - balance - LengthToClose(costFilter, openTags)); if (el is Tag) { if (el is BeginTag && ((BeginTag)el).Unterminated) continue; // skip corrupted tags if (TagCost(costFilter, openTags, (Tag)el) > lenLeft) break; // don't use this tag; we're done else { RegisterTag(openTags, (Tag)el); output.Append(el.ToString()); balance += costFilter.ElementCost(el); } } else if (el is Text) { if (costFilter.ElementCost(el) > lenLeft) { // shrink down the text to fit output.Append(costFilter.TruncateText((Text)el, (int)lenLeft)); appendEllipsis = true; break; } else { // plenty of room output.Append(el.ToString()); balance += costFilter.ElementCost(el); } //update the text end index } else { if (costFilter.ElementCost(el) > lenLeft) break; else { output.Append(el.ToString()); balance += costFilter.ElementCost(el); } } } // Append an ellipsis if we truncated text // We use "..." here rather than TextHelper.Ellipsis, because some mail clients don't understand "\u2026". if (ellipsis && appendEllipsis) output.Append("..."); for (int i = openTags.Count - 1; i >= 0; i--) { output.Append(MakeEndTag((string)openTags[i])); } return output.ToString(); }
private static int TagCost(HTMLBalancerCostFilter costFilter, ArrayList openTags, Tag tag) { try { checked { int baseTagCost = costFilter.ElementCost(tag); if (tag is BeginTag) { return baseTagCost + ((TagRequiresClose(tag.Name)) ? costFilter.ElementCost(MakeEndTag(tag.Name)) : 0); } else if (tag is EndTag) { if (LastIndexOf(openTags, tag) != -1) return baseTagCost - costFilter.ElementCost(MakeEndTag(tag.Name)); else return baseTagCost; } else { Trace.Fail("Unknown tag type"); return baseTagCost; } } } catch (OverflowException) { return int.MaxValue; } }
private static long LengthToClose(HTMLBalancerCostFilter costFilter, ArrayList openTags) { long total = 0; try { checked { for (int i = openTags.Count - 1; i >= 0; i--) { total += costFilter.ElementCost(MakeEndTag((string)openTags[i])); } } } catch (OverflowException) { return int.MaxValue; } return total; }
/// <summary> /// Balances the HTML and safely truncates it, using a custom algorithm /// to determine how much each character/string counts against maxCost. /// </summary> public static string Balance(string html, int maxCost, HTMLBalancerCostFilter costFilter, bool ellipsis) { bool appendEllipsis = false; SimpleHtmlParser parser = new SimpleHtmlParser(html); ArrayList openTags = new ArrayList(); StringBuilder output = new StringBuilder(); long balance = 0; // long to make sure that int32.MaxValue does not cause overflow if (costFilter == null) { costFilter = new DefaultCostFilter(); } Element el; while (null != (el = parser.Next())) { if (el is StyleElement || el is ScriptElement || el is Comment || el is MarkupDirective) { continue; } long lenLeft = Math.Max(0, maxCost - balance - LengthToClose(costFilter, openTags)); if (el is Tag) { if (el is BeginTag && ((BeginTag)el).Unterminated) { continue; // skip corrupted tags } if (TagCost(costFilter, openTags, (Tag)el) > lenLeft) { break; // don't use this tag; we're done } else { RegisterTag(openTags, (Tag)el); output.Append(el.ToString()); balance += costFilter.ElementCost(el); } } else if (el is Text) { if (costFilter.ElementCost(el) > lenLeft) { // shrink down the text to fit output.Append(costFilter.TruncateText((Text)el, (int)lenLeft)); appendEllipsis = true; break; } else { // plenty of room output.Append(el.ToString()); balance += costFilter.ElementCost(el); } //update the text end index } else { if (costFilter.ElementCost(el) > lenLeft) { break; } else { output.Append(el.ToString()); balance += costFilter.ElementCost(el); } } } // Append an ellipsis if we truncated text // We use "..." here rather than TextHelper.Ellipsis, because some mail clients don't understand "\u2026". if (ellipsis && appendEllipsis) { output.Append("..."); } for (int i = openTags.Count - 1; i >= 0; i--) { output.Append(MakeEndTag((string)openTags[i])); } return(output.ToString()); }