public static Tuple <string, string> OperateOnInlineStyles( this string str, INLINE_STYLE_OPERATIONS operation = INLINE_STYLE_OPERATIONS.REMOVE_STYLES_AND_CLASSES ) { Tuple <string, string> ret = null; switch (operation) { case INLINE_STYLE_OPERATIONS.NONE: // No changes. ret = new Tuple <string, string>(str, string.Empty); break; case INLINE_STYLE_OPERATIONS.CONSOLIDATE_STYLES_AND_REMOVE_COLORS: case INLINE_STYLE_OPERATIONS.REMOVE_STYLES_AND_CLASSES: ret = _HandleAttributes(str, operation); break; default: throw new Exception("Inline style operation not supported: " + operation.ToString()); } return(ret); }
private static Tuple <string, string> _HandleAttributes(string source, INLINE_STYLE_OPERATIONS operation) { long startTicks = DateTime.UtcNow.Ticks; int classNum = 1; Dictionary <string, string> cssAndClassNames = new Dictionary <string, string>(); string styleBlock = string.Empty; int lessThan = source.IndexOf('<'); int greaterThan = -1 == lessThan ? -1 : source.IndexOfNextUnescapedGreaterThan(lessThan); while (lessThan != -1 && greaterThan != -1) { string tagOriginal = source.Substring(lessThan, greaterThan - lessThan + 1); string tag = tagOriginal; bool replace = false; string[] attributesToRemove = { "class", "data-bind", "style" }; // Style must be last, idiot, since you're inserting classes. foreach (string attributeName in attributesToRemove) { string attribute = tag.RetrieveAttribute(attributeName, true); while (attribute != null) { replace = true; string replaceVal = string.Empty; if ( operation.Equals(INLINE_STYLE_OPERATIONS.CONSOLIDATE_STYLES_AND_REMOVE_COLORS) && attributeName.Equals("style") ) { string inAttributeDelimiter = attribute[attribute.IndexOfAny(_EitherQuote)].Equals('"') ? "'" : "\""; // Note that we have to clean any instances of " or, strangely, // &quot;, out of inline styles that are / double-quoted and replace with // single quotes. (Clipboards tend to return stuff like: // // style="color: rgb(69, 69, 69); font-family: "Segoe UI";" // or // font-family: &quot;Source Sans Pro&quot;,&quot;Helvetica Neue&quot; // for some reason.) var attrStripped = Regex.Replace(attribute, _PatternStripColorStyle, " ") .Replace(""", inAttributeDelimiter).Replace("&quot;", inAttributeDelimiter) .UnescapeAnyEscapedQuotesInStyle() .Substring(attribute.IndexOfAny(_EitherQuote)) .Trim(_EitherQuote); if (!string.IsNullOrWhiteSpace(attrStripped)) { string className = null; if (!cssAndClassNames.TryGetValue(attrStripped, out className)) { className = $"q{classNum++}_{startTicks}"; cssAndClassNames.Add(attrStripped, className); } replaceVal = $@"class=""{className}"""; } } tag = tag.Replace(attribute, replaceVal); attribute = tag.RetrieveAttribute(attributeName, true); } } if (replace) { source = source.ReplaceFirst(tagOriginal, tag); } lessThan = source.IndexOf('<', lessThan + 1); if (-1 != lessThan) { greaterThan = source.IndexOfNextUnescapedGreaterThan(lessThan); } } if (operation.Equals(INLINE_STYLE_OPERATIONS.CONSOLIDATE_STYLES_AND_REMOVE_COLORS) && cssAndClassNames.Any()) { styleBlock = "<style>\r"; foreach (var kvp in cssAndClassNames) { styleBlock += $" .{kvp.Value} {{{kvp.Key}}}\r"; } styleBlock += "</style>\r"; } return(new Tuple <string, string>(source, styleBlock)); }
// sorry, finally gave in and move the constructor to the front. I don't like // having properties later, but this is where I always want to start. /// <summary> /// This HtmlFragmentViewModel constructor takes in a string and, if it /// is in Microsoft HTML clipboard format, will convert it to a /// HtmlFragmentViewModel. /// </summary> /// <param name="rawClipboard">The Microsoft HTML clipboard formatted string to parse into a view model.</param> /// <param name="stripColorFromHtmlSource">Keeping original inline color style in clipboards from Chrome /// (in particular) can sometimes cause unexpected and difficult to read results. Setting `stripColorFromHhtmlSource` /// to `true` (which is the default) will remove color, background-color, and border-color from inline style values. /// `false` will keep all inline style unedited.</param> /// <param name="operationType" /// <param name="consolidateMultilineHtmlTags">Multiline html tags are often valid, but may make further manipulation of the /// clipboard text difficult or unwieldy (eg, putting into a Markdown blockquote format). So that inserts at the start /// and end of lines doesn't break html, this can be set to `true` (default is `false`) to unwrap multiline html.</param> public HtmlFragmentViewModel(string rawClipboard, bool consolidateMutlilineHtmlTags = true, INLINE_STYLE_OPERATIONS operationType = INLINE_STYLE_OPERATIONS.REMOVE_STYLES_AND_CLASSES, string EOL = "\r\n") { try { _fragmentSourceRaw = rawClipboard.NormalizeNewlineToCarriageReturn_(); string[] aLines = _fragmentSourceRaw.Split(new [] { '\r' }, StringSplitOptions.RemoveEmptyEntries); // I think it's okay to remove empties semantically, but aesthetically, maybe not best to remove all the ? int i = 0; bool headerOver = false; while (i < aLines.Length && !headerOver) { string line = aLines[i]; int colLoc = line.IndexOf(':'); if (colLoc > 5 && line.Length > colLoc) { int intParseDummy = int.MinValue; string value = line.Split(new[] { ':' }, 2)[1]; switch (line.Substring(0, 6).ToLower()) { case "versio": this.Version = value; break; case "starth": if (int.TryParse(value, out intParseDummy)) { this.StartHtml = intParseDummy; } break; case "endhtm": if (int.TryParse(value, out intParseDummy)) { this.EndHtml = intParseDummy; } break; case "startf": if (int.TryParse(value, out intParseDummy)) { this.StartFragment = intParseDummy; } break; case "endfra": if (int.TryParse(value, out intParseDummy)) { this.EndFragment = intParseDummy; } break; case "source": this.SourceUrl = value; break; default: // If this is a header value we don't know about (say Version is > 1.0), // just skip it. Otherwise pretend we're in the HTML. // TODO: Seems fragile, even with duck typing by looking for <!--StartFragment-->. if (!Regex.IsMatch(line, @"^[A-Za-z]+:") || line.IndexOf("<!--StartFragment-->") > -1) { headerOver = true; i--; // We'll need to back up one to process this line as html source since we're incrementing, below. } break; } i++; } else { headerOver = true; } } // while i < aLines.Length & !headerOver StringBuilder sbClippedSource = new StringBuilder(); // MICRO OPTIMIZATION THEATER!!! while (i < aLines.Length) { sbClippedSource.Append(aLines[i]).Append('\r'); i++; } this.HtmlSource = sbClippedSource.ToString().Trim(); if (consolidateMutlilineHtmlTags) { this.HtmlSource = this.HtmlSource.ConslidateMultilineHtmlTags(); } var htmlAndClasses = this.HtmlSource.OperateOnInlineStyles(operationType); this.HtmlSource = htmlAndClasses.Item1; this.StyleBlock = htmlAndClasses.Item2 ?? string.Empty; if (!EOL.Equals("\r")) { this.HtmlSource = this.HtmlSource.Replace("\r", EOL); this.StyleBlock.Replace("\r", EOL); } } catch (Exception e) { System.Diagnostics.Debug.WriteLine(e.ToString()); System.Diagnostics.Debugger.Break(); this._fragmentSourceRaw = rawClipboard; this.Error += e.Message + "\n"; } }