Exemplo n.º 1
0
        public static Tuple <string, string> OperateOnInlineStyles(
            this string str,
            INLINE_STYLE_OPERATIONS operation = INLINE_STYLE_OPERATIONS.REMOVE_STYLES_AND_CLASSES
            )
        {
            Tuple <string, string> ret = null;

            switch (operation)
            {
            case INLINE_STYLE_OPERATIONS.NONE:
                // No changes.
                ret = new Tuple <string, string>(str, string.Empty);
                break;

            case INLINE_STYLE_OPERATIONS.CONSOLIDATE_STYLES_AND_REMOVE_COLORS:
            case INLINE_STYLE_OPERATIONS.REMOVE_STYLES_AND_CLASSES:
                ret = _HandleAttributes(str, operation);
                break;

            default:
                throw new Exception("Inline style operation not supported: " + operation.ToString());
            }

            return(ret);
        }
Exemplo n.º 2
0
        private static Tuple <string, string> _HandleAttributes(string source, INLINE_STYLE_OPERATIONS operation)
        {
            long startTicks = DateTime.UtcNow.Ticks;
            int  classNum   = 1;
            Dictionary <string, string> cssAndClassNames = new Dictionary <string, string>();
            string styleBlock = string.Empty;

            int lessThan    = source.IndexOf('<');
            int greaterThan = -1 == lessThan
                ? -1
                : source.IndexOfNextUnescapedGreaterThan(lessThan);

            while (lessThan != -1 && greaterThan != -1)
            {
                string tagOriginal = source.Substring(lessThan, greaterThan - lessThan + 1);
                string tag         = tagOriginal;

                bool replace = false;

                string[] attributesToRemove = { "class", "data-bind", "style" };    // Style must be last, idiot, since you're inserting classes.
                foreach (string attributeName in attributesToRemove)
                {
                    string attribute = tag.RetrieveAttribute(attributeName, true);
                    while (attribute != null)
                    {
                        replace = true;
                        string replaceVal = string.Empty;

                        if (
                            operation.Equals(INLINE_STYLE_OPERATIONS.CONSOLIDATE_STYLES_AND_REMOVE_COLORS) &&
                            attributeName.Equals("style")
                            )
                        {
                            string inAttributeDelimiter = attribute[attribute.IndexOfAny(_EitherQuote)].Equals('"')
                                ? "'"
                                : "\"";

                            // Note that we have to clean any instances of &quot; or, strangely,
                            // &amp;quot;, out of inline styles that are / double-quoted and replace with
                            // single quotes. (Clipboards tend to return stuff like:
                            //
                            // style="color: rgb(69, 69, 69); font-family: &quot;Segoe UI&quot;;"
                            // or
                            // font-family: &amp;quot;Source Sans Pro&amp;quot;,&amp;quot;Helvetica Neue&amp;quot;
                            // for some reason.)
                            var attrStripped = Regex.Replace(attribute, _PatternStripColorStyle, " ")
                                               .Replace("&quot;", inAttributeDelimiter).Replace("&amp;quot;", inAttributeDelimiter)
                                               .UnescapeAnyEscapedQuotesInStyle()
                                               .Substring(attribute.IndexOfAny(_EitherQuote))
                                               .Trim(_EitherQuote);

                            if (!string.IsNullOrWhiteSpace(attrStripped))
                            {
                                string className = null;

                                if (!cssAndClassNames.TryGetValue(attrStripped, out className))
                                {
                                    className = $"q{classNum++}_{startTicks}";
                                    cssAndClassNames.Add(attrStripped, className);
                                }

                                replaceVal = $@"class=""{className}""";
                            }
                        }

                        tag       = tag.Replace(attribute, replaceVal);
                        attribute = tag.RetrieveAttribute(attributeName, true);
                    }
                }

                if (replace)
                {
                    source = source.ReplaceFirst(tagOriginal, tag);
                }

                lessThan = source.IndexOf('<', lessThan + 1);
                if (-1 != lessThan)
                {
                    greaterThan = source.IndexOfNextUnescapedGreaterThan(lessThan);
                }
            }

            if (operation.Equals(INLINE_STYLE_OPERATIONS.CONSOLIDATE_STYLES_AND_REMOVE_COLORS) && cssAndClassNames.Any())
            {
                styleBlock = "<style>\r";
                foreach (var kvp in cssAndClassNames)
                {
                    styleBlock += $"    .{kvp.Value} {{{kvp.Key}}}\r";
                }
                styleBlock += "</style>\r";
            }

            return(new Tuple <string, string>(source, styleBlock));
        }
        // sorry, finally gave in and move the constructor to the front. I don't like
        // having properties later, but this is where I always want to start.

        /// <summary>
        /// This HtmlFragmentViewModel constructor takes in a string and, if it
        /// is in Microsoft HTML clipboard format, will convert it to a
        /// HtmlFragmentViewModel.
        /// </summary>
        /// <param name="rawClipboard">The Microsoft HTML clipboard formatted string to parse into a view model.</param>
        /// <param name="stripColorFromHtmlSource">Keeping original inline color style in clipboards from Chrome
        /// (in particular) can sometimes cause unexpected and difficult to read results. Setting `stripColorFromHhtmlSource`
        /// to `true` (which is the default) will remove color, background-color, and border-color from inline style values.
        /// `false` will keep all inline style unedited.</param>
        /// <param name="operationType"
        /// <param name="consolidateMultilineHtmlTags">Multiline html tags are often valid, but may make further manipulation of the
        /// clipboard text difficult or unwieldy (eg, putting into a Markdown blockquote format). So that inserts at the start
        /// and end of lines doesn't break html, this can be set to `true` (default is `false`) to unwrap multiline html.</param>
        public HtmlFragmentViewModel(string rawClipboard,
                                     bool consolidateMutlilineHtmlTags     = true,
                                     INLINE_STYLE_OPERATIONS operationType = INLINE_STYLE_OPERATIONS.REMOVE_STYLES_AND_CLASSES,
                                     string EOL = "\r\n")
        {
            try
            {
                _fragmentSourceRaw = rawClipboard.NormalizeNewlineToCarriageReturn_();
                string[] aLines = _fragmentSourceRaw.Split(new [] { '\r' }, StringSplitOptions.RemoveEmptyEntries);   // I think it's okay to remove empties semantically, but aesthetically, maybe not best to remove all the ?

                int  i          = 0;
                bool headerOver = false;
                while (i < aLines.Length && !headerOver)
                {
                    string line   = aLines[i];
                    int    colLoc = line.IndexOf(':');

                    if (colLoc > 5 && line.Length > colLoc)
                    {
                        int    intParseDummy = int.MinValue;
                        string value         = line.Split(new[] { ':' }, 2)[1];

                        switch (line.Substring(0, 6).ToLower())
                        {
                        case "versio":
                            this.Version = value;
                            break;

                        case "starth":
                            if (int.TryParse(value, out intParseDummy))
                            {
                                this.StartHtml = intParseDummy;
                            }
                            break;

                        case "endhtm":
                            if (int.TryParse(value, out intParseDummy))
                            {
                                this.EndHtml = intParseDummy;
                            }
                            break;

                        case "startf":
                            if (int.TryParse(value, out intParseDummy))
                            {
                                this.StartFragment = intParseDummy;
                            }
                            break;

                        case "endfra":
                            if (int.TryParse(value, out intParseDummy))
                            {
                                this.EndFragment = intParseDummy;
                            }
                            break;

                        case "source":
                            this.SourceUrl = value;
                            break;

                        default:
                            // If this is a header value we don't know about (say Version is > 1.0),
                            // just skip it. Otherwise pretend we're in the HTML.
                            // TODO: Seems fragile, even with duck typing by looking for <!--StartFragment-->.
                            if (!Regex.IsMatch(line, @"^[A-Za-z]+:") || line.IndexOf("<!--StartFragment-->") > -1)
                            {
                                headerOver = true;
                                i--;        // We'll need to back up one to process this line as html source since we're incrementing, below.
                            }
                            break;
                        }
                        i++;
                    }
                    else
                    {
                        headerOver = true;
                    }
                }                                                    // while i < aLines.Length & !headerOver

                StringBuilder sbClippedSource = new StringBuilder(); // MICRO OPTIMIZATION THEATER!!!
                while (i < aLines.Length)
                {
                    sbClippedSource.Append(aLines[i]).Append('\r');
                    i++;
                }

                this.HtmlSource = sbClippedSource.ToString().Trim();
                if (consolidateMutlilineHtmlTags)
                {
                    this.HtmlSource = this.HtmlSource.ConslidateMultilineHtmlTags();
                }

                var htmlAndClasses = this.HtmlSource.OperateOnInlineStyles(operationType);

                this.HtmlSource = htmlAndClasses.Item1;
                this.StyleBlock = htmlAndClasses.Item2 ?? string.Empty;

                if (!EOL.Equals("\r"))
                {
                    this.HtmlSource = this.HtmlSource.Replace("\r", EOL);
                    this.StyleBlock.Replace("\r", EOL);
                }
            }
            catch (Exception e)
            {
                System.Diagnostics.Debug.WriteLine(e.ToString());
                System.Diagnostics.Debugger.Break();

                this._fragmentSourceRaw = rawClipboard;
                this.Error += e.Message + "\n";
            }
        }