Example #1
0
        internal static bool TryParseHtmlTagOpenTag(ref StringSlice text, StringBuilder builder)
        {
            var c = text.CurrentChar;

            // Parse the tagname
            if (!c.IsAlpha())
            {
                return(false);
            }
            builder.Append(c);

            while (true)
            {
                c = text.NextChar();
                if (c.IsAlphaNumeric() || c == '-')
                {
                    builder.Append(c);
                }
                else
                {
                    break;
                }
            }

            bool hasAttribute = false;

            while (true)
            {
                var hasWhitespaces = false;
                // Skip any whitespaces
                while (c.IsWhitespace())
                {
                    builder.Append(c);
                    c = text.NextChar();
                    hasWhitespaces = true;
                }

                switch (c)
                {
                case '\0':
                    return(false);

                case '>':
                    text.NextChar();
                    builder.Append(c);
                    return(true);

                case '/':
                    builder.Append('/');
                    c = text.NextChar();
                    if (c != '>')
                    {
                        return(false);
                    }
                    text.NextChar();
                    builder.Append('>');
                    return(true);

                case '=':

                    if (!hasAttribute)
                    {
                        return(false);
                    }

                    builder.Append('=');

                    // Skip any spaces after
                    c = text.NextChar();
                    while (c.IsWhitespace())
                    {
                        builder.Append(c);
                        c = text.NextChar();
                    }

                    // Parse a quoted string
                    if (c == '\'' || c == '\"')
                    {
                        builder.Append(c);
                        char openingStringChar = c;
                        while (true)
                        {
                            c = text.NextChar();
                            if (c == '\0')
                            {
                                return(false);
                            }
                            if (c != openingStringChar)
                            {
                                builder.Append(c);
                            }
                            else
                            {
                                break;
                            }
                        }
                        builder.Append(c);
                        c = text.NextChar();
                    }
                    else
                    {
                        // Parse until we match a space or a special html character
                        int matchCount = 0;
                        while (true)
                        {
                            if (c == '\0')
                            {
                                return(false);
                            }
                            if (c == ' ' || c == '\n' || c == '"' || c == '\'' || c == '=' || c == '<' || c == '>' || c == '`')
                            {
                                break;
                            }
                            matchCount++;
                            builder.Append(c);
                            c = text.NextChar();
                        }

                        // We need at least one char after '='
                        if (matchCount == 0)
                        {
                            return(false);
                        }
                    }

                    hasAttribute = false;
                    continue;

                default:
                    if (!hasWhitespaces)
                    {
                        return(false);
                    }

                    // Parse the attribute name
                    if (!(c.IsAlpha() || c == '_' || c == ':'))
                    {
                        return(false);
                    }
                    builder.Append(c);

                    while (true)
                    {
                        c = text.NextChar();
                        if (c.IsAlphaNumeric() || c == '_' || c == ':' || c == '.' || c == '-')
                        {
                            builder.Append(c);
                        }
                        else
                        {
                            break;
                        }
                    }

                    hasAttribute = true;
                    break;
                }
            }
        }
Example #2
0
 public static bool TryParseHtmlTag(StringSlice text, out string htmlTag)
 {
     return(TryParseHtmlTag(ref text, out htmlTag));
 }
Example #3
0
        /// <summary>
        /// Destructively unescape a string: remove backslashes before punctuation or symbol characters.
        /// </summary>
        /// <param name="text">The string data that will be changed by unescaping any punctuation or symbol characters.</param>
        /// <param name="removeBackSlash">if set to <c>true</c> [remove back slash].</param>
        /// <returns></returns>
        public static string Unescape(string text, bool removeBackSlash = true)
        {
            // Credits: code from CommonMark.NET
            // Copyright (c) 2014, Kārlis Gaņģis All rights reserved.
            // See license for details:  https://github.com/Knagis/CommonMark.NET/blob/master/LICENSE.md
            if (string.IsNullOrEmpty(text))
            {
                return(string.Empty);
            }

            // remove backslashes before punctuation chars:
            int  searchPos = 0;
            int  lastPos   = 0;
            char c;

            char[]        search = removeBackSlash ? SearchBackAndAmp : SearchAmp;
            StringBuilder sb     = null;

            while ((searchPos = text.IndexOfAny(search, searchPos)) != -1)
            {
                if (sb == null)
                {
                    sb        = StringBuilderCache.Local();
                    sb.Length = 0;
                }
                c = text[searchPos];
                if (removeBackSlash && c == '\\')
                {
                    searchPos++;

                    if (text.Length == searchPos)
                    {
                        break;
                    }

                    c = text[searchPos];
                    if (c.IsEscapableSymbol())
                    {
                        sb.Append(text, lastPos, searchPos - lastPos - 1);
                        lastPos = searchPos;
                    }
                }
                else if (c == '&')
                {
                    int entityNameStart;
                    int entityNameLength;
                    int numericEntity;
                    var match = ScanEntity(new StringSlice(text, searchPos, text.Length - 1), out numericEntity, out entityNameStart, out entityNameLength);
                    if (match == 0)
                    {
                        searchPos++;
                    }
                    else
                    {
                        searchPos += match;

                        if (entityNameLength > 0)
                        {
                            var namedEntity = new StringSlice(text, entityNameStart, entityNameStart + entityNameLength - 1);
                            var decoded     = EntityHelper.DecodeEntity(namedEntity.ToString());
                            if (decoded != null)
                            {
                                sb.Append(text, lastPos, searchPos - match - lastPos);
                                sb.Append(decoded);
                                lastPos = searchPos;
                            }
                        }
                        else if (numericEntity >= 0)
                        {
                            sb.Append(text, lastPos, searchPos - match - lastPos);
                            if (numericEntity == 0)
                            {
                                sb.Append('\0'.EscapeInsecure());
                            }
                            else
                            {
                                var decoded = EntityHelper.DecodeEntity(numericEntity);
                                if (decoded != null)
                                {
                                    sb.Append(decoded);
                                }
                                else
                                {
                                    sb.Append('\uFFFD');
                                }
                            }

                            lastPos = searchPos;
                        }
                    }
                }
            }

            if (sb == null)
            {
                return(text);
            }

            sb.Append(text, lastPos, text.Length - lastPos);
            var result = sb.ToString();

            sb.Length = 0;
            return(result);
        }
Example #4
0
        public static bool TryParseInlineLink(ref StringSlice text, out string link, out string title, out SourceSpan linkSpan, out SourceSpan titleSpan)
        {
            // 1. An inline link consists of a link text followed immediately by a left parenthesis (,
            // 2. optional whitespace,  TODO: specs: is it whitespace or multiple whitespaces?
            // 3. an optional link destination,
            // 4. an optional link title separated from the link destination by whitespace,
            // 5. optional whitespace,  TODO: specs: is it whitespace or multiple whitespaces?
            // 6. and a right parenthesis )
            bool isValid = false;
            var  c       = text.CurrentChar;

            link  = null;
            title = null;

            linkSpan  = SourceSpan.Empty;
            titleSpan = SourceSpan.Empty;

            // 1. An inline link consists of a link text followed immediately by a left parenthesis (,
            if (c == '(')
            {
                text.NextChar();
                text.TrimStart();

                var pos = text.Start;
                if (TryParseUrl(ref text, out link))
                {
                    linkSpan.Start = pos;
                    linkSpan.End   = text.Start - 1;
                    if (linkSpan.End < linkSpan.Start)
                    {
                        linkSpan = SourceSpan.Empty;
                    }

                    int spaceCount;
                    text.TrimStart(out spaceCount);
                    var hasWhiteSpaces = spaceCount > 0;

                    c = text.CurrentChar;
                    if (c == ')')
                    {
                        isValid = true;
                    }
                    else if (hasWhiteSpaces)
                    {
                        c   = text.CurrentChar;
                        pos = text.Start;
                        if (c == ')')
                        {
                            isValid = true;
                        }
                        else if (TryParseTitle(ref text, out title))
                        {
                            titleSpan.Start = pos;
                            titleSpan.End   = text.Start - 1;
                            if (titleSpan.End < titleSpan.Start)
                            {
                                titleSpan = SourceSpan.Empty;
                            }
                            text.TrimStart();
                            c = text.CurrentChar;

                            if (c == ')')
                            {
                                isValid = true;
                            }
                        }
                    }
                }
            }

            if (isValid)
            {
                // Skip ')'
                text.NextChar();
                title = title ?? String.Empty;
            }

            return(isValid);
        }
Example #5
0
 public static bool TryParseInlineLink(StringSlice text, out string link, out string title, out SourceSpan linkSpan, out SourceSpan titleSpan)
 {
     return(TryParseInlineLink(ref text, out link, out title, out linkSpan, out titleSpan));
 }
Example #6
0
 public static bool TryParseInlineLink(StringSlice text, out string link, out string title)
 {
     return(TryParseInlineLink(ref text, out link, out title, out _, out _));
 }
Example #7
0
 public static bool TryParseAutolink(StringSlice text, out string link, out bool isEmail)
 {
     return(TryParseAutolink(ref text, out link, out isEmail));
 }
Example #8
0
        public static bool TryParseAutolink(ref StringSlice text, out string link, out bool isEmail)
        {
            link    = null;
            isEmail = false;

            var c = text.CurrentChar;

            if (c != '<')
            {
                return(false);
            }

            // An absolute URI, for these purposes, consists of a scheme followed by a colon (:)
            // followed by zero or more characters other than ASCII whitespace and control characters, <, and >.
            // If the URI includes these characters, they must be percent-encoded (e.g. %20 for a space).
            // A URI that would end with a full stop (.) is treated instead as ending immediately before the full stop.

            // a scheme is any sequence of 2–32 characters
            // beginning with an ASCII letter
            // and followed by any combination of ASCII letters, digits, or the symbols plus (”+”), period (”.”), or hyphen (”-”).

            // An email address, for these purposes, is anything that matches the non-normative regex from the HTML5 spec:
            // /^
            // [a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+
            // @
            // [a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
            // (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/

            c = text.NextChar();

            // -1: scan email
            //  0: scan uri or email
            // +1: scan uri
            int state = 0;

            if (!c.IsAlpha())
            {
                // We may have an email char?
                if (c.IsDigit() || CharHelper.IsEmailUsernameSpecialChar(c))
                {
                    state = -1;
                }
                else
                {
                    return(false);
                }
            }

            var builder = StringBuilderCache.Local();

            // ****************************
            // 1. Scan scheme or user email
            // ****************************
            builder.Append(c);
            while (true)
            {
                c = text.NextChar();

                // Chars valid for both scheme and email
                var isSpecialChar = c == '+' || c == '.' || c == '-';
                var isValidChar   = c.IsAlphaNumeric() || isSpecialChar;
                if (state <= 0 && CharHelper.IsEmailUsernameSpecialChar(c))
                {
                    isValidChar = true;
                    // If this is not a special char valid also for url scheme, then we have an email
                    if (!isSpecialChar)
                    {
                        state = -1;
                    }
                }

                if (isValidChar)
                {
                    // a scheme is any sequence of 2–32 characters
                    if (state > 0 && builder.Length >= 32)
                    {
                        builder.Length = 0;
                        return(false);
                    }
                    builder.Append(c);
                }
                else if (c == ':')
                {
                    if (state < 0 || builder.Length <= 2)
                    {
                        builder.Length = 0;
                        return(false);
                    }
                    state = 1;
                    break;
                }
                else if (c == '@')
                {
                    if (state > 0)
                    {
                        builder.Length = 0;
                        return(false);
                    }
                    state = -1;
                    break;
                }
                else
                {
                    builder.Length = 0;
                    return(false);
                }
            }

            // append ':' or '@'
            builder.Append(c);

            if (state < 0)
            {
                isEmail = true;

                // scan an email
                // [a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
                // (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/
                bool hasMinus        = false;
                int  domainCharCount = 0;
                char pc = '\0';
                while (true)
                {
                    c = text.NextChar();
                    if (c == '>')
                    {
                        if (domainCharCount == 0 || hasMinus)
                        {
                            break;
                        }

                        text.NextChar();
                        link           = builder.ToString();
                        builder.Length = 0;
                        return(true);
                    }

                    if (c.IsAlphaNumeric() || (domainCharCount > 0 && (hasMinus = c == '-')))
                    {
                        domainCharCount++;
                        if (domainCharCount > 63)
                        {
                            break;
                        }
                    }
                    else if (c == '.')
                    {
                        if (pc == '.' || pc == '-')
                        {
                            break;
                        }
                        domainCharCount = 0;
                        hasMinus        = false;
                    }
                    else
                    {
                        break;
                    }
                    builder.Append(c);
                    pc = c;
                }
            }
            else
            {
                // scan an uri
                // An absolute URI, for these purposes, consists of a scheme followed by a colon (:)
                // followed by zero or more characters other than ASCII whitespace and control characters, <, and >.
                // If the URI includes these characters, they must be percent-encoded (e.g. %20 for a space).

                while (true)
                {
                    c = text.NextChar();
                    if (c == '\0')
                    {
                        break;
                    }

                    if (c == '>')
                    {
                        text.NextChar();
                        link           = builder.ToString();
                        builder.Length = 0;
                        return(true);
                    }

                    // Chars valid for both scheme and email
                    if (c <= 127)
                    {
                        if (c > ' ' && c != '>')
                        {
                            builder.Append(c);
                        }
                        else
                        {
                            break;
                        }
                    }
                    else if (!c.IsSpaceOrPunctuation())
                    {
                        builder.Append(c);
                    }
                    else
                    {
                        break;
                    }
                }
            }

            builder.Length = 0;
            return(false);
        }