Exemplo n.º 1
0
        public static bool TryParseLabel <T>(ref T lines, bool allowEmpty, out string label, out SourceSpan labelSpan) where T : ICharIterator
        {
            label = null;
            char c = lines.CurrentChar;

            labelSpan = SourceSpan.Empty;
            if (c != '[')
            {
                return(false);
            }
            var buffer = StringBuilderCache.Local();

            var startLabel = -1;
            var endLabel   = -1;

            bool hasEscape          = false;
            bool previousWhitespace = true;
            bool hasNonWhiteSpace   = false;
            bool isValid            = false;

            while (true)
            {
                c = lines.NextChar();
                if (c == '\0')
                {
                    break;
                }

                if (hasEscape)
                {
                    if (c != '[' && c != ']' && c != '\\')
                    {
                        break;
                    }
                }
                else
                {
                    if (c == '[')
                    {
                        break;
                    }

                    if (c == ']')
                    {
                        lines.NextChar(); // Skip ]
                        if (allowEmpty || hasNonWhiteSpace)
                        {
                            // Remove trailing spaces
                            for (int i = buffer.Length - 1; i >= 0; i--)
                            {
                                if (!buffer[i].IsWhitespace())
                                {
                                    break;
                                }
                                buffer.Length = i;
                                endLabel--;
                            }

                            // Only valid if buffer is less than 1000 characters
                            if (buffer.Length <= 999)
                            {
                                labelSpan.Start = startLabel;
                                labelSpan.End   = endLabel;
                                if (labelSpan.Start > labelSpan.End)
                                {
                                    labelSpan = SourceSpan.Empty;
                                }

                                label   = buffer.ToString();
                                isValid = true;
                            }
                        }
                        break;
                    }
                }

                var isWhitespace = c.IsWhitespace();
                if (isWhitespace)
                {
                    // Replace any whitespace by a single ' '
                    c = ' ';
                }

                if (!hasEscape && c == '\\')
                {
                    if (startLabel < 0)
                    {
                        startLabel = lines.Start;
                    }
                    hasEscape = true;
                }
                else
                {
                    hasEscape = false;

                    if (!previousWhitespace || !isWhitespace)
                    {
                        if (startLabel < 0)
                        {
                            startLabel = lines.Start;
                        }
                        endLabel = lines.Start;
                        buffer.Append(c);
                        if (!isWhitespace)
                        {
                            hasNonWhiteSpace = true;
                        }
                    }
                }
                previousWhitespace = isWhitespace;
            }

            buffer.Length = 0;

            return(isValid);
        }
Exemplo n.º 2
0
        public static bool TryParseUrl <T>(ref T text, out string link) where T : ICharIterator
        {
            bool isValid = false;
            var  buffer  = StringBuilderCache.Local();

            buffer.Length = 0;

            var c = text.CurrentChar;

            // a sequence of zero or more characters between an opening < and a closing >
            // that contains no spaces, line breaks, or unescaped < or > characters, or
            if (c == '<')
            {
                bool hasEscape = false;
                do
                {
                    c = text.NextChar();
                    if (!hasEscape && c == '>')
                    {
                        text.NextChar();
                        isValid = true;
                        break;
                    }

                    if (!hasEscape && c == '<')
                    {
                        break;
                    }

                    if (hasEscape && !c.IsAsciiPunctuation())
                    {
                        buffer.Append('\\');
                    }

                    if (c == '\\')
                    {
                        hasEscape = true;
                        continue;
                    }

                    hasEscape = false;

                    if (c.IsWhitespace()) // TODO: specs unclear. space is strict or relaxed? (includes tabs?)
                    {
                        break;
                    }

                    buffer.Append(c);
                } while (c != '\0');
            }
            else
            {
                // a nonempty sequence of characters that does not include ASCII space or control characters,
                // and includes parentheses only if (a) they are backslash-escaped or (b) they are part of a
                // balanced pair of unescaped parentheses that is not itself inside a balanced pair of unescaped
                // parentheses.
                bool hasEscape    = false;
                int  openedParent = 0;
                while (true)
                {
                    // Match opening and closing parenthesis
                    if (c == '(')
                    {
                        if (!hasEscape)
                        {
                            if (openedParent > 0)
                            {
                                break;
                            }
                            openedParent++;
                        }
                    }

                    if (c == ')')
                    {
                        if (!hasEscape)
                        {
                            openedParent--;
                            if (openedParent < 0)
                            {
                                isValid = true;
                                break;
                            }
                        }
                    }

                    if (hasEscape && !c.IsAsciiPunctuation())
                    {
                        buffer.Append('\\');
                    }

                    // If we have an escape
                    if (c == '\\')
                    {
                        hasEscape = true;
                        c         = text.NextChar();
                        continue;
                    }

                    hasEscape = false;

                    if (c == '\0' || c.IsSpaceOrTab() || c.IsControl()) // TODO: specs unclear. space is strict or relaxed? (includes tabs?)
                    {
                        isValid = true;
                        break;
                    }

                    buffer.Append(c);

                    c = text.NextChar();
                }
            }

            link          = isValid ? buffer.ToString() : null;
            buffer.Length = 0;
            return(isValid);
        }
Exemplo n.º 3
0
        public static bool TryParseTitle <T>(ref T text, out string title) where T : ICharIterator
        {
            bool isValid = false;
            var  buffer  = StringBuilderCache.Local();

            buffer.Length = 0;

            // a sequence of zero or more characters between straight double-quote characters ("), including a " character only if it is backslash-escaped, or
            // a sequence of zero or more characters between straight single-quote characters ('), including a ' character only if it is backslash-escaped, or
            var c = text.CurrentChar;

            if (c == '\'' || c == '"' || c == '(')
            {
                var  closingQuote = c == '(' ? ')' : c;
                bool hasEscape    = false;
                // -1: undefined
                //  0: has only spaces
                //  1: has other characters
                int hasOnlyWhiteSpacesSinceLastLine = -1;
                while (true)
                {
                    c = text.NextChar();

                    if (c == '\n')
                    {
                        if (hasOnlyWhiteSpacesSinceLastLine >= 0)
                        {
                            if (hasOnlyWhiteSpacesSinceLastLine == 1)
                            {
                                break;
                            }
                            hasOnlyWhiteSpacesSinceLastLine = -1;
                        }
                    }

                    if (c == '\0')
                    {
                        break;
                    }

                    if (c == closingQuote)
                    {
                        if (hasEscape)
                        {
                            buffer.Append(closingQuote);
                            hasEscape = false;
                            continue;
                        }

                        // Skip last quote
                        text.NextChar();
                        isValid = true;
                        break;
                    }

                    if (hasEscape && !c.IsAsciiPunctuation())
                    {
                        buffer.Append('\\');
                    }

                    if (c == '\\')
                    {
                        hasEscape = true;
                        continue;
                    }

                    hasEscape = false;

                    if (c.IsSpaceOrTab())
                    {
                        if (hasOnlyWhiteSpacesSinceLastLine < 0)
                        {
                            hasOnlyWhiteSpacesSinceLastLine = 1;
                        }
                    }
                    else if (c != '\n')
                    {
                        hasOnlyWhiteSpacesSinceLastLine = 0;
                    }

                    buffer.Append(c);
                }
            }

            title         = isValid ? buffer.ToString() : null;
            buffer.Length = 0;
            return(isValid);
        }
Exemplo n.º 4
0
        public static string Urilize(string headingText, bool allowOnlyAscii)
        {
#if SUPPORT_NORMALIZE
            // Normalzie the string if we don't allow UTF8
            if (allowOnlyAscii)
            {
                headingText = headingText.Normalize(NormalizationForm.FormD);
            }
#endif

            var  headingBuffer   = StringBuilderCache.Local();
            bool hasLetter       = false;
            bool previousIsSpace = false;
            for (int i = 0; i < headingText.Length; i++)
            {
                var c = headingText[i];
                if (char.IsLetter(c))
                {
#if SUPPORT_NORMALIZE
                    if (allowOnlyAscii && (c < ' ' || c >= 127))
                    {
                        continue;
                    }
#endif
                    c = char.IsUpper(c) ? char.ToLowerInvariant(c) : c;
                    headingBuffer.Append(c);
                    hasLetter       = true;
                    previousIsSpace = false;
                }
                else if (hasLetter)
                {
                    if (IsReservedPunctuation(c))
                    {
                        if (previousIsSpace)
                        {
                            headingBuffer.Length--;
                        }
                        if (headingBuffer[headingBuffer.Length - 1] != c)
                        {
                            headingBuffer.Append(c);
                        }
                        previousIsSpace = false;
                    }
                    else if (c.IsDigit())
                    {
                        headingBuffer.Append(c);
                        previousIsSpace = false;
                    }
                    else if (!previousIsSpace && c.IsWhitespace())
                    {
                        var pc = headingBuffer[headingBuffer.Length - 1];
                        if (!IsReservedPunctuation(pc))
                        {
                            headingBuffer.Append('-');
                        }
                        previousIsSpace = true;
                    }
                }
            }

            // Trim trailing _ - .
            while (headingBuffer.Length > 0)
            {
                var c = headingBuffer[headingBuffer.Length - 1];
                if (IsReservedPunctuation(c))
                {
                    headingBuffer.Length--;
                }
                else
                {
                    break;
                }
            }

            var text = headingBuffer.ToString();
            headingBuffer.Length = 0;
            return(text);
        }
Exemplo n.º 5
0
        public static bool TryParseAutolink(ref StringSlice text, out string link, out bool isEmail)
        {
            link    = null;
            isEmail = false;

            var c = text.CurrentChar;

            if (c != '<')
            {
                return(false);
            }

            // An absolute URI, for these purposes, consists of a scheme followed by a colon (:)
            // followed by zero or more characters other than ASCII whitespace and control characters, <, and >.
            // If the URI includes these characters, they must be percent-encoded (e.g. %20 for a space).

            // a scheme is any sequence of 2–32 characters
            // beginning with an ASCII letter
            // and followed by any combination of ASCII letters, digits, or the symbols plus (”+”), period (”.”), or hyphen (”-”).

            // An email address, for these purposes, is anything that matches the non-normative regex from the HTML5 spec:
            // /^
            // [a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+
            // @
            // [a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
            // (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/

            c = text.NextChar();

            // -1: scan email
            //  0: scan uri or email
            // +1: scan uri
            int state = 0;

            if (!c.IsAlpha())
            {
                // We may have an email char?
                if (c.IsDigit() || CharHelper.IsEmailUsernameSpecialChar(c))
                {
                    state = -1;
                }
                else
                {
                    return(false);
                }
            }

            var builder = StringBuilderCache.Local();

            // ****************************
            // 1. Scan scheme or user email
            // ****************************
            builder.Append(c);
            while (true)
            {
                c = text.NextChar();

                // Chars valid for both scheme and email
                var isSpecialChar = c == '+' || c == '.' || c == '-';
                var isValidChar   = c.IsAlphaNumeric() || isSpecialChar;
                if (state <= 0 && CharHelper.IsEmailUsernameSpecialChar(c))
                {
                    isValidChar = true;
                    // If this is not a special char valid also for url scheme, then we have an email
                    if (!isSpecialChar)
                    {
                        state = -1;
                    }
                }

                if (isValidChar)
                {
                    // a scheme is any sequence of 2–32 characters
                    if (state > 0 && builder.Length >= 32)
                    {
                        builder.Length = 0;
                        return(false);
                    }
                    builder.Append(c);
                }
                else if (c == ':')
                {
                    if (state < 0 || builder.Length <= 2)
                    {
                        builder.Length = 0;
                        return(false);
                    }
                    state = 1;
                    break;
                }
                else if (c == '@')
                {
                    if (state > 0)
                    {
                        builder.Length = 0;
                        return(false);
                    }
                    state = -1;
                    break;
                }
                else
                {
                    builder.Length = 0;
                    return(false);
                }
            }

            // append ':' or '@'
            builder.Append(c);

            if (state < 0)
            {
                isEmail = true;

                // scan an email
                // [a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
                // (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/
                bool hasMinus        = false;
                int  domainCharCount = 0;
                char pc = '\0';
                while (true)
                {
                    c = text.NextChar();
                    if (c == '>')
                    {
                        if (domainCharCount == 0 || hasMinus)
                        {
                            break;
                        }

                        text.NextChar();
                        link           = builder.ToString();
                        builder.Length = 0;
                        return(true);
                    }

                    if (c.IsAlphaNumeric() || (domainCharCount > 0 && (hasMinus = c == '-')))
                    {
                        domainCharCount++;
                        if (domainCharCount > 63)
                        {
                            break;
                        }
                    }
                    else if (c == '.')
                    {
                        if (pc == '.' || pc == '-')
                        {
                            break;
                        }
                        domainCharCount = 0;
                        hasMinus        = false;
                    }
                    else
                    {
                        break;
                    }
                    builder.Append(c);
                }
            }
            else
            {
                // scan an uri
                // An absolute URI, for these purposes, consists of a scheme followed by a colon (:)
                // followed by zero or more characters other than ASCII whitespace and control characters, <, and >.
                // If the URI includes these characters, they must be percent-encoded (e.g. %20 for a space).

                while (true)
                {
                    c = text.NextChar();
                    if (c == '\0')
                    {
                        break;
                    }

                    if (c == '>')
                    {
                        text.NextChar();
                        link           = builder.ToString();
                        builder.Length = 0;
                        return(true);
                    }

                    // Chars valid for both scheme and email
                    if (c > ' ' && c < 127 && c != '<')
                    {
                        builder.Append(c);
                    }
                    else
                    {
                        break;
                    }
                }
            }

            builder.Length = 0;
            return(false);
        }
Exemplo n.º 6
0
        /// <summary>
        /// Destructively unescape a string: remove backslashes before punctuation or symbol characters.
        /// </summary>
        /// <param name="text">The string data that will be changed by unescaping any punctuation or symbol characters.</param>
        /// <param name="removeBackSlash">if set to <c>true</c> [remove back slash].</param>
        /// <returns></returns>
        public static string Unescape(string text, bool removeBackSlash = true)
        {
            // Credits: code from CommonMark.NET
            // Copyright (c) 2014, Kārlis Gaņģis All rights reserved.
            // See license for details:  https://github.com/Knagis/CommonMark.NET/blob/master/LICENSE.md
            if (string.IsNullOrEmpty(text))
            {
                return(string.Empty);
            }

            // remove backslashes before punctuation chars:
            int  searchPos = 0;
            int  lastPos   = 0;
            char c;

            char[]        search = removeBackSlash ? SearchBackAndAmp : SearchAmp;
            StringBuilder sb     = null;

            while ((searchPos = text.IndexOfAny(search, searchPos)) != -1)
            {
                sb ??= StringBuilderCache.Local();
                c = text[searchPos];
                if (removeBackSlash && c == '\\')
                {
                    searchPos++;

                    if (text.Length == searchPos)
                    {
                        break;
                    }

                    c = text[searchPos];
                    if (c.IsEscapableSymbol())
                    {
                        sb.Append(text, lastPos, searchPos - lastPos - 1);
                        lastPos = searchPos;
                    }
                }
                else if (c == '&')
                {
                    var match = ScanEntity(new StringSlice(text, searchPos, text.Length - 1), out int numericEntity, out int entityNameStart, out int entityNameLength);
                    if (match == 0)
                    {
                        searchPos++;
                    }
                    else
                    {
                        searchPos += match;

                        if (entityNameLength > 0)
                        {
                            var decoded = EntityHelper.DecodeEntity(text.AsSpan(entityNameStart, entityNameLength));
                            if (decoded != null)
                            {
                                sb.Append(text, lastPos, searchPos - match - lastPos);
                                sb.Append(decoded);
                                lastPos = searchPos;
                            }
                        }
                        else if (numericEntity >= 0)
                        {
                            sb.Append(text, lastPos, searchPos - match - lastPos);
                            EntityHelper.DecodeEntity(numericEntity, sb);
                            lastPos = searchPos;
                        }
                    }
                }
            }

            if (sb == null || lastPos == 0)
            {
                return(text);
            }

            sb.Append(text, lastPos, text.Length - lastPos);
            return(sb.GetStringAndReset());
        }