// Parse a hard or soft linebreak, returning an inline.
 // Assumes the subject has a newline at the current position.
 static Syntax.Inline handle_newline(Subject subj)
 {
     int nlpos = subj.Position;
     // skip over newline
     advance(subj);
     // skip spaces at beginning of line
     while (peek_char(subj) == ' ')
     {
         advance(subj);
     }
     if (nlpos > 1 &&
         BString.bchar(subj.Buffer, nlpos - 1) == ' ' &&
         BString.bchar(subj.Buffer, nlpos - 2) == ' ')
     {
         return make_linebreak();
     }
     else
     {
         return make_softbreak();
     }
 }
Example #2
0
        // Parse an autolink or HTML tag.
        // Assumes the subject has a '<' character at the current position.
        static Inline handle_pointy_brace(Subject subj)
        {
            int matchlen;
            string contents;

            // advance past first <
            subj.Position++;  

            // first try to match a URL autolink
            matchlen = Scanner.scan_autolink_uri(subj.Buffer, subj.Position, subj.Length);
            if (matchlen > 0)
            {
                contents = subj.Buffer.Substring(subj.Position, matchlen - 1);
                var resultContents = ParseStringEntities(contents);
                var result = Inline.CreateLink(resultContents, contents, string.Empty);

                result.SourcePosition = subj.Position - 1;
                resultContents.SourcePosition = subj.Position;
                subj.Position += matchlen;
                result.SourceLastPosition = subj.Position;
                resultContents.SourceLastPosition = subj.Position - 1;
                
                return result;
            }

            // next try to match an email autolink
            matchlen = Scanner.scan_autolink_email(subj.Buffer, subj.Position, subj.Length);
            if (matchlen > 0)
            {
                contents = subj.Buffer.Substring(subj.Position, matchlen - 1);
                var resultContents = ParseStringEntities(contents);
                var result = Inline.CreateLink(resultContents, "mailto:" + contents, string.Empty);
                
                result.SourcePosition = subj.Position - 1;
                resultContents.SourcePosition = subj.Position;
                subj.Position += matchlen;
                result.SourceLastPosition = subj.Position;
                resultContents.SourceLastPosition = subj.Position - 1;

                return result;
            }

            // finally, try to match an html tag
            matchlen = Scanner.scan_html_tag(subj.Buffer, subj.Position, subj.Length);
            if (matchlen > 0)
            {
                var result = new Inline(InlineTag.RawHtml, subj.Buffer, subj.Position - 1, matchlen + 1);
                result.SourcePosition = subj.Position - 1;
                subj.Position += matchlen;
                result.SourceLastPosition = subj.Position;
                return result;
            }
            else
            {
                // if nothing matches, just return the opening <:
                return new Inline("<", subj.Position - 1, subj.Position);
            }
        }
Example #3
0
        public static void Finalize(Block b, LineInfo line)
        {
            // don't do anything if the block is already closed
            if (!b.IsOpen)
                return;

            b.IsOpen = false;

            if (line.IsTrackingPositions)
            {
                // (b.SourcePosition >= line.LineOffset) determines if the block started on this line.
                if (b.SourcePosition >= line.LineOffset && line.Line != null)
                    b.SourceLastPosition = line.CalculateOrigin(line.Line.Length, false);
                else
                    b.SourceLastPosition = line.CalculateOrigin(0, false);
            }

#pragma warning disable 0618
            b.EndLine = (line.LineNumber > b.StartLine) ? line.LineNumber - 1 : line.LineNumber;
#pragma warning restore 0618

            switch (b.Tag)
            {

                case BlockTag.Paragraph:
                    var sc = b.StringContent;
                    if (!sc.StartsWith('['))
                        break;

                    var subj = new Subject(b.Top.ReferenceMap);
                    sc.FillSubject(subj);
                    var origPos = subj.Position;
                    while (subj.Position < subj.Buffer.Length 
                        && subj.Buffer[subj.Position] == '[' 
                        && 0 != InlineMethods.ParseReference(subj))
                    {
                    }

                    if (subj.Position != origPos)
                    {
                        sc.Replace(subj.Buffer, subj.Position, subj.Buffer.Length - subj.Position);

                        if (sc.PositionTracker != null)
                            sc.PositionTracker.AddBlockOffset(subj.Position - origPos);

                        if (Utilities.IsFirstLineBlank(subj.Buffer, subj.Position))
                            b.Tag = BlockTag.ReferenceDefinition;
                    }

                    break;

                case BlockTag.IndentedCode:
                    b.StringContent.RemoveTrailingBlankLines();
                    break;

                case BlockTag.FencedCode:
                    // first line of contents becomes info
                    var firstlinelen = b.StringContent.IndexOf('\n') + 1;
                    b.FencedCodeData.Info = InlineMethods.Unescape(b.StringContent.TakeFromStart(firstlinelen, true).Trim());
                    break;

                case BlockTag.List: // determine tight/loose status
                    b.ListData.IsTight = true; // tight by default
                    var item = b.FirstChild;
                    Block subitem;

                    while (item != null)
                    {
                        // check for non-final non-empty list item ending with blank line:
                        if (item.IsLastLineBlank && item.NextSibling != null)
                        {
                            b.ListData.IsTight = false;
                            break;
                        }

                        // recurse into children of list item, to see if there are spaces between them:
                        subitem = item.FirstChild;
                        while (subitem != null)
                        {
                            if (EndsWithBlankLine(subitem) && (item.NextSibling != null || subitem.NextSibling != null))
                            {
                                b.ListData.IsTight = false;
                                break;
                            }

                            subitem = subitem.NextSibling;
                        }

                        if (!b.ListData.IsTight)
                            break;

                        item = item.NextSibling;
                    }

                    break;
            }
        }
Example #4
0
        // Parse backslash-escape or just a backslash, returning an inline.
        private static Inline handle_backslash(Subject subj)
        {
            subj.Position++;

            if (subj.Position >= subj.Length)
                return new Inline("\\", subj.Position - 1, subj.Position); 

            var nextChar = subj.Buffer[subj.Position];

            if (Utilities.IsEscapableSymbol(nextChar))
            {
                // only ascii symbols and newline can be escaped
                // the exception is the unicode bullet char since it can be used for defining list items
                subj.Position++;
                return new Inline(nextChar.ToString(), subj.Position - 2, subj.Position);
            }
            else if (nextChar == '\n')
            {
                subj.Position++;
                return new Inline(InlineTag.LineBreak) 
                {
                    SourcePosition = subj.Position - 2,
                    SourceLastPosition = subj.Position
                };
            }
            else
            {
                return new Inline("\\", subj.Position - 1, subj.Position);
            }
        }
Example #5
0
        /// <summary>
        /// Parses the entity at the current position.
        /// Assumes that there is a <c>&amp;</c> at the current position.
        /// </summary>
        private static string ParseEntity(Subject subj)
        {
            int match;
            string entity;
            int numericEntity;
            var origPos = subj.Position;
            match = Scanner.scan_entity(subj.Buffer, subj.Position, subj.Length - subj.Position, out entity, out numericEntity);
            if (match > 0)
            {
                subj.Position += match;

                if (entity != null)
                {
                    entity = EntityDecoder.DecodeEntity(entity);
                    if (entity != null)
                        return entity;

                    return subj.Buffer.Substring(origPos, match);
                }
                else if (numericEntity > 0)
                {
                    entity = EntityDecoder.DecodeEntity(numericEntity);
                    if (entity != null)
                        return entity;
                }

                return "\uFFFD";
            }
            else
            {
                subj.Position++;
                return "&";
            }
        }
Example #6
0
 private static Inline HandleLeftSquareBracket(Subject subj, CommonMarkSettings settings)
 {
     return HandleLeftSquareBracket(subj, false, settings);
 }
Example #7
0
        internal static void MatchSquareBracketStack(InlineStack opener, Subject subj, Reference details, InlineParserParameters parameters)
        {
            if (details != null)
            {
                var inl = opener.StartingInline;
                var isImage = 0 != (opener.Flags & InlineStack.InlineStackFlags.ImageLink);
                inl.Tag = isImage ? InlineTag.Image : InlineTag.Link;
                inl.FirstChild = inl.NextSibling;
                inl.NextSibling = null;
                inl.SourceLastPosition = subj.Position;

                inl.TargetUrl = details.Url;
                inl.LiteralContent = details.Title;

                if (!isImage)
                {
                    // since there cannot be nested links, remove any other link openers before this
                    var temp = opener.Previous;
                    while (temp != null && temp.Priority <= InlineStack.InlineStackPriority.Links)
                    {
                        if (temp.Delimeter == '[' && temp.Flags == opener.Flags)
                        {
                            // mark the previous entries as "inactive"
                            if (temp.DelimeterCount == -1)
                                break;

                            temp.DelimeterCount = -1;
                        }

                        temp = temp.Previous;
                    }
                }

                InlineStack.RemoveStackEntry(opener, subj, null, parameters);

                subj.LastInline = inl;
            }
            else
            {
                // this looked like a link, but was not.
                // remove the opener stack entry but leave the inbetween intact
                InlineStack.RemoveStackEntry(opener, subj, opener, parameters);

                var inl = new Inline("]", subj.Position - 1, subj.Position);
                subj.LastInline.LastSibling.NextSibling = inl;
                subj.LastInline = inl;
            }
        }
Example #8
0
        public static Inline parse_inlines(Subject subj, Dictionary<string, Reference> refmap, InlineParserParameters parameters)
        {
            var len = subj.Length;

            if (len == 0)
                return null;

            var first = ParseInline(subj, parameters);
            subj.LastInline = first.LastSibling;

            Inline cur;
            while (subj.Position < len)
            {
                cur = ParseInline(subj, parameters);
                if (cur != null)
                {
                    subj.LastInline.NextSibling = cur;
                    subj.LastInline = cur.LastSibling;
                }
            }

            InlineStack.PostProcessInlineStack(subj, subj.FirstPendingInline, subj.LastPendingInline, InlineStack.InlineStackPriority.Maximum, parameters);

            return first;
        }
Example #9
0
 // Parse zero or more space characters, including at most one newline.
 private static void spnl(Subject subj)
 {
     var seenNewline = false;
     var len = subj.Length;
     while (subj.Position < len)
     {
         var c = subj.Buffer[subj.Position];
         if (c == ' ' || (!seenNewline && (seenNewline = c == '\n')))
             subj.Position++;
         else
             return;
     }
 }
Example #10
0
        // Parse a hard or soft linebreak, returning an inline.
        // Assumes the subject has a newline at the current position.
        static Inline handle_newline(Subject subj)
        {
            int nlpos = subj.Position;

            // skip over newline
            subj.Position++;

            // skip spaces at beginning of line
            var len = subj.Length;
            while (subj.Position < len && subj.Buffer[subj.Position] == ' ')
                subj.Position++;

            if (nlpos > 1 && subj.Buffer[nlpos - 1] == ' ' && subj.Buffer[nlpos - 2] == ' ')
                return new Inline(InlineTag.LineBreak) { SourcePosition = nlpos - 2, SourceLastPosition = nlpos + 1 };
            else
                return new Inline(InlineTag.SoftBreak) { SourcePosition = nlpos, SourceLastPosition = nlpos + 1 };
        }
Example #11
0
        /// <summary>
        /// Parse an inline element from the subject. The subject position is updated to after the element.
        /// </summary>
        public static Inline ParseInline(Subject subj, InlineParserParameters parameters)
        {
            var parsers = parameters.Parsers;
            var specialCharacters = parameters.SpecialCharacters;

            var c = subj.Buffer[subj.Position];

            var parser = c < parsers.Length ? parsers[c] : null;

            if (parser != null)
                return parser(subj);

            var startpos = subj.Position;

            // we read until we hit a special character
            // +1 is so that any special character at the current position is ignored.
            var endpos = subj.Buffer.IndexOfAny(specialCharacters, startpos + 1, subj.Length - startpos - 1);

            if (endpos == -1)
                endpos = subj.Length;

            subj.Position = endpos;

            // if we're at a newline, strip trailing spaces.
            if (endpos < subj.Length && subj.Buffer[endpos] == '\n')
                while (endpos > startpos && subj.Buffer[endpos - 1] == ' ')
                    endpos--;

            return new Inline(subj.Buffer, startpos, endpos - startpos, startpos, endpos, c);
        }
Example #12
0
 // Parse zero or more space characters, including at most one newline.
 private static void spnl(Subject subj)
 {
     bool seen_newline = false;
     while (peek_char(subj) == ' ' ||
            (!seen_newline &&
             (seen_newline = peek_char(subj) == '\n')))
     {
         advance(subj);
     }
 }
Example #13
0
        // Parse an inline, advancing subject, and add it to last element.
        // Adjust tail to point to new last element of list.
        // Return 0 if no inline can be parsed, 1 otherwise.
        public static Syntax.Inline parse_inline(Subject subj)
        {
            Syntax.Inline inew = null;
            string contents;
            string special_chars;
            char? c;
            int endpos;
            c = peek_char(subj);
            if (c == null)
                return null;

            switch (c)
            {
                case '\n':
                    inew = handle_newline(subj);
                    break;
                case '`':
                    inew = handle_backticks(subj);
                    break;
                case '\\':
                    inew = handle_backslash(subj);
                    break;
                case '&':
                    inew = handle_entity(subj);
                    break;
                case '<':
                    inew = handle_pointy_brace(subj);
                    break;
                case '_':
                    inew = HandleEmphasis(subj, '_');
                    break;
                case '*':
                    inew = HandleEmphasis(subj, '*');
                    break;
                case '[':
                    inew = handle_left_bracket(subj);
                    break;
                case '!':
                    advance(subj);
                    if (peek_char(subj) == '[')
                    {
                        inew = handle_left_bracket(subj);
                        if (inew != null && inew.Tag == InlineTag.Link)
                        {
                            inew.Tag = InlineTag.Image;
                        }
                        else
                        {
                            inew = append_inlines(make_str("!"), inew);
                        }
                    }
                    else
                    {
                        inew = make_str("!");
                    }
                    break;
                default:
                    // we read until we hit a special character
                    special_chars = "\n\\`&_*[]<!";
                    endpos = BString.binchr(subj.Buffer, subj.Position, special_chars);
                    if (endpos == subj.Position)
                    {
                        // current char is special: read a 1-character str
                        contents = take_one(subj);
                    }
                    else if (endpos == -1)
                    {
                        // special char not found, take whole rest of buffer:
                        endpos = subj.Buffer.Length;
                        contents = BString.bmidstr(subj.Buffer, subj.Position, endpos - subj.Position);
                        subj.Position = endpos;
                    }
                    else
                    {
                        // take buffer from subj.pos to endpos to str.
                        contents = BString.bmidstr(subj.Buffer, subj.Position, endpos - subj.Position);
                        subj.Position = endpos;
                        // if we're at a newline, strip trailing spaces.
                        if (peek_char(subj) == '\n')
                            contents = contents.TrimEnd();
                    }
                    inew = make_str(contents);
                    break;
            }

            return inew;
        }
Example #14
0
        // Parse inlines while a predicate is satisfied.  Return inlines.
        public static Syntax.Inline parse_inlines_while(Subject subj)
        {
            Syntax.Inline first = null;
            Syntax.Inline cur;
            while (!is_eof(subj))
            {
                cur = parse_inline(subj);
                if (first == null)
                {
                    first = cur;
                    subj.LastInline = cur.LastSibling;
                }
                else
                {
                    subj.LastInline.Next = cur;
                    subj.LastInline = cur.LastSibling;
                }
            }

            return first;
        }
Example #15
0
        private static Inline HandleOpenerCloser(Subject subj, InlineTag singleCharTag, InlineTag doubleCharTag, InlineParserParameters parameters)
        {
            bool canOpen, canClose;
            var c = subj.Buffer[subj.Position];
            var numdelims = ScanEmphasisDelimeters(subj, c, out canOpen, out canClose);

            if (canClose)
            {
                // walk the stack and find a matching opener, if there is one
                var istack = InlineStack.FindMatchingOpener(subj.LastPendingInline, InlineStack.InlineStackPriority.Emphasis, c, out canClose);
                if (istack != null)
                {
                    var useDelims = MatchInlineStack(istack, subj, numdelims, null, singleCharTag, doubleCharTag, parameters);

                    // if the closer was not fully used, move back a char or two and try again.
                    if (useDelims < numdelims)
                    {
                        subj.Position = subj.Position - numdelims + useDelims;

                        // use recursion only if it will not be very deep.
                        if (numdelims < 10)
                            return HandleOpenerCloser(subj, singleCharTag, doubleCharTag, parameters);
                    }

                    return null;
                }
            }

            var inlText = new Inline(subj.Buffer, subj.Position - numdelims, numdelims, subj.Position - numdelims, subj.Position, c);

            if (canOpen || canClose)
            {
                var istack = new InlineStack();
                istack.DelimeterCount = numdelims;
                istack.Delimeter = c;
                istack.StartingInline = inlText;
                istack.Priority = InlineStack.InlineStackPriority.Emphasis;
                istack.Flags = (canOpen ? InlineStack.InlineStackFlags.Opener : 0)
                             | (canClose ? InlineStack.InlineStackFlags.Closer : 0);

                InlineStack.AppendStackEntry(istack, subj);
            }

            return inlText;
        }
Example #16
0
        /// <summary>
        /// Parses the contents of [..] for a reference label. Only used for parsing 
        /// reference definition labels for use with the reference dictionary because 
        /// it does not properly parse nested inlines.
        /// 
        /// Assumes the source starts with '[' character or spaces before '['.
        /// Returns null and does not advance if no matching ] is found.
        /// Note the precedence:  code backticks have precedence over label bracket
        /// markers, which have precedence over *, _, and other inline formatting
        /// markers. So, 2 below contains a link while 1 does not:
        /// 1. [a link `with a ](/url)` character
        /// 2. [a link *with emphasized ](/url) text*        /// </summary>
        private static StringPart? ParseReferenceLabel(Subject subj)
        {
            var startPos = subj.Position;
            var source = subj.Buffer;
            var len = subj.Length;

            while (subj.Position < len)
            {
                var c = subj.Buffer[subj.Position];
                if (c == ' ' || c == '\n')
                {
                    subj.Position++;
                    continue;
                }
                else if (c == '[')
                {
                    subj.Position++;
                    break;
                }
                else
                {
                    subj.Position = startPos;
                    return null;
                }
            }

            var labelStartPos = subj.Position;

            len = subj.Position + Reference.MaximumReferenceLabelLength;
            if (len > source.Length)
                len = source.Length;

            subj.Position = source.IndexOfAny(BracketSpecialCharacters, subj.Position, len - subj.Position);
            while (subj.Position > -1)
            {
                var c = source[subj.Position];
                if (c == '\\')
                {
                    subj.Position += 2;
                    if (subj.Position >= len)
                        break;

                    subj.Position = source.IndexOfAny(BracketSpecialCharacters, subj.Position, len - subj.Position);
                }
                else if (c == '[')
                {
                    break;
                }
                else
                {
                    var label = new StringPart(source, labelStartPos, subj.Position - labelStartPos);
                    subj.Position++;
                    return label;
                }
            }

            subj.Position = startPos;
            return null;
        }
Example #17
0
 private static Inline HandleExclamation(Subject subj, CommonMarkSettings settings)
 {
     subj.Position++;
     if (peek_char(subj) == '[')
         return HandleLeftSquareBracket(subj, true, settings);
     else
         return new Inline("!", subj.Position - 1, subj.Position);
 }
Example #18
0
        // Parse reference.  Assumes string begins with '[' character.
        // Modify refmap if a reference is encountered.
        // Return 0 if no reference found, otherwise position of subject
        // after reference is parsed.
        public static int ParseReference(Subject subj, CommonMarkSettings settings)
        {
            string title;
            var startPos = subj.Position;

            // parse label:
            var lab = ParseReferenceLabel(subj);
            if (lab == null || lab.Value.Length > Reference.MaximumReferenceLabelLength)
                goto INVALID;

            if (!Scanner.HasNonWhitespace(lab.Value))
                goto INVALID;

            // colon:
            if (peek_char(subj) == ':')
                subj.Position++;
            else
                goto INVALID;

            // parse link url:
            spnl(subj);
            var matchlen = Scanner.scan_link_url(subj.Buffer, subj.Position, subj.Length);
            if (matchlen == 0)
                goto INVALID;

            var url = subj.Buffer.Substring(subj.Position, matchlen);
            url = CleanUrl(url);
            subj.Position += matchlen;

            // parse optional link_title
            var beforetitle = subj.Position;
            spnl(subj);

            matchlen = Scanner.scan_link_title(subj.Buffer, subj.Position, subj.Length);
            if (matchlen > 0)
            {
                title = subj.Buffer.Substring(subj.Position, matchlen);
                title = CleanTitle(title);
                subj.Position += matchlen;
            }
            else
            {
                subj.Position = beforetitle;
                title = string.Empty;
            }

            char c;
            // parse final spaces and newline:
            while ((c = peek_char(subj)) == ' ') subj.Position++;

            if (c == '\n')
            {
                subj.Position++;
            }
            else if (c != '\0')
            {
                if (matchlen > 0)
                { // try rewinding before title
                    subj.Position = beforetitle;
                    while ((c = peek_char(subj)) == ' ') subj.Position++;
                    if (c == '\n')
                        subj.Position++;
                    else if (c != '\0')
                       goto INVALID;
                }
                else
                {
                    goto INVALID;
                }
            }

            // insert reference into refmap
            AddReference(subj.ReferenceMap, lab.Value, url, title, settings);

            return subj.Position;

        INVALID:
            subj.Position = startPos;
            return 0;
        }
Example #19
0
        private static Inline HandleLeftSquareBracket(Subject subj, bool isImage, CommonMarkSettings settings)
        {
            Inline inlText;
            
            if (isImage)
            {
                inlText = new Inline("![", subj.Position - 1, subj.Position + 1);
            }
            else
            {
                inlText = new Inline("[", subj.Position, subj.Position + 1);
            }

            // move past the '['
            subj.Position++;

            var istack = new InlineStack();
            istack.Delimeter = '[';
            istack.StartingInline = inlText;
            istack.StartPosition = subj.Position;
            istack.Priority = InlineStack.InlineStackPriority.Links;
            istack.Flags = InlineStack.InlineStackFlags.Opener | (isImage ? InlineStack.InlineStackFlags.ImageLink : InlineStack.InlineStackFlags.None);

            InlineStack.AppendStackEntry(istack, subj);

            return inlText;
        }
Example #20
0
 private static char peek_char(Subject subj)
 {
     return subj.Length <= subj.Position ? '\0' : subj.Buffer[subj.Position];
 }
Example #21
0
        private static Inline HandleRightSquareBracket(Subject subj, CommonMarkSettings settings)
        {
            // move past ']'
            subj.Position++;

            bool canClose;
            var istack = InlineStack.FindMatchingOpener(subj.LastPendingInline, InlineStack.InlineStackPriority.Links, '[', out canClose);

            if (istack != null)
            {
                // if the opener is "inactive" then it means that there was a nested link
                if (istack.DelimeterCount == -1)
                {
                    InlineStack.RemoveStackEntry(istack, subj, istack, settings.InlineParserParameters);
                    return new Inline("]", subj.Position - 1, subj.Position);
                }

                var endpos = subj.Position;

                // try parsing details for '[foo](/url "title")' or '[foo][bar]'
                var details = ParseLinkDetails(subj, settings);

                // try lookup of the brackets themselves
                if (details == null || details == Reference.SelfReference)
                {
                    var startpos = istack.StartPosition;
                    var label = new StringPart(subj.Buffer, startpos, endpos - startpos - 1);

                    details = LookupReference(subj.ReferenceMap, label, settings);
                }

                if (details == Reference.InvalidReference)
                    details = null;

                MatchSquareBracketStack(istack, subj, details, settings.InlineParserParameters);
                return null;
            }

            var inlText = new Inline("]", subj.Position - 1, subj.Position);

            if (canClose)
            {
                // note that the current implementation will not work if there are other inlines with priority
                // higher than Links.
                // to fix this the parsed link details should be added to the closer element in the stack.

                throw new NotSupportedException("It is not supported to have inline stack priority higher than Links.");

                ////istack = new InlineStack();
                ////istack.Delimeter = '[';
                ////istack.StartingInline = inlText;
                ////istack.StartPosition = subj.Position;
                ////istack.Priority = InlineStack.InlineStackPriority.Links;
                ////istack.Flags = InlineStack.InlineStackFlags.Closer;

                ////InlineStack.AppendStackEntry(istack, subj);
            }

            return inlText;
        }
Example #22
0
        /// <summary>
        /// Searches the subject for a span of backticks that matches the given length.
        /// Returns <c>0</c> if the closing backticks cannot be found, otherwise returns
        /// the position in the subject after the closing backticks.
        /// Also updates the position on the subject itself.
        /// </summary>
        private static int ScanToClosingBackticks(Subject subj, int openticklength)
        {
            // note - attempt to optimize by using string.IndexOf("````",...) proved to
            // be ~2x times slower than the current implementation.
            // but - buf.IndexOf('`') gives ~1.5x better performance than iterating over
            // every char in the loop.

            var buf = subj.Buffer;
            var len = buf.Length;
            var cc = 0;

            for (var i = subj.Position; i < len; i++)
            {
                if (buf[i] == '`')
                {
                    cc++;
                }
                else
                {
                    if (cc == openticklength)
                        return subj.Position = i;

                    i = buf.IndexOf('`', i, len - i) - 1;
                    if (i == -2)
                        return 0;

                    cc = 0;
                }
            }

            if (cc == openticklength)
                return subj.Position = len;

            return 0;
        }
Example #23
0
 /// <summary>
 /// Parses the entity at the current position. Returns a new string inline.
 /// Assumes that there is a <c>&amp;</c> at the current position.
 /// </summary>
 private static Inline HandleEntity(Subject subj)
 {
     var origPos = subj.Position;
     return new Inline(ParseEntity(subj), origPos, subj.Position);
 }
Example #24
0
        // Parse backtick code section or raw backticks, return an inline.
        // Assumes that the subject has a backtick at the current position.
        static Inline handle_backticks(Subject subj)
        {
            int ticklength = 0;
            var bl = subj.Length;
            while (subj.Position < bl && (subj.Buffer[subj.Position] == '`'))
            {
                ticklength++;
                subj.Position++;
            }

            int startpos = subj.Position;
            int endpos = ScanToClosingBackticks(subj, ticklength);
            if (endpos == 0)
            {
                // closing not found
                subj.Position = startpos; // rewind to right after the opening ticks
                return new Inline(new string('`', ticklength), startpos - ticklength, startpos);
            }
            else
            {
                return new Inline(InlineTag.Code, NormalizeWhitespace(subj.Buffer, startpos, endpos - startpos - ticklength))
                    {
                        SourcePosition = startpos - ticklength,
                        SourceLastPosition = endpos
                    };
            }
        }
Example #25
0
        /// <summary>
        /// Creates a new <see cref="Inline"/> element that represents string content but the given content
        /// is processed to decode any HTML entities in it.
        /// This method is guaranteed to return just one Inline, without nested elements.
        /// </summary>
        private static Inline ParseStringEntities(string s)
        {
            string result = null;
            StringBuilder builder = null;

            int searchpos;
            char c;
            var subj = new Subject(s, null);

            while ('\0' != (c = peek_char(subj)))
            {
                if (result != null)
                {
                    if (builder == null)
                        builder = new StringBuilder(result, s.Length);
                    else
                        builder.Append(result);
                }

                if (c == '&')
                {
                    result = ParseEntity(subj);
                }
                else
                {
                    searchpos = subj.Buffer.IndexOf('&', subj.Position);
                    if (searchpos == -1)
                        searchpos = subj.Length;

                    result = subj.Buffer.Substring(subj.Position, searchpos - subj.Position);
                    subj.Position = searchpos;
                }
            }

            if (builder == null)
                return new Inline(result);

            builder.Append(result);

            return new Inline(builder.ToString());
        }
Example #26
0
        /// <summary>
        /// Scans the subject for a series of the given emphasis character, testing if they could open and/or close
        /// an emphasis element.
        /// </summary>
        private static int ScanEmphasisDelimeters(Subject subj, char delimeter, out bool canOpen, out bool canClose)
        {
            int numdelims = 0;
            int startpos = subj.Position;
            int len = subj.Length;

            while (startpos + numdelims < len && subj.Buffer[startpos + numdelims] == delimeter)
                numdelims++;

            if (numdelims == 0)
            {
                canOpen = false;
                canClose = false;
                return numdelims;
            }

            char charBefore, charAfter;
            bool beforeIsSpace, beforeIsPunctuation, afterIsSpace, afterIsPunctuation;

            charBefore = startpos == 0 ? '\n' : subj.Buffer[startpos - 1];
            subj.Position = (startpos += numdelims);
            charAfter = len == startpos ? '\n' : subj.Buffer[startpos];
            
            Utilities.CheckUnicodeCategory(charBefore, out beforeIsSpace, out beforeIsPunctuation);
            Utilities.CheckUnicodeCategory(charAfter, out afterIsSpace, out afterIsPunctuation);

            canOpen = !afterIsSpace && !(afterIsPunctuation && !beforeIsSpace && !beforeIsPunctuation);
            canClose = !beforeIsSpace && !(beforeIsPunctuation && !afterIsSpace && !afterIsPunctuation);

            if (delimeter == '_')
            {
                var temp = canOpen;
                canOpen &= (!canClose || beforeIsPunctuation);
                canClose &= (!temp || afterIsPunctuation);
            }

            if (delimeter == '$')
            {
                canOpen &= !char.IsDigit(charAfter);
            }

            return numdelims;
        }
Example #27
0
        // Parse a link or the link portion of an image, or return a fallback.
        static Reference ParseLinkDetails(Subject subj, CommonMarkSettings settings)
        {
            int n;
            int sps;
            int endlabel, starturl, endurl, starttitle, endtitle, endall;
            string url, title;
            endlabel = subj.Position;

            var c = peek_char(subj);

            if (c == '(' &&
                    ((sps = Scanner.scan_spacechars(subj.Buffer, subj.Position + 1, subj.Length)) > -1) &&
                    ((n = Scanner.scan_link_url(subj.Buffer, subj.Position + 1 + sps, subj.Length)) > -1))
            {
                // try to parse an explicit link:
                starturl = subj.Position + 1 + sps; // after (
                endurl = starturl + n;
                starttitle = endurl + Scanner.scan_spacechars(subj.Buffer, endurl, subj.Length);
                // ensure there are spaces btw url and title
                endtitle = (starttitle == endurl) ? starttitle :
                           starttitle + Scanner.scan_link_title(subj.Buffer, starttitle, subj.Length);
                endall = endtitle + Scanner.scan_spacechars(subj.Buffer, endtitle, subj.Length);
                if (endall < subj.Length && subj.Buffer[endall] == ')')
                {
                    subj.Position = endall + 1;
                    url = subj.Buffer.Substring(starturl, endurl - starturl);
                    url = CleanUrl(url);
                    title = subj.Buffer.Substring(starttitle, endtitle - starttitle);
                    title = CleanTitle(title);

                    return new Reference() { Title = title, Url = url };
                }
            }
            else if (c == '[' || c == ' ' || c == '\n')
            {
                var label = ParseReferenceLabel(subj);
                if (label != null)
                {
                    if (label.Value.Length == 0)
                        return Reference.SelfReference;

                    var details = LookupReference(subj.ReferenceMap, label.Value, settings);
                    if (details != null)
                        return details;

                    // rollback the subject but return InvalidReference so that the caller knows not to
                    // parse 'foo' from [foo][bar].
                    subj.Position = endlabel;
                    return Reference.InvalidReference;
                }
            }

            // rollback the subject position because didn't match anything.
            subj.Position = endlabel;
            return null;
        }
Example #28
0
        internal static int MatchInlineStack(InlineStack opener, Subject subj, int closingDelimeterCount, InlineStack closer, InlineTag singleCharTag, InlineTag doubleCharTag, InlineParserParameters parameters)
        {
            // calculate the actual number of delimeters used from this closer
            int useDelims;
            var openerDelims = opener.DelimeterCount;

            if (closingDelimeterCount < 3 || openerDelims < 3)
            {
                useDelims = closingDelimeterCount <= openerDelims ? closingDelimeterCount : openerDelims;
                if (useDelims == 2 && doubleCharTag == 0)
                    useDelims = 1;
                if (useDelims == 1 && singleCharTag == 0)
                    return 0;
            }
            else if (singleCharTag == 0)
                useDelims = 2;
            else if (doubleCharTag == 0)
                useDelims = 1;
            else
                useDelims = closingDelimeterCount % 2 == 0 ? 2 : 1;

            Inline inl = opener.StartingInline;
            InlineTag tag = useDelims == 1 ? singleCharTag : doubleCharTag;
            if (openerDelims == useDelims)
            {
                // the opener is completely used up - remove the stack entry and reuse the inline element
                inl.Tag = tag;
                inl.LiteralContent = null;
                inl.FirstChild = inl.NextSibling;
                inl.NextSibling = null;

                InlineStack.RemoveStackEntry(opener, subj, closer?.Previous, parameters);
            }
            else
            {
                // the opener will only partially be used - stack entry remains (truncated) and a new inline is added.
                opener.DelimeterCount -= useDelims;
                inl.LiteralContent = inl.LiteralContent.Substring(0, opener.DelimeterCount);
                inl.SourceLastPosition -= useDelims;

                inl.NextSibling = new Inline(tag, inl.NextSibling);
                inl = inl.NextSibling;

                inl.SourcePosition = opener.StartingInline.SourcePosition + opener.DelimeterCount;
            }

            // there are two callers for this method, distinguished by the `closer` argument.
            // if closer == null it means the method is called during the initial subject parsing and the closer
            //   characters are at the current position in the subject. The main benefit is that there is nothing
            //   parsed that is located after the matched inline element.
            // if closer != null it means the method is called when the second pass for previously unmatched
            //   stack elements is done. The drawback is that there can be other elements after the closer.
            if (closer != null)
            {
                var clInl = closer.StartingInline;
                if ((closer.DelimeterCount -= useDelims) > 0)
                {
                    // a new inline element must be created because the old one has to be the one that
                    // finalizes the children of the emphasis
                    var newCloserInline = new Inline(clInl.LiteralContent.Substring(useDelims));
                    newCloserInline.SourcePosition = inl.SourceLastPosition = clInl.SourcePosition + useDelims;
                    newCloserInline.SourceLength = closer.DelimeterCount;
                    newCloserInline.NextSibling = clInl.NextSibling;

                    clInl.LiteralContent = null;
                    clInl.NextSibling = null;
                    inl.NextSibling = closer.StartingInline = newCloserInline;
                }
                else
                {
                    inl.SourceLastPosition = clInl.SourceLastPosition;

                    clInl.LiteralContent = null;
                    inl.NextSibling = clInl.NextSibling;
                    clInl.NextSibling = null;
                }
            }
            else if (subj != null)
            {
                inl.SourceLastPosition = subj.Position - closingDelimeterCount + useDelims;
                subj.LastInline = inl;
            }

            return useDelims;
        }
Example #29
0
        /// <summary>
        /// Walk through the block, its children and siblings, parsing string content into inline content where appropriate.
        /// </summary>
        /// <param name="block">The document level block from which to start the processing.</param>
        /// <param name="refmap">The reference mapping used when parsing links.</param>
        /// <param name="settings">The settings that influence how the inline parsing is performed.</param>
        public static void ProcessInlines(Block block, Dictionary<string, Reference> refmap, CommonMarkSettings settings)
        {
            Stack<Inline> inlineStack = null;
            var stack = new Stack<Block>();
            var parsers = settings.InlineParsers;
            var specialCharacters = settings.InlineParserSpecialCharacters;
            var subj = new Subject(refmap);

            StringContent sc;
            int delta;

            while (block != null)
            {
                var tag = block.Tag;
                if (tag == BlockTag.Paragraph || tag == BlockTag.AtxHeader || tag == BlockTag.SETextHeader)
                {
                    sc = block.StringContent;
                    if (sc != null)
                    {
                        sc.FillSubject(subj);
                        delta = subj.Position;

                        block.InlineContent = InlineMethods.parse_inlines(subj, refmap, parsers, specialCharacters);
                        block.StringContent = null;

                        if (sc.PositionTracker != null)
                        {
                            sc.PositionTracker.AddBlockOffset(-delta);
                            AdjustInlineSourcePosition(block.InlineContent, sc.PositionTracker, ref inlineStack);
                        }
                    }
                }

                if (block.FirstChild != null)
                {
                    if (block.NextSibling != null)
                        stack.Push(block.NextSibling);

                    block = block.FirstChild;
                }
                else if (block.NextSibling != null)
                {
                    block = block.NextSibling;
                }
                else if (stack.Count > 0)
                {
                    block = stack.Pop();
                }
                else
                {
                    block = null;
                }
            }
        }
Example #30
0
 // Parse a link or the link portion of an image, or return a fallback.
 static Syntax.Inline handle_left_bracket(Subject subj)
 {
     Syntax.Inline lab = null;
     Syntax.Inline result = null;
     Reference refer;
     int n;
     int sps;
     bool found_label;
     int endlabel, starturl, endurl, starttitle, endtitle, endall;
     string url, title, reflabel;
     string rawlabel = "";
     string rawlabel2 = "";
     found_label = link_label(subj, ref rawlabel);
     endlabel = subj.Position;
     if (found_label)
     {
         if (peek_char(subj) == '(' &&
             ((sps = Scanner.scan_spacechars(subj.Buffer, subj.Position + 1)) > -1) &&
             ((n = Scanner.scan_link_url(subj.Buffer, subj.Position + 1 + sps)) > -1))
         {
             // try to parse an explicit link:
             starturl = subj.Position + 1 + sps; // after (
             endurl = starturl + n;
             starttitle = endurl + Scanner.scan_spacechars(subj.Buffer, endurl);
             // ensure there are spaces btw url and title
             endtitle = (starttitle == endurl) ? starttitle :
                        starttitle + Scanner.scan_link_title(subj.Buffer, starttitle);
             endall = endtitle + Scanner.scan_spacechars(subj.Buffer, endtitle);
             if (BString.bchar(subj.Buffer, endall) == ')')
             {
                 subj.Position = endall + 1;
                 url = BString.bmidstr(subj.Buffer, starturl, endurl - starturl);
                 url = CleanUrl(url);
                 title = BString.bmidstr(subj.Buffer, starttitle, endtitle - starttitle);
                 title = CleanTitle(title);
                 lab = parse_inlines(rawlabel, null);
                 return make_link(lab, url, title);
             }
             else
             {
                 // if we get here, we matched a label but didn't get further:
                 subj.Position = endlabel;
                 lab = parse_inlines(rawlabel, subj.ReferenceMap);
                 result = append_inlines(make_str("["),
                                         append_inlines(lab, make_str("]")));
                 return result;
             }
         }
         else
         {
             // Check for reference link.
             // First, see if there's another label:
             subj.Position = subj.Position + Scanner.scan_spacechars(subj.Buffer, endlabel);
             reflabel = rawlabel;
             // if followed by a nonempty link label, we change reflabel to it:
             if (peek_char(subj) == '[' &&
                 link_label(subj, ref rawlabel2))
             {
                 if (rawlabel2 != null && rawlabel2.Length > 0)
                 {
                     reflabel = rawlabel2;
                 }
             }
             else
             {
                 subj.Position = endlabel;
             }
             // lookup rawlabel in subject.reference_map:
             refer = lookup_reference(subj.ReferenceMap, reflabel);
             if (refer != null)
             { // found
                 lab = parse_inlines(rawlabel, null);
                 result = make_link(lab, refer.Url, refer.Title);
             }
             else
             {
                 subj.Position = endlabel;
                 lab = parse_inlines(rawlabel, subj.ReferenceMap);
                 result = append_inlines(make_str("["),
                                        append_inlines(lab, make_str("]")));
             }
             return result;
         }
     }
     // If we fall through to here, it means we didn't match a link:
     advance(subj);  // advance past [
     return make_str("[");
 }