// Process [link] and ![image] directives Token ProcessLinkOrImageOrFootnote() { // Link or image? TokenType token_type = SkipChar('!') ? TokenType.img : TokenType.link; // Opening '[' if (!SkipChar('[')) return null; // Is it a foonote? var savepos=position; if (m_Markdown.ExtraMode && token_type==TokenType.link && SkipChar('^')) { SkipLinespace(); // Parse it string id; if (SkipFootnoteID(out id) && SkipChar(']')) { // Look it up and create footnote reference token int footnote_index = m_Markdown.ClaimFootnote(id); if (footnote_index >= 0) { // Yes it's a footnote return CreateToken(TokenType.footnote, new FootnoteReference(footnote_index, id)); } } // Rewind position = savepos; } if (DisableLinks && token_type==TokenType.link) return null; bool ExtraMode = m_Markdown.ExtraMode; // Find the closing square bracket, allowing for nesting, watching for // escapable characters Mark(); int depth = 1; while (!eof) { char ch = current; if (ch == '[') { depth++; } else if (ch == ']') { depth--; if (depth == 0) break; } this.SkipEscapableChar(ExtraMode); } // Quit if end if (eof) return null; // Get the link text and unescape it string link_text = Utils.UnescapeString(Extract(), ExtraMode); // The closing ']' SkipForward(1); // Save position in case we need to rewind savepos = position; // Inline links must follow immediately if (SkipChar('(')) { // Extract the url and title var link_def = LinkDefinition.ParseLinkTarget(this, null, m_Markdown.ExtraMode); if (link_def==null) return null; // Closing ')' SkipWhitespace(); if (!SkipChar(')')) return null; // Create the token return CreateToken(token_type, new LinkInfo(link_def, link_text)); } // Optional space or tab if (!SkipChar(' ')) SkipChar('\t'); // If there's line end, we're allow it and as must line space as we want // before the link id. if (eol) { SkipEol(); SkipLinespace(); } // Reference link? string link_id = null; if (current == '[') { // Skip the opening '[' SkipForward(1); // Find the start/end of the id Mark(); if (!Find(']')) return null; // Extract the id link_id = Extract(); // Skip closing ']' SkipForward(1); } else { // Rewind to just after the closing ']' position = savepos; } // Link id not specified? if (string.IsNullOrEmpty(link_id)) { // Use the link text (implicit reference link) link_id = Utils.NormalizeLineEnds(link_text); // If the link text has carriage returns, normalize // to spaces if (!object.ReferenceEquals(link_id, link_text)) { while (link_id.Contains(" \n")) link_id = link_id.Replace(" \n", "\n"); link_id = link_id.Replace("\n", " "); } } // Find the link definition abort if not defined var def = m_Markdown.GetLinkDefinition(link_id); if (def == null) return null; // Create a token return CreateToken(token_type, new LinkInfo(def, link_text)); }
// Scan the input string, creating tokens for anything special public void Tokenize(string str, int start, int len) { // Prepare base.Reset(str, start, len); m_Tokens.Clear(); List<Token> emphasis_marks = null; List<Abbreviation> Abbreviations=m_Markdown.GetAbbreviations(); bool ExtraMode = m_Markdown.ExtraMode; // Scan string int start_text_token = position; while (!eof) { int end_text_token=position; // Work out token Token token = null; switch (current) { case '*': case '_': // Create emphasis mark token = CreateEmphasisMark(); if (token != null) { // Store marks in a separate list the we'll resolve later switch (token.type) { case TokenType.internal_mark: case TokenType.opening_mark: case TokenType.closing_mark: if (emphasis_marks == null) { emphasis_marks = new List<Token>(); } emphasis_marks.Add(token); break; } } break; case '`': token = ProcessCodeSpan(); break; case '[': case '!': { // Process link reference int linkpos = position; token = ProcessLinkOrImageOrFootnote(); // Rewind if invalid syntax // (the '[' or '!' will be treated as a regular character and processed below) if (token == null) position = linkpos; break; } case '<': { // Is it a valid html tag? int save = position; HtmlTag tag = HtmlTag.Parse(this); if (tag != null) { if (!m_Markdown.SafeMode || tag.IsSafe()) { // Yes, create a token for it token = CreateToken(TokenType.HtmlTag, save, position - save); } else { // No, rewrite and encode it position = save; } } else { // No, rewind and check if it's a valid autolink eg: <google.com> position = save; token = ProcessAutoLink(); if (token == null) position = save; } break; } case '&': { // Is it a valid html entity int save=position; string unused=null; if (SkipHtmlEntity(ref unused)) { // Yes, create a token for it token = CreateToken(TokenType.Html, save, position - save); } break; } case ' ': { // Check for double space at end of a line if (CharAtOffset(1)==' ' && IsLineEnd(CharAtOffset(2))) { // Yes, skip it SkipForward(2); // Don't put br's at the end of a paragraph if (!eof) { SkipEol(); token = CreateToken(TokenType.br, end_text_token, 0); } } break; } case '\\': { // Special handling for escaping <autolinks> /* if (CharAtOffset(1) == '<') { // Is it an autolink? int savepos = position; SkipForward(1); bool AutoLink = ProcessAutoLink() != null; position = savepos; if (AutoLink) { token = CreateToken(TokenType.Text, position + 1, 1); SkipForward(2); } } else */ { // Check followed by an escapable character if (Utils.IsEscapableChar(CharAtOffset(1), ExtraMode)) { token = CreateToken(TokenType.Text, position + 1, 1); SkipForward(2); } } break; } } // Look for abbreviations. if (token == null && Abbreviations!=null && !Char.IsLetterOrDigit(CharAtOffset(-1))) { var savepos = position; foreach (var abbr in Abbreviations) { if (SkipString(abbr.Abbr) && !Char.IsLetterOrDigit(current)) { token = CreateToken(TokenType.abbreviation, abbr); break; } position = savepos; } } // If token found, append any preceeding text and the new token to the token list if (token!=null) { // Create a token for everything up to the special character if (end_text_token > start_text_token) { m_Tokens.Add(CreateToken(TokenType.Text, start_text_token, end_text_token-start_text_token)); } // Add the new token m_Tokens.Add(token); // Remember where the next text token starts start_text_token=position; } else { // Skip a single character and keep looking SkipForward(1); } } // Append a token for any trailing text after the last token. if (position > start_text_token) { m_Tokens.Add(CreateToken(TokenType.Text, start_text_token, position-start_text_token)); } // Do we need to resolve and emphasis marks? if (emphasis_marks != null) { ResolveEmphasisMarks(m_Tokens, emphasis_marks); } // Done! return; }
// Process '*', '**' or '_', '__' // This is horrible and probably much better done through regex, but I'm stubborn. // For normal cases this routine works as expected. For unusual cases (eg: overlapped // strong and emphasis blocks), the behaviour is probably not the same as the original // markdown scanner. /* public Token ProcessEmphasisOld(ref Token prev_single, ref Token prev_double) { // Check whitespace before/after bool bSpaceBefore = !bof && IsLineSpace(CharAtOffset(-1)); bool bSpaceAfter = IsLineSpace(CharAtOffset(1)); // Ignore if surrounded by whitespace if (bSpaceBefore && bSpaceAfter) { return null; } // Save the current character and skip it char ch = current; Skip(1); // Do we have a previous matching single star? if (!bSpaceBefore && prev_single != null) { // Yes, match them... prev_single.type = TokenType.open_em; prev_single = null; return CreateToken(TokenType.close_em, position - 1, 1); } // Is this a double star/under if (current == ch) { // Skip second character Skip(1); // Space after? bSpaceAfter = IsLineSpace(current); // Space both sides? if (bSpaceBefore && bSpaceAfter) { // Ignore it return CreateToken(TokenType.Text, position - 2, 2); } // Do we have a previous matching double if (!bSpaceBefore && prev_double != null) { // Yes, match them prev_double.type = TokenType.open_strong; prev_double = null; return CreateToken(TokenType.close_strong, position - 2, 2); } if (!bSpaceAfter) { // Opening double star prev_double = CreateToken(TokenType.Text, position - 2, 2); return prev_double; } // Ignore it return CreateToken(TokenType.Text, position - 2, 2); } // If there's a space before, we can open em if (!bSpaceAfter) { // Opening single star prev_single = CreateToken(TokenType.Text, position - 1, 1); return prev_single; } // Ignore Skip(-1); return null; } */ // Process auto links eg: <google.com> Token ProcessAutoLink() { if (DisableLinks) return null; // Skip the angle bracket and remember the start SkipForward(1); Mark(); bool ExtraMode = m_Markdown.ExtraMode; // Allow anything up to the closing angle, watch for escapable characters while (!eof) { char ch = current; // No whitespace allowed if (char.IsWhiteSpace(ch)) break; // End found? if (ch == '>') { string url = Utils.UnescapeString(Extract(), ExtraMode); LinkInfo li = null; if (Utils.IsEmailAddress(url)) { string link_text; if (url.StartsWith("mailto:")) { link_text = url.Substring(7); } else { link_text = url; url = "mailto:" + url; } li = new LinkInfo(new LinkDefinition("auto", url, null), link_text); } else if (Utils.IsWebAddress(url)) { li=new LinkInfo(new LinkDefinition("auto", url, null), url); } if (li!=null) { SkipForward(1); return CreateToken(TokenType.link, li); } return null; } this.SkipEscapableChar(ExtraMode); } // Didn't work return null; }
// Render a list of tokens to a destinatino string builder. private void Render(StringBuilder sb, string str) { foreach (Token t in m_Tokens) { switch (t.type) { case TokenType.Text: // Append encoded text m_Markdown.HtmlEncode(sb, str, t.startOffset, t.length); break; case TokenType.HtmlTag: // Append html as is Utils.SmartHtmlEncodeAmps(sb, str, t.startOffset, t.length); break; case TokenType.Html: case TokenType.opening_mark: case TokenType.closing_mark: case TokenType.internal_mark: // Append html as is sb.Append(str, t.startOffset, t.length); break; case TokenType.br: sb.Append("<br />\n"); break; case TokenType.open_em: sb.Append("<em>"); break; case TokenType.close_em: sb.Append("</em>"); break; case TokenType.open_strong: sb.Append("<strong>"); break; case TokenType.close_strong: sb.Append("</strong>"); break; case TokenType.code_span: sb.Append("<code>"); m_Markdown.HtmlEncode(sb, str, t.startOffset, t.length); sb.Append("</code>"); break; case TokenType.link: { LinkInfo li = (LinkInfo)t.data; var sf = new SpanFormatter(m_Markdown); sf.DisableLinks = true; li.def.RenderLink(m_Markdown, sb, sf.Format(li.link_text)); break; } case TokenType.img: { LinkInfo li = (LinkInfo)t.data; li.def.RenderImg(m_Markdown, sb, li.link_text); break; } case TokenType.footnote: { FootnoteReference r=(FootnoteReference)t.data; sb.Append("<sup id=\"fnref:"); sb.Append(r.id); sb.Append("\"><a href=\"#fn:"); sb.Append(r.id); sb.Append("\" rel=\"footnote\">"); sb.Append(r.index + 1); sb.Append("</a></sup>"); break; } case TokenType.abbreviation: { Abbreviation a = (Abbreviation)t.data; sb.Append("<abbr"); if (!String.IsNullOrEmpty(a.Title)) { sb.Append(" title=\""); m_Markdown.HtmlEncode(sb, a.Title, 0, a.Title.Length); sb.Append("\""); } sb.Append(">"); m_Markdown.HtmlEncode(sb, a.Abbr, 0, a.Abbr.Length); sb.Append("</abbr>"); break; } } FreeToken(t); } }