/* * Resolving emphasis tokens is a two part process * * 1. Find all valid sequences of * and _ and create `mark` tokens for them * this is done by CreateEmphasisMarks during the initial character scan * done by Tokenize * * 2. Looks at all these emphasis marks and tries to pair them up * to make the actual <em> and <strong> tokens * * Any unresolved emphasis marks are rendered unaltered as * or _ */ // Create emphasis mark for sequences of '*' and '_' (part 1) public Token CreateEmphasisMark(Token previous) { // Capture current state char ch = current; char altch = ch == '*' ? '_' : '*'; int savepos = position; // Check for a consecutive sequence of just '_' and '*' if (bof || char.IsWhiteSpace(CharAtOffset(-1))) { while (IsEmphasisChar(current)) SkipForward(1); if (eof || char.IsWhiteSpace(current)) { return new Token(TokenType.Html, savepos, position - savepos); } // Rewind position = savepos; } // Scan backwards and see if we have space before while (IsEmphasisChar(CharAtOffset(-1))) SkipForward(-1); bool bSpaceBefore = bof || char.IsWhiteSpace(CharAtOffset(-1)); position = savepos; // Count how many matching emphasis characters while (current == ch) { SkipForward(1); } int count=position-savepos; // Scan forwards and see if we have space after while (IsEmphasisChar(CharAtOffset(1))) SkipForward(1); bool bSpaceAfter = eof || char.IsWhiteSpace(current); position = savepos + count; // This should have been stopped by check above System.Diagnostics.Debug.Assert(!bSpaceBefore || !bSpaceAfter); if (bSpaceBefore || previous.type == TokenType.closing_mark) { return CreateToken(TokenType.opening_mark, savepos, position - savepos); } if (bSpaceAfter || previous.type == TokenType.opening_mark) { return CreateToken(TokenType.closing_mark, savepos, position - savepos); } if (m_Markdown.ExtraMode && ch == '_') return null; return CreateToken(TokenType.internal_mark, savepos, position - savepos); }
// FreeToken - return a token to the spare token pool internal void FreeToken(Token token) { token.data = null; m_SpareTokens.Push(token); }
// FreeToken - return a token to the spare token pool public void FreeToken(Token token) { token.data = null; m_SpareTokens.Push(token); }
// Split mark token public Token SplitMarkToken(List<Token> tokens, List<Token> marks, Token token, int position) { // Create the new rhs token Token tokenRhs = CreateToken(token.type, token.startOffset + position, token.length - position); // Adjust down the length of this token token.length = position; // Insert the new token into each of the parent collections marks.Insert(marks.IndexOf(token) + 1, tokenRhs); tokens.Insert(tokens.IndexOf(token) + 1, tokenRhs); // Return the new token return tokenRhs; }
// Resolve emphasis marks (part 2) public void ResolveEmphasisMarks_classic(List<Token> tokens, List<Token> marks) { // First pass, do <strong> for (int i = 0; i < marks.Count; i++) { // Get the next opening or public mark Token opening_mark=marks[i]; if (opening_mark.type!=TokenType.opening_mark && opening_mark.type!=TokenType.internal_mark) continue; if (opening_mark.length < 2) continue; // Look for a matching closing mark for (int j = i + 1; j < marks.Count; j++) { // Get the next closing or public mark Token closing_mark = marks[j]; if (closing_mark.type != TokenType.closing_mark && closing_mark.type!=TokenType.internal_mark) continue; // Ignore if different type (ie: `*` vs `_`) if (input[opening_mark.startOffset] != input[closing_mark.startOffset]) continue; // Must be at least two if (closing_mark.length < 2) continue; // Split the opening mark, keeping the LHS if (opening_mark.length > 2) { SplitMarkToken(tokens, marks, opening_mark, 2); } // Split the closing mark, keeping the RHS if (closing_mark.length > 2) { closing_mark=SplitMarkToken(tokens, marks, closing_mark, closing_mark.length-2); } // Connect them opening_mark.type = TokenType.open_strong; closing_mark.type = TokenType.close_strong; // Continue after the closing mark i = marks.IndexOf(closing_mark); break; } } // Second pass, do <em> for (int i = 0; i < marks.Count; i++) { // Get the next opening or public mark Token opening_mark = marks[i]; if (opening_mark.type != TokenType.opening_mark && opening_mark.type != TokenType.internal_mark) continue; // Look for a matching closing mark for (int j = i + 1; j < marks.Count; j++) { // Get the next closing or public mark Token closing_mark = marks[j]; if (closing_mark.type != TokenType.closing_mark && closing_mark.type != TokenType.internal_mark) continue; // Ignore if different type (ie: `*` vs `_`) if (input[opening_mark.startOffset] != input[closing_mark.startOffset]) continue; // Split the opening mark, keeping the LHS if (opening_mark.length > 1) { SplitMarkToken(tokens, marks, opening_mark, 1); } // Split the closing mark, keeping the RHS if (closing_mark.length > 1) { closing_mark = SplitMarkToken(tokens, marks, closing_mark, closing_mark.length - 1); } // Connect them opening_mark.type = TokenType.open_em; closing_mark.type = TokenType.close_em; // Continue after the closing mark i = marks.IndexOf(closing_mark); break; } } }
// Resolve emphasis marks (part 2) public void ResolveEmphasisMarks(List<Token> tokens, List<Token> marks) { bool bContinue = true; while (bContinue) { bContinue = false; for (int i = 0; i < marks.Count; i++) { // Get the next opening or public mark Token opening_mark = marks[i]; if (opening_mark.type != TokenType.opening_mark && opening_mark.type != TokenType.internal_mark) continue; // Look for a matching closing mark for (int j = i + 1; j < marks.Count; j++) { // Get the next closing or public mark Token closing_mark = marks[j]; if (closing_mark.type != TokenType.closing_mark && closing_mark.type != TokenType.internal_mark) break; // Ignore if different type (ie: `*` vs `_`) if (input[opening_mark.startOffset] != input[closing_mark.startOffset]) continue; // strong or em? int style = Math.Min(opening_mark.length, closing_mark.length); // Triple or more on both ends? if (style >= 3) { style = (style % 2)==1 ? 1 : 2; } // Split the opening mark, keeping the RHS if (opening_mark.length > style) { opening_mark = SplitMarkToken(tokens, marks, opening_mark, opening_mark.length - style); i--; } // Split the closing mark, keeping the LHS if (closing_mark.length > style) { SplitMarkToken(tokens, marks, closing_mark, style); } // Connect them opening_mark.type = style == 1 ? TokenType.open_em : TokenType.open_strong; closing_mark.type = style == 1 ? TokenType.close_em : TokenType.close_strong; // Remove the matched marks marks.Remove(opening_mark); marks.Remove(closing_mark); bContinue = true; break; } } } }
// Scan the input string, creating tokens for anything special public void Tokenize(string str, int start, int len) { // Prepare base.Reset(str, start, len); m_Tokens.Clear(); List<Token> emphasis_marks = null; List<Abbreviation> Abbreviations=m_Markdown.GetAbbreviations(); bool ExtraMode = m_Markdown.ExtraMode; // Scan string int start_text_token = position; while (!eof) { int end_text_token=position; // Work out token Token token = null; switch (current) { case '*': case '_': // Create emphasis mark token = CreateEmphasisMark(); if (token != null) { // Store marks in a separate list the we'll resolve later switch (token.type) { case TokenType.internal_mark: case TokenType.opening_mark: case TokenType.closing_mark: if (emphasis_marks == null) { emphasis_marks = new List<Token>(); } emphasis_marks.Add(token); break; } } break; case '`': token = ProcessCodeSpan(); break; case '[': case '!': { // Process link reference int linkpos = position; token = ProcessLinkOrImageOrFootnote(); // Rewind if invalid syntax // (the '[' or '!' will be treated as a regular character and processed below) if (token == null) position = linkpos; break; } case '<': { // Is it a valid html tag? int save = position; HtmlTag tag = HtmlTag.Parse(this); if (tag != null) { if (!m_Markdown.SafeMode || tag.IsSafe()) { // Yes, create a token for it token = CreateToken(TokenType.HtmlTag, save, position - save); } else { // No, rewrite and encode it position = save; } } else { // No, rewind and check if it's a valid autolink eg: <google.com> position = save; token = ProcessAutoLink(); if (token == null) position = save; } break; } case '&': { // Is it a valid html entity int save=position; string unused=null; if (SkipHtmlEntity(ref unused)) { // Yes, create a token for it token = CreateToken(TokenType.Html, save, position - save); } break; } case ' ': { // Check for double space at end of a line if (CharAtOffset(1)==' ' && IsLineEnd(CharAtOffset(2))) { // Yes, skip it SkipForward(2); // Don't put br's at the end of a paragraph if (!eof) { SkipEol(); token = CreateToken(TokenType.br, end_text_token, 0); } } break; } case '\\': { // Special handling for escaping <autolinks> /* if (CharAtOffset(1) == '<') { // Is it an autolink? int savepos = position; SkipForward(1); bool AutoLink = ProcessAutoLink() != null; position = savepos; if (AutoLink) { token = CreateToken(TokenType.Text, position + 1, 1); SkipForward(2); } } else */ { // Check followed by an escapable character if (Utils.IsEscapableChar(CharAtOffset(1), ExtraMode)) { token = CreateToken(TokenType.Text, position + 1, 1); SkipForward(2); } } break; } } // Look for abbreviations. if (token == null && Abbreviations!=null && !Char.IsLetterOrDigit(CharAtOffset(-1))) { var savepos = position; foreach (var abbr in Abbreviations) { if (SkipString(abbr.Abbr) && !Char.IsLetterOrDigit(current)) { token = CreateToken(TokenType.abbreviation, abbr); break; } position = savepos; } } // If token found, append any preceeding text and the new token to the token list if (token!=null) { // Create a token for everything up to the special character if (end_text_token > start_text_token) { m_Tokens.Add(CreateToken(TokenType.Text, start_text_token, end_text_token-start_text_token)); } // Add the new token m_Tokens.Add(token); // Remember where the next text token starts start_text_token=position; } else { // Skip a single character and keep looking SkipForward(1); } } // Append a token for any trailing text after the last token. if (position > start_text_token) { m_Tokens.Add(CreateToken(TokenType.Text, start_text_token, position-start_text_token)); } // Do we need to resolve and emphasis marks? if (emphasis_marks != null) { ResolveEmphasisMarks(m_Tokens, emphasis_marks); } // Done! return; }