// Scan the input string, creating tokens for anything special public void Tokenize(string str, int start, int len) { // Prepare base.Reset(str, start, len); m_Tokens.Clear(); List <Token> emphasis_marks = null; List <Abbreviation> Abbreviations = m_Markdown.GetAbbreviations(); bool ExtraMode = m_Markdown.ExtraMode; // Scan string int start_text_token = position; while (!eof) { int end_text_token = position; // Work out token Token token = null; switch (current) { case '*': case '_': // Create emphasis mark token = CreateEmphasisMark(); if (token != null) { // Store marks in a separate list the we'll resolve later switch (token.type) { case TokenType.internal_mark: case TokenType.opening_mark: case TokenType.closing_mark: if (emphasis_marks == null) { emphasis_marks = new List <Token>(); } emphasis_marks.Add(token); break; } } break; case '`': token = ProcessCodeSpan(); break; case '[': case '!': { // Process link reference int linkpos = position; token = ProcessLinkOrImageOrFootnote(); // Rewind if invalid syntax // (the '[' or '!' will be treated as a regular character and processed below) if (token == null) { position = linkpos; } break; } case '<': { // Is it a valid html tag? int save = position; HtmlTag tag = HtmlTag.Parse(this); if (tag != null) { if (!m_Markdown.SafeMode || tag.IsSafe()) { // Yes, create a token for it token = CreateToken(TokenType.HtmlTag, save, position - save); } else { // No, rewrite and encode it position = save; } } else { // No, rewind and check if it's a valid autolink eg: <google.com> position = save; token = ProcessAutoLink(); if (token == null) { position = save; } } break; } case '&': { // Is it a valid html entity int save = position; string unused = null; if (SkipHtmlEntity(ref unused)) { // Yes, create a token for it token = CreateToken(TokenType.Html, save, position - save); } break; } case ' ': { // Check for double space at end of a line if (CharAtOffset(1) == ' ' && IsLineEnd(CharAtOffset(2))) { // Yes, skip it SkipForward(2); // Don't put br's at the end of a paragraph if (!eof) { SkipEol(); token = CreateToken(TokenType.br, end_text_token, 0); } } break; } case '\\': { // Special handling for escaping <autolinks> /* * if (CharAtOffset(1) == '<') * { * // Is it an autolink? * int savepos = position; * SkipForward(1); * bool AutoLink = ProcessAutoLink() != null; * position = savepos; * * if (AutoLink) * { * token = CreateToken(TokenType.Text, position + 1, 1); * SkipForward(2); * } * } * else */ { // Check followed by an escapable character if (Utils.IsEscapableChar(CharAtOffset(1), ExtraMode)) { token = CreateToken(TokenType.Text, position + 1, 1); SkipForward(2); } } break; } } // Look for abbreviations. if (token == null && Abbreviations != null && !Char.IsLetterOrDigit(CharAtOffset(-1))) { var savepos = position; foreach (var abbr in Abbreviations) { if (SkipString(abbr.Abbr) && !Char.IsLetterOrDigit(current)) { token = CreateToken(TokenType.abbreviation, abbr); break; } position = savepos; } } // If token found, append any preceeding text and the new token to the token list if (token != null) { // Create a token for everything up to the special character if (end_text_token > start_text_token) { m_Tokens.Add(CreateToken(TokenType.Text, start_text_token, end_text_token - start_text_token)); } // Add the new token m_Tokens.Add(token); // Remember where the next text token starts start_text_token = position; } else { // Skip a single character and keep looking SkipForward(1); } } // Append a token for any trailing text after the last token. if (position > start_text_token) { m_Tokens.Add(CreateToken(TokenType.Text, start_text_token, position - start_text_token)); } // Do we need to resolve and emphasis marks? if (emphasis_marks != null) { ResolveEmphasisMarks(m_Tokens, emphasis_marks); } // Done! return; }
// Scan the input string, creating tokens for anything special public void Tokenize(string str, int start, int len) { // Prepare base.Reset(str, start, len); m_Tokens.Clear(); List <Token> emphasis_marks = null; List <Abbreviation> Abbreviations = m_Markdown.GetAbbreviations(); bool ExtraMode = m_Markdown.ExtraMode; // Scan string int start_text_token = Position; while (!Eof) { int end_text_token = Position; // Work out token Token token = null; int positionSave = Position; switch (Current) { case '*': case '_': // Create emphasis mark token = CreateEmphasisMark(); if (token != null) { // Store marks in a separate list the we'll resolve later switch (token.type) { case TokenType.internal_mark: case TokenType.opening_mark: case TokenType.closing_mark: if (emphasis_marks == null) { emphasis_marks = new List <Token>(); } emphasis_marks.Add(token); break; } } break; case '~': if (m_Markdown.HnDMode) { // scan for ~~text~~. token = ProcessStrikeThrough(); } break; case '`': token = ProcessCodeSpan(); break; case '[': case '!': // Process link reference int linkpos = Position; token = ProcessLinkOrImageOrFootnote(); // Rewind if invalid syntax // (the '[' or '!' will be treated as a regular character and processed below) if (token == null) { Position = linkpos; } break; case '<': // Is it a valid html tag? HtmlTag tag = HtmlTag.Parse(this); if (tag != null) { if (!m_Markdown.SafeMode || tag.IsSafe()) { // Yes, create a token for it token = CreateToken(TokenType.HtmlTag, positionSave, Position - positionSave); } else { // No, rewrite and encode it Position = positionSave; } } else { // No, rewind and check if it's a valid autolink eg: <google.com> Position = positionSave; token = ProcessAutoLink(); if (token == null) { Position = positionSave; } } break; case '&': // Is it a valid html entity string unused = null; if (SkipHtmlEntity(ref unused)) { // Yes, create a token for it token = CreateToken(TokenType.Html, positionSave, Position - positionSave); } break; case ' ': // Check for double space at end of a line if (CharAtOffset(1) == ' ' && IsLineEnd(CharAtOffset(2))) { // Yes, skip it SkipForward(2); // Don't put br's at the end of a paragraph if (!Eof) { SkipEol(); token = CreateToken(TokenType.br, end_text_token, 0); } } break; case '\\': // Special handling for escaping <autolinks> /* * if (CharAtOffset(1) == '<') * { * // Is it an autolink? * int savepos = position; * SkipForward(1); * bool AutoLink = ProcessAutoLink() != null; * position = savepos; * * if (AutoLink) * { * token = CreateToken(TokenType.Text, position + 1, 1); * SkipForward(2); * } * } * else */ { // Check followed by an escapable character if (Utils.IsEscapableChar(CharAtOffset(1), ExtraMode)) { token = CreateToken(TokenType.Text, Position + 1, 1); SkipForward(2); } } break; case '@': if (m_Markdown.DocNetMode || m_Markdown.HnDMode) { if (this.DoesMatch("@fa-")) { // expect FontAwesome. string iconName = string.Empty; int newPosition = 0; if (Utils.SkipFontAwesome(this.Input, this.Position, out newPosition, out iconName)) { // token should be just the iconname, so adjust position specification to that. token = CreateToken(TokenType.font_awesome, newPosition - iconName.Length, iconName.Length); this.Position = newPosition; } } } break; case ':': if (m_Markdown.HnDMode && m_Markdown.EmojiFilePerName != null) { // scan till next ':' and stop if EOL or whitespace is seen. string emojiName = string.Empty; int newPosition = 0; if (Utils.ParseEmoji(this.Input, this.Position, out newPosition, out emojiName)) { token = CreateToken(TokenType.emoji, emojiName); this.Position = newPosition; } else { // try smiley mapping token = ParseSmileyMapping(positionSave); } } break; default: token = ParseSmileyMapping(positionSave); break; } // Look for abbreviations. if (token == null && Abbreviations != null && !Char.IsLetterOrDigit(CharAtOffset(-1))) { var savepos = Position; foreach (var abbr in Abbreviations) { if (SkipString(abbr.Abbr) && !Char.IsLetterOrDigit(Current)) { token = CreateToken(TokenType.abbreviation, abbr); break; } Position = savepos; } } // If token found, append any preceeding text and the new token to the token list if (token != null) { // Create a token for everything up to the special character if (end_text_token > start_text_token) { m_Tokens.Add(CreateToken(TokenType.Text, start_text_token, end_text_token - start_text_token)); } // Add the new token m_Tokens.Add(token); // Remember where the next text token starts start_text_token = Position; } else { // Skip a single character and keep looking SkipForward(1); } } // Append a token for any trailing text after the last token. if (Position > start_text_token) { m_Tokens.Add(CreateToken(TokenType.Text, start_text_token, Position - start_text_token)); } // Do we need to resolve and emphasis marks? if (emphasis_marks != null) { ResolveEmphasisMarks(m_Tokens, emphasis_marks); } }