private SymbolAtom ParseDelimiter(SourceSpan value, int start, ref int position) { var delimiter = ReadElement(value, ref position); string delimiterName; if (delimiter.Length == 1) { delimiterName = GetDelimeterMapping(delimiter[0]); } else { if (delimiter[0] != escapeChar) { throw new Exception($"Incorrect parser state: delimiter should start from {escapeChar}: {delimiter}"); } // Here goes the fancy business: for non-alphanumeric commands (e.g. \{, \\ etc.) we need to pass them // through GetDelimeterMapping, but for alphanumeric ones, we don't. delimiterName = delimiter.Segment(1).ToString(); // skip an escape character if (delimiterName.Length == 1 && !char.IsLetterOrDigit(delimiterName[0])) { delimiterName = GetDelimeterMapping(delimiterName[0]); } } var delimiterSource = value.Segment(start, position - start); // will map the whole "\left(" to a delimiter atom created if (delimiterName == null || !SymbolAtom.TryGetAtom(delimiterName, delimiterSource, out var atom) || !atom.IsDelimeter) { throw new TexParseException($"Cannot find delimiter {delimiter}"); } return(atom); }
internal static SourceSpan ReadElementGroup(SourceSpan value, ref int position, char openChar, char closeChar) { if (position == value.Length || value[position] != openChar) { throw new TexParseException("missing '" + openChar + "'!"); } var group = 0; position++; var start = position; while (position < value.Length && !(value[position] == closeChar && group == 0)) { if (value[position] == openChar) { group++; } else if (value[position] == closeChar) { group--; } position++; } if (position == value.Length) { // Reached end of formula but group has not been closed. throw new TexParseException("Illegal end, missing '" + closeChar + "'!"); } position++; return(value.Segment(start, position - start - 1)); }
private static TexFormula ConvertRawText(SourceSpan value, string textStyle) { var formula = new TexFormula { Source = value, TextStyle = textStyle }; var position = 0; var initialPosition = position; while (position < value.Length) { var ch = value[position]; var source = value.Segment(position, 1); var atom = IsWhiteSpace(ch) ? (Atom) new SpaceAtom(source) : new CharAtom(source, ch, textStyle); position++; formula.Add(atom, value.Segment(initialPosition, position - initialPosition)); } return(formula); }
/// <summary> /// Reads an element: typically, a curly brace-enclosed value group, a singular value or a character sequence /// prefixed by a backslash. /// </summary> /// <exception cref="TexParseException">Will be thrown for ill-formed groups.</exception> internal static SourceSpan ReadElement(SourceSpan value, ref int position) { SkipWhiteSpace(value, ref position); if (position == value.Length) { throw new TexParseException("An element is missing"); } return(value[position] switch { leftGroupChar => ReadElementGroup(value, ref position, leftGroupChar, rightGroupChar), escapeChar => ReadEscapeSequence(value, ref position), _ => value.Segment(position++, 1) });
/// <summary>Reads an element: typically, a curly brace-enclosed value group or a singular value.</summary> /// <exception cref="TexParseException">Will be thrown for ill-formed groups.</exception> internal static SourceSpan ReadElement(SourceSpan value, ref int position) { SkipWhiteSpace(value, ref position); if (position == value.Length) { throw new TexParseException("An element is missing"); } if (value[position] == leftGroupChar) { return(ReadElementGroup(value, ref position, leftGroupChar, rightGroupChar)); } return(value.Segment(position++, 1)); }
private TexFormula ReadScript(TexFormula formula, SourceSpan value, ref int position) { SkipWhiteSpace(value, ref position); if (position == value.Length) { throw new TexParseException("illegal end, missing script!"); } var ch = value[position]; if (ch == leftGroupChar) { return(Parse(ReadGroup(formula, value, ref position, leftGroupChar, rightGroupChar), formula.TextStyle)); } else { position++; return(Parse(value.Segment(position - 1, 1), formula.TextStyle)); } }
private static SourceSpan ReadEscapeSequence(SourceSpan value, ref int position) { var initialPosition = position; if (value[initialPosition] != escapeChar) { throw new Exception($"Invalid state: {nameof(ReadEscapeSequence)} called for a value without escape character ({value})"); } position++; var start = position; while (position < value.Length) { var ch = value[position]; var isEnd = position == value.Length - 1; if (!char.IsLetter(ch) || isEnd) { // Escape sequence has ended // Or it's a symbol. Assuming in this case it will only be a single char. if ((isEnd && char.IsLetter(ch)) || position - start == 0) { position++; } break; } position++; } var length = position - initialPosition; if (length <= 1) { throw new TexParseException($"Unfinished escape sequence (value: \"{value}\", index {position})"); } return(value.Segment(initialPosition, length)); }
private TexFormula Parse( SourceSpan value, ref int position, bool allowClosingDelimiter, string textStyle, ICommandEnvironment environment) { var formula = new TexFormula { Source = value, TextStyle = textStyle }; var closedDelimiter = false; var skipWhiteSpace = ShouldSkipWhiteSpace(textStyle); var initialPosition = position; while (position < value.Length && !(allowClosingDelimiter && closedDelimiter)) { char ch = value[position]; var source = value.Segment(position, 1); if (IsWhiteSpace(ch)) { if (!skipWhiteSpace) { formula.Add(new SpaceAtom(source), source); } position++; } else if (ch == escapeChar) { ProcessEscapeSequence( formula, value, ref position, allowClosingDelimiter, ref closedDelimiter, environment); } else if (ch == leftGroupChar) { var groupValue = ReadElement(value, ref position); var parsedGroup = Parse(groupValue, textStyle, environment.CreateChildEnvironment()); var innerGroupAtom = parsedGroup.RootAtom ?? new RowAtom(groupValue); var groupAtom = new TypedAtom( innerGroupAtom.Source, innerGroupAtom, TexAtomType.Ordinary, TexAtomType.Ordinary); var scriptsAtom = this.AttachScripts(formula, value, ref position, groupAtom, true, environment); formula.Add(scriptsAtom, value.Segment(initialPosition, position - initialPosition)); } else if (ch == rightGroupChar) { throw new TexParseException("Found a closing '" + rightGroupChar + "' without an opening '" + leftGroupChar + "'!"); } else if (ch == superScriptChar || ch == subScriptChar || ch == primeChar) { if (position == 0) { throw new TexParseException("Every script needs a base: \"" + superScriptChar + "\", \"" + subScriptChar + "\" and \"" + primeChar + "\" can't be the first character!"); } else { throw new TexParseException("Double scripts found! Try using more braces."); } } else { var character = ConvertCharacter(formula, ref position, source, environment); if (character != null) { var scriptsAtom = AttachScripts( formula, value, ref position, character, skipWhiteSpace, environment); formula.Add(scriptsAtom, value.Segment(initialPosition, position)); } } } return(formula); }
private Atom AttachScripts( TexFormula formula, SourceSpan value, ref int position, Atom atom, bool skipWhiteSpace, ICommandEnvironment environment) { if (skipWhiteSpace) { SkipWhiteSpace(value, ref position); } var initialPosition = position; if (position == value.Length) { return(atom); } // Check for prime marks. var primesRowAtom = new RowAtom(new SourceSpan(value.Source, position, 0)); int i = position; while (i < value.Length) { if (value[i] == primeChar) { primesRowAtom = primesRowAtom.Add(SymbolAtom.GetAtom("prime", value.Segment(i, 1))); position++; } else if (!IsWhiteSpace(value[i])) { break; } i++; } var primesRowSource = new SourceSpan( value.Source, primesRowAtom.Source.Start, position - primesRowAtom.Source.Start); primesRowAtom = primesRowAtom.WithSource(primesRowSource); if (primesRowAtom.Elements.Count > 0) { atom = new ScriptsAtom(primesRowAtom.Source, atom, null, primesRowAtom); } if (position == value.Length) { return(atom); } TexFormula superscriptFormula = null; TexFormula subscriptFormula = null; var ch = value[position]; if (ch == superScriptChar) { // Attach superscript. position++; superscriptFormula = ReadScript(formula, value, ref position, environment); SkipWhiteSpace(value, ref position); if (position < value.Length && value[position] == subScriptChar) { // Attach subscript also. position++; subscriptFormula = ReadScript(formula, value, ref position, environment); } } else if (ch == subScriptChar) { // Add subscript. position++; subscriptFormula = ReadScript(formula, value, ref position, environment); SkipWhiteSpace(value, ref position); if (position < value.Length && value[position] == superScriptChar) { // Attach superscript also. position++; superscriptFormula = ReadScript(formula, value, ref position, environment); } } if (superscriptFormula == null && subscriptFormula == null) { return(atom); } // Check whether to return Big Operator or Scripts. var subscriptAtom = subscriptFormula?.RootAtom; var superscriptAtom = superscriptFormula?.RootAtom; if (atom.GetRightType() == TexAtomType.BigOperator) { var source = value.Segment(atom.Source.Start, position - atom.Source.Start); if (atom is BigOperatorAtom typedAtom) { return(new BigOperatorAtom( source, typedAtom.BaseAtom, subscriptAtom, superscriptAtom, typedAtom.UseVerticalLimits)); } return(new BigOperatorAtom(source, atom, subscriptAtom, superscriptAtom)); } else { var source = new SourceSpan(value.Source, initialPosition, position - initialPosition); return(new ScriptsAtom(source, atom, subscriptAtom, superscriptAtom)); } }
private void ProcessEscapeSequence(TexFormula formula, SourceSpan value, ref int position, bool allowClosingDelimiter, ref bool closedDelimiter, ICommandEnvironment environment) { var initialSrcPosition = position; position++; var start = position; while (position < value.Length) { var ch = value[position]; var isEnd = position == value.Length - 1; if (!char.IsLetter(ch) || isEnd) { // Escape sequence has ended // Or it's a symbol. Assuming in this case it will only be a single char. if ((isEnd && char.IsLetter(ch)) || position - start == 0) { position++; } break; } position++; } var commandSpan = value.Segment(start, position - start); var command = commandSpan.ToString(); var formulaSource = new SourceSpan(value.Source, initialSrcPosition, commandSpan.End); SymbolAtom symbolAtom = null; if (SymbolAtom.TryGetAtom(commandSpan, out symbolAtom)) { // Symbol was found. if (symbolAtom.Type == TexAtomType.Accent) { var helper = new TexFormulaHelper(formula, formulaSource); TexFormula accentFormula = ReadScript(formula, value, ref position, environment); helper.AddAccent(accentFormula, symbolAtom.Name); } else if (symbolAtom.Type == TexAtomType.BigOperator) { var opAtom = new BigOperatorAtom(formulaSource, symbolAtom, null, null); formula.Add(AttachScripts(formula, value, ref position, opAtom, true, environment), formulaSource); } else { formula.Add( AttachScripts(formula, value, ref position, symbolAtom, true, environment), formulaSource); } } else if (predefinedFormulas.TryGetValue(command, out var factory)) { // Predefined formula was found. var predefinedFormula = factory(formulaSource); var atom = AttachScripts(formula, value, ref position, predefinedFormula.RootAtom, true, environment); formula.Add(atom, formulaSource); } else if (command.Equals("nbsp")) { // Space was found. var atom = AttachScripts(formula, value, ref position, new SpaceAtom(formulaSource), true, environment); formula.Add(atom, formulaSource); } else if (textStyles.Contains(command)) { // Text style was found. SkipWhiteSpace(value, ref position); var styledFormula = command == TexUtilities.TextStyleName ? ConvertRawText(ReadElement(value, ref position), command) : Parse(ReadElement(value, ref position), command, environment.CreateChildEnvironment()); var source = value.Segment(start, position - start); var atom = styledFormula.RootAtom ?? new NullAtom(source); var commandAtom = AttachScripts(formula, value, ref position, atom, true, environment); formula.Add(commandAtom, source); } else if (embeddedCommands.Contains(command) || environment.AvailableCommands.ContainsKey(command) || _commandRegistry.ContainsKey(command)) { // Command was found. var commandAtom = ProcessCommand( formula, value, ref position, command, allowClosingDelimiter, ref closedDelimiter, environment); if (commandAtom != null) { commandAtom = allowClosingDelimiter ? commandAtom : AttachScripts( formula, value, ref position, commandAtom, true, environment); var source = new SourceSpan(formulaSource.Source, formulaSource.Start, commandAtom.Source.End); formula.Add(commandAtom, source); } } else { // Escape sequence is invalid. throw new TexParseException("Unknown symbol or command or predefined TeXFormula: '" + command + "'"); } }
/// <remarks>May return <c>null</c> for commands that produce no atoms.</remarks> private Atom ProcessCommand( TexFormula formula, SourceSpan value, ref int position, string command, bool allowClosingDelimiter, ref bool closedDelimiter, ICommandEnvironment environment) { int start = position - command.Length; SourceSpan source; switch (command) { case "frac": { var numeratorFormula = Parse( ReadElement(value, ref position), formula.TextStyle, environment.CreateChildEnvironment()); var denominatorFormula = Parse( ReadElement(value, ref position), formula.TextStyle, environment.CreateChildEnvironment()); source = value.Segment(start, position - start); return(new FractionAtom(source, numeratorFormula.RootAtom, denominatorFormula.RootAtom, true)); } case "left": { SkipWhiteSpace(value, ref position); if (position == value.Length) { throw new TexParseException("`left` command should be passed a delimiter"); } var delimiter = value[position]; ++position; var left = position; var internals = ParseUntilDelimiter(value, ref position, formula.TextStyle, environment); var opening = GetDelimiterSymbol( GetDelimeterMapping(delimiter), value.Segment(start, left - start)); if (opening == null) { throw new TexParseException($"Cannot find delimiter named {delimiter}"); } var closing = internals.ClosingDelimiter; source = value.Segment(start, position - start); return(new FencedAtom(source, internals.Body, opening, closing)); } case "overline": { var overlineFormula = Parse( ReadElement(value, ref position), formula.TextStyle, environment.CreateChildEnvironment()); source = value.Segment(start, position - start); return(new OverlinedAtom(source, overlineFormula.RootAtom)); } case "right": { if (!allowClosingDelimiter) { throw new TexParseException("`right` command is not allowed without `left`"); } SkipWhiteSpace(value, ref position); if (position == value.Length) { throw new TexParseException("`right` command should be passed a delimiter"); } var delimiter = value[position]; ++position; var closing = GetDelimiterSymbol( GetDelimeterMapping(delimiter), value.Segment(start, position - start)); if (closing == null) { throw new TexParseException($"Cannot find delimiter named {delimiter}"); } closedDelimiter = true; return(closing); } case "sqrt": { // Command is radical. SkipWhiteSpace(value, ref position); TexFormula degreeFormula = null; if (value.Length > position && value[position] == leftBracketChar) { // Degree of radical is specified. degreeFormula = Parse( ReadElementGroup(value, ref position, leftBracketChar, rightBracketChar), formula.TextStyle, environment.CreateChildEnvironment()); } var sqrtFormula = this.Parse( ReadElement(value, ref position), formula.TextStyle, environment.CreateChildEnvironment()); source = value.Segment(start, position - start); return(new Radical(source, sqrtFormula.RootAtom, degreeFormula?.RootAtom)); } case "color": { var color = ReadColorModelData(value, ref position); var bodyValue = ReadElement(value, ref position); var bodyFormula = Parse(bodyValue, formula.TextStyle, environment.CreateChildEnvironment()); source = value.Segment(start, position - start); return(new StyledAtom(source, bodyFormula.RootAtom, null, new SolidColorBrush(color))); } case "colorbox": { var color = ReadColorModelData(value, ref position); var bodyValue = ReadElement(value, ref position); var bodyFormula = Parse(bodyValue, formula.TextStyle, environment.CreateChildEnvironment()); source = value.Segment(start, position - start); return(new StyledAtom(source, bodyFormula.RootAtom, new SolidColorBrush(color), null)); } } if (environment.AvailableCommands.TryGetValue(command, out var parser) || _commandRegistry.TryGetValue(command, out parser)) { var context = new CommandContext(this, formula, environment, value, start, position); var parseResult = parser.ProcessCommand(context); if (parseResult.NextPosition < position) { throw new TexParseException( $"Incorrect parser behavior for command {command}: NextPosition = {parseResult.NextPosition}, position = {position}. Parser did not made any progress."); } position = parseResult.NextPosition; return(parseResult.Atom); } throw new TexParseException("Invalid command."); }
private Atom ProcessCommand( TexFormula formula, SourceSpan value, ref int position, string command, bool allowClosingDelimiter, ref bool closedDelimiter) { int start = position - command.Length; SourceSpan source; switch (command) { case "frac": { var numeratorFormula = Parse(ReadElement(value, ref position), formula.TextStyle); var denominatorFormula = Parse(ReadElement(value, ref position), formula.TextStyle); source = value.Segment(start, position - start); return(new FractionAtom(source, numeratorFormula.RootAtom, denominatorFormula.RootAtom, true)); } case "left": { SkipWhiteSpace(value, ref position); if (position == value.Length) { throw new TexParseException("`left` command should be passed a delimiter"); } var delimiter = value[position]; ++position; var left = position; var internals = ParseUntilDelimiter(value, ref position, formula.TextStyle); var opening = GetDelimiterSymbol( GetDelimeterMapping(delimiter), value.Segment(start, left - start)); if (opening == null) { throw new TexParseException($"Cannot find delimiter named {delimiter}"); } var closing = internals.ClosingDelimiter; source = value.Segment(start, position - start); return(new FencedAtom(source, internals.Body, opening, closing)); } case "overline": { var overlineFormula = Parse(ReadElement(value, ref position), formula.TextStyle); source = value.Segment(start, position - start); return(new OverlinedAtom(source, overlineFormula.RootAtom)); } case "right": { if (!allowClosingDelimiter) { throw new TexParseException("`right` command is not allowed without `left`"); } SkipWhiteSpace(value, ref position); if (position == value.Length) { throw new TexParseException("`right` command should be passed a delimiter"); } var delimiter = value[position]; ++position; var closing = GetDelimiterSymbol( GetDelimeterMapping(delimiter), value.Segment(start, position - start)); if (closing == null) { throw new TexParseException($"Cannot find delimiter named {delimiter}"); } closedDelimiter = true; return(closing); } case "sqrt": { // Command is radical. SkipWhiteSpace(value, ref position); TexFormula degreeFormula = null; if (value.Length > position && value[position] == leftBracketChar) { // Degree of radical is specified. degreeFormula = Parse( ReadElementGroup(value, ref position, leftBracketChar, rightBracketChar), formula.TextStyle); } var sqrtFormula = Parse(ReadElement(value, ref position), formula.TextStyle); source = value.Segment(start, position - start); return(new Radical(source, sqrtFormula.RootAtom, degreeFormula?.RootAtom)); } case "underline": { var underlineFormula = Parse(ReadElement(value, ref position), formula.TextStyle); source = value.Segment(start, position - start); return(new UnderlinedAtom(source, underlineFormula.RootAtom)); } case "color": { var colorName = ReadElement(value, ref position); if (!predefinedColors.TryGetValue(colorName.ToString(), out var color)) { throw new TexParseException($"Color {colorName} not found"); } var bodyValue = ReadElement(value, ref position); var bodyFormula = Parse(bodyValue, formula.TextStyle); source = value.Segment(start, position - start); return(new StyledAtom(source, bodyFormula.RootAtom, null, new SolidColorBrush(color))); } case "colorbox": { var colorName = ReadElement(value, ref position); var remainingString = ReadElement(value, ref position); var remaining = Parse(remainingString, formula.TextStyle); if (predefinedColors.TryGetValue(colorName.ToString(), out var color)) { source = value.Segment(start, position - start); return(new StyledAtom(source, remaining.RootAtom, new SolidColorBrush(color), null)); } throw new TexParseException($"Color {colorName} not found"); } } throw new TexParseException("Invalid command."); }
private void ProcessEscapeSequence( TexFormula formula, SourceSpan value, ref int position, bool allowClosingDelimiter, ref bool closedDelimiter) { var initialSrcPosition = position; position++; var start = position; while (position < value.Length) { var ch = value[position]; var isEnd = position == value.Length - 1; if (!char.IsLetter(ch) || isEnd) { // Escape sequence has ended // Or it's a symbol. Assuming in this case it will only be a single char. if ((isEnd && char.IsLetter(ch)) || position - start == 0) { position++; } break; } position++; } var commandSpan = value.Segment(start, position - start); var command = commandSpan.ToString(); var formulaSource = new SourceSpan(value.Source, initialSrcPosition, commandSpan.End); SymbolAtom symbolAtom = null; if (SymbolAtom.TryGetAtom(commandSpan, out symbolAtom)) { // Symbol was found. if (symbolAtom.Type == TexAtomType.Accent) { var helper = new TexFormulaHelper(formula, formulaSource); TexFormula accentFormula = ReadScript(formula, value, ref position); helper.AddAccent(accentFormula, symbolAtom.Name); } else if (symbolAtom.Type == TexAtomType.BigOperator) { var opAtom = new BigOperatorAtom(formulaSource, symbolAtom, null, null); formula.Add(this.AttachScripts(formula, value, ref position, opAtom), formulaSource); } else { formula.Add(this.AttachScripts(formula, value, ref position, symbolAtom), formulaSource); } } else if (predefinedFormulas.TryGetValue(command, out var factory)) { // Predefined formula was found. var predefinedFormula = factory(formulaSource); var atom = this.AttachScripts(formula, value, ref position, predefinedFormula.RootAtom); formula.Add(atom, formulaSource); } else if (command.Equals("nbsp")) { // Space was found. var atom = this.AttachScripts(formula, value, ref position, new SpaceAtom(formulaSource)); formula.Add(atom, formulaSource); } else if (textStyles.Contains(command)) { // Text style was found. this.SkipWhiteSpace(value, ref position); var styledFormula = Parse(ReadGroup(formula, value, ref position, leftGroupChar, rightGroupChar), command); if (styledFormula.RootAtom == null) { throw new TexParseException("Styled text can't be empty!"); } var atom = this.AttachScripts(formula, value, ref position, styledFormula.RootAtom); var source = new SourceSpan(formulaSource.Source, formulaSource.Start, position); formula.Add(atom, source); } else if (commands.Contains(command)) { // Command was found. var commandAtom = this.ProcessCommand( formula, value, ref position, command, allowClosingDelimiter, ref closedDelimiter); commandAtom = allowClosingDelimiter ? commandAtom : AttachScripts( formula, value, ref position, commandAtom); var source = new SourceSpan(formulaSource.Source, formulaSource.Start, commandAtom.Source.End); formula.Add(commandAtom, source); } else { // this.AttachScripts(formula, value, ref position, ); position = initialSrcPosition; var fallbackSpan = new SourceSpan(value.Source, start, commandSpan.End); var escapeAtom = this.AttachScripts( formula, value, ref position, new CharAtom(new SourceSpan(value.Source, position, 1), '\\'), true); // this.ConvertCharacter(formula, ref position, new SourceSpan(value.Source,position,1)); var scriptsAtom = this.AttachScripts( formula, value, ref position, this.ConvertCharacter(formula, ref position, fallbackSpan), true); // formula.Add(scriptsAtom, formulaSource ); // Escape sequence is invalid. //throw new TexParseException("Unknown symbol or command or predefined TeXFormula: '" + command + "'"); } }