public static Result <TextAtom> TextAtomFromLaTeX(string latexSource) { if (string.IsNullOrEmpty(latexSource)) { return(new TextAtom.List(Array.Empty <TextAtom>())); } int endAt = 0; bool?displayMath = null; var mathLaTeX = new StringBuilder(); bool backslashEscape = false; bool afterCommand = false; // ignore spaces after command bool afterNewline = false; int dollarCount = 0; var globalAtoms = new TextAtomListBuilder(); List <BreakAtInfo> breakList = new List <BreakAtInfo>(); // Roslyn bug that assumes breakList is nullable resulting in warnings so var is not used var breaker = new CustomBreaker(v => breakList.Add(new BreakAtInfo(v.LatestBreakAt, v.LatestWordKind))) { BreakNumberAfterText = true, ThrowIfCharOutOfRange = false }; breaker.BreakWords(latexSource); Result CheckDollarCount(int startAt, ref int endAt, TextAtomListBuilder atoms) { switch (dollarCount) { case 0: break; case 1: dollarCount = 0; switch (displayMath) { case true: return("Cannot close display math mode with $"); case false: if (atoms.Math(mathLaTeX.ToString(), false, startAt, ref endAt).Error is string error) { return(error); } mathLaTeX.Clear(); displayMath = null; break; case null: displayMath = false; break; } break; case 2: dollarCount = 0; switch (displayMath) { case true: if (atoms.Math(mathLaTeX.ToString(), true, startAt - 1, ref endAt).Error is string error) { return(error); } mathLaTeX.Clear(); displayMath = null; break; case false: return("Cannot close inline math mode with $$"); case null: displayMath = true; break; } break; default: return("Invalid number of $: " + dollarCount); } return(Ok()); } Result <int> BuildBreakList(ReadOnlySpan <char> latex, TextAtomListBuilder atoms, int i, bool oneCharOnly, char stopChar) { void ParagraphBreak() { atoms.Break(); atoms.Space(Space.ParagraphIndent); } for (; i < breakList.Count; i++) { void ObtainSection(ReadOnlySpan <char> latexInput, int index, out int start, out int end, out ReadOnlySpan <char> section, out WordKind kind) { (start, end) = (index == 0 ? 0 : breakList[index - 1].breakAt, breakList[index].breakAt); section = latexInput.Slice(start, end - start); kind = breakList[index].wordKind; } ObtainSection(latex, i, out var startAt, out endAt, out var textSection, out var wordKind); bool PreviousSection(ReadOnlySpan <char> latexInput, ref ReadOnlySpan <char> section) { bool success = i-- > 0; if (success) { ObtainSection(latexInput, i, out startAt, out endAt, out section, out wordKind); } return(success); } bool NextSection(ReadOnlySpan <char> latexInput, ref ReadOnlySpan <char> section) { bool success = ++i < breakList.Count; if (success) { ObtainSection(latexInput, i, out startAt, out endAt, out section, out wordKind); } return(success); } Result <TextAtom> ReadArgumentAtom(ReadOnlySpan <char> latexInput) { backslashEscape = false; var argAtoms = new TextAtomListBuilder(); return(BuildBreakList(latexInput, argAtoms, ++i, true, '\0') .Bind(index => { i = index; return argAtoms.Build(); })); } SpanResult <char> ReadArgumentString(ReadOnlySpan <char> latexInput, ref ReadOnlySpan <char> section) { afterCommand = false; if (!NextSection(latexInput, ref section)) { return(Err("Missing argument")); } if (section.IsNot('{')) { return(Err("Missing {")); } int endingIndex = -1; //startAt + 1 to not start at the { we started at bool isEscape = false; for (int j = startAt + 1, bracketDepth = 0; j < latexInput.Length; j++) { if (latexInput[j] == '\\') { isEscape = true; } else if (latexInput[j] == '{' && !isEscape) { bracketDepth++; } else if (latexInput[j] == '}' && !isEscape) { if (bracketDepth > 0) { bracketDepth--; } else { endingIndex = j; break; } } else { isEscape = false; } } if (endingIndex == -1) { return(Err("Missing }")); } var resultText = latexInput.Slice(endAt, endingIndex - endAt); while (startAt < endingIndex) { _ = NextSection(latexInput, ref section); //this never fails because the above check } return(Ok(resultText)); } Result <Color> ReadColor(ReadOnlySpan <char> latexInput, ref ReadOnlySpan <char> section) => ReadArgumentString(latexInput, ref section).Bind(color => Color.Create(color, !NoEnhancedColors) is Color value ? Ok(value) : Err("Invalid color: " + color.ToString()) ); ///<summary>Get punctutation after current section</summary> ReadOnlySpan <char> NextSectionWhilePunc(ReadOnlySpan <char> latexInput, ref ReadOnlySpan <char> section) { int start = endAt; ReadOnlySpan <char> specialChars = stackalloc[] { '#', '$', '%', '&', '\\', '^', '_', '{', '}', '~' }; while (NextSection(latexInput, ref section)) { if (wordKind != WordKind.Punc || specialChars.IndexOf(section[0]) != -1) { // We have overlooked by one when non-punctuation or special character is encountered PreviousSection(latexInput, ref section); break; } } return(latexInput.Slice(start, endAt - start)); } //Nothing should be before dollar sign checking -- dollar sign checking uses continue; atoms.TextLength = startAt; if (textSection.Is('$')) { if (backslashEscape) { if (displayMath != null) { mathLaTeX.Append(@"\$"); } else { atoms.Text("$", NextSectionWhilePunc(latex, ref textSection)); } } else { dollarCount++; continue; } backslashEscape = false; } else { { if (CheckDollarCount(startAt, ref endAt, atoms).Error is string error) { return(error); } } switch (backslashEscape, displayMath) {
public static Result <TextAtom> TextAtomFromLaTeX(string latexSource) { if (string.IsNullOrEmpty(latexSource)) { return(new TextAtom.List(Array.Empty <TextAtom>(), 0)); } bool? displayMath = null; StringBuilder mathLaTeX = null; bool backslashEscape = false; bool afterCommand = false; //ignore spaces after command bool afterNewline = false; int dollarCount = 0; var globalAtoms = new TextAtomListBuilder(); var breakList = new List <BreakAtInfo>(); breaker.SetNewBreakHandler(v => breakList.Add(new BreakAtInfo(v.LatestBreakAt, v.LatestWordKind))); breaker.BreakWords(latexSource); Result CheckDollarCount(TextAtomListBuilder atoms) { switch (dollarCount) { case 0: break; case 1: dollarCount = 0; switch (displayMath) { case true: return("Cannot close display math mode with $"); case false: if (atoms.Math(mathLaTeX.ToString(), false).Error is string mathError) { return("[Math mode error] " + mathError); } mathLaTeX = null; displayMath = null; break; case null: mathLaTeX = new StringBuilder(); displayMath = false; break; } break; case 2: dollarCount = 0; switch (displayMath) { case true: if (atoms.Math(mathLaTeX.ToString(), true).Error is string mathError) { return("[Math mode error] " + mathError); } mathLaTeX = null; displayMath = null; break; case false: return("Cannot close inline math mode with $$"); case null: mathLaTeX = new StringBuilder(); displayMath = true; break; } break; default: return("Invalid number of $: " + dollarCount); } return(Ok()); } Result <int> BuildBreakList(ReadOnlySpan <char> latex, TextAtomListBuilder atoms, int i, bool oneCharOnly, char stopChar) { void ParagraphBreak() { atoms.Break(3); #warning Should the newline and space occupy the same range? atoms.TextLength -= 3; atoms.Space(Space.ParagraphIndent, 3); } for (; i < breakList.Count; i++) { void ObtainSection(ReadOnlySpan <char> latexInput, int index, out int start, out int end, out ReadOnlySpan <char> section, out WordKind kind) { (start, end) = (index == 0 ? 0 : breakList[index - 1].breakAt, breakList[index].breakAt); section = latexInput.Slice(start, end - start); kind = breakList[index].wordKind; } ObtainSection(latex, i, out var startAt, out var endAt, out var textSection, out var wordKind); bool PreviousSection(ReadOnlySpan <char> latexInput, ref ReadOnlySpan <char> section) { bool success = i-- > 0; if (success) { ObtainSection(latexInput, i, out startAt, out endAt, out section, out wordKind); } return(success); } bool NextSection(ReadOnlySpan <char> latexInput, ref ReadOnlySpan <char> section) { bool success = ++i < breakList.Count; if (success) { ObtainSection(latexInput, i, out startAt, out endAt, out section, out wordKind); } return(success); } Result <TextAtom.List> ReadArgumentAtom(ReadOnlySpan <char> latexInput) { backslashEscape = false; var argAtoms = new TextAtomListBuilder(); return(BuildBreakList(latexInput, argAtoms, ++i, true, '\0') .Bind(index => { i = index; return argAtoms.Build(); })); } SpanResult <char> ReadArgumentString(ReadOnlySpan <char> latexInput, ref ReadOnlySpan <char> section) { afterCommand = false; if (!NextSection(latexInput, ref section)) { return(Err("Missing argument")); } if (section.IsNot('{')) { return(Err("Missing {")); } int endingIndex = -1; //startAt + 1 to not start at the { we started at bool isEscape = false; for (int j = startAt + 1, bracketDepth = 0; j < latexInput.Length; j++) { if (latexInput[j] == '\\') { isEscape = true; } else if (latexInput[j] == '{' && !isEscape) { bracketDepth++; } else if (latexInput[j] == '}' && !isEscape) { if (bracketDepth > 0) { bracketDepth--; } else { endingIndex = j; break; } } else { isEscape = false; } } if (endingIndex == -1) { return(Err("Missing }")); } var resultText = latexInput.Slice(endAt, endingIndex - endAt); while (startAt < endingIndex) { _ = NextSection(latexInput, ref section); //this never fails because the above check } return(Ok(resultText)); } ReadOnlySpan <char> NextSectionUntilPunc(ReadOnlySpan <char> latexInput, ref ReadOnlySpan <char> section) { int start = endAt; ReadOnlySpan <char> specialChars = stackalloc[] { '#', '$', '%', '&', '\\', '^', '_', '{', '}', '~' }; while (NextSection(latexInput, ref section)) { if (wordKind != WordKind.Punc || specialChars.IndexOf(section[0]) != -1) { //We have overlooked by one PreviousSection(latexInput, ref section); break; } } return(latexInput.Slice(start, endAt - start)); } //Nothing should be before dollar sign checking -- dollar sign checking uses continue; atoms.TextLength = startAt; if (textSection.Is('$')) { if (backslashEscape) { if (displayMath != null) { mathLaTeX.Append(@"\$"); } else { atoms.Text("$", NextSectionUntilPunc(latex, ref textSection)); } } else { dollarCount++; continue; } backslashEscape = false; } else { { if (CheckDollarCount(atoms).Error is string error) { return(error); } } if (!backslashEscape) { //Unescaped text section, inside display/inline math mode if (displayMath != null) { switch (textSection) { case var _ when textSection.Is('$'): throw new InvalidCodePathException("The $ case should have been accounted for."); case var _ when textSection.Is('\\'): backslashEscape = true; continue; default: mathLaTeX.Append(textSection); break; } } //Unescaped text section, not inside display/inline math mode else { switch (textSection) { case var _ when stopChar > 0 && textSection[0] == stopChar: return(Ok(i)); case var _ when textSection.Is('$'): throw new InvalidCodePathException("The $ case should have been accounted for."); case var _ when textSection.Is('\\'): backslashEscape = true; continue; case var _ when textSection.Is('#'): return("Unexpected command argument reference character # outside of new command definition (currently unsupported)"); case var _ when textSection.Is('^'): case var _ when textSection.Is('_'): return($"Unexpected script indicator {textSection[0]} outside of math mode"); case var _ when textSection.Is('&'): return($"Unexpected alignment tab character & outside of table environments"); case var _ when textSection.Is('~'): atoms.ControlSpace(); break; case var _ when textSection.Is('%'): var comment = new StringBuilder(); while (NextSection(latex, ref textSection) && wordKind != WordKind.NewLine) { comment.Append(textSection); } atoms.Comment(comment.ToString()); break; case var _ when textSection.Is('{'): if (BuildBreakList(latex, atoms, ++i, false, '}').Bind(index => i = index).Error is string error) { return(error); } break; case var _ when textSection.Is('}'): return("Unexpected }, unbalanced braces"); case var _ when wordKind == WordKind.NewLine: // Consume newlines after commands // Double newline == paragraph break if (afterNewline) { ParagraphBreak(); afterNewline = false; break; } else { atoms.ControlSpace(); afterNewline = true; continue; } case var _ when wordKind == WordKind.Whitespace: //Collpase spaces if (afterCommand) { continue; } else { atoms.ControlSpace(); } break; default: //Just ordinary text if (oneCharOnly) { if (startAt + 1 < endAt) //Only re-read if current break span is more than 1 long { i--; breakList[i] = new BreakAtInfo(breakList[i].breakAt + 1, breakList[i].wordKind); } //Need to allocate in the end :( //Don't look ahead for punc; we are looking for one char only atoms.Text(textSection[0].ToString(), default); } else { atoms.Text(textSection.ToString(), NextSectionUntilPunc(latex, ref textSection)); } break; } } afterCommand = false; } //Escaped text section but in inline/display math mode else if (displayMath != null) { switch (textSection) { case var _ when textSection.Is('$'): throw new InvalidCodePathException("The $ case should have been accounted for."); case var _ when textSection.Is('('): return(displayMath switch { true => "Cannot open inline math mode in display math mode", false => "Cannot open inline math mode in inline math mode", null => throw new InvalidCodePathException("displayMath is null. This switch should not be hit."), });