예제 #1
0
        public static Result <TextAtom> TextAtomFromLaTeX(string latexSource)
        {
            if (string.IsNullOrEmpty(latexSource))
            {
                return(new TextAtom.List(Array.Empty <TextAtom>()));
            }
            int  endAt                   = 0;
            bool?displayMath             = null;
            var  mathLaTeX               = new StringBuilder();
            bool backslashEscape         = false;
            bool afterCommand            = false; // ignore spaces after command
            bool afterNewline            = false;
            int  dollarCount             = 0;
            var  globalAtoms             = new TextAtomListBuilder();
            List <BreakAtInfo> breakList = new List <BreakAtInfo>(); // Roslyn bug that assumes breakList is nullable resulting in warnings so var is not used
            var breaker                  = new CustomBreaker(v => breakList.Add(new BreakAtInfo(v.LatestBreakAt, v.LatestWordKind)))
            {
                BreakNumberAfterText  = true,
                ThrowIfCharOutOfRange = false
            };

            breaker.BreakWords(latexSource);

            Result CheckDollarCount(int startAt, ref int endAt, TextAtomListBuilder atoms)
            {
                switch (dollarCount)
                {
                case 0:
                    break;

                case 1:
                    dollarCount = 0;
                    switch (displayMath)
                    {
                    case true:
                        return("Cannot close display math mode with $");

                    case false:
                        if (atoms.Math(mathLaTeX.ToString(), false, startAt, ref endAt).Error is string error)
                        {
                            return(error);
                        }
                        mathLaTeX.Clear();
                        displayMath = null;
                        break;

                    case null:
                        displayMath = false;
                        break;
                    }
                    break;

                case 2:
                    dollarCount = 0;
                    switch (displayMath)
                    {
                    case true:
                        if (atoms.Math(mathLaTeX.ToString(), true, startAt - 1, ref endAt).Error is string error)
                        {
                            return(error);
                        }
                        mathLaTeX.Clear();
                        displayMath = null;
                        break;

                    case false:
                        return("Cannot close inline math mode with $$");

                    case null:
                        displayMath = true;
                        break;
                    }
                    break;

                default:
                    return("Invalid number of $: " + dollarCount);
                }
                return(Ok());
            }

            Result <int> BuildBreakList(ReadOnlySpan <char> latex, TextAtomListBuilder atoms,
                                        int i, bool oneCharOnly, char stopChar)
            {
                void ParagraphBreak()
                {
                    atoms.Break();
                    atoms.Space(Space.ParagraphIndent);
                }

                for (; i < breakList.Count; i++)
                {
                    void ObtainSection(ReadOnlySpan <char> latexInput, int index,
                                       out int start, out int end, out ReadOnlySpan <char> section, out WordKind kind)
                    {
                        (start, end) = (index == 0 ? 0 : breakList[index - 1].breakAt, breakList[index].breakAt);
                        section      = latexInput.Slice(start, end - start);
                        kind         = breakList[index].wordKind;
                    }

                    ObtainSection(latex, i, out var startAt, out endAt, out var textSection, out var wordKind);
                    bool PreviousSection(ReadOnlySpan <char> latexInput, ref ReadOnlySpan <char> section)
                    {
                        bool success = i-- > 0;

                        if (success)
                        {
                            ObtainSection(latexInput, i, out startAt, out endAt, out section, out wordKind);
                        }
                        return(success);
                    }

                    bool NextSection(ReadOnlySpan <char> latexInput, ref ReadOnlySpan <char> section)
                    {
                        bool success = ++i < breakList.Count;

                        if (success)
                        {
                            ObtainSection(latexInput, i, out startAt, out endAt, out section, out wordKind);
                        }
                        return(success);
                    }

                    Result <TextAtom> ReadArgumentAtom(ReadOnlySpan <char> latexInput)
                    {
                        backslashEscape = false;
                        var argAtoms = new TextAtomListBuilder();

                        return(BuildBreakList(latexInput, argAtoms, ++i, true, '\0')
                               .Bind(index => { i = index; return argAtoms.Build(); }));
                    }

                    SpanResult <char> ReadArgumentString(ReadOnlySpan <char> latexInput, ref ReadOnlySpan <char> section)
                    {
                        afterCommand = false;
                        if (!NextSection(latexInput, ref section))
                        {
                            return(Err("Missing argument"));
                        }
                        if (section.IsNot('{'))
                        {
                            return(Err("Missing {"));
                        }
                        int endingIndex = -1;
                        //startAt + 1 to not start at the { we started at
                        bool isEscape = false;

                        for (int j = startAt + 1, bracketDepth = 0; j < latexInput.Length; j++)
                        {
                            if (latexInput[j] == '\\')
                            {
                                isEscape = true;
                            }
                            else if (latexInput[j] == '{' && !isEscape)
                            {
                                bracketDepth++;
                            }
                            else if (latexInput[j] == '}' && !isEscape)
                            {
                                if (bracketDepth > 0)
                                {
                                    bracketDepth--;
                                }
                                else
                                {
                                    endingIndex = j; break;
                                }
                            }
                            else
                            {
                                isEscape = false;
                            }
                        }
                        if (endingIndex == -1)
                        {
                            return(Err("Missing }"));
                        }
                        var resultText = latexInput.Slice(endAt, endingIndex - endAt);

                        while (startAt < endingIndex)
                        {
                            _ = NextSection(latexInput, ref section); //this never fails because the above check
                        }
                        return(Ok(resultText));
                    }

                    Result <Color> ReadColor(ReadOnlySpan <char> latexInput, ref ReadOnlySpan <char> section) =>
                    ReadArgumentString(latexInput, ref section).Bind(color =>
                                                                     Color.Create(color, !NoEnhancedColors) is Color value ?
                                                                     Ok(value) :
                                                                     Err("Invalid color: " + color.ToString())
                                                                     );

                    ///<summary>Get punctutation after current section</summary>
                    ReadOnlySpan <char> NextSectionWhilePunc(ReadOnlySpan <char> latexInput, ref ReadOnlySpan <char> section)
                    {
                        int start = endAt;
                        ReadOnlySpan <char> specialChars = stackalloc[] { '#', '$', '%', '&', '\\', '^', '_', '{', '}', '~' };

                        while (NextSection(latexInput, ref section))
                        {
                            if (wordKind != WordKind.Punc || specialChars.IndexOf(section[0]) != -1)
                            {
                                // We have overlooked by one when non-punctuation or special character is encountered
                                PreviousSection(latexInput, ref section);
                                break;
                            }
                        }
                        return(latexInput.Slice(start, endAt - start));
                    }

                    //Nothing should be before dollar sign checking -- dollar sign checking uses continue;
                    atoms.TextLength = startAt;
                    if (textSection.Is('$'))
                    {
                        if (backslashEscape)
                        {
                            if (displayMath != null)
                            {
                                mathLaTeX.Append(@"\$");
                            }
                            else
                            {
                                atoms.Text("$", NextSectionWhilePunc(latex, ref textSection));
                            }
                        }
                        else
                        {
                            dollarCount++;
                            continue;
                        }
                        backslashEscape = false;
                    }
                    else
                    {
                        { if (CheckDollarCount(startAt, ref endAt, atoms).Error is string error)
                          {
                              return(error);
                          }
                        }
                        switch (backslashEscape, displayMath)
                        {
예제 #2
0
        public static Result <TextAtom> TextAtomFromLaTeX(string latexSource)
        {
            if (string.IsNullOrEmpty(latexSource))
            {
                return(new TextAtom.List(Array.Empty <TextAtom>(), 0));
            }
            bool?         displayMath     = null;
            StringBuilder mathLaTeX       = null;
            bool          backslashEscape = false;
            bool          afterCommand    = false; //ignore spaces after command
            bool          afterNewline    = false;
            int           dollarCount     = 0;
            var           globalAtoms     = new TextAtomListBuilder();
            var           breakList       = new List <BreakAtInfo>();

            breaker.SetNewBreakHandler(v =>
                                       breakList.Add(new BreakAtInfo(v.LatestBreakAt, v.LatestWordKind)));
            breaker.BreakWords(latexSource);

            Result CheckDollarCount(TextAtomListBuilder atoms)
            {
                switch (dollarCount)
                {
                case 0:
                    break;

                case 1:
                    dollarCount = 0;
                    switch (displayMath)
                    {
                    case true:
                        return("Cannot close display math mode with $");

                    case false:
                        if (atoms.Math(mathLaTeX.ToString(), false).Error is string mathError)
                        {
                            return("[Math mode error] " + mathError);
                        }
                        mathLaTeX   = null;
                        displayMath = null;
                        break;

                    case null:
                        mathLaTeX   = new StringBuilder();
                        displayMath = false;
                        break;
                    }
                    break;

                case 2:
                    dollarCount = 0;
                    switch (displayMath)
                    {
                    case true:
                        if (atoms.Math(mathLaTeX.ToString(), true).Error is string mathError)
                        {
                            return("[Math mode error] " + mathError);
                        }
                        mathLaTeX   = null;
                        displayMath = null;
                        break;

                    case false:
                        return("Cannot close inline math mode with $$");

                    case null:
                        mathLaTeX   = new StringBuilder();
                        displayMath = true;
                        break;
                    }
                    break;

                default:
                    return("Invalid number of $: " + dollarCount);
                }
                return(Ok());
            }

            Result <int> BuildBreakList(ReadOnlySpan <char> latex, TextAtomListBuilder atoms,
                                        int i, bool oneCharOnly, char stopChar)
            {
                void ParagraphBreak()
                {
                    atoms.Break(3);
#warning Should the newline and space occupy the same range?
                    atoms.TextLength -= 3;
                    atoms.Space(Space.ParagraphIndent, 3);
                }

                for (; i < breakList.Count; i++)
                {
                    void ObtainSection(ReadOnlySpan <char> latexInput, int index,
                                       out int start, out int end, out ReadOnlySpan <char> section, out WordKind kind)
                    {
                        (start, end) = (index == 0 ? 0 : breakList[index - 1].breakAt, breakList[index].breakAt);
                        section      = latexInput.Slice(start, end - start);
                        kind         = breakList[index].wordKind;
                    }

                    ObtainSection(latex, i, out var startAt, out var endAt, out var textSection, out var wordKind);
                    bool PreviousSection(ReadOnlySpan <char> latexInput, ref ReadOnlySpan <char> section)
                    {
                        bool success = i-- > 0;

                        if (success)
                        {
                            ObtainSection(latexInput, i, out startAt, out endAt, out section, out wordKind);
                        }
                        return(success);
                    }

                    bool NextSection(ReadOnlySpan <char> latexInput, ref ReadOnlySpan <char> section)
                    {
                        bool success = ++i < breakList.Count;

                        if (success)
                        {
                            ObtainSection(latexInput, i, out startAt, out endAt, out section, out wordKind);
                        }
                        return(success);
                    }

                    Result <TextAtom.List> ReadArgumentAtom(ReadOnlySpan <char> latexInput)
                    {
                        backslashEscape = false;
                        var argAtoms = new TextAtomListBuilder();

                        return(BuildBreakList(latexInput, argAtoms, ++i, true, '\0')
                               .Bind(index => { i = index; return argAtoms.Build(); }));
                    }

                    SpanResult <char> ReadArgumentString(ReadOnlySpan <char> latexInput, ref ReadOnlySpan <char> section)
                    {
                        afterCommand = false;
                        if (!NextSection(latexInput, ref section))
                        {
                            return(Err("Missing argument"));
                        }
                        if (section.IsNot('{'))
                        {
                            return(Err("Missing {"));
                        }
                        int endingIndex = -1;
                        //startAt + 1 to not start at the { we started at
                        bool isEscape = false;

                        for (int j = startAt + 1, bracketDepth = 0; j < latexInput.Length; j++)
                        {
                            if (latexInput[j] == '\\')
                            {
                                isEscape = true;
                            }
                            else if (latexInput[j] == '{' && !isEscape)
                            {
                                bracketDepth++;
                            }
                            else if (latexInput[j] == '}' && !isEscape)
                            {
                                if (bracketDepth > 0)
                                {
                                    bracketDepth--;
                                }
                                else
                                {
                                    endingIndex = j; break;
                                }
                            }
                            else
                            {
                                isEscape = false;
                            }
                        }
                        if (endingIndex == -1)
                        {
                            return(Err("Missing }"));
                        }
                        var resultText = latexInput.Slice(endAt, endingIndex - endAt);

                        while (startAt < endingIndex)
                        {
                            _ = NextSection(latexInput, ref section); //this never fails because the above check
                        }
                        return(Ok(resultText));
                    }

                    ReadOnlySpan <char> NextSectionUntilPunc(ReadOnlySpan <char> latexInput, ref ReadOnlySpan <char> section)
                    {
                        int start = endAt;
                        ReadOnlySpan <char> specialChars = stackalloc[] { '#', '$', '%', '&', '\\', '^', '_', '{', '}', '~' };

                        while (NextSection(latexInput, ref section))
                        {
                            if (wordKind != WordKind.Punc || specialChars.IndexOf(section[0]) != -1)
                            {
                                //We have overlooked by one
                                PreviousSection(latexInput, ref section);
                                break;
                            }
                        }
                        return(latexInput.Slice(start, endAt - start));
                    }

                    //Nothing should be before dollar sign checking -- dollar sign checking uses continue;
                    atoms.TextLength = startAt;
                    if (textSection.Is('$'))
                    {
                        if (backslashEscape)
                        {
                            if (displayMath != null)
                            {
                                mathLaTeX.Append(@"\$");
                            }
                            else
                            {
                                atoms.Text("$", NextSectionUntilPunc(latex, ref textSection));
                            }
                        }
                        else
                        {
                            dollarCount++;
                            continue;
                        }
                        backslashEscape = false;
                    }
                    else
                    {
                        { if (CheckDollarCount(atoms).Error is string error)
                          {
                              return(error);
                          }
                        }
                        if (!backslashEscape)
                        {
                            //Unescaped text section, inside display/inline math mode
                            if (displayMath != null)
                            {
                                switch (textSection)
                                {
                                case var _ when textSection.Is('$'):
                                    throw new InvalidCodePathException("The $ case should have been accounted for.");

                                case var _ when textSection.Is('\\'):
                                    backslashEscape = true;

                                    continue;

                                default:
                                    mathLaTeX.Append(textSection);
                                    break;
                                }
                            }
                            //Unescaped text section, not inside display/inline math mode
                            else
                            {
                                switch (textSection)
                                {
                                case var _ when stopChar > 0 && textSection[0] == stopChar:
                                    return(Ok(i));

                                case var _ when textSection.Is('$'):
                                    throw new InvalidCodePathException("The $ case should have been accounted for.");

                                case var _ when textSection.Is('\\'):
                                    backslashEscape = true;

                                    continue;

                                case var _ when textSection.Is('#'):
                                    return("Unexpected command argument reference character # outside of new command definition (currently unsupported)");

                                case var _ when textSection.Is('^'):
                                case var _ when textSection.Is('_'):
                                    return($"Unexpected script indicator {textSection[0]} outside of math mode");

                                case var _ when textSection.Is('&'):
                                    return($"Unexpected alignment tab character & outside of table environments");

                                case var _ when textSection.Is('~'):
                                    atoms.ControlSpace();

                                    break;

                                case var _ when textSection.Is('%'):
                                    var comment = new StringBuilder();

                                    while (NextSection(latex, ref textSection) && wordKind != WordKind.NewLine)
                                    {
                                        comment.Append(textSection);
                                    }
                                    atoms.Comment(comment.ToString());
                                    break;

                                case var _ when textSection.Is('{'):
                                    if (BuildBreakList(latex, atoms, ++i, false, '}').Bind(index => i = index).Error is string error)
                                    {
                                        return(error);
                                    }
                                    break;

                                case var _ when textSection.Is('}'):
                                    return("Unexpected }, unbalanced braces");

                                case var _ when wordKind == WordKind.NewLine:
                                    // Consume newlines after commands
                                    // Double newline == paragraph break
                                    if (afterNewline)
                                    {
                                        ParagraphBreak();
                                        afterNewline = false;
                                        break;
                                    }
                                    else
                                    {
                                        atoms.ControlSpace();
                                        afterNewline = true;
                                        continue;
                                    }

                                case var _ when wordKind == WordKind.Whitespace:
                                    //Collpase spaces
                                    if (afterCommand)
                                    {
                                        continue;
                                    }
                                    else
                                    {
                                        atoms.ControlSpace();
                                    }
                                    break;

                                default: //Just ordinary text
                                    if (oneCharOnly)
                                    {
                                        if (startAt + 1 < endAt) //Only re-read if current break span is more than 1 long
                                        {
                                            i--;
                                            breakList[i] = new BreakAtInfo(breakList[i].breakAt + 1, breakList[i].wordKind);
                                        }
                                        //Need to allocate in the end :(
                                        //Don't look ahead for punc; we are looking for one char only
                                        atoms.Text(textSection[0].ToString(), default);
                                    }
                                    else
                                    {
                                        atoms.Text(textSection.ToString(), NextSectionUntilPunc(latex, ref textSection));
                                    }
                                    break;
                                }
                            }
                            afterCommand = false;
                        }

                        //Escaped text section but in inline/display math mode
                        else if (displayMath != null)
                        {
                            switch (textSection)
                            {
                            case var _ when textSection.Is('$'):
                                throw new InvalidCodePathException("The $ case should have been accounted for.");

                            case var _ when textSection.Is('('):
                                return(displayMath switch
                                {
                                    true => "Cannot open inline math mode in display math mode",
                                    false => "Cannot open inline math mode in inline math mode",
                                    null => throw new InvalidCodePathException("displayMath is null. This switch should not be hit."),
                                });