Ejemplo n.º 1
0
 public static string Description(this GraphemeCluster gc)
 {
     if (gc.endOfFile)
     {
         return("end of file");
     }
     else if (gc.str == "\"")
     {
         return("'\"'");
     }
     else
     {
         return($"\"{gc.str}\"");
     }
 }
Ejemplo n.º 2
0
Archivo: Lexer.cs Proyecto: envlang/env
    public static ParserErrorException ParserError(IImmutableEnumerator <GraphemeCluster> context, IImmutableEnumerator <GraphemeCluster> rest, S state, List <Rule> possibleNext, GraphemeCluster gc)
    {
        var strContext =
            context
            .ToIEnumerable()
            .TakeUntil(c => c.Equals(rest))
            .Select(c => c.str)
            .JoinWith("");

        var strRest =
            rest
            .TakeUntil(c => c.str.StartsWith("\n"))
            .Select(c => c.str)
            .JoinWith("");

        var expected = ", ".Join(possibleNext.Select(p => p.description));
        var actual   = (gc.endOfFile ? "" : "grapheme cluster ") + gc.Description();
        var cat      = gc.codePoints
                       .First()
                       .Match(Some: x => x.UnicodeCategory(0).ToString(),
                              None: "None (empty string)");

        return(new ParserErrorException(
                   $"Unexpected {actual} (Unicode category {cat}) while the lexer was in state {state}: expected one of {expected}{Environment.NewLine}{strContext}  <--HERE  {strRest}"
                   ));
    }
Ejemplo n.º 3
0
Archivo: Lexer.cs Proyecto: envlang/env
    private static ValueTuple <S, string, IImmutableEnumerator <Lexeme> > Transition(S state, string lexeme, GraphemeCluster c, Rule rule)
    {
        List <Lexeme> result = new List <Lexeme>();

        if (rule.throughState != state)
        {
            result.Add(new Lexeme(state, lexeme));
            state  = rule.throughState;
            lexeme = "";
        }
        lexeme += c.str;
        if (rule.newState != state)
        {
            result.Add(new Lexeme(state, lexeme));
            state  = rule.newState;
            lexeme = "";
        }
        return(state, lexeme, result.GetImmutableEnumerator());
    }
Ejemplo n.º 4
0
        public void Index()
        {
            if (_hasIndexed)
            {
                return;
            }

            Normalize();

            var codeUnitsIndex      = 0;
            var codeUnitsIndexUpper = Value.Length;

            RedBlackTreeNode <GraphemeSurrogate> surrogate       = null;
            RedBlackTreeNode <GraphemeCluster>   graphemeCluster = null;

            var cpIndex = 0;
            var graphemeClusterIndex = 0;

            uint Read()
            {
                var codeUnit = Value[codeUnitsIndex];

                if ((codeUnit & 0xfffffc00) == 0xd800) // U16_IS_LEAD
                {
                    surrogate = new RedBlackTreeNode <GraphemeSurrogate>()
                    {
                        Value = new GraphemeSurrogate()
                        {
                            CodePointsIndex = cpIndex++, CodePointsIndexUpper = cpIndex, CodeUnits = codeUnitsIndex
                        }
                    };

                    RedBlackTree.InsertRight(surrogate, ref Surrogates);
                    RedBlackTree.Balance(surrogate, ref Surrogates);

                    var cp = ((uint)codeUnit << 10) + Value[codeUnitsIndex + 1] - ((0xd800 << 10) + 0xdc00 - 0x10000); // U16_GET_SUPPLEMENTARY

                    codeUnitsIndex += 2;

                    return(cp);
                }

                if (surrogate != null)
                {
                    ++surrogate.Value.CodePointsIndexUpper;
                }

                ++cpIndex;

                return(Value[codeUnitsIndex++]);
            }

            void Forward()
            {
                if ((Value[codeUnitsIndex] & 0xfffffc00) == 0xd800) // U16_IS_LEAD
                {
                    surrogate = new RedBlackTreeNode <GraphemeSurrogate>()
                    {
                        Value = new GraphemeSurrogate()
                        {
                            CodePointsIndex = cpIndex++, CodePointsIndexUpper = cpIndex, CodeUnits = codeUnitsIndex
                        }
                    };

                    RedBlackTree.InsertRight(surrogate, ref Surrogates);
                    RedBlackTree.Balance(surrogate, ref Surrogates);

                    codeUnitsIndex += 2;
                }

                if (surrogate != null)
                {
                    ++surrogate.Value.CodePointsIndexUpper;
                }

                ++cpIndex;
                ++codeUnitsIndex;
            }

            uint Peek(int index)
            {
                var codeUnit = Value[index];

                if ((codeUnit & 0xfffffc00) == 0xd800)                                                       // U16_IS_LEAD
                {
                    return(((uint)codeUnit << 10) + Value[index + 1] - ((0xd800 << 10) + 0xdc00 - 0x10000)); // U16_GET_SUPPLEMENTARY
                }
                return(codeUnit);
            }

            void IndexGraphemeCluster(int codeUnitsStart, int index)
            {
                if (codeUnitsStart - index == 2)
                {
                    var gc = new RedBlackTreeNode <GraphemeCluster>()
                    {
                        Value = new GraphemeCluster()
                        {
                            Interval = new UnicodeInterval(graphemeClusterIndex++, graphemeClusterIndex), CodeUnitsInterval = new UnicodeInterval(index, codeUnitsStart)
                        }
                    };

                    RedBlackTree.InsertRight(gc, ref Clusters);
                    RedBlackTree.Balance(gc, ref Clusters);

                    graphemeCluster = null;
                }

                else if (graphemeCluster == null && Clusters != null)
                {
                    graphemeCluster = new RedBlackTreeNode <GraphemeCluster>()
                    {
                        Value = new GraphemeCluster()
                        {
                            Interval = new UnicodeInterval(graphemeClusterIndex++, graphemeClusterIndex), CodeUnitsInterval = new UnicodeInterval(index, codeUnitsStart)
                        }
                    };

                    RedBlackTree.InsertRight(graphemeCluster, ref Clusters);
                    RedBlackTree.Balance(graphemeCluster, ref Clusters);
                }

                else
                {
                    ++graphemeClusterIndex;

                    if (graphemeCluster != null)
                    {
                        ++graphemeCluster.Value.Interval.IndexUpper;
                        ++graphemeCluster.Value.CodeUnitsInterval.IndexUpper;
                    }
                }
            }

loop:

            while (codeUnitsIndex < codeUnitsIndexUpper)
            {
                var index = codeUnitsIndex;
                var a     = Read();

                if (codeUnitsIndex < codeUnitsIndexUpper)
                {
                    var b = Peek(codeUnitsIndex);

                    if (GraphemeCluster.Extends(a, b))
                    {
                        for (Forward(); codeUnitsIndex < codeUnitsIndexUpper;)
                        {
                            a = Peek(codeUnitsIndex);

                            if (GraphemeCluster.Extends(b, a))
                            {
                                Forward();

                                if (codeUnitsIndex < codeUnitsIndexUpper)
                                {
                                    b = Peek(codeUnitsIndex);

                                    if (GraphemeCluster.Extends(a, b))
                                    {
                                        Forward();
                                        continue;
                                    }
                                }

                                else
                                {
                                    break;
                                }
                            }

                            var gc = new RedBlackTreeNode <GraphemeCluster>()
                            {
                                Value = new GraphemeCluster()
                                {
                                    Interval = new UnicodeInterval(graphemeClusterIndex++, graphemeClusterIndex), CodeUnitsInterval = new UnicodeInterval(index, codeUnitsIndex)
                                }
                            };

                            RedBlackTree.InsertRight(gc, ref Clusters);
                            RedBlackTree.Balance(gc, ref Clusters);

                            graphemeCluster = null;

                            goto loop;
                        }

                        graphemeCluster = new RedBlackTreeNode <GraphemeCluster>()
                        {
                            Value = new GraphemeCluster()
                            {
                                Interval = new UnicodeInterval(graphemeClusterIndex, graphemeClusterIndex + 1), CodeUnitsInterval = new UnicodeInterval(index, codeUnitsIndex)
                            }
                        };

                        RedBlackTree.InsertRight(graphemeCluster, ref Clusters);
                        RedBlackTree.Balance(graphemeCluster, ref Clusters);

                        _hasIndexed = true;
                        return;
                    }

                    else
                    {
                        IndexGraphemeCluster(codeUnitsIndex, index);
                    }
                }

                else
                {
                    IndexGraphemeCluster(codeUnitsIndex, index);
                    _hasIndexed = true;
                    return;
                }
            }

            _hasIndexed = true;
        }