예제 #1
0
        public void Index()
        {
            if (_hasIndexed)
            {
                return;
            }

            Normalize();

            var codeUnitsIndex      = 0;
            var codeUnitsIndexUpper = Value.Length;

            RedBlackTreeNode <GraphemeSurrogate> surrogate       = null;
            RedBlackTreeNode <GraphemeCluster>   graphemeCluster = null;

            var cpIndex = 0;
            var graphemeClusterIndex = 0;

            uint Read()
            {
                var codeUnit = Value[codeUnitsIndex];

                if ((codeUnit & 0xfffffc00) == 0xd800) // U16_IS_LEAD
                {
                    surrogate = new RedBlackTreeNode <GraphemeSurrogate>()
                    {
                        Value = new GraphemeSurrogate()
                        {
                            CodePointsIndex = cpIndex++, CodePointsIndexUpper = cpIndex, CodeUnits = codeUnitsIndex
                        }
                    };

                    RedBlackTree.InsertRight(surrogate, ref Surrogates);
                    RedBlackTree.Balance(surrogate, ref Surrogates);

                    var cp = ((uint)codeUnit << 10) + Value[codeUnitsIndex + 1] - ((0xd800 << 10) + 0xdc00 - 0x10000); // U16_GET_SUPPLEMENTARY

                    codeUnitsIndex += 2;

                    return(cp);
                }

                if (surrogate != null)
                {
                    ++surrogate.Value.CodePointsIndexUpper;
                }

                ++cpIndex;

                return(Value[codeUnitsIndex++]);
            }

            void Forward()
            {
                if ((Value[codeUnitsIndex] & 0xfffffc00) == 0xd800) // U16_IS_LEAD
                {
                    surrogate = new RedBlackTreeNode <GraphemeSurrogate>()
                    {
                        Value = new GraphemeSurrogate()
                        {
                            CodePointsIndex = cpIndex++, CodePointsIndexUpper = cpIndex, CodeUnits = codeUnitsIndex
                        }
                    };

                    RedBlackTree.InsertRight(surrogate, ref Surrogates);
                    RedBlackTree.Balance(surrogate, ref Surrogates);

                    codeUnitsIndex += 2;
                }

                if (surrogate != null)
                {
                    ++surrogate.Value.CodePointsIndexUpper;
                }

                ++cpIndex;
                ++codeUnitsIndex;
            }

            uint Peek(int index)
            {
                var codeUnit = Value[index];

                if ((codeUnit & 0xfffffc00) == 0xd800)                                                       // U16_IS_LEAD
                {
                    return(((uint)codeUnit << 10) + Value[index + 1] - ((0xd800 << 10) + 0xdc00 - 0x10000)); // U16_GET_SUPPLEMENTARY
                }
                return(codeUnit);
            }

            void IndexGraphemeCluster(int codeUnitsStart, int index)
            {
                if (codeUnitsStart - index == 2)
                {
                    var gc = new RedBlackTreeNode <GraphemeCluster>()
                    {
                        Value = new GraphemeCluster()
                        {
                            Interval = new UnicodeInterval(graphemeClusterIndex++, graphemeClusterIndex), CodeUnitsInterval = new UnicodeInterval(index, codeUnitsStart)
                        }
                    };

                    RedBlackTree.InsertRight(gc, ref Clusters);
                    RedBlackTree.Balance(gc, ref Clusters);

                    graphemeCluster = null;
                }

                else if (graphemeCluster == null && Clusters != null)
                {
                    graphemeCluster = new RedBlackTreeNode <GraphemeCluster>()
                    {
                        Value = new GraphemeCluster()
                        {
                            Interval = new UnicodeInterval(graphemeClusterIndex++, graphemeClusterIndex), CodeUnitsInterval = new UnicodeInterval(index, codeUnitsStart)
                        }
                    };

                    RedBlackTree.InsertRight(graphemeCluster, ref Clusters);
                    RedBlackTree.Balance(graphemeCluster, ref Clusters);
                }

                else
                {
                    ++graphemeClusterIndex;

                    if (graphemeCluster != null)
                    {
                        ++graphemeCluster.Value.Interval.IndexUpper;
                        ++graphemeCluster.Value.CodeUnitsInterval.IndexUpper;
                    }
                }
            }

loop:

            while (codeUnitsIndex < codeUnitsIndexUpper)
            {
                var index = codeUnitsIndex;
                var a     = Read();

                if (codeUnitsIndex < codeUnitsIndexUpper)
                {
                    var b = Peek(codeUnitsIndex);

                    if (GraphemeCluster.Extends(a, b))
                    {
                        for (Forward(); codeUnitsIndex < codeUnitsIndexUpper;)
                        {
                            a = Peek(codeUnitsIndex);

                            if (GraphemeCluster.Extends(b, a))
                            {
                                Forward();

                                if (codeUnitsIndex < codeUnitsIndexUpper)
                                {
                                    b = Peek(codeUnitsIndex);

                                    if (GraphemeCluster.Extends(a, b))
                                    {
                                        Forward();
                                        continue;
                                    }
                                }

                                else
                                {
                                    break;
                                }
                            }

                            var gc = new RedBlackTreeNode <GraphemeCluster>()
                            {
                                Value = new GraphemeCluster()
                                {
                                    Interval = new UnicodeInterval(graphemeClusterIndex++, graphemeClusterIndex), CodeUnitsInterval = new UnicodeInterval(index, codeUnitsIndex)
                                }
                            };

                            RedBlackTree.InsertRight(gc, ref Clusters);
                            RedBlackTree.Balance(gc, ref Clusters);

                            graphemeCluster = null;

                            goto loop;
                        }

                        graphemeCluster = new RedBlackTreeNode <GraphemeCluster>()
                        {
                            Value = new GraphemeCluster()
                            {
                                Interval = new UnicodeInterval(graphemeClusterIndex, graphemeClusterIndex + 1), CodeUnitsInterval = new UnicodeInterval(index, codeUnitsIndex)
                            }
                        };

                        RedBlackTree.InsertRight(graphemeCluster, ref Clusters);
                        RedBlackTree.Balance(graphemeCluster, ref Clusters);

                        _hasIndexed = true;
                        return;
                    }

                    else
                    {
                        IndexGraphemeCluster(codeUnitsIndex, index);
                    }
                }

                else
                {
                    IndexGraphemeCluster(codeUnitsIndex, index);
                    _hasIndexed = true;
                    return;
                }
            }

            _hasIndexed = true;
        }