Ejemplo n.º 1
0
        public static unsafe TokenList ParseTokens(string text)
        {
            var tokens = new TokenList(text);

            long depth = 0;

            fixed (char* pText = text)
            {
                char* p = pText;
                char* pEnd = pText + text.Length;

                while (p < pEnd)
                {
                    // skip past whitespace between tags
                    char* pStart = p;
                    while (p != pEnd && (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n'))
                        ++p;

                    // identify text region (if there is one)
                    if (p != pEnd && *p != '<')
                    {
                        while (p != pEnd && *p != '<')
                            ++p;

                        tokens.AddRegion(pStart - pText, TokenType.TextNormal, p - pStart, depth);
                    }
                    //else if (p != pStart)
                    //{
                    //    // remember that this is whitespace, but no more details
                    //    tokens.AddRegion(pStart - pText, TokenType.TextNormal, 0, depth);
                    //}

                    // identify tag region
                    if (p != pEnd)
                    {
                        pStart = p;
                        ++p;

                        if (*p == '!')
                        {
                            p = HandleExclamationPoint(pText, pStart, pEnd, depth, tokens);
                        }
                        else if (*p == '?')
                        {
                            p = HandleQuestionMark(pText, pStart, pEnd, depth, tokens);
                        }
                        else
                        {
                            // normal tags (closing, opening, self-closing)

                            bool isClosing = (*p == '/');

                            while (p != pEnd && *p != '>') ++p;

                            if (isClosing)
                            {
                                --depth;
                            }
                            else
                            {
                                // QName format
                                // [prefix:]local
                                char* pFirstSymbol = pStart + 1;
                                char* pTmp = pFirstSymbol;
                                long namePrefixLength = 0;

                                while (pTmp != p && (*pTmp != ' ' && *pTmp != '\t' && *pTmp != '\r' && *pTmp != '\n'))
                                {
                                    if (*pTmp == ':' && namePrefixLength == 0)
                                        namePrefixLength = (pTmp - pFirstSymbol);

                                    ++pTmp;
                                }
                                long length = pTmp - pFirstSymbol;

                                tokens.AddTag(pFirstSymbol - pText, length, namePrefixLength, depth);

                                // check for self-closing
                                if ((*(p - 1) != '/'))
                                    ++depth;

                                // attributes
                                ParseAttributes(pFirstSymbol, pTmp, p, tokens);
                            }
                        }
                    }

                    ++p;
                }
            }

            if (depth != 0)
                throw new Exception("bad depth");

            return tokens;
        }