public static unsafe TokenList ParseTokens(string text) { var tokens = new TokenList(text); long depth = 0; fixed (char* pText = text) { char* p = pText; char* pEnd = pText + text.Length; while (p < pEnd) { // skip past whitespace between tags char* pStart = p; while (p != pEnd && (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n')) ++p; // identify text region (if there is one) if (p != pEnd && *p != '<') { while (p != pEnd && *p != '<') ++p; tokens.AddRegion(pStart - pText, TokenType.TextNormal, p - pStart, depth); } //else if (p != pStart) //{ // // remember that this is whitespace, but no more details // tokens.AddRegion(pStart - pText, TokenType.TextNormal, 0, depth); //} // identify tag region if (p != pEnd) { pStart = p; ++p; if (*p == '!') { p = HandleExclamationPoint(pText, pStart, pEnd, depth, tokens); } else if (*p == '?') { p = HandleQuestionMark(pText, pStart, pEnd, depth, tokens); } else { // normal tags (closing, opening, self-closing) bool isClosing = (*p == '/'); while (p != pEnd && *p != '>') ++p; if (isClosing) { --depth; } else { // QName format // [prefix:]local char* pFirstSymbol = pStart + 1; char* pTmp = pFirstSymbol; long namePrefixLength = 0; while (pTmp != p && (*pTmp != ' ' && *pTmp != '\t' && *pTmp != '\r' && *pTmp != '\n')) { if (*pTmp == ':' && namePrefixLength == 0) namePrefixLength = (pTmp - pFirstSymbol); ++pTmp; } long length = pTmp - pFirstSymbol; tokens.AddTag(pFirstSymbol - pText, length, namePrefixLength, depth); // check for self-closing if ((*(p - 1) != '/')) ++depth; // attributes ParseAttributes(pFirstSymbol, pTmp, p, tokens); } } } ++p; } } if (depth != 0) throw new Exception("bad depth"); return tokens; }