public static DocumentNode ParseWellFormedXml(string text, List<long> tags) { var root = new DocumentNode(); ElementNode current = root; foreach (var tag in tags) { int index = ((int)tag << 8) >> 8; int length = (int)((tag << 16) >> (24 + 16)); ushort other = (ushort)(tag >> 64 - 16); if (other == 0) { var node = new TextNode(text.Substring(index, length)); current.AppendChild(node); } else if (other == ushort.MaxValue) { var node = new CommentNode(text.SubstringTrim(index + 4, length - 7)); current.AppendChild(node); } else { bool isClosingTag = text[index + 1] == '/'; if (isClosingTag) { current = current.Parent; } else { bool isSelfClosingTag = text[index + length - 2] == '/'; string tagName = text.Substring(index + 1, other); var node = new ElementNode(tagName, isSelfClosingTag); int attributeStart = index + other + 1; ParseAttributesFromWellFormedXml(node, text, attributeStart, length - (attributeStart - index) - 1); current.AppendChild(node); if (!isSelfClosingTag) { current = node; } } } } return root; }
public static DocumentNode ParseTagsToTree(TokenList tokens, List<TokenGroup> taglist) { // temporary variables var root = new DocumentNode(); ElementNode current = root; Node node = null; // loop through final tag array and create the tree // this is separate from setting the depth so that errors can // be isolated in that section foreach(var tag in taglist){ node = null; if(tag.TreeDepth != -1) { if(tag.Type == NodeType.ELEMENT_NODE) { if(tag.IsClosing) { current = current.Parent; } else if(tag.IsEmpty) { node = new ElementNode(tokens[tag.TokenStartIndex].Value, true); current.AppendChild(node); } else { var elementNode = new ElementNode(tokens[tag.TokenStartIndex].Value, false); node = elementNode; current.AppendChild(node); current = elementNode; } if(node != null) { // add attributes for(int j = tag.TokenStartIndex + 1; j <= tag.TokenEndIndex; j++) { if(tokens[j].Type == TokenType.CDATA) { var attribute = new NodeAttribute(tokens[j].Value, null); ++j; if( j < tag.TokenEndIndex && tokens[j].Type == TokenType.EQUALS_SIGN ) { ++j; if(tokens[j].Type == TokenType.START_QUOTE) { ++j; if(j < tag.TokenEndIndex) { if(tokens[j].Type == TokenType.END_QUOTE) { // blank value attribute.Value = ""; } else { // standard quoted value attribute.Value = tokens[j].Value; ++j; } } else { // half-quoted value (trailing off end of document?) //this->insertLogEntry(ERROR, ATTRIBUTE_FORMAT, MISSING_END_QUOTE, null); attribute.Value = tokens[j].Value; ++j; } } else { // unquoted value attribute.Value = tokens[j].Value; } } else { //debugval = Parser::reassembleTokenString(array_slice(tokens, j - 1, 1)); //this->insertLogEntry(WARNING, ATTRIBUTE_FORMAT, MISSING_VALUE, "after '".debugval."'"); --j; } (node as ElementNode).Attributes.Add(attribute.Name, attribute.Value); } else if(tokens[j].Type == TokenType.FORWARD_SLASH) { // ignore for now } else { //int t = j; if(tokens[j].Type == TokenType.START_QUOTE) { while(tokens[j].Type != TokenType.END_QUOTE) { ++j; } } else { while(tokens[j].Type != TokenType.CDATA) { ++j; } } --j; //debugval = Parser::reassembleTokenString(array_slice(tokens, t, j - t + 1)); //this->insertLogEntry(ERROR, ATTRIBUTE_FORMAT, INVALID_TOKEN_POSITION, "token sequence '".debugval."' removed"); } } } } else { if(tag.Type == NodeType.TEXT_NODE) { node = new TextNode(tokens[tag.TokenStartIndex].Value); } else if(tag.Type == NodeType.COMMENT_NODE) { node = new CommentNode(tokens[tag.TokenStartIndex].Value); } else if(tag.Type == NodeType.CDATA_SECTION_NODE) { node = new CDATASectionNode(tokens[tag.TokenStartIndex].Value); } else if(tag.Type == NodeType.PROCESSING_INSTRUCTION_NODE) { node = new ProcessingInstructionNode(tokens[tag.TokenStartIndex].Value); } else if(tag.Type == NodeType.DOCUMENT_TYPE_NODE) { // format: PUBLIC "public" "system" // or SYSTEM "system" var name = tokens[tag.TokenStartIndex].Value; var type = tokens[tag.TokenStartIndex + 1].Value; string publicId = null; string systemId = null; int j = tag.TokenStartIndex + 2; if(String.Compare(type, "PUBLIC", true) == 0) { if(tokens[j].Type == TokenType.START_QUOTE) { publicId = tokens[j + 1].Value; j += 3; } } if(tokens[j].Type == TokenType.START_QUOTE) { systemId = tokens[j + 1].Value; } node = new DocumentTypeNode(name, publicId, systemId); } else if(tag.Type == NodeType.DTD_ENTITY_NODE) { //node = new DTDEntityNode(); } else if(tag.Type == NodeType.DTD_ELEMENT_NODE) { //node = new DTDElementNode(); } else if(tag.Type == NodeType.DTD_ATTLIST_NODE) { //node = new DTDAttListNode(); } if(node != null) { current.AppendChild(node); } } } } return root; }
public static unsafe DocumentNode ParseTreeFromWellFormedXml(string text, List<long> tags) { var root = new DocumentNode(); ElementNode current = root; fixed (char* pText = text) { for (int i = 0; i < tags.Count; i++) { long tag = tags[i]; if (tag == 0) continue; int index = (int)tag; int length = (int)(tag >> 32); if (text[index] != '<') { var node = new TextNode(text.Substring(index, length)); current.AppendChild(node); } //else if (other == ushort.MaxValue) //{ // var node = new CommentNode(text.SubstringTrim(index + 4, length - 7)); // current.AppendChild(node); //} else { bool isClosingTag = text[index + 1] == '/'; if (isClosingTag) { current = current.Parent; } else { bool isSelfClosingTag = (text[index + length - 2] == '/'); char* p = pText + index + 1; char* pEnd = p + length - 2; while (p != pEnd && !char.IsWhiteSpace(*p)) ++p; int tagNameLength = (int)(p - (pText + index + 1)); string tagName = text.Substring(index + 1, tagNameLength); var node = new ElementNode(tagName, isSelfClosingTag); int attributeStart = index + tagNameLength + 1; ParseAttributesFromWellFormedXml(node, text, attributeStart, length - (attributeStart - index) - 1); current.AppendChild(node); if (!isSelfClosingTag) { current = node; } } } } } return root; }