Example #1
0
        public static DocumentNode ParseWellFormedXml(string text, List<long> tags)
        {
            var root = new DocumentNode();
            ElementNode current = root;

            foreach (var tag in tags)
            {
                int index = ((int)tag << 8) >> 8;
                int length = (int)((tag << 16) >> (24 + 16));
                ushort other = (ushort)(tag >> 64 - 16);

                if (other == 0)
                {
                    var node = new TextNode(text.Substring(index, length));
                    current.AppendChild(node);
                }
                else if (other == ushort.MaxValue)
                {
                    var node = new CommentNode(text.SubstringTrim(index + 4, length - 7));
                    current.AppendChild(node);
                }
                else
                {
                    bool isClosingTag = text[index + 1] == '/';
                    if (isClosingTag)
                    {
                        current = current.Parent;
                    }
                    else
                    {
                        bool isSelfClosingTag = text[index + length - 2] == '/';
                        string tagName = text.Substring(index + 1, other);
                        var node = new ElementNode(tagName, isSelfClosingTag);

                        int attributeStart = index + other + 1;
                        ParseAttributesFromWellFormedXml(node, text, attributeStart, length - (attributeStart - index) - 1);

                        current.AppendChild(node);
                        if (!isSelfClosingTag)
                        {
                            current = node;
                        }
                    }
                }
            }

            return root;
        }
Example #2
0
        public static DocumentNode ParseTagsToTree(TokenList tokens, List<TokenGroup> taglist)
        {
            // temporary variables
            var root = new DocumentNode();
            ElementNode current = root;
            Node node = null;

            // loop through final tag array and create the tree
            // this is separate from setting the depth so that errors can
            // be isolated in that section
            foreach(var tag in taglist){
                node = null;
                if(tag.TreeDepth != -1) {
                    if(tag.Type == NodeType.ELEMENT_NODE) {
                        if(tag.IsClosing) {
                            current = current.Parent;
                        } else if(tag.IsEmpty) {
                            node = new ElementNode(tokens[tag.TokenStartIndex].Value, true);
                            current.AppendChild(node);
                        } else {
                            var elementNode = new ElementNode(tokens[tag.TokenStartIndex].Value, false);
                            node = elementNode;
                            current.AppendChild(node);
                            current = elementNode;
                        }
                        if(node != null) {
                            // add attributes
                            for(int j = tag.TokenStartIndex + 1; j <= tag.TokenEndIndex; j++) {
                                if(tokens[j].Type == TokenType.CDATA) {
                                    var attribute = new NodeAttribute(tokens[j].Value, null);
                                    ++j;
                                    if( j < tag.TokenEndIndex &&
                                        tokens[j].Type == TokenType.EQUALS_SIGN ) {
                                        ++j;
                                        if(tokens[j].Type == TokenType.START_QUOTE) {
                                            ++j;
                                            if(j < tag.TokenEndIndex) {
                                                if(tokens[j].Type == TokenType.END_QUOTE) {
                                                    // blank value
                                                    attribute.Value = "";
                                                } else {
                                                    // standard quoted value
                                                    attribute.Value = tokens[j].Value;
                                                    ++j;
                                                }
                                            } else {
                                                // half-quoted value (trailing off end of document?)
                                                //this->insertLogEntry(ERROR, ATTRIBUTE_FORMAT, MISSING_END_QUOTE, null);
                                                attribute.Value = tokens[j].Value;
                                                ++j;
                                            }
                                        } else {
                                            // unquoted value
                                            attribute.Value = tokens[j].Value;
                                        }
                                    } else {
                                        //debugval = Parser::reassembleTokenString(array_slice(tokens, j - 1, 1));
                                        //this->insertLogEntry(WARNING, ATTRIBUTE_FORMAT, MISSING_VALUE, "after '".debugval."'");
                                        --j;
                                    }

                                    (node as ElementNode).Attributes.Add(attribute.Name, attribute.Value);

                                } else if(tokens[j].Type == TokenType.FORWARD_SLASH) {
                                    // ignore for now
                                } else {
                                    //int t = j;
                                    if(tokens[j].Type == TokenType.START_QUOTE) {
                                        while(tokens[j].Type != TokenType.END_QUOTE) {
                                            ++j;
                                        }
                                    } else {
                                        while(tokens[j].Type != TokenType.CDATA) {
                                            ++j;
                                        }
                                    }
                                    --j;
                                    //debugval = Parser::reassembleTokenString(array_slice(tokens, t, j - t + 1));
                                    //this->insertLogEntry(ERROR, ATTRIBUTE_FORMAT, INVALID_TOKEN_POSITION, "token sequence '".debugval."' removed");
                                }
                            }
                        }
                    } else {
                        if(tag.Type == NodeType.TEXT_NODE) {
                            node = new TextNode(tokens[tag.TokenStartIndex].Value);
                        } else if(tag.Type == NodeType.COMMENT_NODE) {
                            node = new CommentNode(tokens[tag.TokenStartIndex].Value);
                        } else if(tag.Type == NodeType.CDATA_SECTION_NODE) {
                            node = new CDATASectionNode(tokens[tag.TokenStartIndex].Value);
                        } else if(tag.Type == NodeType.PROCESSING_INSTRUCTION_NODE) {
                            node = new ProcessingInstructionNode(tokens[tag.TokenStartIndex].Value);
                        } else if(tag.Type == NodeType.DOCUMENT_TYPE_NODE) {
                            // format: PUBLIC "public" "system"
                            // or      SYSTEM "system"
                            var name = tokens[tag.TokenStartIndex].Value;
                            var type = tokens[tag.TokenStartIndex + 1].Value;
                            string publicId = null;
                            string systemId = null;
                            int j = tag.TokenStartIndex + 2;
                            if(String.Compare(type, "PUBLIC", true) == 0) {
                                if(tokens[j].Type == TokenType.START_QUOTE) {
                                    publicId = tokens[j + 1].Value;
                                    j += 3;
                                }
                            }
                            if(tokens[j].Type == TokenType.START_QUOTE) {
                                systemId = tokens[j + 1].Value;
                            }
                            node = new DocumentTypeNode(name, publicId, systemId);
                        } else if(tag.Type == NodeType.DTD_ENTITY_NODE) {
                            //node = new DTDEntityNode();
                        } else if(tag.Type == NodeType.DTD_ELEMENT_NODE) {
                            //node = new DTDElementNode();
                        } else if(tag.Type == NodeType.DTD_ATTLIST_NODE) {
                            //node = new DTDAttListNode();
                        }
                        if(node != null) {
                            current.AppendChild(node);
                        }
                    }
                }
            }

            return root;
        }
Example #3
0
        public static unsafe DocumentNode ParseTreeFromWellFormedXml(string text, List<long> tags)
        {
            var root = new DocumentNode();
            ElementNode current = root;
            fixed (char* pText = text)
            {
                for (int i = 0; i < tags.Count; i++)
                {
                    long tag = tags[i];

                    if (tag == 0)
                        continue;

                    int index = (int)tag;
                    int length = (int)(tag >> 32);

                    if (text[index] != '<')
                    {
                        var node = new TextNode(text.Substring(index, length));
                        current.AppendChild(node);
                    }
                    //else if (other == ushort.MaxValue)
                    //{
                    //    var node = new CommentNode(text.SubstringTrim(index + 4, length - 7));
                    //    current.AppendChild(node);
                    //}
                    else
                    {
                        bool isClosingTag = text[index + 1] == '/';
                        if (isClosingTag)
                        {
                            current = current.Parent;
                        }
                        else
                        {
                            bool isSelfClosingTag = (text[index + length - 2] == '/');

                            char* p = pText + index + 1;
                            char* pEnd = p + length - 2;
                            while (p != pEnd && !char.IsWhiteSpace(*p))
                                ++p;

                            int tagNameLength = (int)(p - (pText + index + 1));
                            string tagName = text.Substring(index + 1, tagNameLength);
                            var node = new ElementNode(tagName, isSelfClosingTag);

                            int attributeStart = index + tagNameLength + 1;
                            ParseAttributesFromWellFormedXml(node, text, attributeStart, length - (attributeStart - index) - 1);

                            current.AppendChild(node);
                            if (!isSelfClosingTag)
                            {
                                current = node;
                            }
                        }
                    }
                }
            }

            return root;
        }