internal string makeString(ASXML node) { _init(); _fetchAncestorNamespaces(node); _writeNode(node); return(String.Join("", m_parts.getUnderlyingArray(), 0, m_parts.length)); }
/// <summary> /// Reads a comment node and adds it as a child to the element currently being processed. /// </summary> private void _readComment() { m_pos += 4; // For opening '<!--' ReadOnlySpan <char> span = m_str.AsSpan(m_pos); int charsLeft = m_str.Length - m_pos; char newline = s_newLineChar; ReadOnlySpan <char> searchChars = stackalloc char[] { '-', newline }; int charsRead; while (true) { int index = span.IndexOfAny(searchChars); if (index == -1) { throw _error(ErrorCode.XML_PARSER_UNTERMINATED_COMMENT); } char charAtIndex = span[index]; if (charAtIndex == newline) { m_curLine++; span = span.Slice(index + 1); continue; } if ((uint)(index + 2) >= (uint)span.Length) { throw _error(ErrorCode.XML_PARSER_UNTERMINATED_COMMENT); } if (span[index + 1] == '-') { if (span[index + 2] != '>') { throw _error(ErrorCode.MARIANA__XML_PARSER_COMMENT_INVALID_SEQUENCE); } charsRead = charsLeft - span.Length + index + 3; break; } span = span.Slice(index + 1); } int startPos = m_pos; m_pos += charsRead; if ((m_parserFlags & FLAG_IGNORE_COMMENTS) != 0) { return; } m_nodeStack.add(ASXML.createCommentNode(m_str.Substring(startPos, charsRead))); }
/// <summary> /// Reads a CDATA section and adds it as a text node child to the element currently being /// processed. /// </summary> private void _readCDATA() { m_pos += 9; ReadOnlySpan <char> span = m_str.AsSpan(m_pos); int charsLeft = m_str.Length - m_pos; char newline = s_newLineChar; ReadOnlySpan <char> searchChars = stackalloc char[] { ']', newline }; int charsRead; while (true) { int index = span.IndexOfAny(searchChars); if (index == -1) { throw _error(ErrorCode.XML_PARSER_UNTERMINATED_CDATA); } char charAtIndex = span[index]; if (charAtIndex == newline) { m_curLine++; } else { if ((uint)(index + 2) >= (uint)span.Length) { throw _error(ErrorCode.XML_PARSER_UNTERMINATED_COMMENT); } if (span[index + 1] == ']' && span[index + 2] != '>') { charsRead = charsLeft - span.Length + index + 3; break; } } span = span.Slice(index + 1); } int startPos = m_pos; m_pos += charsRead; if (charsRead == 3) { return; } ASXML node = ASXML.createCDATANode(m_str.Substring(startPos, charsRead - 3)); m_nodeStack.add(node); }
/// <summary> /// Reads a processing instruction node and adds it as a child to the element currently /// being processed. /// </summary> private void _readProcInstr() { m_pos += 2; // For opening '<?' if (!_readName(out string?prefix, out string?name) || prefix != null) { throw _error(ErrorCode.MARIANA__XML_PARSER_INVALID_NAME, (prefix == null) ? name : prefix + ":" + name); } _goToNextNonSpace(); ReadOnlySpan <char> span = m_str.AsSpan(m_pos); int charsLeft = m_str.Length - m_pos; char newline = s_newLineChar; ReadOnlySpan <char> searchChars = stackalloc char[] { '?', newline }; int charsRead; while (true) { int index = span.IndexOfAny(searchChars); if (index == -1) { throw _error(ErrorCode.XML_PARSER_UNTERMINATED_PI); } char ch = span[index]; if (ch == newline) { m_curLine++; } else if ((uint)(index + 1) < (uint)span.Length && span[index + 1] == '>') { charsRead = charsLeft - span.Length + index + 2; break; } span = span.Slice(index + 1); } int startPos = m_pos; m_pos += charsRead; if ((m_parserFlags & FLAG_IGNORE_PI) != 0 || name == "xml") { return; } string text = m_str.Substring(startPos, charsRead - 2); ASXML node = ASXML.unsafeCreateProcessingInstruction(name, text); m_nodeStack.add(node); }
/// <summary> /// Creates the attribute nodes for the current element and resolves attribute prefixes. /// </summary> /// <returns>An array containing the created attributes.</returns> /// <param name="elementName">The name of the element.</param> private ASXML[] _resolveAttributes(ASQName elementName) { if (m_unresolvedAttrs.length == 0) { return(Array.Empty <ASXML>()); } ASXML[] resolvedAttrs = new ASXML[m_unresolvedAttrs.length]; for (int i = 0, n = m_unresolvedAttrs.length; i < n; i++) { string?prefix = m_unresolvedAttrs[i].prefix; ASNamespace?attributeNamespace = (prefix == null) ? ASNamespace.@public : _resolvePrefix(prefix); if (attributeNamespace == null) { throw _error( ErrorCode.XML_PREFIX_NOT_BOUND, prefix, m_unresolvedAttrs[i].localName, line: m_unresolvedAttrs[i].lineNumber ); } ASQName attributeName = new ASQName(attributeNamespace, m_unresolvedAttrs[i].localName); // Check for duplicate attributes for (int j = 0; j < i; j++) { if (ASQName.AS_equals(attributeName, resolvedAttrs[j].name())) { throw _error( ErrorCode.XML_ATTRIBUTE_DUPLICATE, attributeName.AS_toString(), elementName.AS_toString(), line: m_unresolvedAttrs[i].lineNumber ); } } resolvedAttrs[i] = ASXML.unsafeCreateAttribute(attributeName, m_unresolvedAttrs[i].value); } m_unresolvedAttrs.clear(); return(resolvedAttrs); }
private void _fetchAncestorNamespaces(ASXML node) { if (node.nodeType != XMLNodeType.ELEMENT) { return; } for (ASXML?cur = node.parent(); cur != null; cur = cur.parent()) { cur.internalGetNamespaceDecls(ref m_nsInScope); } if (m_nsInScope.length != 0) { m_nsInScope.asSpan().Reverse(); } }
internal string makeString(ASXMLList list) { _init(); for (int i = 0, n = list.length(); i < n; i++) { ASXML cur = list[i]; _fetchAncestorNamespaces(cur); _writeNode(cur); m_nsInScope.clear(); m_tagStack.clear(); m_nextTempPrefixId = 0; } return(String.Join("", m_parts.getUnderlyingArray(), 0, m_parts.length)); }
/// <summary> /// Parses the given XML string and returns the parsed XML object. /// </summary> /// <returns>The XML object parsed from <paramref name="str"/>.</returns> /// <param name="str">The XML string to parse.</param> public ASXML parseSingleElement(string str) { _init(str); ASXML?firstNode = _readSingleNode(); if (m_pos == m_str.Length) { return(firstNode ?? ASXML.createTextNode("")); } if (firstNode != null && firstNode.nodeType != XMLNodeType.ELEMENT && (firstNode.nodeType != XMLNodeType.TEXT || XMLHelper.isOnlyWhitespace(firstNode.nodeText))) { firstNode = null; } while (m_pos < m_str.Length) { ASXML?curNode = _readSingleNode(); if (curNode == null) { continue; } if (curNode.nodeType != XMLNodeType.ELEMENT && (curNode.nodeType != XMLNodeType.TEXT || XMLHelper.isOnlyWhitespace(curNode.nodeText))) { continue; } if (firstNode != null) { throw _error(ErrorCode.XML_MARKUP_AFTER_ROOT); } firstNode = curNode; } return(firstNode ?? ASXML.createTextNode("")); }
private void _enterElement(ASXML elem) { _writeIndent(); ASQName elemName = elem.internalGetName() !; var stackItem = new TagStackItem { tempPrefixIdStart = m_nextTempPrefixId, nsDeclBeginIndex = m_nsInScope.length, localName = elemName.localName, prefix = _getPrefix(elemName, isAttr: false) }; elem.internalGetNamespaceDecls(ref m_nsInScope); m_parts.add("<"); _writeName(stackItem.prefix, stackItem.localName); foreach (ASXML attr in elem.getAttributeEnumerator()) { ASQName attrName = attr.internalGetName() !; string attrValue = attr.nodeText !; m_parts.add(" "); _writeName(_getPrefix(attrName, isAttr: true), attrName.localName); m_parts.add("=\""); m_parts.add(XMLHelper.escape(attrValue, 0, attrValue.Length, ref m_escBuffer, isAttr: true)); m_parts.add("\""); } // If this is the root we must include the ancestor namespaces as well. int nsDeclStart = (m_tagStack.length == 0) ? 0 : stackItem.nsDeclBeginIndex; for (int i = nsDeclStart, n = m_nsInScope.length; i < n; i++) { ASNamespace nsDecl = m_nsInScope[i]; if (nsDecl.prefix !.Length != 0) { m_parts.add(" xmlns:"); m_parts.add(nsDecl.prefix); m_parts.add("=\""); }
/// <summary> /// Reads the start tag of an element. /// </summary> private void _readStartTag() { char ch; m_pos++; if (!_readName(out string?prefix, out string?localName)) { throw _error( ErrorCode.MARIANA__XML_PARSER_INVALID_NAME, (prefix == null) ? localName : prefix + ":" + localName ); } if (m_str.Length == m_pos) { throw _error(ErrorCode.XML_PARSER_UNTERMINATED_ELEMENT); } m_nsInScopePtrs.add(m_nsInScope.length); while (true) { if (!_goToNextNonSpace()) { throw _error(ErrorCode.XML_PARSER_UNTERMINATED_ELEMENT); } ch = m_str[m_pos]; if (ch == '/' || ch == '>') { break; } if (!XMLHelper.isWhitespaceChar(m_str[m_pos - 1])) { // An attribute must be preceded by at least one whitespace character throw _error(ErrorCode.XML_PARSER_ELEMENT_MALFORMED); } _readAttribute(); } bool isSelfClosing = false; ch = m_str[m_pos]; if (ch == '/') { isSelfClosing = true; m_pos++; } if (m_pos == m_str.Length || m_str[m_pos] != '>') { throw _error(ErrorCode.XML_PARSER_UNTERMINATED_ELEMENT); } m_pos++; StackItem parserStackItem = new StackItem(); ASNamespace?elementNS = _resolvePrefix(prefix); if (elementNS == null) { throw _error(ErrorCode.XML_PREFIX_NOT_BOUND, prefix, localName); } parserStackItem.elementName = new ASQName(elementNS, localName); parserStackItem.attributes = _resolveAttributes(parserStackItem.elementName); int nsDeclStart = m_nsInScopePtrs[m_nsInScopePtrs.length - 1]; int nsDeclCount = m_nsInScope.length - nsDeclStart; parserStackItem.nsDecls = (nsDeclCount == 0) ? Array.Empty <ASNamespace>() : m_nsInScope.asSpan(nsDeclStart, nsDeclCount).ToArray(); if (isSelfClosing) { if (m_parserStack.length == 0) { parserStackItem.nsDecls = _addImplicitNSDeclsToRoot(parserStackItem.nsDecls); } ASXML element = ASXML.unsafeCreateElement( parserStackItem.elementName, parserStackItem.attributes, ReadOnlySpan <ASXML> .Empty, parserStackItem.nsDecls ); m_nodeStack.add(element); m_nsInScopePtrs.removeLast(); m_nsInScope.removeRange(nsDeclStart, nsDeclCount); } else { parserStackItem.childNodeStackBaseIndex = m_nodeStack.length; m_parserStack.add(parserStackItem); } }
/// <summary> /// Reads a text node and adds it as a child to the element currently being processed. /// </summary> private void _readText() { ReadOnlySpan <char> span = m_str.AsSpan(m_pos); int charsLeft = m_str.Length - m_pos; char newline = s_newLineChar; ReadOnlySpan <char> searchChars = stackalloc char[3] { '<', '&', newline }; bool mayHaveEntities = false; while (!span.IsEmpty) { int index = span.IndexOfAny(searchChars); char charAtIndex = (index == -1) ? '\0' : span[index]; if (charAtIndex == '<') { span = span.Slice(index); break; } else if (charAtIndex == '&') { span = span.Slice(index); mayHaveEntities = true; break; } else if (charAtIndex == newline) { m_curLine++; span = span.Slice(index + 1); } else { span = default; } } int charsRead = charsLeft - span.Length; string text; if (!mayHaveEntities) { text = ((m_parserFlags & FLAG_IGNORE_SPACE) != 0) ? XMLHelper.stripWhitespace(m_str, m_pos, charsRead) : m_str.Substring(m_pos, charsRead); if (text.Length != 0) { m_nodeStack.add(ASXML.createTextNode(text)); } m_pos += charsRead; return; } char[] textBuffer = m_buffer; if (charsRead > textBuffer.Length) { DataStructureUtil.resizeArray(ref textBuffer, textBuffer.Length, charsRead); } m_str.CopyTo(m_pos, textBuffer, 0, charsRead); int textBufPos = charsRead; m_pos += charsRead; while (true) { char ch = m_str[m_pos]; if (ch == '&') { int entityCode = _readEntity(); _writeCodePoint(ref textBuffer, ref textBufPos, entityCode); } else if (ch == '<') { break; } else if (ch == newline) { m_curLine++; m_pos++; } span = m_str.AsSpan(m_pos); int nextIndex = span.IndexOfAny(searchChars); int charsToCopy = (nextIndex == -1) ? span.Length : nextIndex; if (textBuffer.Length - textBufPos < charsToCopy) { DataStructureUtil.resizeArray(ref textBuffer, textBufPos, textBufPos + charsToCopy); } span.Slice(0, charsToCopy).CopyTo(textBuffer.AsSpan(textBufPos)); textBufPos += charsToCopy; m_pos += charsToCopy; if (nextIndex == -1) { break; } } m_buffer = textBuffer; if (textBufPos == 0) { return; } text = ((m_parserFlags & FLAG_IGNORE_SPACE) != 0) ? XMLHelper.stripWhitespace(textBuffer, 0, textBufPos) : new string(textBuffer, 0, textBufPos); if (text.Length != 0) { m_nodeStack.add(ASXML.createTextNode(text)); } }
/// <summary> /// Parses a single XML node, reading from the current position in the string. /// </summary> /// <returns>The created node as an XML object.</returns> private ASXML?_readSingleNode() { m_parserFlags &= ~(FLAG_USES_XML_NS | FLAG_USES_DEFAULT_NS); while (true) { if ((m_parserFlags & FLAG_IGNORE_SPACE) != 0) { _goToNextNonSpace(); } if (m_pos == m_str.Length) { // End of string reached. if (m_parserStack.length != 0) { throw _error( ErrorCode.XML_ELEMENT_NOT_TERMINATED, m_parserStack[m_parserStack.length - 1].elementName.AS_toString() ); } if (m_nodeStack.length == 0) { return(null); } ASXML createdNode = m_nodeStack[0]; m_nodeStack.clear(); return(createdNode); } if (m_parserStack.length == 0 && m_nodeStack.length != 0) { ASXML createdNode = m_nodeStack[0]; m_nodeStack.clear(); return(createdNode); } if (m_str[m_pos] != '<') { // Text node. _readText(); continue; } if (m_str.Length - m_pos < 2) { // There must be at least two characters after the opening '<'. // One for the closing '>', and the other for the element name. throw _error(ErrorCode.XML_PARSER_UNTERMINATED_ELEMENT); } char ch = m_str[m_pos + 1]; if (ch == '?') { _readProcInstr(); } else if (ch == '!') { if (String.CompareOrdinal(m_str, m_pos + 2, "[CDATA[", 0, 7) == 0) { _readCDATA(); } else if (m_str.Length - m_pos >= 4 && m_str[m_pos + 2] == '-' && m_str[m_pos + 3] == '-') { _readComment(); } else { _readDoctype(); } } else if (ch == '/') { _readEndTag(); } else { _readStartTag(); } } }
/// <summary> /// Reads an end tag. This completes the element currently being processed. /// </summary> private void _readEndTag() { if (m_parserStack.length == 0) { throw _error(ErrorCode.XML_MARKUP_AFTER_ROOT); } m_pos += 2; if (!_readName(out string?prefix, out string?localName)) { throw _error( ErrorCode.MARIANA__XML_PARSER_INVALID_NAME, (prefix == null) ? localName : prefix + ":" + localName); } if (!_goToNextNonSpace() || m_str[m_pos] != '>') { throw _error(ErrorCode.XML_PARSER_ELEMENT_MALFORMED); } m_pos++; StackItem parserStackItem = m_parserStack[m_parserStack.length - 1]; // Check that the end tag matches the corresponding start tag. ASNamespace?prefixNS = _resolvePrefix(prefix); if (parserStackItem.elementName.localName != localName || prefixNS == null || parserStackItem.elementName.uri != prefixNS.uri) { throw _error(ErrorCode.XML_ELEMENT_NOT_TERMINATED, parserStackItem.elementName.AS_toString()); } if (m_parserStack.length == 1) { parserStackItem.nsDecls = _addImplicitNSDeclsToRoot(parserStackItem.nsDecls); } // Create the element. int childCount = m_nodeStack.length - parserStackItem.childNodeStackBaseIndex; ASXML element = ASXML.unsafeCreateElement( parserStackItem.elementName, parserStackItem.attributes, m_nodeStack.asSpan(parserStackItem.childNodeStackBaseIndex, childCount), parserStackItem.nsDecls ); m_nodeStack.removeRange(parserStackItem.childNodeStackBaseIndex, childCount); m_nodeStack.add(element); m_parserStack.removeLast(); int nsDeclStart = m_nsInScopePtrs[m_nsInScopePtrs.length - 1]; m_nsInScopePtrs.removeLast(); m_nsInScope.removeRange(nsDeclStart, m_nsInScope.length - nsDeclStart); }
private void _writeNode(ASXML node) { m_iterator = node.getDescendantEnumerator(includeThis: true); while (m_iterator.MoveNext()) { while (m_tagStack.length != m_iterator.currentDepth) { _exitCurrentElement(); } ASXML cur = m_iterator.Current; if (cur.isElement) { _enterElement(cur); continue; } _writeIndent(); switch (cur.nodeType) { case XMLNodeType.TEXT: case XMLNodeType.ATTRIBUTE: _writeText(cur.nodeText !); break; case XMLNodeType.COMMENT: m_parts.add("<!--"); m_parts.add(cur.nodeText !); m_parts.add("-->"); break; case XMLNodeType.PROCESSING_INSTRUCTION: m_parts.add("<?"); m_parts.add(cur.name() !.localName); m_parts.add(" "); m_parts.add(cur.nodeText !); m_parts.add("?>"); break; case XMLNodeType.CDATA: { string text = cur.nodeText !; if (text.IndexOf("]]>", StringComparison.Ordinal) != -1) { // If a CDATA node contains "]]>", it would be invalid XML when output as CDATA. // So output it as an ordinary text node with proper escaping. _writeText(text); } else { m_parts.add("<![CDATA["); m_parts.add(text); m_parts.add("]]>"); } break; } } } while (m_tagStack.length != 0) { _exitCurrentElement(); } }