public override XmlParserState PushChar(char c, IXmlParserContext context, ref string rollback) { var namedObject = context.Nodes.Peek() as INamedXObject; if (namedObject == null || namedObject.Name.Prefix != null) { throw new InvalidOperationException("Invalid state"); } Debug.Assert(context.CurrentStateLength > 1 || IsValidNameStart(c), "First character pushed to a XmlTagNameState must be a letter."); Debug.Assert(context.CurrentStateLength > 1 || context.KeywordBuilder.Length == 0, "Keyword builder must be empty when state begins."); if (XmlChar.IsWhitespace(c) || c == '<' || c == '>' || c == '/' || c == '=') { rollback = string.Empty; if (context.KeywordBuilder.Length == 0) { context.LogError("Zero-length name."); } else { string s = context.KeywordBuilder.ToString(); int i = s.IndexOf(':'); if (i < 0) { namedObject.Name = new XName(s); } else { namedObject.Name = new XName(s.Substring(0, i), s.Substring(i + 1)); } } return(Parent); } if (c == ':') { if (context.KeywordBuilder.ToString().IndexOf(':') > 0) { context.LogError("Unexpected ':' in name."); } context.KeywordBuilder.Append(c); return(null); } if (XmlChar.IsNameChar(c)) { context.KeywordBuilder.Append(c); return(null); } rollback = string.Empty; context.LogError("Unexpected character '" + c + "' in name"); return(Parent); }
public override XmlParserState PushChar(char c, XmlParserContext context, ref string rollback) { if (context.CurrentStateLength == 0) { context.Nodes.Push(new XText(context.Position)); // StateTag is tracking the last non-whitespace char context.StateTag = context.Position; } //FIXME: handle entities? if (c == '<') { var node = (XText)context.Nodes.Pop(); if (context.BuildTree) { //trim the text down to the node length and add it var length = context.StateTag - node.Span.Start + 1; context.KeywordBuilder.Length = length; node.End(context.KeywordBuilder.ToString()); ((XContainer)context.Nodes.Peek()).AddChildNode(node); } else { node.End(context.StateTag + 1); } rollback = string.Empty; return(Parent); } if (!XmlChar.IsWhitespace(c)) { context.StateTag = context.Position; } context.KeywordBuilder.Append(c); return(null); }
/// <summary> /// Gets the XML name at the parser's position. /// </summary> /// <param name="spine">A spine parser. It will not be modified.</param> /// <param name="text">The text snapshot corresponding to the parser.</param> /// <returns></returns> public static XName GetCompleteName(this XmlSpineParser spine, ITextSource text, int maximumReadahead = DEFAULT_READAHEAD_LIMIT) { Debug.Assert(spine.CurrentState is XmlNameState); int end = spine.Position; int start = end - spine.CurrentStateLength; int mid = -1; int limit = Math.Min(text.Length, end + maximumReadahead); //try to find the end of the name, but don't go too far for (; end < limit; end++) { char c = text[end]; if (c == ':') { if (mid == -1) { mid = end; } else { break; } } else if (!XmlChar.IsNameChar(c)) { break; } } if (mid > 0 && end > mid + 1) { return(new XName(text.GetText(start, mid - start), text.GetText(mid + 1, end - mid - 1))); } return(new XName(text.GetText(start, end - start))); }
public override XmlParserState PushChar(char c, IXmlParserContext context, ref string rollback) { var doc = context.Nodes.Peek() as XDocType; if (doc == null) { doc = new XDocType(context.LocationMinus("<!DOCTYPE".Length + 1)); context.Nodes.Push(doc); } if (!doc.RootElement.IsValid) { if (XmlChar.IsWhitespace(c)) { return(null); } else if (XmlChar.IsFirstNameChar(c)) { rollback = ""; return(nameState); } } else if (doc.PublicFpi == null) { if (context.StateTag == 0) { if (c == 's' || c == 'S') { context.StateTag = 1; return(null); } else if (c == 'p' || c == 'P') { context.StateTag = -1; return(null); } if (XmlChar.IsWhitespace(c)) { return(null); } } else if (Math.Abs(context.StateTag) < 6) { if (context.StateTag > 0) { if ("YSTEM"[context.StateTag - 1] == c || "ystem"[context.StateTag - 1] == c) { context.StateTag++; if (context.StateTag == 6) { context.StateTag = 0; doc.PublicFpi = ""; } return(null); } } else { int absState = Math.Abs(context.StateTag) - 1; if ("UBLIC"[absState] == c || "ublic"[absState] == c) { context.StateTag--; return(null); } } } else { if (context.KeywordBuilder.Length == 0) { if (XmlChar.IsWhitespace(c)) { return(null); } else if (c == '"') { context.KeywordBuilder.Append(c); return(null); } } else { if (c == '"') { context.KeywordBuilder.Remove(0, 1); doc.PublicFpi = context.KeywordBuilder.ToString(); context.KeywordBuilder.Length = 0; context.StateTag = 0; } else { context.KeywordBuilder.Append(c); } return(null); } } } else if (doc.Uri == null) { if (context.KeywordBuilder.Length == 0) { if (XmlChar.IsWhitespace(c)) { return(null); } else if (c == '"') { context.KeywordBuilder.Append(c); return(null); } } else { if (c == '"') { context.KeywordBuilder.Remove(0, 1); doc.Uri = context.KeywordBuilder.ToString(); context.KeywordBuilder.Length = 0; } else { context.KeywordBuilder.Append(c); } return(null); } } else if (doc.InternalDeclarationRegion.EndLine <= 0) { if (XmlChar.IsWhitespace(c)) { return(null); } switch (context.StateTag) { case 0: if (c == '[') { doc.InternalDeclarationRegion = new DomRegion(context.Location, TextLocation.Empty); context.StateTag = 1; return(null); } break; case 1: if (c == '<') { context.StateTag = 2; return(null); } else if (c == ']') { context.StateTag = 0; doc.InternalDeclarationRegion = new DomRegion(doc.InternalDeclarationRegion.Begin, context.Location); return(null); } break; case 2: if (c == '>') { context.StateTag = 1; } return(null); default: throw new InvalidOperationException(); } } doc = (XDocType)context.Nodes.Pop(); if (c == '<') { rollback = string.Empty; context.LogError("Doctype ended prematurely."); } else if (c != '>') { context.LogError("Unexpected character '" + c + "' in doctype."); } if (context.BuildTree) { doc.End(context.Location); ((XContainer)context.Nodes.Peek()).AddChildNode(doc); } return(Parent); }
public override XmlParserState PushChar(char c, IXmlParserContext context, ref string rollback) { XElement element = context.Nodes.Peek() as XElement; if (element == null || element.IsComplete) { var parent = element; element = new XElement(context.LocationMinus(2)); // 2 == < + current char element.Parent = parent; context.Nodes.Push(element); } if (c == '<') { if (element.IsNamed) { context.LogError("Unexpected '<' in tag '" + element.Name.FullName + "'."); Close(element, context, context.LocationMinus(1)); } else { context.LogError("Unexpected '<' in unnamed tag."); } rollback = string.Empty; return(Parent); } Debug.Assert(!element.IsComplete); if (element.IsClosed && c != '>') { if (char.IsWhiteSpace(c)) { context.LogWarning("Unexpected whitespace after '/' in self-closing tag."); return(null); } context.LogError("Unexpected character '" + c + "' after '/' in self-closing tag."); context.Nodes.Pop(); return(Parent); } //if tag closed if (c == '>') { if (context.StateTag == MAYBE_SELF_CLOSING) { element.Close(element); } if (!element.IsNamed) { context.LogError("Tag closed prematurely."); } else { Close(element, context, context.Location); } return(Parent); } if (c == '/') { context.StateTag = MAYBE_SELF_CLOSING; return(null); } if (context.StateTag == ATTEMPT_RECOVERY) { if (XmlChar.IsWhitespace(c)) { context.StateTag = RECOVERY_FOUND_WHITESPACE; } return(null); } if (context.StateTag == RECOVERY_FOUND_WHITESPACE) { if (!XmlChar.IsFirstNameChar(c)) { return(null); } } context.StateTag = OK; if (!element.IsNamed && XmlChar.IsFirstNameChar(c)) { rollback = string.Empty; return(NameState); } if (context.CurrentStateLength > 1 && XmlChar.IsFirstNameChar(c)) { rollback = string.Empty; return(AttributeState); } if (XmlChar.IsWhitespace(c)) { return(null); } context.LogError("Unexpected character '" + c + "' in tag.", context.LocationMinus(1)); context.StateTag = ATTEMPT_RECOVERY; return(null); }
public override XmlParserState PushChar(char c, IXmlParserContext context, ref string rollback) { var ct = context.Nodes.Peek() as XClosingTag; if (ct == null) { Debug.Assert(context.CurrentStateLength == 1, "IncompleteNode must not be an XClosingTag when CurrentStateLength is 1"); ct = new XClosingTag(context.Position - 3); //3 = </ and the current char context.Nodes.Push(ct); } //if tag closed if (c == '>') { context.Nodes.Pop(); if (ct.IsNamed) { ct.End(context.Position); // walk up tree of parents looking for matching tag int popCount = 0; bool found = false; foreach (XObject node in context.Nodes) { popCount++; if (node is XElement element && element.Name == ct.Name) { found = true; break; } } if (!found) { popCount = 0; } //clear the stack of intermediate unclosed tags while (popCount > 1) { if (context.Nodes.Pop() is XElement el) { context.LogError(string.Format("Unclosed tag '{0}'", el.Name.FullName), ct.Span); } popCount--; } //close the start tag, if we found it if (popCount > 0) { if (context.BuildTree) { ((XElement)context.Nodes.Pop()).Close(ct); } else { context.Nodes.Pop(); } } else { context.LogError( string.Format("Closing tag '{0}' does not match any currently open tag.", ct.Name.FullName), ct.Span ); } } else { context.LogError("Closing tag ended prematurely."); } return(Parent); } if (c == '<') { context.LogError("Unexpected '<' in tag.", context.Position - 1); context.Nodes.Pop(); rollback = string.Empty; return(Parent); } if (XmlChar.IsWhitespace(c)) { return(null); } if (!ct.IsNamed && (char.IsLetter(c) || c == '_')) { rollback = string.Empty; return(NameState); } rollback = string.Empty; context.LogError("Unexpected character '" + c + "' in closing tag.", context.Position - 1); context.Nodes.Pop(); return(Parent); }
public override XmlParserState PushChar(char c, XmlParserContext context, ref string rollback) { if (c == '<') { if (context.StateTag != FREE) { context.Diagnostics?.LogError( "Incomplete tag opening; encountered unexpected '<'.", TextSpan.FromBounds( context.Position - LengthFromOpenBracket(context) - 1, context.Position - 1 ) ); } context.StateTag = BRACKET; return(null); } switch (context.StateTag) { case FREE: if (!XmlChar.IsWhitespace(c)) { rollback = string.Empty; return(TextState); } return(null); case BRACKET: switch (c) { case '?': rollback = string.Empty; return(ProcessingInstructionState); case '!': context.StateTag = BRACKET_EXCLAM; return(null); case '/': return(ClosingTagState); } if (char.IsLetter(c) || c == '_' || char.IsWhiteSpace(c)) { rollback = string.Empty; return(TagState); } break; case BRACKET_EXCLAM: switch (c) { case '[': context.StateTag = CDATA; return(null); case '-': context.StateTag = COMMENT; return(null); case 'D': context.StateTag = DOCTYPE; return(null); } break; case COMMENT: if (c == '-') { return(CommentState); } break; case CDATA: string cdataStr = "CDATA["; if (c == cdataStr [context.KeywordBuilder.Length]) { context.KeywordBuilder.Append(c); if (context.KeywordBuilder.Length < cdataStr.Length) { return(null); } return(CDataState); } context.KeywordBuilder.Length = 0; break; case DOCTYPE: string docTypeStr = "OCTYPE"; if (c == docTypeStr [context.KeywordBuilder.Length]) { context.KeywordBuilder.Append(c); if (context.KeywordBuilder.Length < docTypeStr.Length) { return(null); } return(DocTypeState); } else { context.KeywordBuilder.Length = 0; } break; } context.Diagnostics?.LogError($"Incomplete tag opening; encountered unexpected character '{c}'.", TextSpan.FromBounds( context.Position - LengthFromOpenBracket(context), context.Position)); context.StateTag = FREE; return(null); }
public override XmlParserState PushChar(char c, XmlParserContext context, ref string rollback) { var ct = context.Nodes.Peek() as XClosingTag; if (ct == null) { Debug.Assert(context.CurrentStateLength == 0, "IncompleteNode must not be an XClosingTag when CurrentStateLength is 0"); ct = new XClosingTag(context.Position - STARTOFFSET); context.Nodes.Push(ct); } //if tag closed if (c == '>') { context.Nodes.Pop(); if (ct.IsNamed) { ct.End(context.Position + 1); // walk up tree of parents looking for matching tag int popCount = 0; bool found = false; foreach (XObject node in context.Nodes) { popCount++; if (node is XElement element && element.Name == ct.Name) { found = true; break; } } if (!found) { popCount = 0; } //clear the stack of intermediate unclosed tags while (popCount > 1) { if (context.Nodes.Pop() is XElement el) { context.Diagnostics?.LogError(string.Format("Unclosed tag '{0}'", el.Name.FullName), el.Span); } popCount--; } //close the start tag, if we found it if (popCount > 0) { // close it even if not in tree mode, as some spines may want to know whether an element was closed after advancing the parser ((XElement)context.Nodes.Pop()).Close(ct); } else { if (context.BuildTree) { context.Diagnostics?.LogError( $"Closing tag '{ct.Name.FullName}' does not match any currently open tag.", ct.Span ); // add it into the tree anyway so it's accessible var parent = context.Nodes.Peek() as XContainer; if (parent != null) { if (!parent.IsEnded) { parent = context.Nodes.Count > 1? context.Nodes.Peek(1) as XContainer : null; } if (parent != null) { parent.AddChildNode(ct); } } } } } else { context.Diagnostics?.LogError("Closing tag ended prematurely.", context.Position); } return(Parent); } if (c == '<') { context.Diagnostics?.LogError("Unexpected '<' in tag.", context.Position - 1); context.Nodes.Pop(); rollback = string.Empty; return(Parent); } if (XmlChar.IsWhitespace(c)) { return(null); } if (!ct.IsNamed && (char.IsLetter(c) || c == '_')) { rollback = string.Empty; return(NameState); } rollback = string.Empty; context.Diagnostics?.LogError("Unexpected character '" + c + "' in closing tag.", context.Position - 1); context.Nodes.Pop(); return(Parent); }
public override XmlParserState PushChar(char c, IXmlParserContext context, ref string rollback) { var element = context.Nodes.Peek() as XElement; if (element == null || element.IsEnded) { var parent = element; element = new XElement(context.Position - 2) { Parent = parent }; // 2 == < + current char context.Nodes.Push(element); if (context.BuildTree) { var parentContainer = (XContainer)context.Nodes.Peek(element.IsClosed ? 0 : 1); parentContainer.AddChildNode(element); } } if (c == '<') { if (element.IsNamed) { context.LogError("Unexpected '<' in tag '" + element.Name.FullName + "'."); Close(element, context, context.Position - 1); } else { context.LogError("Tag has no name.", element.Span.Start); } rollback = string.Empty; return(Parent); } Debug.Assert(!element.IsEnded); if (element.IsClosed && c != '>') { if (char.IsWhiteSpace(c)) { context.LogWarning("Unexpected whitespace after '/' in self-closing tag."); return(null); } context.LogError("Unexpected character '" + c + "' after '/' in self-closing tag."); context.Nodes.Pop(); return(Parent); } //if tag closed if (c == '>') { element.HasEndBracket = true; if (context.StateTag == MAYBE_SELF_CLOSING) { element.Close(element); } if (!element.IsNamed) { context.LogError("Tag closed prematurely."); } else { Close(element, context, context.Position); } return(Parent); } if (c == '/') { context.StateTag = MAYBE_SELF_CLOSING; return(null); } if (context.StateTag == ATTEMPT_RECOVERY) { if (XmlChar.IsWhitespace(c)) { context.StateTag = RECOVERY_FOUND_WHITESPACE; } return(null); } if (context.StateTag == RECOVERY_FOUND_WHITESPACE) { if (!XmlChar.IsFirstNameChar(c)) { return(null); } } context.StateTag = FREE; if (context.CurrentStateLength > 1 && XmlChar.IsFirstNameChar(c)) { rollback = string.Empty; return(AttributeState); } if (!element.IsNamed && XmlChar.IsFirstNameChar(c)) { rollback = string.Empty; return(NameState); } if (XmlChar.IsWhitespace(c)) { return(null); } context.LogError("Unexpected character '" + c + "' in tag.", context.Position - 1); context.StateTag = ATTEMPT_RECOVERY; return(null); }