public InternalTag AddSyntaxError(string description) { if (this.SyntaxErrors != null && this.SyntaxErrors.Length > 0) return this; // don't add error if there already is one InternalTag tag = (InternalTag)MemberwiseClone(); tag.SyntaxErrors = new InternalSyntaxError[] { new InternalSyntaxError(0, Length, description) }; return tag; }
bool ReadCurrentPosition() { attributes = null; attributeIndex = -1; inAttributeValue = false; while (true) { var obj = objectIterator.CurrentObject; if (obj == null) { readState = ReadState.EndOfFile; elementNodeType = XmlNodeType.None; return(false); } else if (objectIterator.IsAtElementEnd) { elementNodeType = XmlNodeType.EndElement; return(true); } else if (obj is InternalElement) { // element start elementNodeType = XmlNodeType.Element; InternalTag startTag = ((InternalTag)obj.NestedObjects[0]); if (startTag.NestedObjects != null) { attributes = startTag.NestedObjects.OfType <InternalAttribute>().ToList(); } return(true); } else if (obj is InternalText) { InternalText text = (InternalText)obj; if (text.ContainsOnlyWhitespace) { elementNodeType = XmlNodeType.Whitespace; } else { elementNodeType = XmlNodeType.Text; } return(true); } else if (obj is InternalTag) { // start/end tags can be skipped as the parent InternalElement already handles them, // TODO all other tags (xml decl, comments, ...) } else { throw new NotSupportedException(); } objectIterator.MoveInto(); } }
public InternalElement(InternalTag tag) { this.Name = tag.Name; }
void ProcessObject(InternalObject obj, int indentationLevel, ConfigurationList oldConfigurations, ref ConfigurationList newConfigurations) { newConfigurations.Clear(); InternalTag tag = obj as InternalTag; for (int i = 0; i < oldConfigurations.count; i++) { Configuration c = oldConfigurations.configurations[i]; if (c.Cost == InfiniteCost) { continue; } if (tag != null && tag.IsStartTag) { // Push start tag newConfigurations.Add( c.OpenTags.Push(tag.Name, indentationLevel), c.Document.Push(obj), c.Cost ); } else if (tag != null && tag.IsEndTag) { // We can ignore this end tag newConfigurations.Add( c.OpenTags, c.Document.Push(StartTagPlaceholder).Push(obj), c.Cost + IgnoreEndTagCost ); // We can match this end tag with one of the currently open tags var openTags = c.OpenTags; var documentWithInsertedEndTags = c.Document; uint newCost = c.Cost; while (!openTags.IsEmpty) { uint matchCost = 0; if (openTags.IndentationLevel >= 0 && indentationLevel >= 0) { matchCost += (uint)Math.Abs(openTags.IndentationLevel - indentationLevel); } if (openTags.Name != tag.Name) { matchCost += MismatchedNameCost; } newConfigurations.Add( openTags.Pop(), documentWithInsertedEndTags.Push(obj), newCost + matchCost ); newCost += MissingEndTagCost; openTags = openTags.Pop(); documentWithInsertedEndTags = documentWithInsertedEndTags.Push(EndTagPlaceholder); } } else { newConfigurations.Add( c.OpenTags, c.Document.Push(obj), c.Cost ); } } }
/// <summary> /// Context: "<" /// </summary> void ReadTag() { AssertHasMoreData(); int tagStart = this.CurrentLocation; InternalTag tag = new InternalTag(); var frame = BeginInternalObject(tag); // Read the opening bracket // It identifies the type of tag and parsing behavior for the rest of it tag.OpeningBracket = ReadOpeningBracket(); if (tag.IsUnknownBang && !TryPeekWhiteSpace()) { OnSyntaxError(tagStart, this.CurrentLocation, "Unknown tag"); } if (tag.IsStartOrEmptyTag || tag.IsEndTag || tag.IsProcessingInstruction) { // Read the name TryMoveToNonWhiteSpace(); tag.RelativeNameStart = this.CurrentRelativeLocation; string name; if (TryReadName(out name)) { if (!IsValidName(name)) { OnSyntaxError(this.CurrentLocation - name.Length, this.CurrentLocation, "The name '{0}' is invalid", name); } } else { OnSyntaxError("Element name expected"); } tag.Name = name; } else { tag.Name = string.Empty; } bool isXmlDeclr = tag.Name == "xml" && tag.IsProcessingInstruction; int oldObjectCount = objects.Count; if (tag.IsStartOrEmptyTag || tag.IsEndTag || isXmlDeclr) { // Read attributes for the tag while (HasMoreData()) { // Chech for all forbiden 'name' characters first - see ReadName TryMoveToNonWhiteSpace(); if (TryPeek('<')) { break; } string endBr; int endBrStart = this.CurrentLocation; // Just peek if (TryReadClosingBracket(out endBr)) // End tag { GoBack(endBrStart); break; } // We have "=\'\"" or name - read attribute int attrStartOffset = this.CurrentLocation; ReadAttribute(); if (tag.IsEndTag) { OnSyntaxError(attrStartOffset, this.CurrentLocation, "Attribute not allowed in end tag."); } } } else if (tag.IsDocumentType) { ReadContentOfDTD(); } else { int start = this.CurrentLocation; if (tag.IsComment) { ReadText(TextType.Comment); } else if (tag.IsCData) { ReadText(TextType.CData); } else if (tag.IsProcessingInstruction) { ReadText(TextType.ProcessingInstruction); } else if (tag.IsUnknownBang) { ReadText(TextType.UnknownBang); } else { throw new InternalException(string.Format(CultureInfo.InvariantCulture, "Unknown opening bracket '{0}'", tag.OpeningBracket)); } // Backtrack at complete start if (IsEndOfFile() || (tag.IsUnknownBang && TryPeek('<'))) { GoBack(start); objects.RemoveRange(oldObjectCount, objects.Count - oldObjectCount); } } // Read closing bracket string bracket; TryReadClosingBracket(out bracket); tag.ClosingBracket = bracket; // Error check int brStart = this.CurrentLocation - (tag.ClosingBracket ?? string.Empty).Length; int brEnd = this.CurrentLocation; if (tag.Name == null) { // One error was reported already } else if (tag.IsStartOrEmptyTag) { if (tag.ClosingBracket != ">" && tag.ClosingBracket != "/>") { OnSyntaxError(brStart, brEnd, "'>' or '/>' expected"); } } else if (tag.IsEndTag) { if (tag.ClosingBracket != ">") { OnSyntaxError(brStart, brEnd, "'>' expected"); } } else if (tag.IsComment) { if (tag.ClosingBracket != "-->") { OnSyntaxError(brStart, brEnd, "'-->' expected"); } } else if (tag.IsCData) { if (tag.ClosingBracket != "]]>") { OnSyntaxError(brStart, brEnd, "']]>' expected"); } } else if (tag.IsProcessingInstruction) { if (tag.ClosingBracket != "?>") { OnSyntaxError(brStart, brEnd, "'?>' expected"); } } else if (tag.IsUnknownBang) { if (tag.ClosingBracket != ">") { OnSyntaxError(brStart, brEnd, "'>' expected"); } } else if (tag.IsDocumentType) { if (tag.ClosingBracket != ">") { OnSyntaxError(brStart, brEnd, "'>' expected"); } } else { throw new InternalException(string.Format(CultureInfo.InvariantCulture, "Unknown opening bracket '{0}'", tag.OpeningBracket)); } // Attribute name may not apper multiple times if (objects.Count > oldObjectCount) { // Move nested objects into tag.NestedObjects: tag.NestedObjects = new InternalObject[objects.Count - oldObjectCount]; objects.CopyTo(oldObjectCount, tag.NestedObjects, 0, tag.NestedObjects.Length); objects.RemoveRange(oldObjectCount, objects.Count - oldObjectCount); // Look for duplicate attributes: HashSet <string> attributeNames = new HashSet <string>(); foreach (var obj in tag.NestedObjects) { InternalAttribute attr = obj as InternalAttribute; if (attr != null && !attributeNames.Add(attr.Name)) { int attrStart = tagStart + attr.StartRelativeToParent; OnSyntaxError(attrStart, attrStart + attr.Name.Length, "Attribute with name '{0}' already exists", attr.Name); } } } EndInternalObject(frame); }
void StoreObject(InternalObject obj) { objects.Add(obj); // Now combine properly-nested elements: if (elementNameStack == null) { return; // parsing tag soup } InternalTag tag = obj as InternalTag; if (tag == null) { return; } if (tag.IsEmptyTag) { // the tag is its own element objects[objects.Count - 1] = new InternalElement(tag) { Length = tag.Length, LengthTouched = tag.LengthTouched, IsPropertyNested = true, StartRelativeToParent = tag.StartRelativeToParent, NestedObjects = new [] { tag.SetStartRelativeToParent(0) } }; } else if (tag.IsStartTag) { elementNameStack.Push(tag.Name); } else if (tag.IsEndTag && elementNameStack.Count > 0) { // Now look for the start element: int startIndex = objects.Count - 2; bool ok = false; string expectedName = elementNameStack.Pop(); if (tag.Name == expectedName) { while (startIndex > 0) { var startTag = objects[startIndex] as InternalTag; if (startTag != null) { if (startTag.IsStartTag) { ok = (startTag.Name == expectedName); break; } else if (startTag.IsEndTag) { break; } } startIndex--; } } if (ok) { // We found a correct nesting, let's create an element: InternalObject[] nestedObjects = new InternalObject[objects.Count - startIndex]; int oldStartRelativeToParent = objects[startIndex].StartRelativeToParent; int pos = 0; int maxLengthTouched = 0; for (int i = 0; i < nestedObjects.Length; i++) { nestedObjects[i] = objects[startIndex + i].SetStartRelativeToParent(pos); maxLengthTouched = Math.Max(maxLengthTouched, pos + nestedObjects[i].LengthTouched); pos += nestedObjects[i].Length; } objects.RemoveRange(startIndex, nestedObjects.Length); objects.Add( new InternalElement((InternalTag)nestedObjects[0]) { HasEndTag = true, IsPropertyNested = true, Length = pos, LengthTouched = maxLengthTouched, StartRelativeToParent = oldStartRelativeToParent, NestedObjects = nestedObjects }); } else { // Mismatched name - the nesting isn't properly; // clear the whole stack so that none of the currently open elements are closed as properly-nested. elementNameStack.Clear(); } } }
/// <summary> /// Context: "<" /// </summary> void ReadTag() { AssertHasMoreData(); int tagStart = this.CurrentLocation; InternalTag tag = new InternalTag(); var frame = BeginInternalObject(tag); // Read the opening bracket // It identifies the type of tag and parsing behavior for the rest of it tag.OpeningBracket = ReadOpeningBracket(); if (tag.IsUnknownBang && !TryPeekWhiteSpace()) OnSyntaxError(tagStart, this.CurrentLocation, "Unknown tag"); if (tag.IsStartOrEmptyTag || tag.IsEndTag || tag.IsProcessingInstruction) { // Read the name TryMoveToNonWhiteSpace(); tag.RelativeNameStart = this.CurrentRelativeLocation; string name; if (TryReadName(out name)) { if (!IsValidName(name)) { OnSyntaxError(this.CurrentLocation - name.Length, this.CurrentLocation, "The name '{0}' is invalid", name); } } else { OnSyntaxError("Element name expected"); } tag.Name = name; } else { tag.Name = string.Empty; } bool isXmlDeclr = tag.Name == "xml" && tag.IsProcessingInstruction; int oldObjectCount = objects.Count; if (tag.IsStartOrEmptyTag || tag.IsEndTag || isXmlDeclr) { // Read attributes for the tag while (HasMoreData()) { // Chech for all forbiden 'name' characters first - see ReadName TryMoveToNonWhiteSpace(); if (TryPeek('<')) break; string endBr; int endBrStart = this.CurrentLocation; // Just peek if (TryReadClosingBracket(out endBr)) { // End tag GoBack(endBrStart); break; } // We have "=\'\"" or name - read attribute int attrStartOffset = this.CurrentLocation; ReadAttribute(); if (tag.IsEndTag) OnSyntaxError(attrStartOffset, this.CurrentLocation, "Attribute not allowed in end tag."); } } else if (tag.IsDocumentType) { ReadContentOfDTD(); } else { int start = this.CurrentLocation; if (tag.IsComment) { ReadText(TextType.Comment); } else if (tag.IsCData) { ReadText(TextType.CData); } else if (tag.IsProcessingInstruction) { ReadText(TextType.ProcessingInstruction); } else if (tag.IsUnknownBang) { ReadText(TextType.UnknownBang); } else { throw new InternalException(string.Format(CultureInfo.InvariantCulture, "Unknown opening bracket '{0}'", tag.OpeningBracket)); } // Backtrack at complete start if (IsEndOfFile() || (tag.IsUnknownBang && TryPeek('<'))) { GoBack(start); objects.RemoveRange(oldObjectCount, objects.Count - oldObjectCount); } } // Read closing bracket string bracket; TryReadClosingBracket(out bracket); tag.ClosingBracket = bracket; // Error check int brStart = this.CurrentLocation - (tag.ClosingBracket ?? string.Empty).Length; int brEnd = this.CurrentLocation; if (tag.Name == null) { // One error was reported already } else if (tag.IsStartOrEmptyTag) { if (tag.ClosingBracket != ">" && tag.ClosingBracket != "/>") OnSyntaxError(brStart, brEnd, "'>' or '/>' expected"); } else if (tag.IsEndTag) { if (tag.ClosingBracket != ">") OnSyntaxError(brStart, brEnd, "'>' expected"); } else if (tag.IsComment) { if (tag.ClosingBracket != "-->") OnSyntaxError(brStart, brEnd, "'-->' expected"); } else if (tag.IsCData) { if (tag.ClosingBracket != "]]>") OnSyntaxError(brStart, brEnd, "']]>' expected"); } else if (tag.IsProcessingInstruction) { if (tag.ClosingBracket != "?>") OnSyntaxError(brStart, brEnd, "'?>' expected"); } else if (tag.IsUnknownBang) { if (tag.ClosingBracket != ">") OnSyntaxError(brStart, brEnd, "'>' expected"); } else if (tag.IsDocumentType) { if (tag.ClosingBracket != ">") OnSyntaxError(brStart, brEnd, "'>' expected"); } else { throw new InternalException(string.Format(CultureInfo.InvariantCulture, "Unknown opening bracket '{0}'", tag.OpeningBracket)); } // Attribute name may not apper multiple times if (objects.Count > oldObjectCount) { // Move nested objects into tag.NestedObjects: tag.NestedObjects = new InternalObject[objects.Count - oldObjectCount]; objects.CopyTo(oldObjectCount, tag.NestedObjects, 0, tag.NestedObjects.Length); objects.RemoveRange(oldObjectCount, objects.Count - oldObjectCount); // Look for duplicate attributes: HashSet<string> attributeNames = new HashSet<string>(); foreach (var obj in tag.NestedObjects) { InternalAttribute attr = obj as InternalAttribute; if (attr != null && !attributeNames.Add(attr.Name)) { int attrStart = tagStart + attr.StartRelativeToParent; OnSyntaxError(attrStart, attrStart + attr.Name.Length, "Attribute with name '{0}' already exists", attr.Name); } } } EndInternalObject(frame); }
internal AXmlTag(AXmlObject parent, int startOffset, InternalTag internalObject) : base(parent, startOffset, internalObject) { this.internalObject = internalObject; }
bool ReadCurrentPosition() { attributes = null; attributeIndex = -1; inAttributeValue = false; while (true) { var obj = objectIterator.CurrentObject; if (obj == null) { readState = ReadState.EndOfFile; elementNodeType = XmlNodeType.None; return(false); } else if (objectIterator.IsAtElementEnd) { if (IsEmptyElement) { // Don't report EndElement for empty elements nsManager.PopScope(); } else { elementNodeType = XmlNodeType.EndElement; return(true); } } else if (obj is InternalElement) { // element start elementNodeType = XmlNodeType.Element; InternalTag startTag = ((InternalTag)obj.NestedObjects[0]); nsManager.PushScope(); if (startTag.NestedObjects != null) { attributes = startTag.NestedObjects.OfType <InternalAttribute>().ToList(); for (int i = 0; i < attributes.Count; i++) { var attr = attributes[i]; if (attr.Name.StartsWith("xmlns:", StringComparison.Ordinal)) { nsManager.AddNamespace(AXmlObject.GetLocalName(attr.Name), attr.Value); } else if (attr.Name == "xmlns") { nsManager.AddNamespace(string.Empty, attr.Value); } } } return(true); } else if (obj is InternalText) { InternalText text = (InternalText)obj; if (text.ContainsOnlyWhitespace) { elementNodeType = XmlNodeType.Whitespace; } else { elementNodeType = XmlNodeType.Text; } return(true); } else if (obj is InternalTag) { InternalTag tag = (InternalTag)obj; if (tag.IsStartOrEmptyTag || tag.IsEndTag) { // start/end tags can be skipped as the parent InternalElement already handles them } else if (tag.IsComment && !settings.IgnoreComments) { elementNodeType = XmlNodeType.Comment; return(true); } else if (tag.IsProcessingInstruction && !settings.IgnoreProcessingInstructions) { if (tag.Name == "xml") { elementNodeType = XmlNodeType.XmlDeclaration; attributes = tag.NestedObjects.OfType <InternalAttribute>().ToList(); } else { elementNodeType = XmlNodeType.ProcessingInstruction; } return(true); } else if (tag.IsCData) { elementNodeType = XmlNodeType.CDATA; return(true); } else { // TODO all other tags } } else { throw new NotSupportedException(); } objectIterator.MoveInto(); } }