/// <summary> /// Incrementaly parse the given text. /// You have to hold the write lock. /// </summary> /// <param name="input"> /// The full XML text of the new document. /// </param> /// <param name="changesSinceLastParse"> /// Changes since last parse. Null will cause full reparse. /// </param> public AXmlDocument Parse(string input, IEnumerable <DocumentChangeEventArgs> changesSinceLastParse) { if (!Lock.IsWriteLockHeld) { throw new InvalidOperationException("Lock needed!"); } // Use changes to invalidate cache if (changesSinceLastParse != null) { this.TrackedSegments.UpdateOffsetsAndInvalidate(changesSinceLastParse); } else { this.TrackedSegments.InvalidateAll(); } TagReader tagReader = new TagReader(this, input); List <AXmlObject> tags = tagReader.ReadAllTags(); AXmlDocument parsedDocument = new TagMatchingHeuristics(this, input, tags).ReadDocument(); tagReader.PrintStringCacheStats(); AXmlParser.Log("Updating main DOM tree..."); userDocument.UpdateTreeFrom(parsedDocument); userDocument.DebugCheckConsistency(true); Assert(userDocument.GetSelfAndAllChildren().Count() == parsedDocument.GetSelfAndAllChildren().Count(), "Parsed document and updated document have different number of children"); return(userDocument); }
IEnumerable <AXmlObject> Split(AXmlElement elem) { int myIndention = GetIndentLevel(elem); // Has start tag and no end tag ? (other then empty-element tag) if (elem.HasStartOrEmptyTag && elem.StartTag.IsStartTag && !elem.HasEndTag && myIndention != -1) { int lastAccepted = 0; // Accept start tag while (lastAccepted + 1 < elem.Children.Count) { AXmlObject nextItem = elem.Children[lastAccepted + 1]; if (nextItem is AXmlText) { lastAccepted++; continue; // Accept } else { // Include all more indented items if (GetIndentLevel(nextItem) > myIndention) { lastAccepted++; continue; // Accept } else { break; // Reject } } } // Accepted everything? if (lastAccepted + 1 == elem.Children.Count) { yield return(elem); yield break; } AXmlParser.Log("Splitting {0} - take {1} of {2} nested", elem, lastAccepted, elem.Children.Count - 1); AXmlElement topHalf = new AXmlElement(); topHalf.HasStartOrEmptyTag = elem.HasStartOrEmptyTag; topHalf.HasEndTag = elem.HasEndTag; topHalf.AddChildren(elem.Children.Take(1 + lastAccepted)); // Start tag + nested topHalf.StartOffset = topHalf.FirstChild.StartOffset; topHalf.EndOffset = topHalf.LastChild.EndOffset; TagReader.OnSyntaxError(topHalf, topHalf.LastChild.EndOffset, topHalf.LastChild.EndOffset, "Expected '</{0}>'", topHalf.StartTag.Name); AXmlParser.Log("Constructed {0}", topHalf); trackedSegments.AddParsedObject(topHalf, null); yield return(topHalf); for (int i = lastAccepted + 1; i < elem.Children.Count; i++) { yield return(elem.Children[i]); } } else { yield return(elem); } }
public AXmlDocument ReadDocument() { AXmlDocument doc = new AXmlDocument() { Parser = parser }; // AXmlParser.Log("Flat stream: {0}", PrintObjects(tags)); List <AXmlObject> valid = MatchTags(tags); // AXmlParser.Log("Fixed stream: {0}", PrintObjects(valid)); IEnumerator <AXmlObject> validStream = valid.GetEnumerator(); validStream.MoveNext(); // Move to first while (true) { // End of stream? try { if (validStream.Current == null) { break; } } catch (InvalidCastException) { break; } doc.AddChild(ReadTextOrElement(validStream)); } if (doc.Children.Count > 0) { doc.StartOffset = doc.FirstChild.StartOffset; doc.EndOffset = doc.LastChild.EndOffset; } // Check well formed foreach (AXmlTag xmlDeclaration in doc.Children.OfType <AXmlTag>().Where(t => t.IsProcessingInstruction && t.Name.ToLower() == "xml")) { if (xmlDeclaration.StartOffset != 0) { TagReader.OnSyntaxError(doc, xmlDeclaration.StartOffset, xmlDeclaration.StartOffset + 5, "XML declaration must be at the start of document"); } } int elemCount = doc.Children.OfType <AXmlElement>().Count(); if (elemCount == 0) { TagReader.OnSyntaxError(doc, doc.EndOffset, doc.EndOffset, "Root element is missing"); } if (elemCount > 1) { AXmlElement next = doc.Children.OfType <AXmlElement>().Skip(1).First(); TagReader.OnSyntaxError(doc, next.StartOffset, next.StartOffset, "Only one root element is allowed"); } foreach (AXmlTag tag in doc.Children.OfType <AXmlTag>()) { if (tag.IsCData) { TagReader.OnSyntaxError(doc, tag.StartOffset, tag.EndOffset, "CDATA not allowed in document root"); } } foreach (AXmlText text in doc.Children.OfType <AXmlText>()) { if (!text.ContainsOnlyWhitespace) { TagReader.OnSyntaxError(doc, text.StartOffset, text.EndOffset, "Only whitespace is allowed in document root"); } } AXmlParser.Log("Constructed {0}", doc); trackedSegments.AddParsedObject(doc, null); return(doc); }
AXmlElement ReadElement(IEnumerator <AXmlObject> objStream) { AXmlElement element = new AXmlElement(); element.IsProperlyNested = true; // Read start tag AXmlTag startTag = ReadSingleObject(objStream) as AXmlTag; AXmlParser.DebugAssert(startTag != null, "Start tag expected"); AXmlParser.DebugAssert(startTag.IsStartOrEmptyTag || startTag == StartTagPlaceholder, "Start tag expected"); if (startTag == StartTagPlaceholder) { element.HasStartOrEmptyTag = false; element.IsProperlyNested = false; TagReader.OnSyntaxError(element, objStream.Current.StartOffset, objStream.Current.EndOffset, "Matching openning tag was not found"); } else { element.HasStartOrEmptyTag = true; element.AddChild(startTag); } // Read content and end tag if (startTag == StartTagPlaceholder || // Check first in case the start tag is null element.StartTag.IsStartTag) { while (true) { AXmlTag currTag = objStream.Current as AXmlTag; // Peek if (currTag == EndTagPlaceholder) { TagReader.OnSyntaxError(element, element.LastChild.EndOffset, element.LastChild.EndOffset, "Expected '</{0}>'", element.StartTag.Name); ReadSingleObject(objStream); element.HasEndTag = false; element.IsProperlyNested = false; break; } else if (currTag != null && currTag.IsEndTag) { if (element.HasStartOrEmptyTag && currTag.Name != element.StartTag.Name) { TagReader.OnSyntaxError(element, currTag.StartOffset + 2, currTag.StartOffset + 2 + currTag.Name.Length, "Expected '{0}'. End tag must have same name as start tag.", element.StartTag.Name); } element.AddChild(ReadSingleObject(objStream)); element.HasEndTag = true; break; } AXmlObject nested = ReadTextOrElement(objStream); if (nested is AXmlElement) { if (!((AXmlElement)nested).IsProperlyNested) { element.IsProperlyNested = false; } element.AddChildren(Split((AXmlElement)nested).ToList()); } else { element.AddChild(nested); } } } else { element.HasEndTag = false; } element.StartOffset = element.FirstChild.StartOffset; element.EndOffset = element.LastChild.EndOffset; AXmlParser.Assert(element.HasStartOrEmptyTag || element.HasEndTag, "Must have at least start or end tag"); AXmlParser.Log("Constructed {0}", element); trackedSegments.AddParsedObject(element, null); // Need all elements in cache for offset tracking return(element); }