/// <summary> /// Incrementaly parse the given text. /// You have to hold the write lock. /// </summary> /// <param name="input"> /// The full XML text of the new document. /// </param> /// <param name="changesSinceLastParse"> /// Changes since last parse. Null will cause full reparse. /// </param> public AXmlDocument Parse(string input, IEnumerable <DocumentChangeEventArgs> changesSinceLastParse) { if (!Lock.IsWriteLockHeld) { throw new InvalidOperationException("Lock needed!"); } // Use changes to invalidate cache if (changesSinceLastParse != null) { this.TrackedSegments.UpdateOffsetsAndInvalidate(changesSinceLastParse); } else { this.TrackedSegments.InvalidateAll(); } TagReader tagReader = new TagReader(this, input); List <AXmlObject> tags = tagReader.ReadAllTags(); AXmlDocument parsedDocument = new TagMatchingHeuristics(this, input, tags).ReadDocument(); tagReader.PrintStringCacheStats(); AXmlParser.Log("Updating main DOM tree..."); userDocument.UpdateTreeFrom(parsedDocument); userDocument.DebugCheckConsistency(true); Assert(userDocument.GetSelfAndAllChildren().Count() == parsedDocument.GetSelfAndAllChildren().Count(), "Parsed document and updated document have different number of children"); return(userDocument); }
public void UpdateOffsetsAndInvalidate(IEnumerable <DocumentChangeEventArgs> changes) { foreach (DocumentChangeEventArgs change in changes) { // Update offsets of all items segments.UpdateOffsets(change); // Remove any items affected by the change AXmlParser.Log("Changed {0}-{1}", change.Offset, change.Offset + change.InsertionLength); // Removing will cause one of the ends to be set to change.Offset // FindSegmentsContaining includes any segments touching // so that conviniently takes care of the +1 byte var segmentsContainingOffset = segments.FindOverlappingSegments(change.Offset, change.InsertionLength); foreach (AXmlObject obj in segmentsContainingOffset.OfType <AXmlObject>().Where(o => o.IsCached)) { InvalidateCache(obj, false); } foreach (TouchedRange range in segmentsContainingOffset.OfType <TouchedRange>()) { AXmlParser.Log("Found that {0} dependeds on ({1}-{2})", range.TouchedByObject, range.StartOffset, range.EndOffset); InvalidateCache(range.TouchedByObject, true); segments.Remove(range); } } }
/// <summary> /// Add start or end tag placeholders so that the documment is properly nested /// </summary> List <AXmlObject> MatchTags(IEnumerable <AXmlObject> objs) { Configurations configurations = new Configurations(); configurations.Add(new Configuration { StartTags = ImmutableStack <AXmlTag> .Empty, Document = ImmutableStack <AXmlObject> .Empty, Cost = 0, }); foreach (AXmlObject obj in objs) { configurations = ProcessObject(configurations, obj); } // Close any remaining start tags foreach (Configuration conifg in configurations.Values) { while (!conifg.StartTags.IsEmpty) { conifg.StartTags = conifg.StartTags.Pop(); conifg.Document = conifg.Document.Push(EndTagPlaceholder); conifg.Cost += 1; } } // AXmlParser.Log("Configurations after closing all remaining tags:" + configurations.ToString()); Configuration bestConfig = configurations.Values.OrderBy(v => v.Cost).First(); AXmlParser.Log("Best configuration has cost {0}", bestConfig.Cost); return(bestConfig.Document.Reverse().ToList()); }
public TagMatchingHeuristics(AXmlParser parser, string input, List <AXmlObject> tags) { this.parser = parser; this.trackedSegments = parser.TrackedSegments; this.input = input; this.tags = tags; }
/// <summary> Add object to cache, optionally adding extra memory tracking </summary> public void AddParsedObject(AXmlObject obj, int?maxTouchedLocation) { if (!(obj.Length > 0 || obj is AXmlDocument)) { AXmlParser.Assert(false, string.Format(CultureInfo.InvariantCulture, "Invalid object {0}. It has zero length.", obj)); } // // Expensive check // if (obj is AXmlContainer) { // int objStartOffset = obj.StartOffset; // int objEndOffset = obj.EndOffset; // foreach(AXmlObject child in ((AXmlContainer)obj).Children) { // AXmlParser.Assert(objStartOffset <= child.StartOffset && child.EndOffset <= objEndOffset, "Wrong nesting"); // } // } segments.Add(obj); AddSyntaxErrorsOf(obj); obj.IsCached = true; if (maxTouchedLocation != null) { // location is assumed to be read so the range ends at (location + 1) // For example eg for "a_" it is (0-2) TouchedRange range = new TouchedRange() { StartOffset = obj.StartOffset, EndOffset = maxTouchedLocation.Value + 1, TouchedByObject = obj }; segments.Add(range); AXmlParser.Log("{0} touched range ({1}-{2})", obj, range.StartOffset, range.EndOffset); } }
IEnumerable <AXmlObject> Split(AXmlElement elem) { int myIndention = GetIndentLevel(elem); // Has start tag and no end tag ? (other then empty-element tag) if (elem.HasStartOrEmptyTag && elem.StartTag.IsStartTag && !elem.HasEndTag && myIndention != -1) { int lastAccepted = 0; // Accept start tag while (lastAccepted + 1 < elem.Children.Count) { AXmlObject nextItem = elem.Children[lastAccepted + 1]; if (nextItem is AXmlText) { lastAccepted++; continue; // Accept } else { // Include all more indented items if (GetIndentLevel(nextItem) > myIndention) { lastAccepted++; continue; // Accept } else { break; // Reject } } } // Accepted everything? if (lastAccepted + 1 == elem.Children.Count) { yield return(elem); yield break; } AXmlParser.Log("Splitting {0} - take {1} of {2} nested", elem, lastAccepted, elem.Children.Count - 1); AXmlElement topHalf = new AXmlElement(); topHalf.HasStartOrEmptyTag = elem.HasStartOrEmptyTag; topHalf.HasEndTag = elem.HasEndTag; topHalf.AddChildren(elem.Children.Take(1 + lastAccepted)); // Start tag + nested topHalf.StartOffset = topHalf.FirstChild.StartOffset; topHalf.EndOffset = topHalf.LastChild.EndOffset; TagReader.OnSyntaxError(topHalf, topHalf.LastChild.EndOffset, topHalf.LastChild.EndOffset, "Expected '</{0}>'", topHalf.StartTag.Name); AXmlParser.Log("Constructed {0}", topHalf); trackedSegments.AddParsedObject(topHalf, null); yield return(topHalf); for (int i = lastAccepted + 1; i < elem.Children.Count; i++) { yield return(elem.Children[i]); } } else { yield return(elem); } }
/// <summary> /// Invlidates all objects. That is, the whole document has changed. /// </summary> /// <remarks> We still have to keep the items becuase they might be in the document </remarks> public void InvalidateAll() { AXmlParser.Log("Invalidating all objects"); foreach (AXmlObject obj in segments.OfType <AXmlObject>()) { obj.IsCached = false; } }
protected string GetText(int start, int end) { AXmlParser.Assert(end <= currentLocation, "Reading ahead of current location"); if (start == inputLength && end == inputLength) { return(string.Empty); } else { return(GetCachedString(input.Substring(start, end - start))); } }
/// <summary> /// To be used exclusively by the children update algorithm. /// Insert child and keep links consistent. /// </summary> void InsertChild(int index, AXmlObject item) { AXmlParser.Log("Inserting {0} at index {1}", item, index); Assert(this.Document != null, "Can not insert to dangling object"); Assert(item.Parent != this, "Can not own item twice"); SetParentPointersInTree(item); this.Children.InsertItemAt(index, item); this.Document.OnObjectInserted(index, item); }
/// <summary> Invalidates items, but keeps tracking them </summary> /// <remarks> Can be called redundantly (from range tacking) </remarks> void InvalidateCache(AXmlObject obj, bool includeParents) { if (includeParents) { foreach (AXmlObject parent in FindParents(obj)) { parent.IsCached = false; AXmlParser.Log("Invalidating cached item {0} (it is parent)", parent); } } obj.IsCached = false; AXmlParser.Log("Invalidating cached item {0}", obj); }
public static void OnSyntaxError(AXmlObject obj, int start, int end, string message, params object[] args) { if (end <= start) { end = start + 1; } AXmlParser.Log("Syntax error ({0}-{1}): {2}", start, end, string.Format(message, args)); obj.AddSyntaxError(new SyntaxError() { Object = obj, StartOffset = start, EndOffset = end, Message = string.Format(message, args), }); }
AXmlText MakeText(int start, int end) { AXmlParser.DebugAssert(end > start, "Empty text"); AXmlText text = new AXmlText() { StartOffset = start, EndOffset = end, EscapedValue = GetText(start, end), Type = TextType.Other }; OnParsed(text); return(text); }
public static void OnSyntaxError(AXmlObject obj, int start, int end, string message, params object[] args) { if (end <= start) { end = start + 1; } string formattedMessage = string.Format(CultureInfo.InvariantCulture, message, args); AXmlParser.Log("Syntax error ({0}-{1}): {2}", start, end, formattedMessage); obj.AddSyntaxError(new SyntaxError() { Object = obj, StartOffset = start, EndOffset = end, Message = formattedMessage, }); }
bool TryReadFromCacheOrNew <T>(out T res, Predicate <T> condition) where T : AXmlObject, new() { T cached = trackedSegments.GetCachedObject <T>(this.CurrentLocation, 0, condition); if (cached != null) { Skip(cached.Length); AXmlParser.Assert(cached.Length > 0, "cached elements must not have zero length"); res = cached; return(true); } else { res = new T(); return(false); } }
void RunTest1() { AXmlParser parser = new AXmlParser(); try { parser.Lock.EnterWriteLock(); parser.Parse(initialDocumentText, null); // full reparse IList <DocumentChangeEventArgs> changes = new List <DocumentChangeEventArgs>(); changes.Add(new DocumentChangeEventArgs(offset, original, replacement)); parser.Parse(finalDocumentText, changes); } finally { parser.Lock.ExitWriteLock(); } }
protected override void OnInitialized(EventArgs e) { markerService = new TextMarkerService(editor.TextArea); editor.TextArea.TextView.MouseMove += new MouseEventHandler(editor_TextArea_TextView_MouseMove); editor.Document.Changed += delegate(object sender, DocumentChangeEventArgs e2) { textDirty = true; changes.Add(e2); }; parser = new AXmlParser(); DispatcherTimer timer = new DispatcherTimer(); timer.Interval = TimeSpan.FromSeconds(0.5); timer.Tick += delegate { Button_Click(null, null); }; timer.Start(); base.OnInitialized(e); }
/// <summary> Removes object with all of its non-cached children </summary> public void RemoveParsedObject(AXmlObject obj) { // Cached objects may be used in the future - do not remove them if (obj.IsCached) { return; } segments.Remove(obj); RemoveSyntaxErrorsOf(obj); AXmlParser.Log("Stopped tracking {0}", obj); if (obj is AXmlContainer) { foreach (AXmlObject child in ((AXmlContainer)obj).Children) { RemoveParsedObject(child); } } }
/// <summary> /// To be used exclusively by the children update algorithm. /// Remove child, set parent to null and notify the document /// </summary> void RemoveChild(int index) { AXmlObject removed = this.Children[index]; AXmlParser.Log("Removing {0} at index {1}", removed, index); // Stop tracking if the object can not be used again if (!removed.IsCached) { this.Document.Parser.TrackedSegments.RemoveParsedObject(removed); } // Null parent pointer Assert(removed.Parent == this, "Inconsistent child"); removed.Parent = null; this.Children.RemoveItemAt(index); this.Document.OnObjectRemoved(index, removed); }
/// <summary> Raises Changed event </summary> protected void OnChanged() { AXmlParser.Log("Changed {0}", this); if (Changed != null) { Changed(this, new AXmlObjectEventArgs() { Object = this }); } AXmlDocument doc = this.Document; if (doc != null) { doc.OnObjectChanged(this); } // As a convenience, also rasie an event for the parent element AXmlTag me = this as AXmlTag; if (me != null && (me.IsStartOrEmptyTag || me.IsEndTag) && me.Parent is AXmlElement) { me.Parent.OnChanged(); } }
protected void AssertHasMoreData() { AXmlParser.Assert(HasMoreData(), "Unexpected end of file"); }
void OnParsed(AXmlObject obj) { AXmlParser.Log("Parsed {0}", obj); trackedSegments.AddParsedObject(obj, this.MaxTouchedLocation > this.CurrentLocation ? (int?)this.MaxTouchedLocation : null); }
public TagReader(AXmlParser parser, string input) : base(input) { this.parser = parser; this.trackedSegments = parser.TrackedSegments; this.input = input; }
void RunTest1() { AXmlParser parser = new AXmlParser(); try { parser.Lock.EnterWriteLock(); parser.Parse(initialDocumentText, null); // full reparse IList<DocumentChangeEventArgs> changes = new List<DocumentChangeEventArgs>(); changes.Add(new DocumentChangeEventArgs(offset, original, replacement)); parser.Parse(finalDocumentText, changes); } finally { parser.Lock.ExitWriteLock(); } }
protected void GoBack(int oldLocation) { AXmlParser.Assert(oldLocation <= currentLocation, "Trying to move forward"); maxTouchedLocation = Math.Max(maxTouchedLocation, currentLocation); currentLocation = oldLocation; }
const int lookAheadLength = (3 * maxTextFragmentSize) / 2; // More so that we do not get small "what was inserted" fragments /// <summary> /// Reads text and optionaly separates it into fragments. /// It can also return empty set for no appropriate text input. /// Make sure you enumerate it only once /// </summary> IEnumerable <AXmlObject> ReadText(TextType type) { bool lookahead = false; while (true) { AXmlText text; if (TryReadFromCacheOrNew(out text, t => t.Type == type)) { // Cached text found yield return(text); continue; // Read next fragment; the method can handle "no text left" } text.Type = type; // Limit the reading to just a few characters // (the first character not to be read) int fragmentEnd = Math.Min(this.CurrentLocation + maxTextFragmentSize, this.InputLength); // Look if some futher text has been already processed and align so that // we hit that chache point. It is expensive so it is off for the first run if (lookahead) { // Note: Must fit entity AXmlObject nextFragment = trackedSegments.GetCachedObject <AXmlText>(this.CurrentLocation + maxEntityLength, lookAheadLength - maxEntityLength, t => t.Type == type); if (nextFragment != null) { fragmentEnd = Math.Min(nextFragment.StartOffset, this.InputLength); AXmlParser.Log("Parsing only text ({0}-{1}) because later text was already processed", this.CurrentLocation, fragmentEnd); } } lookahead = true; text.StartOffset = this.CurrentLocation; int start = this.CurrentLocation; // Whitespace would be skipped anyway by any operation TryMoveToNonWhiteSpace(fragmentEnd); int wsEnd = this.CurrentLocation; // Try move to the terminator given by the context if (type == TextType.WhiteSpace) { TryMoveToNonWhiteSpace(fragmentEnd); } else if (type == TextType.CharacterData) { while (true) { if (!TryMoveToAnyOf(new char[] { '<', ']' }, fragmentEnd)) { break; // End of fragment } if (TryPeek('<')) { break; } if (TryPeek(']')) { if (TryPeek("]]>")) { OnSyntaxError(text, this.CurrentLocation, this.CurrentLocation + 3, "']]>' is not allowed in text"); } TryMoveNext(); continue; } throw new Exception("Infinite loop"); } } else if (type == TextType.Comment) { // Do not report too many errors bool errorReported = false; while (true) { if (!TryMoveTo('-', fragmentEnd)) { break; // End of fragment } if (TryPeek("-->")) { break; } if (TryPeek("--") && !errorReported) { OnSyntaxError(text, this.CurrentLocation, this.CurrentLocation + 2, "'--' is not allowed in comment"); errorReported = true; } TryMoveNext(); } } else if (type == TextType.CData) { while (true) { // We can not use use TryMoveTo("]]>", fragmentEnd) because it may incorectly accept "]" at the end of fragment if (!TryMoveTo(']', fragmentEnd)) { break; // End of fragment } if (TryPeek("]]>")) { break; } TryMoveNext(); } } else if (type == TextType.ProcessingInstruction) { while (true) { if (!TryMoveTo('?', fragmentEnd)) { break; // End of fragment } if (TryPeek("?>")) { break; } TryMoveNext(); } } else if (type == TextType.UnknownBang) { TryMoveToAnyOf(new char[] { '<', '>' }, fragmentEnd); } else { throw new Exception("Uknown type " + type); } text.ContainsOnlyWhitespace = (wsEnd == this.CurrentLocation); // Terminal found or real end was reached; bool finished = this.CurrentLocation < fragmentEnd || IsEndOfFile(); if (!finished) { // We have to continue reading more text fragments // If there is entity reference, make sure the next segment starts with it to prevent framentation int entitySearchStart = Math.Max(start + 1 /* data for us */, this.CurrentLocation - maxEntityLength); int entitySearchLength = this.CurrentLocation - entitySearchStart; if (entitySearchLength > 0) { // Note that LastIndexOf works backward int entityIndex = input.LastIndexOf('&', this.CurrentLocation - 1, entitySearchLength); if (entityIndex != -1) { GoBack(entityIndex); } } } text.EscapedValue = GetText(start, this.CurrentLocation); if (type == TextType.CharacterData) { // Normalize end of line first text.Value = Dereference(text, NormalizeEndOfLine(text.EscapedValue), start); } else { text.Value = text.EscapedValue; } text.EndOffset = this.CurrentLocation; if (text.EscapedValue.Length > 0) { OnParsed(text); yield return(text); } if (finished) { yield break; } } }
public void PrintStringCacheStats() { AXmlParser.Log("String cache: Requested {0} ({1} bytes); Actaully stored {2} ({3} bytes); {4}% stored", stringCacheRequestedCount, stringCacheRequestedSize, stringCacheStoredCount, stringCacheStoredSize, stringCacheRequestedSize == 0 ? 0 : stringCacheStoredSize * 100 / stringCacheRequestedSize); }
/// <remarks> /// If using DTD, canonical representation is not checked /// If using DTD, uknown entiry references are not error /// </remarks> bool TestFile(TestFile testFile, bool isWellFormed) { bool passed = true; string content = testFile.Content; Debug.WriteLine("Testing " + testFile.Name + "..."); AXmlParser parser = new AXmlParser(); bool usingDTD = content.Contains("<!DOCTYPE") && (content.Contains("<!ENTITY") || content.Contains(" SYSTEM ")); if (usingDTD) parser.UnknownEntityReferenceIsError = false; AXmlDocument document; parser.Lock.EnterWriteLock(); try { document = parser.Parse(content, null); } finally { parser.Lock.ExitWriteLock(); } string printed = PrettyPrintAXmlVisitor.PrettyPrint(document); if (content != printed) { errorOutput.AppendFormat("Output of pretty printed XML for \"{0}\" does not match the original.\n", testFile.Name); errorOutput.AppendFormat("Pretty printed:\n{0}\n", Indent(printed)); passed = false; } if (isWellFormed && !usingDTD) { string canonicalPrint = CanonicalPrintAXmlVisitor.Print(document); if (testFile.Canonical != null) { if (testFile.Canonical != canonicalPrint) { errorOutput.AppendFormat("Canonical XML for \"{0}\" does not match the excpected.\n", testFile.Name); errorOutput.AppendFormat("Expected:\n{0}\n", Indent(testFile.Canonical)); errorOutput.AppendFormat("Seen:\n{0}\n", Indent(canonicalPrint)); passed = false; } } else { errorOutput.AppendFormat("Can not find canonical output for \"{0}\"", testFile.Name); errorOutput.AppendFormat("Suggested canonical output:\n{0}\n", Indent(canonicalPrint)); passed = false; } } bool hasErrors = document.SyntaxErrors.FirstOrDefault() != null; if (isWellFormed && hasErrors) { errorOutput.AppendFormat("Syntax error(s) in well formed file \"{0}\":\n", testFile.Name); foreach (var error in document.SyntaxErrors) { string followingText = content.Substring(error.StartOffset, Math.Min(10, content.Length - error.StartOffset)); errorOutput.AppendFormat("Error ({0}-{1}): {2} (followed by \"{3}\")\n", error.StartOffset, error.EndOffset, error.Message, followingText); } passed = false; } if (!isWellFormed && !hasErrors) { errorOutput.AppendFormat("No syntax errors reported for mallformed file \"{0}\"\n", testFile.Name); passed = false; } // Epilog if (!passed) { if (testFile.Description != null) { errorOutput.AppendFormat("Test description:\n{0}\n", Indent(testFile.Description)); } errorOutput.AppendFormat("File content:\n{0}\n", Indent(content)); errorOutput.AppendLine(); } return passed; }
AXmlElement ReadElement(IEnumerator <AXmlObject> objStream) { AXmlElement element = new AXmlElement(); element.IsProperlyNested = true; // Read start tag AXmlTag startTag = ReadSingleObject(objStream) as AXmlTag; AXmlParser.DebugAssert(startTag != null, "Start tag expected"); AXmlParser.DebugAssert(startTag.IsStartOrEmptyTag || startTag == StartTagPlaceholder, "Start tag expected"); if (startTag == StartTagPlaceholder) { element.HasStartOrEmptyTag = false; element.IsProperlyNested = false; TagReader.OnSyntaxError(element, objStream.Current.StartOffset, objStream.Current.EndOffset, "Matching openning tag was not found"); } else { element.HasStartOrEmptyTag = true; element.AddChild(startTag); } // Read content and end tag if (startTag == StartTagPlaceholder || // Check first in case the start tag is null element.StartTag.IsStartTag) { while (true) { AXmlTag currTag = objStream.Current as AXmlTag; // Peek if (currTag == EndTagPlaceholder) { TagReader.OnSyntaxError(element, element.LastChild.EndOffset, element.LastChild.EndOffset, "Expected '</{0}>'", element.StartTag.Name); ReadSingleObject(objStream); element.HasEndTag = false; element.IsProperlyNested = false; break; } else if (currTag != null && currTag.IsEndTag) { if (element.HasStartOrEmptyTag && currTag.Name != element.StartTag.Name) { TagReader.OnSyntaxError(element, currTag.StartOffset + 2, currTag.StartOffset + 2 + currTag.Name.Length, "Expected '{0}'. End tag must have same name as start tag.", element.StartTag.Name); } element.AddChild(ReadSingleObject(objStream)); element.HasEndTag = true; break; } AXmlObject nested = ReadTextOrElement(objStream); if (nested is AXmlElement) { if (!((AXmlElement)nested).IsProperlyNested) { element.IsProperlyNested = false; } element.AddChildren(Split((AXmlElement)nested).ToList()); } else { element.AddChild(nested); } } } else { element.HasEndTag = false; } element.StartOffset = element.FirstChild.StartOffset; element.EndOffset = element.LastChild.EndOffset; AXmlParser.Assert(element.HasStartOrEmptyTag || element.HasEndTag, "Must have at least start or end tag"); AXmlParser.Log("Constructed {0}", element); trackedSegments.AddParsedObject(element, null); // Need all elements in cache for offset tracking return(element); }
public AXmlDocument ReadDocument() { AXmlDocument doc = new AXmlDocument() { Parser = parser }; // AXmlParser.Log("Flat stream: {0}", PrintObjects(tags)); List <AXmlObject> valid = MatchTags(tags); // AXmlParser.Log("Fixed stream: {0}", PrintObjects(valid)); IEnumerator <AXmlObject> validStream = valid.GetEnumerator(); validStream.MoveNext(); // Move to first while (true) { // End of stream? try { if (validStream.Current == null) { break; } } catch (InvalidCastException) { break; } doc.AddChild(ReadTextOrElement(validStream)); } if (doc.Children.Count > 0) { doc.StartOffset = doc.FirstChild.StartOffset; doc.EndOffset = doc.LastChild.EndOffset; } // Check well formed foreach (AXmlTag xmlDeclaration in doc.Children.OfType <AXmlTag>().Where(t => t.IsProcessingInstruction && t.Name.ToLower() == "xml")) { if (xmlDeclaration.StartOffset != 0) { TagReader.OnSyntaxError(doc, xmlDeclaration.StartOffset, xmlDeclaration.StartOffset + 5, "XML declaration must be at the start of document"); } } int elemCount = doc.Children.OfType <AXmlElement>().Count(); if (elemCount == 0) { TagReader.OnSyntaxError(doc, doc.EndOffset, doc.EndOffset, "Root element is missing"); } if (elemCount > 1) { AXmlElement next = doc.Children.OfType <AXmlElement>().Skip(1).First(); TagReader.OnSyntaxError(doc, next.StartOffset, next.StartOffset, "Only one root element is allowed"); } foreach (AXmlTag tag in doc.Children.OfType <AXmlTag>()) { if (tag.IsCData) { TagReader.OnSyntaxError(doc, tag.StartOffset, tag.EndOffset, "CDATA not allowed in document root"); } } foreach (AXmlText text in doc.Children.OfType <AXmlText>()) { if (!text.ContainsOnlyWhitespace) { TagReader.OnSyntaxError(doc, text.StartOffset, text.EndOffset, "Only whitespace is allowed in document root"); } } AXmlParser.Log("Constructed {0}", doc); trackedSegments.AddParsedObject(doc, null); return(doc); }
/// <summary> Get posible configurations after considering fiven object </summary> Configurations ProcessObject(Configurations oldConfigs, AXmlObject obj) { AXmlParser.Log("Processing {0}", obj); AXmlTag tag = obj as AXmlTag; AXmlParser.Assert(obj is AXmlTag || obj is AXmlText || obj is AXmlElement, obj.GetType().Name + " not expected"); if (obj is AXmlElement) { AXmlParser.Assert(((AXmlElement)obj).IsProperlyNested, "Element not proprly nested"); } Configurations newConfigs = new Configurations(); foreach (var kvp in oldConfigs) { Configuration oldConfig = kvp.Value; var oldStartTags = oldConfig.StartTags; var oldDocument = oldConfig.Document; int oldCost = oldConfig.Cost; if (tag != null && tag.IsStartTag) { newConfigs.Add(new Configuration { // Push start-tag (cost 0) StartTags = oldStartTags.Push(tag), Document = oldDocument.Push(tag), Cost = oldCost, }); } else if (tag != null && tag.IsEndTag) { newConfigs.Add(new Configuration { // Ignore (cost 1) StartTags = oldStartTags, Document = oldDocument.Push(StartTagPlaceholder).Push(tag), Cost = oldCost + 1, }); if (!oldStartTags.IsEmpty && oldStartTags.Peek().Name != tag.Name) { newConfigs.Add(new Configuration { // Pop 1 item (cost 1) - not mathcing StartTags = oldStartTags.Pop(), Document = oldDocument.Push(tag), Cost = oldCost + 1, }); } int popedCount = 0; var startTags = oldStartTags; var doc = oldDocument; foreach (AXmlTag poped in oldStartTags) { popedCount++; if (poped.Name == tag.Name) { newConfigs.Add(new Configuration { // Pop 'x' items (cost x-1) - last one is matching StartTags = startTags.Pop(), Document = doc.Push(tag), Cost = oldCost + popedCount - 1, }); } startTags = startTags.Pop(); doc = doc.Push(EndTagPlaceholder); } } else { // Empty tag or other tag type or text or properly nested element newConfigs.Add(new Configuration { // Ignore (cost 0) StartTags = oldStartTags, Document = oldDocument.Push(obj), Cost = oldCost, }); } } // Log("New configurations:" + newConfigs.ToString()); Configurations bestNewConfigurations = new Configurations( newConfigs.Values.OrderBy(v => v.Cost).Take(maxConfigurationCount) ); // AXmlParser.Log("Best new configurations:" + bestNewConfigurations.ToString()); return(bestNewConfigurations); }
protected void Skip(int count) { AXmlParser.Assert(currentLocation + count <= inputLength, "Skipping after the end of file"); currentLocation += count; }
/// <remarks> /// If using DTD, canonical representation is not checked /// If using DTD, uknown entiry references are not error /// </remarks> bool TestFile(TestFile testFile, bool isWellFormed) { bool passed = true; string content = testFile.Content; Debug.WriteLine("Testing " + testFile.Name + "..."); AXmlParser parser = new AXmlParser(); bool usingDTD = content.Contains("<!DOCTYPE") && (content.Contains("<!ENTITY") || content.Contains(" SYSTEM ")); if (usingDTD) { parser.UnknownEntityReferenceIsError = false; } AXmlDocument document; parser.Lock.EnterWriteLock(); try { document = parser.Parse(content, null); } finally { parser.Lock.ExitWriteLock(); } string printed = PrettyPrintAXmlVisitor.PrettyPrint(document); if (content != printed) { errorOutput.AppendFormat("Output of pretty printed XML for \"{0}\" does not match the original.\n", testFile.Name); errorOutput.AppendFormat("Pretty printed:\n{0}\n", Indent(printed)); passed = false; } if (isWellFormed && !usingDTD) { string canonicalPrint = CanonicalPrintAXmlVisitor.Print(document); if (testFile.Canonical != null) { if (testFile.Canonical != canonicalPrint) { errorOutput.AppendFormat("Canonical XML for \"{0}\" does not match the excpected.\n", testFile.Name); errorOutput.AppendFormat("Expected:\n{0}\n", Indent(testFile.Canonical)); errorOutput.AppendFormat("Seen:\n{0}\n", Indent(canonicalPrint)); passed = false; } } else { errorOutput.AppendFormat("Can not find canonical output for \"{0}\"", testFile.Name); errorOutput.AppendFormat("Suggested canonical output:\n{0}\n", Indent(canonicalPrint)); passed = false; } } bool hasErrors = document.SyntaxErrors.FirstOrDefault() != null; if (isWellFormed && hasErrors) { errorOutput.AppendFormat("Syntax error(s) in well formed file \"{0}\":\n", testFile.Name); foreach (var error in document.SyntaxErrors) { string followingText = content.Substring(error.StartOffset, Math.Min(10, content.Length - error.StartOffset)); errorOutput.AppendFormat("Error ({0}-{1}): {2} (followed by \"{3}\")\n", error.StartOffset, error.EndOffset, error.Message, followingText); } passed = false; } if (!isWellFormed && !hasErrors) { errorOutput.AppendFormat("No syntax errors reported for mallformed file \"{0}\"\n", testFile.Name); passed = false; } // Epilog if (!passed) { if (testFile.Description != null) { errorOutput.AppendFormat("Test description:\n{0}\n", Indent(testFile.Description)); } errorOutput.AppendFormat("File content:\n{0}\n", Indent(content)); errorOutput.AppendLine(); } return(passed); }