Esempio n. 1
0
        /// <summary>
        /// Incrementaly parse the given text.
        /// You have to hold the write lock.
        /// </summary>
        /// <param name="input">
        /// The full XML text of the new document.
        /// </param>
        /// <param name="changesSinceLastParse">
        /// Changes since last parse.  Null will cause full reparse.
        /// </param>
        public AXmlDocument Parse(string input, IEnumerable <DocumentChangeEventArgs> changesSinceLastParse)
        {
            if (!Lock.IsWriteLockHeld)
            {
                throw new InvalidOperationException("Lock needed!");
            }

            // Use changes to invalidate cache
            if (changesSinceLastParse != null)
            {
                this.TrackedSegments.UpdateOffsetsAndInvalidate(changesSinceLastParse);
            }
            else
            {
                this.TrackedSegments.InvalidateAll();
            }

            TagReader         tagReader      = new TagReader(this, input);
            List <AXmlObject> tags           = tagReader.ReadAllTags();
            AXmlDocument      parsedDocument = new TagMatchingHeuristics(this, input, tags).ReadDocument();

            tagReader.PrintStringCacheStats();
            AXmlParser.Log("Updating main DOM tree...");
            userDocument.UpdateTreeFrom(parsedDocument);
            userDocument.DebugCheckConsistency(true);
            Assert(userDocument.GetSelfAndAllChildren().Count() == parsedDocument.GetSelfAndAllChildren().Count(), "Parsed document and updated document have different number of children");
            return(userDocument);
        }
        public void UpdateOffsetsAndInvalidate(IEnumerable <DocumentChangeEventArgs> changes)
        {
            foreach (DocumentChangeEventArgs change in changes)
            {
                // Update offsets of all items
                segments.UpdateOffsets(change);

                // Remove any items affected by the change
                AXmlParser.Log("Changed {0}-{1}", change.Offset, change.Offset + change.InsertionLength);
                // Removing will cause one of the ends to be set to change.Offset
                // FindSegmentsContaining includes any segments touching
                // so that conviniently takes care of the +1 byte
                var segmentsContainingOffset = segments.FindOverlappingSegments(change.Offset, change.InsertionLength);
                foreach (AXmlObject obj in segmentsContainingOffset.OfType <AXmlObject>().Where(o => o.IsCached))
                {
                    InvalidateCache(obj, false);
                }
                foreach (TouchedRange range in segmentsContainingOffset.OfType <TouchedRange>())
                {
                    AXmlParser.Log("Found that {0} dependeds on ({1}-{2})", range.TouchedByObject, range.StartOffset, range.EndOffset);
                    InvalidateCache(range.TouchedByObject, true);
                    segments.Remove(range);
                }
            }
        }
Esempio n. 3
0
        /// <summary>
        /// Add start or end tag placeholders so that the documment is properly nested
        /// </summary>
        List <AXmlObject> MatchTags(IEnumerable <AXmlObject> objs)
        {
            Configurations configurations = new Configurations();

            configurations.Add(new Configuration {
                StartTags = ImmutableStack <AXmlTag> .Empty,
                Document  = ImmutableStack <AXmlObject> .Empty,
                Cost      = 0,
            });
            foreach (AXmlObject obj in objs)
            {
                configurations = ProcessObject(configurations, obj);
            }
            // Close any remaining start tags
            foreach (Configuration conifg in configurations.Values)
            {
                while (!conifg.StartTags.IsEmpty)
                {
                    conifg.StartTags = conifg.StartTags.Pop();
                    conifg.Document  = conifg.Document.Push(EndTagPlaceholder);
                    conifg.Cost     += 1;
                }
            }
            // AXmlParser.Log("Configurations after closing all remaining tags:" + configurations.ToString());
            Configuration bestConfig = configurations.Values.OrderBy(v => v.Cost).First();

            AXmlParser.Log("Best configuration has cost {0}", bestConfig.Cost);

            return(bestConfig.Document.Reverse().ToList());
        }
Esempio n. 4
0
 public TagMatchingHeuristics(AXmlParser parser, string input, List <AXmlObject> tags)
 {
     this.parser          = parser;
     this.trackedSegments = parser.TrackedSegments;
     this.input           = input;
     this.tags            = tags;
 }
        /// <summary> Add object to cache, optionally adding extra memory tracking </summary>
        public void AddParsedObject(AXmlObject obj, int?maxTouchedLocation)
        {
            if (!(obj.Length > 0 || obj is AXmlDocument))
            {
                AXmlParser.Assert(false, string.Format(CultureInfo.InvariantCulture, "Invalid object {0}.  It has zero length.", obj));
            }
//			// Expensive check
//			if (obj is AXmlContainer) {
//				int objStartOffset = obj.StartOffset;
//				int objEndOffset = obj.EndOffset;
//				foreach(AXmlObject child in ((AXmlContainer)obj).Children) {
//					AXmlParser.Assert(objStartOffset <= child.StartOffset && child.EndOffset <= objEndOffset, "Wrong nesting");
//				}
//			}
            segments.Add(obj);
            AddSyntaxErrorsOf(obj);
            obj.IsCached = true;
            if (maxTouchedLocation != null)
            {
                // location is assumed to be read so the range ends at (location + 1)
                // For example eg for "a_" it is (0-2)
                TouchedRange range = new TouchedRange()
                {
                    StartOffset     = obj.StartOffset,
                    EndOffset       = maxTouchedLocation.Value + 1,
                    TouchedByObject = obj
                };
                segments.Add(range);
                AXmlParser.Log("{0} touched range ({1}-{2})", obj, range.StartOffset, range.EndOffset);
            }
        }
Esempio n. 6
0
        IEnumerable <AXmlObject> Split(AXmlElement elem)
        {
            int myIndention = GetIndentLevel(elem);

            // Has start tag and no end tag ?  (other then empty-element tag)
            if (elem.HasStartOrEmptyTag && elem.StartTag.IsStartTag && !elem.HasEndTag && myIndention != -1)
            {
                int lastAccepted = 0;                 // Accept start tag
                while (lastAccepted + 1 < elem.Children.Count)
                {
                    AXmlObject nextItem = elem.Children[lastAccepted + 1];
                    if (nextItem is AXmlText)
                    {
                        lastAccepted++; continue;                          // Accept
                    }
                    else
                    {
                        // Include all more indented items
                        if (GetIndentLevel(nextItem) > myIndention)
                        {
                            lastAccepted++; continue;                              // Accept
                        }
                        else
                        {
                            break;                              // Reject
                        }
                    }
                }
                // Accepted everything?
                if (lastAccepted + 1 == elem.Children.Count)
                {
                    yield return(elem);

                    yield break;
                }
                AXmlParser.Log("Splitting {0} - take {1} of {2} nested", elem, lastAccepted, elem.Children.Count - 1);
                AXmlElement topHalf = new AXmlElement();
                topHalf.HasStartOrEmptyTag = elem.HasStartOrEmptyTag;
                topHalf.HasEndTag          = elem.HasEndTag;
                topHalf.AddChildren(elem.Children.Take(1 + lastAccepted));                    // Start tag + nested
                topHalf.StartOffset = topHalf.FirstChild.StartOffset;
                topHalf.EndOffset   = topHalf.LastChild.EndOffset;
                TagReader.OnSyntaxError(topHalf, topHalf.LastChild.EndOffset, topHalf.LastChild.EndOffset,
                                        "Expected '</{0}>'", topHalf.StartTag.Name);

                AXmlParser.Log("Constructed {0}", topHalf);
                trackedSegments.AddParsedObject(topHalf, null);
                yield return(topHalf);

                for (int i = lastAccepted + 1; i < elem.Children.Count; i++)
                {
                    yield return(elem.Children[i]);
                }
            }
            else
            {
                yield return(elem);
            }
        }
 /// <summary>
 /// Invlidates all objects.  That is, the whole document has changed.
 /// </summary>
 /// <remarks> We still have to keep the items becuase they might be in the document </remarks>
 public void InvalidateAll()
 {
     AXmlParser.Log("Invalidating all objects");
     foreach (AXmlObject obj in segments.OfType <AXmlObject>())
     {
         obj.IsCached = false;
     }
 }
Esempio n. 8
0
 protected string GetText(int start, int end)
 {
     AXmlParser.Assert(end <= currentLocation, "Reading ahead of current location");
     if (start == inputLength && end == inputLength)
     {
         return(string.Empty);
     }
     else
     {
         return(GetCachedString(input.Substring(start, end - start)));
     }
 }
Esempio n. 9
0
        /// <summary>
        /// To be used exclusively by the children update algorithm.
        /// Insert child and keep links consistent.
        /// </summary>
        void InsertChild(int index, AXmlObject item)
        {
            AXmlParser.Log("Inserting {0} at index {1}", item, index);

            Assert(this.Document != null, "Can not insert to dangling object");
            Assert(item.Parent != this, "Can not own item twice");

            SetParentPointersInTree(item);

            this.Children.InsertItemAt(index, item);

            this.Document.OnObjectInserted(index, item);
        }
 /// <summary> Invalidates items, but keeps tracking them </summary>
 /// <remarks> Can be called redundantly (from range tacking) </remarks>
 void InvalidateCache(AXmlObject obj, bool includeParents)
 {
     if (includeParents)
     {
         foreach (AXmlObject parent in FindParents(obj))
         {
             parent.IsCached = false;
             AXmlParser.Log("Invalidating cached item {0} (it is parent)", parent);
         }
     }
     obj.IsCached = false;
     AXmlParser.Log("Invalidating cached item {0}", obj);
 }
Esempio n. 11
0
 public static void OnSyntaxError(AXmlObject obj, int start, int end, string message, params object[] args)
 {
     if (end <= start)
     {
         end = start + 1;
     }
     AXmlParser.Log("Syntax error ({0}-{1}): {2}", start, end, string.Format(message, args));
     obj.AddSyntaxError(new SyntaxError()
     {
         Object      = obj,
         StartOffset = start,
         EndOffset   = end,
         Message     = string.Format(message, args),
     });
 }
Esempio n. 12
0
        AXmlText MakeText(int start, int end)
        {
            AXmlParser.DebugAssert(end > start, "Empty text");

            AXmlText text = new AXmlText()
            {
                StartOffset  = start,
                EndOffset    = end,
                EscapedValue = GetText(start, end),
                Type         = TextType.Other
            };

            OnParsed(text);
            return(text);
        }
Esempio n. 13
0
        public static void OnSyntaxError(AXmlObject obj, int start, int end, string message, params object[] args)
        {
            if (end <= start)
            {
                end = start + 1;
            }
            string formattedMessage = string.Format(CultureInfo.InvariantCulture, message, args);

            AXmlParser.Log("Syntax error ({0}-{1}): {2}", start, end, formattedMessage);
            obj.AddSyntaxError(new SyntaxError()
            {
                Object      = obj,
                StartOffset = start,
                EndOffset   = end,
                Message     = formattedMessage,
            });
        }
Esempio n. 14
0
        bool TryReadFromCacheOrNew <T>(out T res, Predicate <T> condition) where T : AXmlObject, new()
        {
            T cached = trackedSegments.GetCachedObject <T>(this.CurrentLocation, 0, condition);

            if (cached != null)
            {
                Skip(cached.Length);
                AXmlParser.Assert(cached.Length > 0, "cached elements must not have zero length");
                res = cached;
                return(true);
            }
            else
            {
                res = new T();
                return(false);
            }
        }
Esempio n. 15
0
        void RunTest1()
        {
            AXmlParser parser = new AXmlParser();

            try {
                parser.Lock.EnterWriteLock();

                parser.Parse(initialDocumentText, null);                 // full reparse

                IList <DocumentChangeEventArgs> changes = new List <DocumentChangeEventArgs>();

                changes.Add(new DocumentChangeEventArgs(offset, original, replacement));

                parser.Parse(finalDocumentText, changes);
            } finally {
                parser.Lock.ExitWriteLock();
            }
        }
Esempio n. 16
0
		protected override void OnInitialized(EventArgs e)
		{
			markerService = new TextMarkerService(editor.TextArea);
			
			editor.TextArea.TextView.MouseMove += new MouseEventHandler(editor_TextArea_TextView_MouseMove);
			
			editor.Document.Changed += delegate(object sender, DocumentChangeEventArgs e2) {
				textDirty = true;
				changes.Add(e2);
			};
			parser = new AXmlParser();

			DispatcherTimer timer = new DispatcherTimer();
			timer.Interval = TimeSpan.FromSeconds(0.5);
			timer.Tick += delegate { Button_Click(null, null); };
			timer.Start();
			
			base.OnInitialized(e);
		}
        /// <summary> Removes object with all of its non-cached children </summary>
        public void RemoveParsedObject(AXmlObject obj)
        {
            // Cached objects may be used in the future - do not remove them
            if (obj.IsCached)
            {
                return;
            }
            segments.Remove(obj);
            RemoveSyntaxErrorsOf(obj);
            AXmlParser.Log("Stopped tracking {0}", obj);

            if (obj is AXmlContainer)
            {
                foreach (AXmlObject child in ((AXmlContainer)obj).Children)
                {
                    RemoveParsedObject(child);
                }
            }
        }
Esempio n. 18
0
        /// <summary>
        /// To be used exclusively by the children update algorithm.
        /// Remove child, set parent to null and notify the document
        /// </summary>
        void RemoveChild(int index)
        {
            AXmlObject removed = this.Children[index];

            AXmlParser.Log("Removing {0} at index {1}", removed, index);

            // Stop tracking if the object can not be used again
            if (!removed.IsCached)
            {
                this.Document.Parser.TrackedSegments.RemoveParsedObject(removed);
            }

            // Null parent pointer
            Assert(removed.Parent == this, "Inconsistent child");
            removed.Parent = null;

            this.Children.RemoveItemAt(index);

            this.Document.OnObjectRemoved(index, removed);
        }
Esempio n. 19
0
        /// <summary> Raises Changed event </summary>
        protected void OnChanged()
        {
            AXmlParser.Log("Changed {0}", this);
            if (Changed != null)
            {
                Changed(this, new AXmlObjectEventArgs()
                {
                    Object = this
                });
            }
            AXmlDocument doc = this.Document;

            if (doc != null)
            {
                doc.OnObjectChanged(this);
            }
            // As a convenience, also rasie an event for the parent element
            AXmlTag me = this as AXmlTag;

            if (me != null && (me.IsStartOrEmptyTag || me.IsEndTag) && me.Parent is AXmlElement)
            {
                me.Parent.OnChanged();
            }
        }
Esempio n. 20
0
 protected void AssertHasMoreData()
 {
     AXmlParser.Assert(HasMoreData(), "Unexpected end of file");
 }
Esempio n. 21
0
 void OnParsed(AXmlObject obj)
 {
     AXmlParser.Log("Parsed {0}", obj);
     trackedSegments.AddParsedObject(obj, this.MaxTouchedLocation > this.CurrentLocation ? (int?)this.MaxTouchedLocation : null);
 }
Esempio n. 22
0
 public TagReader(AXmlParser parser, string input) : base(input)
 {
     this.parser          = parser;
     this.trackedSegments = parser.TrackedSegments;
     this.input           = input;
 }
Esempio n. 23
0
		void RunTest1()
		{
			AXmlParser parser = new AXmlParser();
			
			try {
				parser.Lock.EnterWriteLock();
				
				parser.Parse(initialDocumentText, null); // full reparse
				
				IList<DocumentChangeEventArgs> changes = new List<DocumentChangeEventArgs>();
				
				changes.Add(new DocumentChangeEventArgs(offset, original, replacement));
				
				parser.Parse(finalDocumentText, changes);
			} finally {
				parser.Lock.ExitWriteLock();
			}
		}
Esempio n. 24
0
 protected void GoBack(int oldLocation)
 {
     AXmlParser.Assert(oldLocation <= currentLocation, "Trying to move forward");
     maxTouchedLocation = Math.Max(maxTouchedLocation, currentLocation);
     currentLocation    = oldLocation;
 }
Esempio n. 25
0
        const int lookAheadLength     = (3 * maxTextFragmentSize) / 2; // More so that we do not get small "what was inserted" fragments

        /// <summary>
        /// Reads text and optionaly separates it into fragments.
        /// It can also return empty set for no appropriate text input.
        /// Make sure you enumerate it only once
        /// </summary>
        IEnumerable <AXmlObject> ReadText(TextType type)
        {
            bool lookahead = false;

            while (true)
            {
                AXmlText text;
                if (TryReadFromCacheOrNew(out text, t => t.Type == type))
                {
                    // Cached text found
                    yield return(text);

                    continue;                     // Read next fragment;  the method can handle "no text left"
                }
                text.Type = type;

                // Limit the reading to just a few characters
                // (the first character not to be read)
                int fragmentEnd = Math.Min(this.CurrentLocation + maxTextFragmentSize, this.InputLength);

                // Look if some futher text has been already processed and align so that
                // we hit that chache point.  It is expensive so it is off for the first run
                if (lookahead)
                {
                    // Note: Must fit entity
                    AXmlObject nextFragment = trackedSegments.GetCachedObject <AXmlText>(this.CurrentLocation + maxEntityLength, lookAheadLength - maxEntityLength, t => t.Type == type);
                    if (nextFragment != null)
                    {
                        fragmentEnd = Math.Min(nextFragment.StartOffset, this.InputLength);
                        AXmlParser.Log("Parsing only text ({0}-{1}) because later text was already processed", this.CurrentLocation, fragmentEnd);
                    }
                }
                lookahead = true;

                text.StartOffset = this.CurrentLocation;
                int start = this.CurrentLocation;

                // Whitespace would be skipped anyway by any operation
                TryMoveToNonWhiteSpace(fragmentEnd);
                int wsEnd = this.CurrentLocation;

                // Try move to the terminator given by the context
                if (type == TextType.WhiteSpace)
                {
                    TryMoveToNonWhiteSpace(fragmentEnd);
                }
                else if (type == TextType.CharacterData)
                {
                    while (true)
                    {
                        if (!TryMoveToAnyOf(new char[] { '<', ']' }, fragmentEnd))
                        {
                            break;                                                                              // End of fragment
                        }
                        if (TryPeek('<'))
                        {
                            break;
                        }
                        if (TryPeek(']'))
                        {
                            if (TryPeek("]]>"))
                            {
                                OnSyntaxError(text, this.CurrentLocation, this.CurrentLocation + 3, "']]>' is not allowed in text");
                            }
                            TryMoveNext();
                            continue;
                        }
                        throw new Exception("Infinite loop");
                    }
                }
                else if (type == TextType.Comment)
                {
                    // Do not report too many errors
                    bool errorReported = false;
                    while (true)
                    {
                        if (!TryMoveTo('-', fragmentEnd))
                        {
                            break;                                                       // End of fragment
                        }
                        if (TryPeek("-->"))
                        {
                            break;
                        }
                        if (TryPeek("--") && !errorReported)
                        {
                            OnSyntaxError(text, this.CurrentLocation, this.CurrentLocation + 2, "'--' is not allowed in comment");
                            errorReported = true;
                        }
                        TryMoveNext();
                    }
                }
                else if (type == TextType.CData)
                {
                    while (true)
                    {
                        // We can not use use TryMoveTo("]]>", fragmentEnd) because it may incorectly accept "]" at the end of fragment
                        if (!TryMoveTo(']', fragmentEnd))
                        {
                            break;                                                       // End of fragment
                        }
                        if (TryPeek("]]>"))
                        {
                            break;
                        }
                        TryMoveNext();
                    }
                }
                else if (type == TextType.ProcessingInstruction)
                {
                    while (true)
                    {
                        if (!TryMoveTo('?', fragmentEnd))
                        {
                            break;                                                       // End of fragment
                        }
                        if (TryPeek("?>"))
                        {
                            break;
                        }
                        TryMoveNext();
                    }
                }
                else if (type == TextType.UnknownBang)
                {
                    TryMoveToAnyOf(new char[] { '<', '>' }, fragmentEnd);
                }
                else
                {
                    throw new Exception("Uknown type " + type);
                }

                text.ContainsOnlyWhitespace = (wsEnd == this.CurrentLocation);

                // Terminal found or real end was reached;
                bool finished = this.CurrentLocation < fragmentEnd || IsEndOfFile();

                if (!finished)
                {
                    // We have to continue reading more text fragments

                    // If there is entity reference, make sure the next segment starts with it to prevent framentation
                    int entitySearchStart  = Math.Max(start + 1 /* data for us */, this.CurrentLocation - maxEntityLength);
                    int entitySearchLength = this.CurrentLocation - entitySearchStart;
                    if (entitySearchLength > 0)
                    {
                        // Note that LastIndexOf works backward
                        int entityIndex = input.LastIndexOf('&', this.CurrentLocation - 1, entitySearchLength);
                        if (entityIndex != -1)
                        {
                            GoBack(entityIndex);
                        }
                    }
                }

                text.EscapedValue = GetText(start, this.CurrentLocation);
                if (type == TextType.CharacterData)
                {
                    // Normalize end of line first
                    text.Value = Dereference(text, NormalizeEndOfLine(text.EscapedValue), start);
                }
                else
                {
                    text.Value = text.EscapedValue;
                }
                text.EndOffset = this.CurrentLocation;

                if (text.EscapedValue.Length > 0)
                {
                    OnParsed(text);
                    yield return(text);
                }

                if (finished)
                {
                    yield break;
                }
            }
        }
Esempio n. 26
0
 public void PrintStringCacheStats()
 {
     AXmlParser.Log("String cache: Requested {0} ({1} bytes);  Actaully stored {2} ({3} bytes); {4}% stored", stringCacheRequestedCount, stringCacheRequestedSize, stringCacheStoredCount, stringCacheStoredSize, stringCacheRequestedSize == 0 ? 0 : stringCacheStoredSize * 100 / stringCacheRequestedSize);
 }
Esempio n. 27
0
		/// <remarks>
		/// If using DTD, canonical representation is not checked
		/// If using DTD, uknown entiry references are not error
		/// </remarks>
		bool TestFile(TestFile testFile, bool isWellFormed)
		{
			bool passed = true;
			
			string content = testFile.Content;
			Debug.WriteLine("Testing " + testFile.Name + "...");
			AXmlParser parser = new AXmlParser();
			
			bool usingDTD = content.Contains("<!DOCTYPE") && (content.Contains("<!ENTITY") || content.Contains(" SYSTEM "));
			if (usingDTD)
				parser.UnknownEntityReferenceIsError = false;
			
			AXmlDocument document;
			
			parser.Lock.EnterWriteLock();
			try {
				document = parser.Parse(content, null);
			} finally {
				parser.Lock.ExitWriteLock();
			}
			
			string printed = PrettyPrintAXmlVisitor.PrettyPrint(document);
			if (content != printed) {
				errorOutput.AppendFormat("Output of pretty printed XML for \"{0}\" does not match the original.\n", testFile.Name);
				errorOutput.AppendFormat("Pretty printed:\n{0}\n", Indent(printed));
				passed = false;
			}
			
			if (isWellFormed && !usingDTD) {
				string canonicalPrint = CanonicalPrintAXmlVisitor.Print(document);
				if (testFile.Canonical != null) {
					if (testFile.Canonical != canonicalPrint) {
						errorOutput.AppendFormat("Canonical XML for \"{0}\" does not match the excpected.\n", testFile.Name);
						errorOutput.AppendFormat("Expected:\n{0}\n", Indent(testFile.Canonical));
						errorOutput.AppendFormat("Seen:\n{0}\n", Indent(canonicalPrint));
						passed = false;
					}
				} else {
					errorOutput.AppendFormat("Can not find canonical output for \"{0}\"", testFile.Name);
					errorOutput.AppendFormat("Suggested canonical output:\n{0}\n", Indent(canonicalPrint));
					passed = false;
				}
			}
			
			bool hasErrors = document.SyntaxErrors.FirstOrDefault() != null;
			if (isWellFormed && hasErrors) {
				errorOutput.AppendFormat("Syntax error(s) in well formed file \"{0}\":\n", testFile.Name);
				foreach (var error in document.SyntaxErrors) {
					string followingText = content.Substring(error.StartOffset, Math.Min(10, content.Length - error.StartOffset));
					errorOutput.AppendFormat("Error ({0}-{1}): {2} (followed by \"{3}\")\n", error.StartOffset, error.EndOffset, error.Message, followingText);
				}
				passed = false;
			}
			
			if (!isWellFormed && !hasErrors) {
				errorOutput.AppendFormat("No syntax errors reported for mallformed file \"{0}\"\n", testFile.Name);
				passed = false;
			}
			
			// Epilog
			if (!passed) {
				if (testFile.Description != null) {
					errorOutput.AppendFormat("Test description:\n{0}\n", Indent(testFile.Description));
				}
				errorOutput.AppendFormat("File content:\n{0}\n", Indent(content));
				errorOutput.AppendLine();
			}
			
			return passed;
		}
Esempio n. 28
0
        AXmlElement ReadElement(IEnumerator <AXmlObject> objStream)
        {
            AXmlElement element = new AXmlElement();

            element.IsProperlyNested = true;

            // Read start tag
            AXmlTag startTag = ReadSingleObject(objStream) as AXmlTag;

            AXmlParser.DebugAssert(startTag != null, "Start tag expected");
            AXmlParser.DebugAssert(startTag.IsStartOrEmptyTag || startTag == StartTagPlaceholder, "Start tag expected");
            if (startTag == StartTagPlaceholder)
            {
                element.HasStartOrEmptyTag = false;
                element.IsProperlyNested   = false;
                TagReader.OnSyntaxError(element, objStream.Current.StartOffset, objStream.Current.EndOffset,
                                        "Matching openning tag was not found");
            }
            else
            {
                element.HasStartOrEmptyTag = true;
                element.AddChild(startTag);
            }

            // Read content and end tag
            if (startTag == StartTagPlaceholder ||             // Check first in case the start tag is null
                element.StartTag.IsStartTag)
            {
                while (true)
                {
                    AXmlTag currTag = objStream.Current as AXmlTag;                     // Peek
                    if (currTag == EndTagPlaceholder)
                    {
                        TagReader.OnSyntaxError(element, element.LastChild.EndOffset, element.LastChild.EndOffset,
                                                "Expected '</{0}>'", element.StartTag.Name);
                        ReadSingleObject(objStream);
                        element.HasEndTag        = false;
                        element.IsProperlyNested = false;
                        break;
                    }
                    else if (currTag != null && currTag.IsEndTag)
                    {
                        if (element.HasStartOrEmptyTag && currTag.Name != element.StartTag.Name)
                        {
                            TagReader.OnSyntaxError(element, currTag.StartOffset + 2, currTag.StartOffset + 2 + currTag.Name.Length,
                                                    "Expected '{0}'.  End tag must have same name as start tag.", element.StartTag.Name);
                        }
                        element.AddChild(ReadSingleObject(objStream));
                        element.HasEndTag = true;
                        break;
                    }
                    AXmlObject nested = ReadTextOrElement(objStream);
                    if (nested is AXmlElement)
                    {
                        if (!((AXmlElement)nested).IsProperlyNested)
                        {
                            element.IsProperlyNested = false;
                        }
                        element.AddChildren(Split((AXmlElement)nested).ToList());
                    }
                    else
                    {
                        element.AddChild(nested);
                    }
                }
            }
            else
            {
                element.HasEndTag = false;
            }

            element.StartOffset = element.FirstChild.StartOffset;
            element.EndOffset   = element.LastChild.EndOffset;

            AXmlParser.Assert(element.HasStartOrEmptyTag || element.HasEndTag, "Must have at least start or end tag");

            AXmlParser.Log("Constructed {0}", element);
            trackedSegments.AddParsedObject(element, null);             // Need all elements in cache for offset tracking
            return(element);
        }
Esempio n. 29
0
        public AXmlDocument ReadDocument()
        {
            AXmlDocument doc = new AXmlDocument()
            {
                Parser = parser
            };

            // AXmlParser.Log("Flat stream: {0}", PrintObjects(tags));
            List <AXmlObject> valid = MatchTags(tags);
            // AXmlParser.Log("Fixed stream: {0}", PrintObjects(valid));
            IEnumerator <AXmlObject> validStream = valid.GetEnumerator();

            validStream.MoveNext();             // Move to first
            while (true)
            {
                // End of stream?
                try {
                    if (validStream.Current == null)
                    {
                        break;
                    }
                } catch (InvalidCastException) {
                    break;
                }
                doc.AddChild(ReadTextOrElement(validStream));
            }

            if (doc.Children.Count > 0)
            {
                doc.StartOffset = doc.FirstChild.StartOffset;
                doc.EndOffset   = doc.LastChild.EndOffset;
            }

            // Check well formed
            foreach (AXmlTag xmlDeclaration in doc.Children.OfType <AXmlTag>().Where(t => t.IsProcessingInstruction && t.Name.ToLower() == "xml"))
            {
                if (xmlDeclaration.StartOffset != 0)
                {
                    TagReader.OnSyntaxError(doc, xmlDeclaration.StartOffset, xmlDeclaration.StartOffset + 5,
                                            "XML declaration must be at the start of document");
                }
            }
            int elemCount = doc.Children.OfType <AXmlElement>().Count();

            if (elemCount == 0)
            {
                TagReader.OnSyntaxError(doc, doc.EndOffset, doc.EndOffset,
                                        "Root element is missing");
            }
            if (elemCount > 1)
            {
                AXmlElement next = doc.Children.OfType <AXmlElement>().Skip(1).First();
                TagReader.OnSyntaxError(doc, next.StartOffset, next.StartOffset,
                                        "Only one root element is allowed");
            }
            foreach (AXmlTag tag in doc.Children.OfType <AXmlTag>())
            {
                if (tag.IsCData)
                {
                    TagReader.OnSyntaxError(doc, tag.StartOffset, tag.EndOffset,
                                            "CDATA not allowed in document root");
                }
            }
            foreach (AXmlText text in doc.Children.OfType <AXmlText>())
            {
                if (!text.ContainsOnlyWhitespace)
                {
                    TagReader.OnSyntaxError(doc, text.StartOffset, text.EndOffset,
                                            "Only whitespace is allowed in document root");
                }
            }


            AXmlParser.Log("Constructed {0}", doc);
            trackedSegments.AddParsedObject(doc, null);
            return(doc);
        }
Esempio n. 30
0
        /// <summary> Get posible configurations after considering fiven object </summary>
        Configurations ProcessObject(Configurations oldConfigs, AXmlObject obj)
        {
            AXmlParser.Log("Processing {0}", obj);

            AXmlTag tag = obj as AXmlTag;

            AXmlParser.Assert(obj is AXmlTag || obj is AXmlText || obj is AXmlElement, obj.GetType().Name + " not expected");
            if (obj is AXmlElement)
            {
                AXmlParser.Assert(((AXmlElement)obj).IsProperlyNested, "Element not proprly nested");
            }

            Configurations newConfigs = new Configurations();

            foreach (var kvp in oldConfigs)
            {
                Configuration oldConfig    = kvp.Value;
                var           oldStartTags = oldConfig.StartTags;
                var           oldDocument  = oldConfig.Document;
                int           oldCost      = oldConfig.Cost;

                if (tag != null && tag.IsStartTag)
                {
                    newConfigs.Add(new Configuration {                                        // Push start-tag (cost 0)
                        StartTags = oldStartTags.Push(tag),
                        Document  = oldDocument.Push(tag),
                        Cost      = oldCost,
                    });
                }
                else if (tag != null && tag.IsEndTag)
                {
                    newConfigs.Add(new Configuration {                                        // Ignore (cost 1)
                        StartTags = oldStartTags,
                        Document  = oldDocument.Push(StartTagPlaceholder).Push(tag),
                        Cost      = oldCost + 1,
                    });
                    if (!oldStartTags.IsEmpty && oldStartTags.Peek().Name != tag.Name)
                    {
                        newConfigs.Add(new Configuration {                                        // Pop 1 item (cost 1) - not mathcing
                            StartTags = oldStartTags.Pop(),
                            Document  = oldDocument.Push(tag),
                            Cost      = oldCost + 1,
                        });
                    }
                    int popedCount = 0;
                    var startTags  = oldStartTags;
                    var doc        = oldDocument;
                    foreach (AXmlTag poped in oldStartTags)
                    {
                        popedCount++;
                        if (poped.Name == tag.Name)
                        {
                            newConfigs.Add(new Configuration {                                         // Pop 'x' items (cost x-1) - last one is matching
                                StartTags = startTags.Pop(),
                                Document  = doc.Push(tag),
                                Cost      = oldCost + popedCount - 1,
                            });
                        }
                        startTags = startTags.Pop();
                        doc       = doc.Push(EndTagPlaceholder);
                    }
                }
                else
                {
                    // Empty tag  or  other tag type  or  text  or  properly nested element
                    newConfigs.Add(new Configuration {                                        // Ignore (cost 0)
                        StartTags = oldStartTags,
                        Document  = oldDocument.Push(obj),
                        Cost      = oldCost,
                    });
                }
            }

            // Log("New configurations:" + newConfigs.ToString());

            Configurations bestNewConfigurations = new Configurations(
                newConfigs.Values.OrderBy(v => v.Cost).Take(maxConfigurationCount)
                );

            // AXmlParser.Log("Best new configurations:" + bestNewConfigurations.ToString());

            return(bestNewConfigurations);
        }
Esempio n. 31
0
 protected void Skip(int count)
 {
     AXmlParser.Assert(currentLocation + count <= inputLength, "Skipping after the end of file");
     currentLocation += count;
 }
Esempio n. 32
0
        /// <remarks>
        /// If using DTD, canonical representation is not checked
        /// If using DTD, uknown entiry references are not error
        /// </remarks>
        bool TestFile(TestFile testFile, bool isWellFormed)
        {
            bool passed = true;

            string content = testFile.Content;

            Debug.WriteLine("Testing " + testFile.Name + "...");
            AXmlParser parser = new AXmlParser();

            bool usingDTD = content.Contains("<!DOCTYPE") && (content.Contains("<!ENTITY") || content.Contains(" SYSTEM "));

            if (usingDTD)
            {
                parser.UnknownEntityReferenceIsError = false;
            }

            AXmlDocument document;

            parser.Lock.EnterWriteLock();
            try {
                document = parser.Parse(content, null);
            } finally {
                parser.Lock.ExitWriteLock();
            }

            string printed = PrettyPrintAXmlVisitor.PrettyPrint(document);

            if (content != printed)
            {
                errorOutput.AppendFormat("Output of pretty printed XML for \"{0}\" does not match the original.\n", testFile.Name);
                errorOutput.AppendFormat("Pretty printed:\n{0}\n", Indent(printed));
                passed = false;
            }

            if (isWellFormed && !usingDTD)
            {
                string canonicalPrint = CanonicalPrintAXmlVisitor.Print(document);
                if (testFile.Canonical != null)
                {
                    if (testFile.Canonical != canonicalPrint)
                    {
                        errorOutput.AppendFormat("Canonical XML for \"{0}\" does not match the excpected.\n", testFile.Name);
                        errorOutput.AppendFormat("Expected:\n{0}\n", Indent(testFile.Canonical));
                        errorOutput.AppendFormat("Seen:\n{0}\n", Indent(canonicalPrint));
                        passed = false;
                    }
                }
                else
                {
                    errorOutput.AppendFormat("Can not find canonical output for \"{0}\"", testFile.Name);
                    errorOutput.AppendFormat("Suggested canonical output:\n{0}\n", Indent(canonicalPrint));
                    passed = false;
                }
            }

            bool hasErrors = document.SyntaxErrors.FirstOrDefault() != null;

            if (isWellFormed && hasErrors)
            {
                errorOutput.AppendFormat("Syntax error(s) in well formed file \"{0}\":\n", testFile.Name);
                foreach (var error in document.SyntaxErrors)
                {
                    string followingText = content.Substring(error.StartOffset, Math.Min(10, content.Length - error.StartOffset));
                    errorOutput.AppendFormat("Error ({0}-{1}): {2} (followed by \"{3}\")\n", error.StartOffset, error.EndOffset, error.Message, followingText);
                }
                passed = false;
            }

            if (!isWellFormed && !hasErrors)
            {
                errorOutput.AppendFormat("No syntax errors reported for mallformed file \"{0}\"\n", testFile.Name);
                passed = false;
            }

            // Epilog
            if (!passed)
            {
                if (testFile.Description != null)
                {
                    errorOutput.AppendFormat("Test description:\n{0}\n", Indent(testFile.Description));
                }
                errorOutput.AppendFormat("File content:\n{0}\n", Indent(content));
                errorOutput.AppendLine();
            }

            return(passed);
        }