Ejemplo n.º 1
0
        public void TokenShouldHaveHaveStartIndexHasIndex()
        {
            TagParser  parser     = new TagParser(stream);
            TokenIndex startIndex = parser.Index;

            TagToken token = parser.Parse();

            Assert.AreEqual(startIndex.Index, token.Index.Index);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Initializes a new instance of the <see cref="TagExpression"/> class.
        /// </summary>
        /// <param name="token">The token.</param>
        protected TagExpression(TagToken token)
        {
            if (token == null)
            {
                throw new ArgumentNullException("token");
            }

            _token = token;
        }
Ejemplo n.º 3
0
        public void Create(string codeText, string value, string tagName, int index)
        {
            ParseStream parseStream = new ParseStream(codeText);
            TagParser   parser      = new TagParser(parseStream);
            TagToken    token       = parser.Parse();

            Assert.AreEqual(value, token.Value);
            Assert.AreEqual(tagName, token.TagName);
            Assert.AreEqual(index, parseStream.Index.Index);
        }
Ejemplo n.º 4
0
        public List <Core.Tokenization.Token> Tokenize(Core.Segment s, bool allowTokenBundles)
        {
            // TODO check whether segment culture is compatible with tokenizer parameter's culture? Or accept junk-in-junk-out?

            List <Token> result = new List <Token>();

            int run = -1;

            foreach (SegmentElement se in s.Elements)
            {
                ++run;

                if (se == null)
                {
                    System.Diagnostics.Debug.Assert(false, "empty segment run!");
                    continue;
                }

                Text  txtR = se as Text;
                Token tokR = se as Token;
                Tag   tagR = se as Tag;

                if (tagR != null)
                {
                    // TODO rather have a "markup token" type?
                    Token t = new TagToken(tagR);
                    t.Span = new SegmentRange(run, 0, 0);
                    result.Add(t);
                }
                else if (tokR != null)
                {
                    // partially pretokenized input
                    // TODO duplicate token/deep copy?
                    tokR.Span = new SegmentRange(run, 0, 0);
                    result.Add(tokR);
                }
                else if (txtR != null)
                {
                    List <Token> tokenized = TokenizeInternal(txtR.Value, run, _Parameters.CreateWhitespaceTokens, allowTokenBundles);
                    if (tokenized != null && tokenized.Count > 0)
                    {
                        result.AddRange(tokenized);
                    }
                }
                else
                {
                    System.Diagnostics.Debug.Assert(false, "Unknown segment run type");
                }
            }

            ReclassifyAcronyms(result);

            return(result);
        }
Ejemplo n.º 5
0
        public List <TagToken> ParseLine(string line)
        {
            var result = new List <TagToken>();

            if (line == null)
            {
                return(result);
            }

            var tagTokenStack = new Stack <TagToken>();
            var index         = 0;

            while (index < line.Length)
            {
                var currentTag = tags.FirstOrDefault(tag => IsSuitableTag(tag, line, index));
                if (currentTag == null)
                {
                    index++;
                    continue;
                }

                if (DoubleTagValidator.IsPossibleOpeningTag(line, index, currentTag))
                {
                    var newOpeningTagToken = new TagToken(index, currentTag, currentTag.Opening, true);
                    tagTokenStack.Push(newOpeningTagToken);
                }
                else if (DoubleTagValidator.IsPossibleClosingTag(line, index, currentTag))
                {
                    while (tagTokenStack.Count > 0 && tagTokenStack.Peek().Tag != currentTag)
                    {
                        tagTokenStack.Pop();
                    }

                    if (tagTokenStack.Count == 0)
                    {
                        index++;
                        continue;
                    }

                    var newOpeningTagToken = tagTokenStack.Pop();
                    var newClosingTagToken = new TagToken(index, currentTag, currentTag.Closing, false);

                    result.Add(newOpeningTagToken);
                    result.Add(newClosingTagToken);
                }

                index += currentTag.Opening.Length;
            }

            return(result);
        }
Ejemplo n.º 6
0
        public void LexItemExists()
        {
            ParserState state = NewState("exists(@(ProjectOutput)) != false or 12.5");

            TagToken tk = Lexer.GetNextToken(state);

            Assert.That(tk.TokenType, Is.EqualTo(TagTokenType.Function));
            Assert.That(tk.Value, Is.EqualTo("exists"));

            tk = Lexer.GetNextToken(state);

            Assert.That(tk.TokenType, Is.EqualTo((TagTokenType)'('));
            Assert.That(tk.Value, Is.EqualTo("("));

            tk = Lexer.GetNextToken(state);

            Assert.That(tk.TokenType, Is.EqualTo(TagTokenType.Item));
            Assert.That(tk.Value, Is.EqualTo("@(ProjectOutput)"));

            tk = Lexer.GetNextToken(state);

            Assert.That(tk.TokenType, Is.EqualTo((TagTokenType)')'));
            Assert.That(tk.Value, Is.EqualTo(")"));


            tk = Lexer.GetNextToken(state);

            Assert.That(tk.TokenType, Is.EqualTo(TagTokenType.IsNot));
            Assert.That(tk.Value, Is.EqualTo("!="));


            tk = Lexer.GetNextToken(state);

            Assert.That(tk.TokenType, Is.EqualTo(TagTokenType.Literal));
            Assert.That(tk.Value, Is.EqualTo("false"));

            tk = Lexer.GetNextToken(state);

            Assert.That(tk.TokenType, Is.EqualTo(TagTokenType.Or));
            Assert.That(tk.Value, Is.EqualTo("OR"));

            tk = Lexer.GetNextToken(state);

            Assert.That(tk.TokenType, Is.EqualTo(TagTokenType.Number));
            Assert.That(tk.Value, Is.EqualTo("12.5"));

            tk = Lexer.GetNextToken(state);

            Assert.That(tk, Is.Null);
        }
Ejemplo n.º 7
0
        public List <TagToken> ParseLine(string line)
        {
            var countSteps    = Math.Min(5, GetCountSpaceAtBeginningLine(line) + tags.First().Opening.Length + 1);
            var positionsTags = tags
                                .Select(tag => (tag, index: line.IndexOf(tag.Opening, StringComparison.Ordinal)))
                                .OrderBy(tuple => tuple.index)
                                .FirstOrDefault(tuple => tuple.index != -1 && tuple.index < countSteps);

            if (positionsTags.tag == null)
            {
                return(new List <TagToken>());
            }
            var singleTagToken = new TagToken(positionsTags.index, positionsTags.tag, positionsTags.tag.Opening, true);

            return(new List <TagToken> {
                singleTagToken
            });
        }
        // http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
        public INode InsertHtmlElement(TagToken token, IDocument doc)
        {
            //TODO - make sure the steps conform with the specs in the link above.

            //1. Let the adjusted insertion location be the appropriate place for inserting a node.
            InsertionLocation adjustedInsertionLocation = GetAppropriatePlaceForInsertingANode();

            //2. Create an element for the token in the given namespace, with the intended parent being
            //   the element in which the adjusted insertion location finds itself.
            INode element = HtmlElementFactory.Instance.CreateElement(token.TagName, doc);

            //3. TODO - If it is possible to insert an element at the adjusted insertion location, then insert
            //   TODO - the newly created element at the adjusted insertion location.
            //   NOTE: If the adjusted insertion location cannot accept more elements, e.g. because it's a Document
            //         that already has an element child, then the newly created element is dropped on the floor.

            //4. Push the element onto the stack of open elements so that it is the new current node.
            TreeConstruction.Instance.StackOfOpenElements.Push(element);

            //5. Return the newly created element.
            return element;
        }
Ejemplo n.º 9
0
        private static TagZone TakeTagZone(TagToken closeTag, Stack <TagToken> stackOfTags, string line,
                                           ILanguage language)
        {
            while (stackOfTags.Count != 0)
            {
                if (stackOfTags.Peek().Tagtype == closeTag.Tagtype)
                {
                    break;
                }
                stackOfTags.Pop();
            }

            var openTag = stackOfTags.Pop();

            var startValTag = openTag.Position + language.Tags[openTag.Tagtype].Start.Length;
            var endValTag   = closeTag.Position;
            var startTag    = openTag.Position;
            var endTag      = closeTag.Position + language.Tags[closeTag.Tagtype].End.Length;

            return(new TagZone(new TagNode(openTag.Tagtype), language.Tags[closeTag.Tagtype], startTag, endTag,
                               line.Substring(startValTag, endValTag - startValTag)));
        }
Ejemplo n.º 10
0
        public void LexTagItem()
        {
            ParserState state = NewState("@(ProjectOutput=>'%(RelativePath)') != 'bin/$(Configuration)/test.txt'");

            TagToken tk = Lexer.GetNextToken(state);

            Assert.That(tk.TokenType, Is.EqualTo(TagTokenType.Item));
            Assert.That(tk.Value, Is.EqualTo("@(ProjectOutput=>'%(RelativePath)')"));

            tk = Lexer.GetNextToken(state);

            Assert.That(tk.TokenType, Is.EqualTo(TagTokenType.IsNot));
            Assert.That(tk.Value, Is.EqualTo("!="));

            tk = Lexer.GetNextToken(state);

            Assert.That(tk.TokenType, Is.EqualTo(TagTokenType.String));
            Assert.That(tk.Value, Is.EqualTo("'bin/$(Configuration)/test.txt'"));

            tk = Lexer.GetNextToken(state);

            Assert.That(tk, Is.Null);
        }
Ejemplo n.º 11
0
        public void LexVarCompare()
        {
            ParserState state = NewState("'$(banana)' == 'yellow'");

            TagToken tk = Lexer.GetNextToken(state);

            Assert.That(tk.TokenType, Is.EqualTo(TagTokenType.String));
            Assert.That(tk.Value, Is.EqualTo("'$(banana)'"));

            tk = Lexer.GetNextToken(state);

            Assert.That(tk.TokenType, Is.EqualTo(TagTokenType.IsEqual));
            Assert.That(tk.Value, Is.EqualTo("=="));

            tk = Lexer.GetNextToken(state);

            Assert.That(tk.TokenType, Is.EqualTo(TagTokenType.String));
            Assert.That(tk.Value, Is.EqualTo("'yellow'"));

            tk = Lexer.GetNextToken(state);

            Assert.That(tk, Is.Null);
        }
Ejemplo n.º 12
0
        public static string ToXml(string html)
        {
            StringBuilder result = new StringBuilder();

            // Standard XML file header, including entities that are likely to be used.
            result.Append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");

            ParseReader    reader       = new ParseReader(html);
            TagParser      parser       = new TagParser(reader);
            Stack <string> nestingStack = new Stack <string>();

            try
            {
                ParseToken token = parser.GetNextToken();

                // Ignore leading white-space.
                while (token is SpacesToken || token is NewlineToken || token is DoctypeToken)
                {
                    token = parser.GetNextToken();
                }

                while (!(token is EOFToken))
                {
                    Log.DebugFormat("Token = {0}", token);
                    if (token is TagToken)
                    {
                        TagToken t = (TagToken)token;
                        if (!t.Tag.IsEndTag)
                        {
                            // Deal with start-tag. Typically this will be new element nesting.
                            Tag startTag = t.Tag;
                            if (startTag is EmptyElement)
                            {
                                result.Append(((EmptyElement)startTag).ToString());
                            }
                            else
                            {
                                // Tags that are always empty elements are converted to empty elements here.
                                // Element names are pushed onto the stack to balance elements with missing end-tag.
                                string startTagName = startTag.Name.ToLower();
                                Log.DebugFormat("startTagName = {0}", startTagName);
                                if (EmptyElements.Contains(startTagName))
                                {
                                    result.Append((new EmptyElement(startTag)).ToString());
                                }
                                else
                                {
                                    result.Append(startTag.ToString());
                                    nestingStack.Push(startTagName);
                                }
                            }
                        }
                        else
                        {
                            // Deal with end-tag.
                            Tag endTag = t.Tag;

                            // Remove the '/' from beginning of the tag-name for comparison.
                            string endTagName = endTag.Name.Substring(1).ToLower();
                            Log.DebugFormat("endTagName = {0}", endTagName);

                            // Ignore some end-tags for empty elements that are handled with or without empty element syntax.
                            if (EmptyElements.Contains(endTagName))
                            {
                                Log.InfoFormat("Ignoring redundant end-tag: {0}", endTagName);
                            }
                            else
                            {
                                // Keep element tags matched appropriately.
                                string peek = nestingStack.Peek();
                                if (peek == null)
                                {
                                    Log.WarnFormat("Ignoring extra content at end of document! </{0}> ({1})", endTagName, parser.GetCharacterPosition());
                                }
                                else
                                {
                                    if (peek.Equals(endTagName))
                                    {
                                        nestingStack.Pop();
                                    }
                                    else
                                    {
                                        // Pair all the previous unmatched tags for these important structural elements.
                                        // These elements appear only once, so should never be automatically closed.
                                        if (SingleElements.Contains(endTagName))
                                        {
                                            while (peek != endTagName)
                                            {
                                                StringBuilder endtag = (new StringBuilder()).Append("</").Append(peek).Append('>');
                                                Log.WarnFormat("Adding a missing end-tag! {0} ({1})", endtag, parser.GetCharacterPosition());
                                                result.Append(endtag);
                                                nestingStack.Pop();
                                                peek = nestingStack.Peek();
                                            }

                                            // Remove the current item from the stack, as it has been paired now.
                                            nestingStack.Pop();
                                        }
                                        else
                                        {
                                            // Insert a matching start-tag before the unbalanced end-tag found.
                                            StringBuilder startTag = (new StringBuilder()).Append("<").Append(endTagName).Append('>');
                                            Log.WarnFormat("Adding a missing start-tag! {0} ({1})", startTag, parser.GetCharacterPosition());
                                            result.Append(startTag);
                                        }
                                    }

                                    // Write the current element end-tag.
                                    result.Append("</").Append(endTagName).Append('>');
                                }
                            }
                        }
                    }
                    else if (token is WordToken)
                    {
                        WordToken t = (WordToken)token;
                        result.Append(t.Word);
                    }
                    else if (token is SpacesToken)
                    {
                        SpacesToken t = (SpacesToken)token;
                        result.Append(t.Spaces);
                    }
                    else if (token is NumberToken)
                    {
                        NumberToken t = (NumberToken)token;
                        result.Append(t.Number);
                    }
                    else if (token is EntityReferenceToken)
                    {
                        EntityReferenceToken t = (EntityReferenceToken)token;
                        result.Append(XmlEntity(t.Name));
                    }
                    else if (token is PunctuationToken)
                    {
                        PunctuationToken t = (PunctuationToken)token;
                        result.Append(t.Character);
                    }
                    else if (token is CharacterEntityToken)
                    {
                        CharacterEntityToken t = (CharacterEntityToken)token;
                        result.Append(t.Character);
                    }
                    else if (token is NewlineToken)
                    {
                        result.Append('\n');
                    }
                    else if (token is ScriptToken)
                    {
                        ScriptToken t = (ScriptToken)token;
                        if (t.Script.Length > 0)
                        {
                            // Script element contents are often empty.
                            // NOTE: Removing any prior use of CDATA section in script, to avoid conflict.
                            string script = t.Script.Replace("<![CDATA[", "").Replace("]]>", "");
                            result.Append("/*<![CDATA[*/").Append(script).Append("/*]]>*/");
                        }
                    }
                    else if (token is CDataToken)
                    {
                        CDataToken t = (CDataToken)token;
                        result.Append("<![CDATA[").Append(t.Data).Append("]]>");
                    }
                    else if (token is CommentToken)
                    {
                        CommentToken t = (CommentToken)token;
                        result.Append("<!--").Append(t.Comment).Append("-->");
                    }
                    else if (token is DoctypeToken)
                    {
                        // Ignore.
                    }
                    else if (token is ProcessingInstructionToken)
                    {
                        // Ignore.
                    }
                    else
                    {
                        Log.WarnFormat("Unexpected token! {0}", token);
                    }
                    token = parser.GetNextToken();
                }

                Log.Info(parser.GetCompletionReport());
            }
            catch (Exception ex)
            {
                Log.Error("EXCEPTION", ex);
                result = null;
            }

            return(result == null ? null : result.ToString());
        }
Ejemplo n.º 13
0
 public void VisitTagToken(TagToken token)
 {
 }
Ejemplo n.º 14
0
 public void VisitTagToken(TagToken token)
 {
     this._sb.Append(token.Text);
 }
Ejemplo n.º 15
0
 private static bool IsCorrectNextCloseTag(TagToken tag, Stack <TagToken> stackOfTags)
 {
     return(!tag.IsOpen && stackOfTags.Count > 0 && stackOfTags.Peek().Tagtype == tag.Tagtype &&
            stackOfTags.Peek().IsOpen);
 }
Ejemplo n.º 16
0
 private static bool IsCorrectNextOpenTag(TagToken tag, Stack <TagToken> stackOfTags, ILanguage language)
 {
     return(tag.IsOpen && (stackOfTags.Count == 0 ||
                           (stackOfTags.Peek().Tagtype != tag.Tagtype && language
                            .Tags[stackOfTags.Peek().Tagtype].Children.Contains(tag.Tagtype))));
 }
Ejemplo n.º 17
0
		public void VisitTagToken(TagToken token)
		{
			_plainText += token.Text;

		}
Ejemplo n.º 18
0
        internal string ParseInner(string input)
        {
            if (string.IsNullOrEmpty(input))
            {
                return(input);
            }

            var output  = "";
            var escaped = false;

            GrammarToken token = null;

            for (var i = 0; i < input.Length; i++)
            {
                var ch = input[i];

                if (escaped)
                {
                    AddCharToToken(ch, ref output, token, true);
                    escaped = false;
                    continue;
                }

                if (ch == '\\')
                {
                    AddCharToToken(ch, ref output, token, true);
                    escaped = true;
                    continue;
                }

                switch (ch)
                {
                case '[':
                    // Start new action token.
                    var newToken = new ActionToken(this, i + 1, token);

                    if (token == null)
                    {
                        token = newToken;
                    }
                    else
                    {
                        token.AddChild(newToken);
                    }

                    newToken.AddChar(ch);
                    break;

                case ']':
                    // Close highest action token. If any inner tokens are unfinished, they resolve to their raw text.
                    var action = token?.FindLowestOpenOfType(TagType.Action);

                    if (action == null)
                    {
                        // No open action. Add ] to text as normal.
                        AddCharToToken(ch, ref output, token);
                        break;
                    }
                    else
                    {
                        action.AddChar(ch);
                    }

                    action.Resolve();
                    break;

                case '#':
                    // If lowest open node is a tag, close it. Otherwise, open a new tag.
                    if (token == null)
                    {
                        token = new TagToken(this, i + 1, null);
                        token.AddChar(ch);
                        break;
                    }

                    var lowest = token.FindLowestOpenToken();

                    if (lowest.Type == TagType.Tag)
                    {
                        lowest.AddChar(ch);
                        lowest.Resolve();
                        break;
                    }

                    var newTag = new TagToken(this, i + 1, lowest);
                    lowest.AddChild(newTag);
                    newTag.AddChar(ch);
                    break;

                default:
                    AddCharToToken(ch, ref output, token);
                    break;
                }

                if (token != null && token.IsResolved)
                {
                    output += token.Resolved;
                    token   = null;
                }
            }

            return(output);
        }
 public void VisitTagToken(TagToken token)
 {
     // not required with this implementation
 }