public void TokenShouldHaveHaveStartIndexHasIndex() { TagParser parser = new TagParser(stream); TokenIndex startIndex = parser.Index; TagToken token = parser.Parse(); Assert.AreEqual(startIndex.Index, token.Index.Index); }
/// <summary> /// Initializes a new instance of the <see cref="TagExpression"/> class. /// </summary> /// <param name="token">The token.</param> protected TagExpression(TagToken token) { if (token == null) { throw new ArgumentNullException("token"); } _token = token; }
public void Create(string codeText, string value, string tagName, int index) { ParseStream parseStream = new ParseStream(codeText); TagParser parser = new TagParser(parseStream); TagToken token = parser.Parse(); Assert.AreEqual(value, token.Value); Assert.AreEqual(tagName, token.TagName); Assert.AreEqual(index, parseStream.Index.Index); }
public List <Core.Tokenization.Token> Tokenize(Core.Segment s, bool allowTokenBundles) { // TODO check whether segment culture is compatible with tokenizer parameter's culture? Or accept junk-in-junk-out? List <Token> result = new List <Token>(); int run = -1; foreach (SegmentElement se in s.Elements) { ++run; if (se == null) { System.Diagnostics.Debug.Assert(false, "empty segment run!"); continue; } Text txtR = se as Text; Token tokR = se as Token; Tag tagR = se as Tag; if (tagR != null) { // TODO rather have a "markup token" type? Token t = new TagToken(tagR); t.Span = new SegmentRange(run, 0, 0); result.Add(t); } else if (tokR != null) { // partially pretokenized input // TODO duplicate token/deep copy? tokR.Span = new SegmentRange(run, 0, 0); result.Add(tokR); } else if (txtR != null) { List <Token> tokenized = TokenizeInternal(txtR.Value, run, _Parameters.CreateWhitespaceTokens, allowTokenBundles); if (tokenized != null && tokenized.Count > 0) { result.AddRange(tokenized); } } else { System.Diagnostics.Debug.Assert(false, "Unknown segment run type"); } } ReclassifyAcronyms(result); return(result); }
public List <TagToken> ParseLine(string line) { var result = new List <TagToken>(); if (line == null) { return(result); } var tagTokenStack = new Stack <TagToken>(); var index = 0; while (index < line.Length) { var currentTag = tags.FirstOrDefault(tag => IsSuitableTag(tag, line, index)); if (currentTag == null) { index++; continue; } if (DoubleTagValidator.IsPossibleOpeningTag(line, index, currentTag)) { var newOpeningTagToken = new TagToken(index, currentTag, currentTag.Opening, true); tagTokenStack.Push(newOpeningTagToken); } else if (DoubleTagValidator.IsPossibleClosingTag(line, index, currentTag)) { while (tagTokenStack.Count > 0 && tagTokenStack.Peek().Tag != currentTag) { tagTokenStack.Pop(); } if (tagTokenStack.Count == 0) { index++; continue; } var newOpeningTagToken = tagTokenStack.Pop(); var newClosingTagToken = new TagToken(index, currentTag, currentTag.Closing, false); result.Add(newOpeningTagToken); result.Add(newClosingTagToken); } index += currentTag.Opening.Length; } return(result); }
public void LexItemExists() { ParserState state = NewState("exists(@(ProjectOutput)) != false or 12.5"); TagToken tk = Lexer.GetNextToken(state); Assert.That(tk.TokenType, Is.EqualTo(TagTokenType.Function)); Assert.That(tk.Value, Is.EqualTo("exists")); tk = Lexer.GetNextToken(state); Assert.That(tk.TokenType, Is.EqualTo((TagTokenType)'(')); Assert.That(tk.Value, Is.EqualTo("(")); tk = Lexer.GetNextToken(state); Assert.That(tk.TokenType, Is.EqualTo(TagTokenType.Item)); Assert.That(tk.Value, Is.EqualTo("@(ProjectOutput)")); tk = Lexer.GetNextToken(state); Assert.That(tk.TokenType, Is.EqualTo((TagTokenType)')')); Assert.That(tk.Value, Is.EqualTo(")")); tk = Lexer.GetNextToken(state); Assert.That(tk.TokenType, Is.EqualTo(TagTokenType.IsNot)); Assert.That(tk.Value, Is.EqualTo("!=")); tk = Lexer.GetNextToken(state); Assert.That(tk.TokenType, Is.EqualTo(TagTokenType.Literal)); Assert.That(tk.Value, Is.EqualTo("false")); tk = Lexer.GetNextToken(state); Assert.That(tk.TokenType, Is.EqualTo(TagTokenType.Or)); Assert.That(tk.Value, Is.EqualTo("OR")); tk = Lexer.GetNextToken(state); Assert.That(tk.TokenType, Is.EqualTo(TagTokenType.Number)); Assert.That(tk.Value, Is.EqualTo("12.5")); tk = Lexer.GetNextToken(state); Assert.That(tk, Is.Null); }
public List <TagToken> ParseLine(string line) { var countSteps = Math.Min(5, GetCountSpaceAtBeginningLine(line) + tags.First().Opening.Length + 1); var positionsTags = tags .Select(tag => (tag, index: line.IndexOf(tag.Opening, StringComparison.Ordinal))) .OrderBy(tuple => tuple.index) .FirstOrDefault(tuple => tuple.index != -1 && tuple.index < countSteps); if (positionsTags.tag == null) { return(new List <TagToken>()); } var singleTagToken = new TagToken(positionsTags.index, positionsTags.tag, positionsTags.tag.Opening, true); return(new List <TagToken> { singleTagToken }); }
// http://www.w3.org/TR/html5/syntax.html#insert-an-html-element public INode InsertHtmlElement(TagToken token, IDocument doc) { //TODO - make sure the steps conform with the specs in the link above. //1. Let the adjusted insertion location be the appropriate place for inserting a node. InsertionLocation adjustedInsertionLocation = GetAppropriatePlaceForInsertingANode(); //2. Create an element for the token in the given namespace, with the intended parent being // the element in which the adjusted insertion location finds itself. INode element = HtmlElementFactory.Instance.CreateElement(token.TagName, doc); //3. TODO - If it is possible to insert an element at the adjusted insertion location, then insert // TODO - the newly created element at the adjusted insertion location. // NOTE: If the adjusted insertion location cannot accept more elements, e.g. because it's a Document // that already has an element child, then the newly created element is dropped on the floor. //4. Push the element onto the stack of open elements so that it is the new current node. TreeConstruction.Instance.StackOfOpenElements.Push(element); //5. Return the newly created element. return element; }
private static TagZone TakeTagZone(TagToken closeTag, Stack <TagToken> stackOfTags, string line, ILanguage language) { while (stackOfTags.Count != 0) { if (stackOfTags.Peek().Tagtype == closeTag.Tagtype) { break; } stackOfTags.Pop(); } var openTag = stackOfTags.Pop(); var startValTag = openTag.Position + language.Tags[openTag.Tagtype].Start.Length; var endValTag = closeTag.Position; var startTag = openTag.Position; var endTag = closeTag.Position + language.Tags[closeTag.Tagtype].End.Length; return(new TagZone(new TagNode(openTag.Tagtype), language.Tags[closeTag.Tagtype], startTag, endTag, line.Substring(startValTag, endValTag - startValTag))); }
public void LexTagItem() { ParserState state = NewState("@(ProjectOutput=>'%(RelativePath)') != 'bin/$(Configuration)/test.txt'"); TagToken tk = Lexer.GetNextToken(state); Assert.That(tk.TokenType, Is.EqualTo(TagTokenType.Item)); Assert.That(tk.Value, Is.EqualTo("@(ProjectOutput=>'%(RelativePath)')")); tk = Lexer.GetNextToken(state); Assert.That(tk.TokenType, Is.EqualTo(TagTokenType.IsNot)); Assert.That(tk.Value, Is.EqualTo("!=")); tk = Lexer.GetNextToken(state); Assert.That(tk.TokenType, Is.EqualTo(TagTokenType.String)); Assert.That(tk.Value, Is.EqualTo("'bin/$(Configuration)/test.txt'")); tk = Lexer.GetNextToken(state); Assert.That(tk, Is.Null); }
public void LexVarCompare() { ParserState state = NewState("'$(banana)' == 'yellow'"); TagToken tk = Lexer.GetNextToken(state); Assert.That(tk.TokenType, Is.EqualTo(TagTokenType.String)); Assert.That(tk.Value, Is.EqualTo("'$(banana)'")); tk = Lexer.GetNextToken(state); Assert.That(tk.TokenType, Is.EqualTo(TagTokenType.IsEqual)); Assert.That(tk.Value, Is.EqualTo("==")); tk = Lexer.GetNextToken(state); Assert.That(tk.TokenType, Is.EqualTo(TagTokenType.String)); Assert.That(tk.Value, Is.EqualTo("'yellow'")); tk = Lexer.GetNextToken(state); Assert.That(tk, Is.Null); }
public static string ToXml(string html) { StringBuilder result = new StringBuilder(); // Standard XML file header, including entities that are likely to be used. result.Append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); ParseReader reader = new ParseReader(html); TagParser parser = new TagParser(reader); Stack <string> nestingStack = new Stack <string>(); try { ParseToken token = parser.GetNextToken(); // Ignore leading white-space. while (token is SpacesToken || token is NewlineToken || token is DoctypeToken) { token = parser.GetNextToken(); } while (!(token is EOFToken)) { Log.DebugFormat("Token = {0}", token); if (token is TagToken) { TagToken t = (TagToken)token; if (!t.Tag.IsEndTag) { // Deal with start-tag. Typically this will be new element nesting. Tag startTag = t.Tag; if (startTag is EmptyElement) { result.Append(((EmptyElement)startTag).ToString()); } else { // Tags that are always empty elements are converted to empty elements here. // Element names are pushed onto the stack to balance elements with missing end-tag. string startTagName = startTag.Name.ToLower(); Log.DebugFormat("startTagName = {0}", startTagName); if (EmptyElements.Contains(startTagName)) { result.Append((new EmptyElement(startTag)).ToString()); } else { result.Append(startTag.ToString()); nestingStack.Push(startTagName); } } } else { // Deal with end-tag. Tag endTag = t.Tag; // Remove the '/' from beginning of the tag-name for comparison. string endTagName = endTag.Name.Substring(1).ToLower(); Log.DebugFormat("endTagName = {0}", endTagName); // Ignore some end-tags for empty elements that are handled with or without empty element syntax. if (EmptyElements.Contains(endTagName)) { Log.InfoFormat("Ignoring redundant end-tag: {0}", endTagName); } else { // Keep element tags matched appropriately. string peek = nestingStack.Peek(); if (peek == null) { Log.WarnFormat("Ignoring extra content at end of document! </{0}> ({1})", endTagName, parser.GetCharacterPosition()); } else { if (peek.Equals(endTagName)) { nestingStack.Pop(); } else { // Pair all the previous unmatched tags for these important structural elements. // These elements appear only once, so should never be automatically closed. if (SingleElements.Contains(endTagName)) { while (peek != endTagName) { StringBuilder endtag = (new StringBuilder()).Append("</").Append(peek).Append('>'); Log.WarnFormat("Adding a missing end-tag! {0} ({1})", endtag, parser.GetCharacterPosition()); result.Append(endtag); nestingStack.Pop(); peek = nestingStack.Peek(); } // Remove the current item from the stack, as it has been paired now. nestingStack.Pop(); } else { // Insert a matching start-tag before the unbalanced end-tag found. StringBuilder startTag = (new StringBuilder()).Append("<").Append(endTagName).Append('>'); Log.WarnFormat("Adding a missing start-tag! {0} ({1})", startTag, parser.GetCharacterPosition()); result.Append(startTag); } } // Write the current element end-tag. result.Append("</").Append(endTagName).Append('>'); } } } } else if (token is WordToken) { WordToken t = (WordToken)token; result.Append(t.Word); } else if (token is SpacesToken) { SpacesToken t = (SpacesToken)token; result.Append(t.Spaces); } else if (token is NumberToken) { NumberToken t = (NumberToken)token; result.Append(t.Number); } else if (token is EntityReferenceToken) { EntityReferenceToken t = (EntityReferenceToken)token; result.Append(XmlEntity(t.Name)); } else if (token is PunctuationToken) { PunctuationToken t = (PunctuationToken)token; result.Append(t.Character); } else if (token is CharacterEntityToken) { CharacterEntityToken t = (CharacterEntityToken)token; result.Append(t.Character); } else if (token is NewlineToken) { result.Append('\n'); } else if (token is ScriptToken) { ScriptToken t = (ScriptToken)token; if (t.Script.Length > 0) { // Script element contents are often empty. // NOTE: Removing any prior use of CDATA section in script, to avoid conflict. string script = t.Script.Replace("<![CDATA[", "").Replace("]]>", ""); result.Append("/*<![CDATA[*/").Append(script).Append("/*]]>*/"); } } else if (token is CDataToken) { CDataToken t = (CDataToken)token; result.Append("<![CDATA[").Append(t.Data).Append("]]>"); } else if (token is CommentToken) { CommentToken t = (CommentToken)token; result.Append("<!--").Append(t.Comment).Append("-->"); } else if (token is DoctypeToken) { // Ignore. } else if (token is ProcessingInstructionToken) { // Ignore. } else { Log.WarnFormat("Unexpected token! {0}", token); } token = parser.GetNextToken(); } Log.Info(parser.GetCompletionReport()); } catch (Exception ex) { Log.Error("EXCEPTION", ex); result = null; } return(result == null ? null : result.ToString()); }
public void VisitTagToken(TagToken token) { }
public void VisitTagToken(TagToken token) { this._sb.Append(token.Text); }
private static bool IsCorrectNextCloseTag(TagToken tag, Stack <TagToken> stackOfTags) { return(!tag.IsOpen && stackOfTags.Count > 0 && stackOfTags.Peek().Tagtype == tag.Tagtype && stackOfTags.Peek().IsOpen); }
private static bool IsCorrectNextOpenTag(TagToken tag, Stack <TagToken> stackOfTags, ILanguage language) { return(tag.IsOpen && (stackOfTags.Count == 0 || (stackOfTags.Peek().Tagtype != tag.Tagtype && language .Tags[stackOfTags.Peek().Tagtype].Children.Contains(tag.Tagtype)))); }
public void VisitTagToken(TagToken token) { _plainText += token.Text; }
internal string ParseInner(string input) { if (string.IsNullOrEmpty(input)) { return(input); } var output = ""; var escaped = false; GrammarToken token = null; for (var i = 0; i < input.Length; i++) { var ch = input[i]; if (escaped) { AddCharToToken(ch, ref output, token, true); escaped = false; continue; } if (ch == '\\') { AddCharToToken(ch, ref output, token, true); escaped = true; continue; } switch (ch) { case '[': // Start new action token. var newToken = new ActionToken(this, i + 1, token); if (token == null) { token = newToken; } else { token.AddChild(newToken); } newToken.AddChar(ch); break; case ']': // Close highest action token. If any inner tokens are unfinished, they resolve to their raw text. var action = token?.FindLowestOpenOfType(TagType.Action); if (action == null) { // No open action. Add ] to text as normal. AddCharToToken(ch, ref output, token); break; } else { action.AddChar(ch); } action.Resolve(); break; case '#': // If lowest open node is a tag, close it. Otherwise, open a new tag. if (token == null) { token = new TagToken(this, i + 1, null); token.AddChar(ch); break; } var lowest = token.FindLowestOpenToken(); if (lowest.Type == TagType.Tag) { lowest.AddChar(ch); lowest.Resolve(); break; } var newTag = new TagToken(this, i + 1, lowest); lowest.AddChild(newTag); newTag.AddChar(ch); break; default: AddCharToToken(ch, ref output, token); break; } if (token != null && token.IsResolved) { output += token.Resolved; token = null; } } return(output); }
public void VisitTagToken(TagToken token) { // not required with this implementation }