public void Tests()
        {
            var p = new StringScanner();

            p.Reset("This is a string with something [bracketed]");
            Assert.IsTrue(p.Bof);
            Assert.IsFalse(p.Eof);
            Assert.IsTrue(p.SkipString("This"));
            Assert.IsFalse(p.Bof);
            Assert.IsFalse(p.Eof);
            Assert.IsFalse(p.SkipString("huh?"));
            Assert.IsTrue(p.SkipLinespace());
            Assert.IsTrue(p.SkipChar('i'));
            Assert.IsTrue(p.SkipChar('s'));
            Assert.IsTrue(p.SkipWhitespace());
            Assert.IsTrue(p.DoesMatchAny(new char[] { 'r', 'a', 't'} ));
            Assert.IsFalse(p.Find("Not here"));
            Assert.IsFalse(p.Find("WITH"));
            Assert.IsFalse(p.FindI("Not here"));
            Assert.IsTrue(p.FindI("WITH"));
            Assert.IsTrue(p.Find('['));
            p.SkipForward(1);
            p.Mark();
            Assert.IsTrue(p.Find(']'));
            Assert.AreEqual("bracketed", p.Extract());
            Assert.IsTrue(p.SkipChar(']'));
            Assert.IsTrue(p.Eof);
        }
        internal static LinkDefinition ParseLinkDefinitionInternal(StringScanner p, bool extraMode)
        {
            // Skip leading white space
            p.SkipWhitespace();

            // Must start with an opening square bracket
            if (!p.SkipChar('['))
                return null;

            // Extract the id
            p.Mark();
            if (!p.Find(']'))
                return null;
            var id = p.Extract();
            if (id.Length == 0)
                return null;
            if (!p.SkipString("]:"))
                return null;

            // Parse the url and title
            var link = ParseLinkTarget(p, id, extraMode);

            // and trailing whitespace
            p.SkipLinespace();

            // Trailing crap, not a valid link reference...
            if (!p.EndOfLine)
                return null;

            return link;
        }
Beispiel #3
0
 // Parse a link definition
 internal static LinkDefinition ParseLinkDefinition(StringScanner p, bool ExtraMode)
 {
     int savepos=p.position;
     var l = ParseLinkDefinitionInternal(p, ExtraMode);
     if (l==null)
         p.position = savepos;
     return l;
 }
Beispiel #4
0
		// Constructor
		public Markdown()
		{
			HtmlClassFootnotes = "footnotes";
			m_StringBuilder = new StringBuilder();
			m_StringScanner = new StringScanner();
			m_SpanFormatter = new SpanFormatter(this);
			m_LinkDefinitions = new Dictionary<string, LinkDefinition>(StringComparer.CurrentCultureIgnoreCase);
			m_Footnotes = new Dictionary<string, Block>();
			m_UsedFootnotes = new List<Block>();
			m_UsedHeaderIDs = new Dictionary<string, bool>();
		}
Beispiel #5
0
 // Constructor
 public Markdown()
 {
     HtmlClassFootnotes = "footnotes";
     m_StringBuilder = new StringBuilder();
     m_StringBuilderFinal = new StringBuilder();
     m_StringScanner = new StringScanner();
     m_SpanFormatter = new SpanFormatter(this);
     m_LinkDefinitions = new Dictionary<string, LinkDefinition>(StringComparer.CurrentCultureIgnoreCase);
     m_Footnotes = new Dictionary<string, Block>();
     m_UsedFootnotes = new List<Block>();
     m_UsedHeaderIDs = new Dictionary<string, bool>();
     this.CreatedH2IdCollector = new List<Tuple<string, string>>();
     _tabIdCounter = 0;
 }
Beispiel #6
0
		public List<string> ParseRow(StringScanner p)
		{
			p.SkipLinespace();

			if (p.eol)
				return null;		// Blank line ends the table

			bool bAnyBars=LeadingBar;
			if (LeadingBar && !p.SkipChar('|'))
			{
				return null;
			}

			// Create the row
			List<string> row = new List<string>();

			// Parse all columns except the last

			while (!p.eol)
			{
				// Find the next vertical bar
				p.Mark();
				while (!p.eol && p.current != '|')
					p.SkipForward(1);

				row.Add(p.Extract().Trim());

				bAnyBars|=p.SkipChar('|');
			}

			// Require at least one bar to continue the table
			if (!bAnyBars)
				return null;

			// Add missing columns
			while (row.Count < Columns.Count)
			{
				row.Add("&nbsp;");
			}

			p.SkipEol();
			return row;
		}
Beispiel #7
0
        public void TestAttacksAreBlocked(string input)
        {
            StringScanner p = new StringScanner(input);

            while (!p.Eof)
            {
                HtmlTag tag=HtmlTag.Parse(p);
                if (tag!=null)
                {
                    if (tag.IsSafe())
                    {
                        // There's a few tags that really are safe in the test data
                        Assert.IsTrue(IsTagReallySafe(tag));
                    }
                }
                else
                {
                    // Next character
                    p.SkipForward(1);
                }
            }
        }
Beispiel #8
0
        public void TestNonAttacksAreAllowed(string input)
        {
            StringScanner p = new StringScanner(input);

            while (!p.Eof)
            {
                HtmlTag tag = HtmlTag.Parse(p);
                if (tag != null)
                {
                    Assert.IsTrue(tag.IsSafe());
                }
                else
                {
                    // Next character
                    p.SkipForward(1);
                }
            }
        }
        public static TableSpec Parse(StringScanner p)
        {
            // Leading line space allowed
            p.SkipLinespace();

            // Quick check for typical case
            if (p.current != '|' && p.current != ':' && p.current != '-')
            {
                return(null);
            }

            // Don't create the spec until it at least looks like one
            TableSpec spec = null;

            // Leading bar, looks like a table spec
            if (p.SkipChar('|'))
            {
                spec            = new TableSpec();
                spec.LeadingBar = true;
            }


            // Process all columns
            while (true)
            {
                // Parse column spec
                p.SkipLinespace();

                // Must have something in the spec
                if (p.current == '|')
                {
                    return(null);
                }

                bool AlignLeft = p.SkipChar(':');
                while (p.current == '-')
                {
                    p.SkipForward(1);
                }
                bool AlignRight = p.SkipChar(':');
                p.SkipLinespace();

                // Work out column alignment
                ColumnAlignment col = ColumnAlignment.NA;
                if (AlignLeft && AlignRight)
                {
                    col = ColumnAlignment.Center;
                }
                else if (AlignLeft)
                {
                    col = ColumnAlignment.Left;
                }
                else if (AlignRight)
                {
                    col = ColumnAlignment.Right;
                }

                if (p.eol)
                {
                    // Not a spec?
                    if (spec == null)
                    {
                        return(null);
                    }

                    // Add the final spec?
                    spec.Columns.Add(col);
                    return(spec);
                }

                // We expect a vertical bar
                if (!p.SkipChar('|'))
                {
                    return(null);
                }

                // Create the table spec
                if (spec == null)
                {
                    spec = new TableSpec();
                }

                // Add the column
                spec.Columns.Add(col);

                // Check for trailing vertical bar
                p.SkipLinespace();
                if (p.eol)
                {
                    spec.TrailingBar = true;
                    return(spec);
                }

                // Next column
            }
        }
Beispiel #10
0
        public void TestAttacksAreBlocked(string input)
        {
            var p = new StringScanner(input);

            while (!p.EndOfFile)
            {
                HtmlTag tag=HtmlTag.Parse(p);
                if (tag!=null)
                {
                    if (tag.IsSafe())
                    {
                        Assert.IsTrue(IsTagReallySafe(tag));
                    }
                }
                else
                {
                    p.SkipForward(1);
                }
            }
        }
Beispiel #11
0
		// Parse a link definition from a string (used by test cases)
		internal static LinkDefinition ParseLinkDefinition(string str, bool ExtraMode)
		{
			StringScanner p = new StringScanner(str);
			return ParseLinkDefinitionInternal(p, ExtraMode);
		}
Beispiel #12
0
        public static HtmlTag Parse(StringScanner p)
        {
            // Save position
            int savepos = p.Position;

            // Parse it
            var ret = ParseHelper(p);
            if (ret!=null)
                return ret;

            // Rewind if failed
            p.Position = savepos;
            return null;
        }
Beispiel #13
0
 TableSpec Parse(string str)
 {
     var s = new StringScanner(str);
     return TableSpec.Parse(s);
 }
Beispiel #14
0
        public static TableSpec Parse(StringScanner p)
        {
            // Leading line space allowed
            p.SkipLinespace();

            // Quick check for typical case
            if (p.current != '|' && p.current != ':' && p.current != '-')
                return null;

            // Don't create the spec until it at least looks like one
            TableSpec spec = null;

            // Leading bar, looks like a table spec
            if (p.SkipChar('|'))
            {
                spec=new TableSpec();
                spec.LeadingBar=true;
            }

            // Process all columns
            while (true)
            {
                // Parse column spec
                p.SkipLinespace();

                // Must have something in the spec
                if (p.current == '|')
                    return null;

                bool AlignLeft = p.SkipChar(':');
                while (p.current == '-')
                    p.SkipForward(1);
                bool AlignRight = p.SkipChar(':');
                p.SkipLinespace();

                // Work out column alignment
                ColumnAlignment col = ColumnAlignment.NA;
                if (AlignLeft && AlignRight)
                    col = ColumnAlignment.Center;
                else if (AlignLeft)
                    col = ColumnAlignment.Left;
                else if (AlignRight)
                    col = ColumnAlignment.Right;

                if (p.eol)
                {
                    // Not a spec?
                    if (spec == null)
                        return null;

                    // Add the final spec?
                    spec.Columns.Add(col);
                    return spec;
                }

                // We expect a vertical bar
                if (!p.SkipChar('|'))
                    return null;

                // Create the table spec
                if (spec==null)
                    spec=new TableSpec();

                // Add the column
                spec.Columns.Add(col);

                // Check for trailing vertical bar
                p.SkipLinespace();
                if (p.eol)
                {
                    spec.TrailingBar = true;
                    return spec;
                }

                // Next column
            }
        }
        // Parse a link definition from a string (used by test cases)
        internal static LinkDefinition ParseLinkDefinition(string str, bool ExtraMode)
        {
            StringScanner p = new StringScanner(str);

            return(ParseLinkDefinitionInternal(p, ExtraMode));
        }
Beispiel #16
0
        public static HtmlTag Parse(string str, ref int pos)
        {
            StringScanner sp = new StringScanner(str, pos);
            var ret = Parse(sp);

            if (ret!=null)
            {
                pos = sp.Position;
                return ret;
            }

            return null;
        }
        // Parse just the link target
        // For reference link definition, this is the bit after "[id]: thisbit"
        // For inline link, this is the bit in the parens: [link text](thisbit)
        internal static LinkDefinition ParseLinkTarget(StringScanner p, string id, bool ExtraMode)
        {
            // Skip whitespace
            p.SkipWhitespace();

            // End of string?
            if (p.eol)
            {
                return(null);
            }

            // Create the link definition
            var r = new LinkDefinition(id);

            // Is the url enclosed in angle brackets
            if (p.SkipChar('<'))
            {
                // Extract the url
                p.Mark();

                // Find end of the url
                while (p.current != '>')
                {
                    if (p.eof)
                    {
                        return(null);
                    }
                    p.SkipEscapableChar(ExtraMode);
                }

                string url = p.Extract();
                if (!p.SkipChar('>'))
                {
                    return(null);
                }

                // Unescape it
                r.url = Utils.UnescapeString(url.Trim(), ExtraMode);

                // Skip whitespace
                p.SkipWhitespace();
            }
            else
            {
                // Find end of the url
                p.Mark();
                int paren_depth = 1;
                while (!p.eol)
                {
                    char ch = p.current;
                    if (char.IsWhiteSpace(ch))
                    {
                        break;
                    }
                    if (id == null)
                    {
                        if (ch == '(')
                        {
                            paren_depth++;
                        }
                        else if (ch == ')')
                        {
                            paren_depth--;
                            if (paren_depth == 0)
                            {
                                break;
                            }
                        }
                    }

                    p.SkipEscapableChar(ExtraMode);
                }

                r.url = Utils.UnescapeString(p.Extract().Trim(), ExtraMode);
            }

            p.SkipLinespace();

            // End of inline target
            if (p.DoesMatch(')'))
            {
                return(r);
            }

            bool bOnNewLine = p.eol;
            int  posLineEnd = p.position;

            if (p.eol)
            {
                p.SkipEol();
                p.SkipLinespace();
            }

            // Work out what the title is delimited with
            char delim;

            switch (p.current)
            {
            case '\'':
            case '\"':
                delim = p.current;
                break;

            case '(':
                delim = ')';
                break;

            default:
                if (bOnNewLine)
                {
                    p.position = posLineEnd;
                    return(r);
                }
                else
                {
                    return(null);
                }
            }

            // Skip the opening title delimiter
            p.SkipForward(1);

            // Find the end of the title
            p.Mark();
            while (true)
            {
                if (p.eol)
                {
                    return(null);
                }

                if (p.current == delim)
                {
                    if (delim != ')')
                    {
                        int savepos = p.position;

                        // Check for embedded quotes in title

                        // Skip the quote and any trailing whitespace
                        p.SkipForward(1);
                        p.SkipLinespace();

                        // Next we expect either the end of the line for a link definition
                        // or the close bracket for an inline link
                        if ((id == null && p.current != ')') ||
                            (id != null && !p.eol))
                        {
                            continue;
                        }

                        p.position = savepos;
                    }

                    // End of title
                    break;
                }

                p.SkipEscapableChar(ExtraMode);
            }

            // Store the title
            r.title = Utils.UnescapeString(p.Extract(), ExtraMode);

            // Skip closing quote
            p.SkipForward(1);

            // Done!
            return(r);
        }
Beispiel #18
0
        private static HtmlTag ParseHelper(StringScanner p)
        {
            // Does it look like a tag?
            if (p.Current != '<')
                return null;

            // Skip '<'
            p.SkipForward(1);

            // Is it a comment?
            if (p.SkipString("!--"))
            {
                p.Mark();

                if (p.Find("-->"))
                {
                    var t = new HtmlTag("!");
                    t.m_attributes.Add("content", p.Extract());
                    t.m_closed = true;
                    p.SkipForward(3);
                    return t;
                }
            }

            // Is it a closing tag eg: </div>
            bool bClosing = p.SkipChar('/');

            // Get the tag name
            string tagName=null;
            if (!p.SkipIdentifier(ref tagName))
                return null;

            // Probably a tag, create the HtmlTag object now
            HtmlTag tag = new HtmlTag(tagName);
            tag.m_closing = bClosing;

            // If it's a closing tag, no attributes
            if (bClosing)
            {
                if (p.Current != '>')
                    return null;

                p.SkipForward(1);
                return tag;
            }

            while (!p.Eof)
            {
                // Skip whitespace
                p.SkipWhitespace();

                // Check for closed tag eg: <hr />
                if (p.SkipString("/>"))
                {
                    tag.m_closed=true;
                    return tag;
                }

                // End of tag?
                if (p.SkipChar('>'))
                {
                    return tag;
                }

                // attribute name
                string attributeName = null;
                if (!p.SkipIdentifier(ref attributeName))
                    return null;

                // Skip whitespace
                p.SkipWhitespace();

                // Skip equal sign
                if (p.SkipChar('='))
                {
                    // Skip whitespace
                    p.SkipWhitespace();

                    // Optional quotes
                    if (p.SkipChar('\"'))
                    {
                        // Scan the value
                        p.Mark();
                        if (!p.Find('\"'))
                            return null;

                        // Store the value
                        tag.m_attributes.Add(attributeName, p.Extract());

                        // Skip closing quote
                        p.SkipForward(1);
                    }
                    else
                    {
                        // Scan the value
                        p.Mark();
                        while (!p.Eof && !char.IsWhiteSpace(p.Current) && p.Current != '>' && p.Current != '/')
                            p.SkipForward(1);

                        if (!p.Eof)
                        {
                            // Store the value
                            tag.m_attributes.Add(attributeName, p.Extract());
                        }
                    }
                }
                else
                {
                    tag.m_attributes.Add(attributeName, "");
                }
            }

            return null;
        }
Beispiel #19
0
        private static HtmlTag ParseHelper(StringScanner p)
        {
            // Does it look like a tag?
            if (p.Current != '<')
            {
                return(null);
            }

            // Skip '<'
            p.SkipForward(1);

            // Is it a comment?
            if (p.SkipString("!--"))
            {
                p.Mark();

                if (p.Find("-->"))
                {
                    var t = new HtmlTag("!");
                    t.m_attributes["content"] = p.Extract();
                    t.m_closed = true;
                    p.SkipForward(3);
                    return(t);
                }
            }

            // Is it a closing tag eg: </div>
            bool bClosing = p.SkipChar('/');

            // Get the tag name
            string tagName = null;

            if (!p.SkipIdentifier(ref tagName))
            {
                return(null);
            }

            // Probably a tag, create the HtmlTag object now
            HtmlTag tag = new HtmlTag(tagName);

            tag.m_closing = bClosing;


            // If it's a closing tag, no attributes
            if (bClosing)
            {
                if (p.Current != '>')
                {
                    return(null);
                }

                p.SkipForward(1);
                return(tag);
            }


            while (!p.Eof)
            {
                // Skip whitespace
                p.SkipWhitespace();

                // Check for closed tag eg: <hr />
                if (p.SkipString("/>"))
                {
                    tag.m_closed = true;
                    return(tag);
                }

                // End of tag?
                if (p.SkipChar('>'))
                {
                    return(tag);
                }

                // attribute name
                string attributeName = null;
                if (!p.SkipIdentifier(ref attributeName))
                {
                    return(null);
                }

                // Skip whitespace
                p.SkipWhitespace();

                // Skip equal sign
                if (p.SkipChar('='))
                {
                    // Skip whitespace
                    p.SkipWhitespace();

                    // Optional quotes
                    if (p.SkipChar('\"'))
                    {
                        // Scan the value
                        p.Mark();
                        if (!p.Find('\"'))
                        {
                            return(null);
                        }

                        // Store the value
                        tag.m_attributes[attributeName] = p.Extract();

                        // Skip closing quote
                        p.SkipForward(1);
                    }
                    else
                    {
                        // Scan the value
                        p.Mark();
                        while (!p.Eof && !char.IsWhiteSpace(p.Current) && p.Current != '>' && p.Current != '/')
                        {
                            p.SkipForward(1);
                        }

                        if (!p.Eof)
                        {
                            // Store the value
                            tag.m_attributes[attributeName] = p.Extract();
                        }
                    }
                }
                else
                {
                    tag.m_attributes[attributeName] = string.Empty;
                }
            }

            return(null);
        }
Beispiel #20
0
		// Parse just the link target
		// For reference link definition, this is the bit after "[id]: thisbit"
		// For inline link, this is the bit in the parens: [link text](thisbit)
		internal static LinkDefinition ParseLinkTarget(StringScanner p, string id, bool ExtraMode)
		{
			// Skip whitespace
			p.SkipWhitespace();

			// End of string?
			if (p.eol)
				return null;

			// Create the link definition
			var r = new LinkDefinition(id);

			// Is the url enclosed in angle brackets
			if (p.SkipChar('<'))
			{
				// Extract the url
				p.Mark();

				// Find end of the url
				while (p.current != '>')
				{
					if (p.eof)
						return null;
					p.SkipEscapableChar(ExtraMode);
				}

				string url = p.Extract();
				if (!p.SkipChar('>'))
					return null;

				// Unescape it
				r.url = Utils.UnescapeString(url.Trim(), ExtraMode);

				// Skip whitespace
				p.SkipWhitespace();
			}
			else
			{
				// Find end of the url
				p.Mark();
				int paren_depth = 1;
				while (!p.eol)
				{
					char ch=p.current;
					if (char.IsWhiteSpace(ch))
						break;
					if (id == null)
					{
						if (ch == '(')
							paren_depth++;
						else if (ch == ')')
						{
							paren_depth--;
							if (paren_depth==0)
								break;
						}
					}

					p.SkipEscapableChar(ExtraMode);
				}

				r.url = Utils.UnescapeString(p.Extract().Trim(), ExtraMode);
			}

			p.SkipLinespace();

			// End of inline target
			if (p.DoesMatch(')'))
				return r;

			bool bOnNewLine = p.eol;
			int posLineEnd = p.position;
			if (p.eol)
			{
				p.SkipEol();
				p.SkipLinespace();
			}

			// Work out what the title is delimited with
			char delim;
			switch (p.current)
			{
				case '\'':  
				case '\"':
					delim = p.current;
					break;

				case '(':
					delim = ')';
					break;

				default:
					if (bOnNewLine)
					{
						p.position = posLineEnd;
						return r;
					}
					else
						return null;
			}

			// Skip the opening title delimiter
			p.SkipForward(1);

			// Find the end of the title
			p.Mark();
			while (true)
			{
				if (p.eol)
					return null;

				if (p.current == delim)
				{

					if (delim != ')')
					{
						int savepos = p.position;

						// Check for embedded quotes in title

						// Skip the quote and any trailing whitespace
						p.SkipForward(1);
						p.SkipLinespace();

						// Next we expect either the end of the line for a link definition
						// or the close bracket for an inline link
						if ((id == null && p.current != ')') ||
							(id != null && !p.eol))
						{
							continue;
						}

						p.position = savepos;
					}

					// End of title
					break;
				}

				p.SkipEscapableChar(ExtraMode);
			}

			// Store the title
			r.title = Utils.UnescapeString(p.Extract(), ExtraMode);

			// Skip closing quote
			p.SkipForward(1);

			// Done!
			return r;
		}
Beispiel #21
0
        public void TestNonAttacksAreAllowed(string input)
        {
            var p = new StringScanner(input);

            while (!p.EndOfFile)
            {
                var tag = HtmlTag.Parse(p);
                if (tag != null)
                {
                    Assert.IsTrue(tag.IsSafe());
                }
                else
                {
                    p.SkipForward(1);
                }
            }
        }