Extract() public méthode

public Extract ( ) : string
Résultat string
        public void Tests()
        {
            var p = new StringScanner();

            p.Reset("This is a string with something [bracketed]");
            Assert.IsTrue(p.Bof);
            Assert.IsFalse(p.Eof);
            Assert.IsTrue(p.SkipString("This"));
            Assert.IsFalse(p.Bof);
            Assert.IsFalse(p.Eof);
            Assert.IsFalse(p.SkipString("huh?"));
            Assert.IsTrue(p.SkipLinespace());
            Assert.IsTrue(p.SkipChar('i'));
            Assert.IsTrue(p.SkipChar('s'));
            Assert.IsTrue(p.SkipWhitespace());
            Assert.IsTrue(p.DoesMatchAny(new char[] { 'r', 'a', 't'} ));
            Assert.IsFalse(p.Find("Not here"));
            Assert.IsFalse(p.Find("WITH"));
            Assert.IsFalse(p.FindI("Not here"));
            Assert.IsTrue(p.FindI("WITH"));
            Assert.IsTrue(p.Find('['));
            p.SkipForward(1);
            p.Mark();
            Assert.IsTrue(p.Find(']'));
            Assert.AreEqual("bracketed", p.Extract());
            Assert.IsTrue(p.SkipChar(']'));
            Assert.IsTrue(p.Eof);
        }
        internal static LinkDefinition ParseLinkDefinitionInternal(StringScanner p, bool extraMode)
        {
            // Skip leading white space
            p.SkipWhitespace();

            // Must start with an opening square bracket
            if (!p.SkipChar('['))
                return null;

            // Extract the id
            p.Mark();
            if (!p.Find(']'))
                return null;
            var id = p.Extract();
            if (id.Length == 0)
                return null;
            if (!p.SkipString("]:"))
                return null;

            // Parse the url and title
            var link = ParseLinkTarget(p, id, extraMode);

            // and trailing whitespace
            p.SkipLinespace();

            // Trailing crap, not a valid link reference...
            if (!p.EndOfLine)
                return null;

            return link;
        }
Exemple #3
0
		public List<string> ParseRow(StringScanner p)
		{
			p.SkipLinespace();

			if (p.eol)
				return null;		// Blank line ends the table

			bool bAnyBars=LeadingBar;
			if (LeadingBar && !p.SkipChar('|'))
			{
				return null;
			}

			// Create the row
			List<string> row = new List<string>();

			// Parse all columns except the last

			while (!p.eol)
			{
				// Find the next vertical bar
				p.Mark();
				while (!p.eol && p.current != '|')
					p.SkipForward(1);

				row.Add(p.Extract().Trim());

				bAnyBars|=p.SkipChar('|');
			}

			// Require at least one bar to continue the table
			if (!bAnyBars)
				return null;

			// Add missing columns
			while (row.Count < Columns.Count)
			{
				row.Add("&nbsp;");
			}

			p.SkipEol();
			return row;
		}
Exemple #4
0
        internal static LinkDefinition ParseLinkDefinitionInternal(StringScanner p, bool ExtraMode)
        {
            // Skip leading white space
            p.SkipWhitespace();

            // Must start with an opening square bracket
            if (!p.SkipChar('['))
            {
                return(null);
            }

            // Extract the id
            p.Mark();
            if (!p.Find(']'))
            {
                return(null);
            }
            string id = p.Extract();

            if (id.Length == 0)
            {
                return(null);
            }
            if (!p.SkipString("]:"))
            {
                return(null);
            }

            // Parse the url and title
            var link = ParseLinkTarget(p, id, ExtraMode);

            // and trailing whitespace
            p.SkipLinespace();

            // Trailing crap, not a valid link reference...
            if (!p.Eol)
            {
                return(null);
            }

            return(link);
        }
		// Parse just the link target
		// For reference link definition, this is the bit after "[id]: thisbit"
		// For inline link, this is the bit in the parens: [link text](thisbit)
		internal static LinkDefinition ParseLinkTarget(StringScanner p, string id, bool ExtraMode)
		{
			// Skip whitespace
			p.SkipWhitespace();

			// End of string?
			if (p.eol)
				return null;

			// Create the link definition
			var r = new LinkDefinition(id);

			// Is the url enclosed in angle brackets
			if (p.SkipChar('<'))
			{
				// Extract the url
				p.Mark();

				// Find end of the url
				while (p.current != '>')
				{
					if (p.eof)
						return null;
					p.SkipEscapableChar(ExtraMode);
				}

				string url = p.Extract();
				if (!p.SkipChar('>'))
					return null;

				// Unescape it
				r.url = Utils.UnescapeString(url.Trim(), ExtraMode);

				// Skip whitespace
				p.SkipWhitespace();
			}
			else
			{
				// Find end of the url
				p.Mark();
				int paren_depth = 1;
				while (!p.eol)
				{
					char ch=p.current;
					if (char.IsWhiteSpace(ch))
						break;
					if (id == null)
					{
						if (ch == '(')
							paren_depth++;
						else if (ch == ')')
						{
							paren_depth--;
							if (paren_depth==0)
								break;
						}
					}

					p.SkipEscapableChar(ExtraMode);
				}

				r.url = Utils.UnescapeString(p.Extract().Trim(), ExtraMode);
			}

			p.SkipLinespace();

			// End of inline target
			if (p.DoesMatch(')'))
				return r;

			bool bOnNewLine = p.eol;
			int posLineEnd = p.position;
			if (p.eol)
			{
				p.SkipEol();
				p.SkipLinespace();
			}

			// Work out what the title is delimited with
			char delim;
			switch (p.current)
			{
				case '\'':  
				case '\"':
					delim = p.current;
					break;

				case '(':
					delim = ')';
					break;

				default:
					if (bOnNewLine)
					{
						p.position = posLineEnd;
						return r;
					}
					else
						return null;
			}

			// Skip the opening title delimiter
			p.SkipForward(1);

			// Find the end of the title
			p.Mark();
			while (true)
			{
				if (p.eol)
					return null;

				if (p.current == delim)
				{

					if (delim != ')')
					{
						int savepos = p.position;

						// Check for embedded quotes in title

						// Skip the quote and any trailing whitespace
						p.SkipForward(1);
						p.SkipLinespace();

						// Next we expect either the end of the line for a link definition
						// or the close bracket for an inline link
						if ((id == null && p.current != ')') ||
							(id != null && !p.eol))
						{
							continue;
						}

						p.position = savepos;
					}

					// End of title
					break;
				}

				p.SkipEscapableChar(ExtraMode);
			}

			// Store the title
			r.title = Utils.UnescapeString(p.Extract(), ExtraMode);

			// Skip closing quote
			p.SkipForward(1);

			// Done!
			return r;
		}
Exemple #6
0
        private static HtmlTag ParseHelper(StringScanner p)
        {
            // Does it look like a tag?
            if (p.Current != '<')
                return null;

            // Skip '<'
            p.SkipForward(1);

            // Is it a comment?
            if (p.SkipString("!--"))
            {
                p.Mark();

                if (p.Find("-->"))
                {
                    var t = new HtmlTag("!");
                    t.m_attributes.Add("content", p.Extract());
                    t.m_closed = true;
                    p.SkipForward(3);
                    return t;
                }
            }

            // Is it a closing tag eg: </div>
            bool bClosing = p.SkipChar('/');

            // Get the tag name
            string tagName=null;
            if (!p.SkipIdentifier(ref tagName))
                return null;

            // Probably a tag, create the HtmlTag object now
            HtmlTag tag = new HtmlTag(tagName);
            tag.m_closing = bClosing;

            // If it's a closing tag, no attributes
            if (bClosing)
            {
                if (p.Current != '>')
                    return null;

                p.SkipForward(1);
                return tag;
            }

            while (!p.Eof)
            {
                // Skip whitespace
                p.SkipWhitespace();

                // Check for closed tag eg: <hr />
                if (p.SkipString("/>"))
                {
                    tag.m_closed=true;
                    return tag;
                }

                // End of tag?
                if (p.SkipChar('>'))
                {
                    return tag;
                }

                // attribute name
                string attributeName = null;
                if (!p.SkipIdentifier(ref attributeName))
                    return null;

                // Skip whitespace
                p.SkipWhitespace();

                // Skip equal sign
                if (p.SkipChar('='))
                {
                    // Skip whitespace
                    p.SkipWhitespace();

                    // Optional quotes
                    if (p.SkipChar('\"'))
                    {
                        // Scan the value
                        p.Mark();
                        if (!p.Find('\"'))
                            return null;

                        // Store the value
                        tag.m_attributes.Add(attributeName, p.Extract());

                        // Skip closing quote
                        p.SkipForward(1);
                    }
                    else
                    {
                        // Scan the value
                        p.Mark();
                        while (!p.Eof && !char.IsWhiteSpace(p.Current) && p.Current != '>' && p.Current != '/')
                            p.SkipForward(1);

                        if (!p.Eof)
                        {
                            // Store the value
                            tag.m_attributes.Add(attributeName, p.Extract());
                        }
                    }
                }
                else
                {
                    tag.m_attributes.Add(attributeName, "");
                }
            }

            return null;
        }
Exemple #7
0
        // Parse just the link target
        // For reference link definition, this is the bit after "[id]: thisbit"
        // For inline link, this is the bit in the parens: [link text](thisbit)
        internal static LinkDefinition ParseLinkTarget(StringScanner p, string id, bool ExtraMode)
        {
            // Skip whitespace
            p.SkipWhitespace();

            // End of string?
            if (p.eol)
            {
                return(null);
            }

            // Create the link definition
            var r = new LinkDefinition(id);

            // Is the url enclosed in angle brackets
            if (p.SkipChar('<'))
            {
                // Extract the url
                p.Mark();

                // Find end of the url
                while (p.current != '>')
                {
                    if (p.eof)
                    {
                        return(null);
                    }
                    p.SkipEscapableChar(ExtraMode);
                }

                string url = p.Extract();
                if (!p.SkipChar('>'))
                {
                    return(null);
                }

                // Unescape it
                r.url = Utils.UnescapeString(url.Trim(), ExtraMode);

                // Skip whitespace
                p.SkipWhitespace();
            }
            else
            {
                // Find end of the url
                p.Mark();
                int paren_depth = 1;
                while (!p.eol)
                {
                    char ch = p.current;
                    if (char.IsWhiteSpace(ch))
                    {
                        break;
                    }
                    if (id == null)
                    {
                        if (ch == '(')
                        {
                            paren_depth++;
                        }
                        else if (ch == ')')
                        {
                            paren_depth--;
                            if (paren_depth == 0)
                            {
                                break;
                            }
                        }
                    }

                    p.SkipEscapableChar(ExtraMode);
                }

                r.url = Utils.UnescapeString(p.Extract().Trim(), ExtraMode);
            }

            p.SkipLinespace();

            // End of inline target
            if (p.DoesMatch(')'))
            {
                return(r);
            }

            bool bOnNewLine = p.eol;
            int  posLineEnd = p.position;

            if (p.eol)
            {
                p.SkipEol();
                p.SkipLinespace();
            }

            // Work out what the title is delimited with
            char delim;

            switch (p.current)
            {
            case '\'':
            case '\"':
                delim = p.current;
                break;

            case '(':
                delim = ')';
                break;

            default:
                if (bOnNewLine)
                {
                    p.position = posLineEnd;
                    return(r);
                }
                else
                {
                    return(null);
                }
            }

            // Skip the opening title delimiter
            p.SkipForward(1);

            // Find the end of the title
            p.Mark();
            while (true)
            {
                if (p.eol)
                {
                    return(null);
                }

                if (p.current == delim)
                {
                    if (delim != ')')
                    {
                        int savepos = p.position;

                        // Check for embedded quotes in title

                        // Skip the quote and any trailing whitespace
                        p.SkipForward(1);
                        p.SkipLinespace();

                        // Next we expect either the end of the line for a link definition
                        // or the close bracket for an inline link
                        if ((id == null && p.current != ')') ||
                            (id != null && !p.eol))
                        {
                            continue;
                        }

                        p.position = savepos;
                    }

                    // End of title
                    break;
                }

                p.SkipEscapableChar(ExtraMode);
            }

            // Store the title
            r.title = Utils.UnescapeString(p.Extract(), ExtraMode);

            // Skip closing quote
            p.SkipForward(1);

            // Done!
            return(r);
        }
        private static HtmlTag ParseHelper(StringScanner p)
        {
            // Does it look like a tag?
            if (p.current != '<')
            {
                return(null);
            }

            // Skip '<'
            p.SkipForward(1);

            // Is it a comment?
            if (p.SkipString("!--"))
            {
                p.Mark();

                if (p.Find("-->"))
                {
                    var t = new HtmlTag("!");
                    t.m_attributes.Add("content", p.Extract());
                    t.m_closed = true;
                    p.SkipForward(3);
                    return(t);
                }
            }

            // Is it a closing tag eg: </div>
            bool bClosing = p.SkipChar('/');

            // Get the tag name
            string tagName = null;

            if (!p.SkipIdentifier(ref tagName))
            {
                return(null);
            }

            // Probably a tag, create the HtmlTag object now
            HtmlTag tag = new HtmlTag(tagName);

            tag.m_closing = bClosing;


            // If it's a closing tag, no attributes
            if (bClosing)
            {
                if (p.current != '>')
                {
                    return(null);
                }

                p.SkipForward(1);
                return(tag);
            }


            while (!p.eof)
            {
                // Skip whitespace
                p.SkipWhitespace();

                // Check for closed tag eg: <hr />
                if (p.SkipString("/>"))
                {
                    tag.m_closed = true;
                    return(tag);
                }

                // End of tag?
                if (p.SkipChar('>'))
                {
                    return(tag);
                }

                // attribute name
                string attributeName = null;
                if (!p.SkipIdentifier(ref attributeName))
                {
                    return(null);
                }

                // Skip whitespace
                p.SkipWhitespace();

                // Skip equal sign
                if (p.SkipChar('='))
                {
                    // Skip whitespace
                    p.SkipWhitespace();

                    // Optional quotes
                    if (p.SkipChar('\"'))
                    {
                        // Scan the value
                        p.Mark();
                        if (!p.Find('\"'))
                        {
                            return(null);
                        }

                        // Store the value
                        tag.m_attributes.Add(attributeName, p.Extract());

                        // Skip closing quote
                        p.SkipForward(1);
                    }
                    else
                    {
                        // Scan the value
                        p.Mark();
                        while (!p.eof && !char.IsWhiteSpace(p.current) && p.current != '>' && p.current != '/')
                        {
                            p.SkipForward(1);
                        }

                        if (!p.eof)
                        {
                            // Store the value
                            tag.m_attributes.Add(attributeName, p.Extract());
                        }
                    }
                }
                else
                {
                    tag.m_attributes.Add(attributeName, "");
                }
            }

            return(null);
        }