Пример #1
0
        private void ReadWhitespace(MyReader source)
        {
            while (true)
            {
                while (char.IsWhiteSpace((char)source.Peek()))
                {
                    source.Read();
                }

                if (source.Peek() == '#')
                {
                    while (true)
                    {
                        int c = source.Read();
                        if (c == -1 || c == 10 || c == 13)
                        {
                            break;
                        }
                    }
                    continue;
                }

                break;
            }
        }
Пример #2
0
        private char ReadPunc(MyReader source)
        {
            ReadWhitespace(source);
            int c = source.Read();

            if (c == -1)
            {
                OnError("End of file expecting punctuation", new Location(source.Line, source.Col));
            }
            return((char)c);
        }
Пример #3
0
        private object ReadToken(MyReader source, ParseContext context)
        {
            ReadWhitespace(source);

            Location loc = new Location(source.Line, source.Col);

            int firstchar = source.Read();

            if (firstchar == -1)
            {
                return("");
            }

            StringBuilder b = readTokenBuffer; readTokenBuffer.Length = 0;

            b.Append((char)firstchar);

            if (firstchar == '<')
            {
                // This is a URI or the <= verb.  URIs can have \'s and <'s escaped with a backslash, plus \u and \U notation.
                bool escaped = false;
                while (true)
                {
                    int c = source.Read();
                    if (c == -1)
                    {
                        OnError("Unexpected end of stream within a token beginning with <", loc);
                    }

                    if (b.Length == 2 && c == '=')
                    {
                        return("<=");                        // the <= verb
                    }
                    if (escaped)
                    {
                        ReadEscapedChar((char)c, b, source, loc, true, true, false);
                        // the first flag should be set true only if not NTriples, but we are flexible on reading.
                        // the second flag should be set true only if NTriples, but we are flexible on reading.
                        escaped = false;
                    }
                    else if (c == '\\')
                    {
                        escaped = true;
                    }
                    else
                    {
                        b.Append((char)c);
                        if (c == '>')                         // end of the URI
                        {
                            break;
                        }
                    }
                }
            }
            else if (firstchar == '"')
            {
                // This can either be a string "..." or a longString """...""", which additionally allows embedded \n, \r, and \t, and quotes.

                b.Length = 0;                 // get rid of the open quote
                bool escaped      = false;
                bool triplequoted = false;
                while (true)
                {
                    int c = source.Read();
                    if (c == -1)
                    {
                        OnError("Unexpected end of stream within a string", loc);
                    }

                    // Check if this is started by three quotes. If we've already read three quotes, don't keep checking or we can't read """""".
                    if (b.Length == 0 && c == (int)'"' && source.Peek() == (int)'"' && !triplequoted)
                    {
                        triplequoted = true;
                        source.Read();
                        continue;
                    }

                    if (escaped)
                    {
                        ReadEscapedChar((char)c, b, source, loc, false, true, true);                         // the last flag should be set true only if N3, but we are flexible on reading
                        escaped = false;
                    }
                    else if (c == '\\')
                    {
                        escaped = true;
                    }
                    else
                    {
                        if (c == '"' && !triplequoted)
                        {
                            break;
                        }
                        if (c == '"' && source.Peek() == '"' && source.Peek2() == '"' && triplequoted)
                        {
                            break;
                        }
                        b.Append((char)c);
                    }
                }

                if (triplequoted)                   // read the extra end quotes
                {
                    source.Read();
                    source.Read();
                }

                string litvalue = b.ToString();
                string litlang  = null;
                string litdt    = null;

                // Strings can be suffixed with @langcode or ^^symbol (but not both?).
                if (source.Peek() == '@')
                {
                    source.Read();
                    b.Length = 0;
                    while (char.IsLetterOrDigit((char)source.Peek()) || source.Peek() == (int)'-')
                    {
                        b.Append((char)source.Read());
                    }
                    litlang = b.ToString();
                }
                else if (source.Peek() == '^' && source.Peek2() == '^')
                {
                    loc = new Location(source.Line, source.Col);
                    source.Read();
                    source.Read();
                    litdt = ReadToken(source, context).ToString();                     // better be a string URI
                    if (litdt.StartsWith("<") && litdt.EndsWith(">"))
                    {
                        litdt = litdt.Substring(1, litdt.Length - 2);
                    }
                    else if (litdt.IndexOf(":") != -1)
                    {
                        Resource r = ResolveQName(litdt, context, loc);
                        if (r.Uri == null)
                        {
                            OnError("A literal datatype cannot be an anonymous entity", loc);
                        }
                        litdt = r.Uri;
                    }
                }

                Literal literal = new Literal(litvalue, litlang, litdt);
                ValidateLiteral(literal);
                return(literal);
            }
            else if (char.IsLetter((char)firstchar) || firstchar == '?' || firstchar == '@' || firstchar == ':' || firstchar == '_')
            {
                // Something starting with @
                // A QName: ([a-zA-Z_][a-zA-Z0-9_]*)?:)?([a-zA-Z_][a-zA-Z0-9_]*)?
                // A variable: \?[a-zA-Z_][a-zA-Z0-9_]*
                while (true)
                {
                    int c = source.Peek();
                    if (c == -1 || (!Entity.ValidateUriIsIUnreserved((char)c) && c != ':') || c == '.')
                    {
                        break;
                    }
                    b.Append((char)source.Read());
                }
            }
            else if (char.IsDigit((char)firstchar) || firstchar == '+' || firstchar == '-')
            {
                while (true)
                {
                    int ci = source.Peek();
                    if (ci == -1)
                    {
                        break;
                    }
                    if (ci == ']' || ci == ')' || ci == '}')
                    {
                        break;
                    }

                    // punctuation followed by a space means the punctuation is
                    // punctuation, and not part of this token
                    if (!char.IsDigit((char)ci) && source.Peek2() != -1 && char.IsWhiteSpace((char)source.Peek2()))
                    {
                        break;
                    }

                    char c = (char)ci;
                    if (char.IsWhiteSpace(c))
                    {
                        break;
                    }

                    b.Append((char)source.Read());
                }
            }
            else if (firstchar == '=')
            {
                if (source.Peek() == (int)'>')
                {
                    b.Append((char)source.Read());
                }

                if (source.Peek() == (int)':' && source.Peek2() == (int)'>')                   // SPECIAL EXTENSION "=:>"
                {
                    b.Append((char)source.Read());
                    b.Append((char)source.Read());
                }
            }
            else if (firstchar == '[')
            {
                // The start of an anonymous node.
            }
            else if (firstchar == '{')
            {
                return("{");
            }
            else if (firstchar == '(')
            {
                return("(");
            }
            else if (firstchar == ')')
            {
                return(")");
            }
            else
            {
                while (true)
                {
                    int c = source.Read();
                    if (c == -1)
                    {
                        break;
                    }
                    if (char.IsWhiteSpace((char)c))
                    {
                        break;
                    }
                    b.Append((char)c);
                }
                OnError("Invalid token: " + b.ToString(), loc);
            }

            return(b.ToString());
        }
Пример #4
0
        private void ReadEscapedChar(char c, StringBuilder b, MyReader source, Location loc, bool esc3E, bool escSPQ, bool escN3)
        {
            // AbsoluteURIs (NTriples), RelativeURIs (Turtle), strings, and longStrings all allow \'s to be escaped and u and U notation.
            if (c == '\\')
            {
                b.Append('\\');
            }
            else if (c == 'u' || c == 'U')
            {
                StringBuilder num = new StringBuilder();
                if (c == 'u')
                {
                    num.Append((char)source.Read());                     // four hex digits
                    num.Append((char)source.Read());
                    num.Append((char)source.Read());
                    num.Append((char)source.Read());
                }
                else
                {
                    source.Read();                     // two zeros
                    source.Read();
                    num.Append((char)source.Read());   // six hex digits
                    num.Append((char)source.Read());
                    num.Append((char)source.Read());
                    num.Append((char)source.Read());
                    num.Append((char)source.Read());
                    num.Append((char)source.Read());
                }

                int unicode = int.Parse(num.ToString(), System.Globalization.NumberStyles.AllowHexSpecifier);
                b.Append((char)unicode);                 // is this correct?
            }

            // RelativeURis in Turtle (but not string or longString or NTriples Absolute URis) allow >'s to be escaped.
            else if (esc3E && c == '>')
            {
                b.Append(">");
            }

            // AbsoluteURIs (NTriples), string, and longString allow the three space escapes and the double quote.
            else if (escSPQ && c == 'n')
            {
                b.Append('\n');
            }
            else if (escSPQ && c == 'r')
            {
                b.Append('\r');
            }
            else if (escSPQ && c == 't')
            {
                b.Append('\t');
            }
            else if (escSPQ && c == '"')
            {
                b.Append('"');
            }

            // In Notation 3 strings, additional escapes are allowed.
            else if (escN3 && c == '\'')
            {
                b.Append('\'');
            }
            else if (escN3 && c == '\n')
            {
            }
            else if (escN3 && c == '\r')
            {
            }

            else if (char.IsDigit((char)c) || c == 'x' || c == 'a' || c == 'b' || c == 'f' || c == 'v')
            {
                OnError("Octal and hex byte-value escapes and other escapes are deprecated and not supported", loc);
            }
            else
            {
                OnError("Invalid escape character: " + (char)c, loc);
            }
        }
Пример #5
0
 private int NextPunc(MyReader source)
 {
     ReadWhitespace(source);
     return(source.Peek());
 }
Пример #6
0
		private void ReadEscapedChar(char c, StringBuilder b, MyReader source, Location loc) {
			if (c == 'n') b.Append('\n');
			else if (c == 'r') b.Append('\r');
			else if (c == 't') b.Append('\t');
			else if (c == '\\') b.Append('\\');		
			else if (c == '"') b.Append('"');
			else if (c == '\'') b.Append('\'');
			else if (c == 'a') b.Append('\a');
			else if (c == 'b') b.Append('\b');
			else if (c == 'f') b.Append('\f');
			else if (c == 'v') b.Append('\v');
			else if (c == '\n') { }
			else if (c == '\r') { }
			else if (c == 'u' || c == 'U') {
				StringBuilder num = new StringBuilder();
				if (c == 'u')  {
					num.Append((char)source.Read()); // four hex digits
					num.Append((char)source.Read());
					num.Append((char)source.Read());
					num.Append((char)source.Read());
				} else {
					source.Read(); // two zeros
					source.Read();
					num.Append((char)source.Read()); // six hex digits
					num.Append((char)source.Read());
					num.Append((char)source.Read());
					num.Append((char)source.Read());
					num.Append((char)source.Read());
					num.Append((char)source.Read());
				}
				
				int unicode = int.Parse(num.ToString(), System.Globalization.NumberStyles.AllowHexSpecifier);
				b.Append((char)unicode); // is this correct?
				
			} else if (char.IsDigit((char)c) || c == 'x')
				OnError("Octal and hex byte-value escapes are deprecated and not supported", loc);
			else
				OnError("Unrecognized escape character: " + (char)c, loc);
		}
Пример #7
0
		private object ReadToken(MyReader source, ParseContext context) {
			ReadWhitespace(source);
			
			Location loc = new Location(source.Line, source.Col);
			
			int firstchar = source.Read();
			if (firstchar == -1)
				return "";
			
			StringBuilder b = readTokenBuffer; readTokenBuffer.Length = 0;
			b.Append((char)firstchar);

			if (firstchar == '<') {
				// This is a URI or the <= verb.  URIs can be escaped like strings, at least in the NTriples spec.
				bool escaped = false;
				while (true) {
					int c = source.Read();
					if (c == -1) OnError("Unexpected end of stream within a token beginning with <", loc);
					
					if (b.Length == 2 && c == '=')
						return "<="; // the <= verb
					
					if (escaped) {
						ReadEscapedChar((char)c, b, source, loc);
						escaped = false;
					} else if (c == '\\') {
						escaped = true;
					} else {
						b.Append((char)c);
						if (c == '>') // end of the URI
							break;
					}
				}
				
			} else if (firstchar == '"') {
				// A string: ("""[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*""")|("[^"\\]*(?:\\.[^"\\]*)*")
				// What kind of crazy regex is this??
				b.Length = 0; // get rid of the open quote
				bool escaped = false;
				bool triplequoted = false;
				while (true) {
					int c = source.Read();
					if (c == -1) OnError("Unexpected end of stream within a string", loc);
					
					if (b.Length == 0 && c == (int)'"' && source.Peek() == (int)'"') {
						triplequoted = true;
						source.Read();
						continue;
					}
					
					if (!escaped && c == '\\')
						escaped = true;
					else if (escaped) {
						ReadEscapedChar((char)c, b, source, loc);
						escaped = false;
					} else {
						if (c == '"' && !triplequoted)
							break;
						if (c == '"' && source.Peek() == '"' && source.Peek2() == '"' && triplequoted)
							break;
						b.Append((char)c);
					}
				}
				
				if (triplequoted) { // read the extra end quotes
					source.Read();
					source.Read();
				}
				
				string litvalue = b.ToString();
				string litlang = null;
				string litdt = null;

				// Strings can be suffixed with @langcode or ^^symbol (but not both?).
				if (source.Peek() == '@') {
					source.Read();
					b.Length = 0;
					while (char.IsLetterOrDigit((char)source.Peek()) || source.Peek() == (int)'-')
						b.Append((char)source.Read());
					litlang = b.ToString();
				} else if (source.Peek() == '^' && source.Peek2() == '^') {
					loc = new Location(source.Line, source.Col);
					source.Read();
					source.Read();
					litdt = ReadToken(source, context).ToString(); // better be a string URI
					if (litdt.StartsWith("<") && litdt.EndsWith(">"))
						litdt = litdt.Substring(1, litdt.Length-2);
					else if (litdt.IndexOf(":") != -1) {
						Resource r = ResolveQName(litdt, context, loc);
						if (r.Uri == null)
							OnError("A literal datatype cannot be an anonymous entity", loc);
						litdt = r.Uri;
					}
				}
				
				return new Literal(litvalue, litlang, litdt);

			} else if (char.IsLetter((char)firstchar) || firstchar == '?' || firstchar == '@' || firstchar == ':' || firstchar == '_') {
				// Something starting with @
				// A QName: ([a-zA-Z_][a-zA-Z0-9_]*)?:)?([a-zA-Z_][a-zA-Z0-9_]*)?
				// A variable: \?[a-zA-Z_][a-zA-Z0-9_]*
				while (true) {
					int c = source.Peek();
					if (c == -1 || (!char.IsLetterOrDigit((char)c) && c != '-' && c != '_' && c != ':')) break;					
					b.Append((char)source.Read());
				}
			
			} else if (char.IsDigit((char)firstchar) || firstchar == '+' || firstchar == '-') {
				while (true) {
					int ci = source.Peek();
					if (ci == -1) break;
					
					// punctuation followed by a space means the punctuation is
					// punctuation, and not part of this token
					if (!char.IsDigit((char)ci) && source.Peek2() != -1 && char.IsWhiteSpace((char)source.Peek2()))
						break;
					
					char c = (char)ci;
					if (char.IsWhiteSpace(c)) break;
					
					b.Append((char)source.Read());
				}
				
			} else if (firstchar == '=') {
				if (source.Peek() == (int)'>')
					b.Append((char)source.Read());
			
			} else if (firstchar == '[') {
				// The start of an anonymous node.

			} else if (firstchar == '{') {
				return "{";

			} else if (firstchar == '(') {
				return "(";
			} else if (firstchar == ')') {
				return ")";

			} else {
				while (true) {
					int c = source.Read();
					if (c == -1) break;
					if (char.IsWhiteSpace((char)c)) break;
					b.Append((char)c);
				}
				OnError("Invalid token: " + b.ToString(), loc);
			}
			
			return b.ToString();
		}
Пример #8
0
		private char ReadPunc(MyReader source) {
			ReadWhitespace(source);
			int c = source.Read();
			if (c == -1)
				OnError("End of file expecting punctuation", new Location(source.Line, source.Col));
			return (char)c;
		}
Пример #9
0
		private int NextPunc(MyReader source) {
			ReadWhitespace(source);
			return source.Peek();
		}
Пример #10
0
		private void ReadWhitespace(MyReader source) {
			while (true) {
				while (char.IsWhiteSpace((char)source.Peek()))
					source.Read();
				
				if (source.Peek() == '#') {
					while (true) {
						int c = source.Read();
						if (c == -1 || c == 10 || c == 13) break;
					}
					continue;
				}
				
				break;
			}
		}
Пример #11
0
        private object ReadToken(MyReader source, ParseContext context)
        {
            ReadWhitespace(source);

            Location loc = new Location(source.Line, source.Col);

            int firstchar = source.Read();

            if (firstchar == -1)
            {
                return("");
            }

            StringBuilder b = readTokenBuffer; readTokenBuffer.Length = 0;

            b.Append((char)firstchar);

            if (firstchar == '<')
            {
                // This is a URI or the <= verb.  URIs can be escaped like strings, at least in the NTriples spec.
                bool escaped = false;
                while (true)
                {
                    int c = source.Read();
                    if (c == -1)
                    {
                        OnError("Unexpected end of stream within a token beginning with <", loc);
                    }

                    if (b.Length == 2 && c == '=')
                    {
                        return("<=");                        // the <= verb
                    }
                    if (escaped)
                    {
                        ReadEscapedChar((char)c, b, source, loc);
                        escaped = false;
                    }
                    else if (c == '\\')
                    {
                        escaped = true;
                    }
                    else
                    {
                        b.Append((char)c);
                        if (c == '>')                         // end of the URI
                        {
                            break;
                        }
                    }
                }
            }
            else if (firstchar == '"')
            {
                // A string: ("""[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*""")|("[^"\\]*(?:\\.[^"\\]*)*")
                // What kind of crazy regex is this??
                b.Length = 0;                 // get rid of the open quote
                bool escaped      = false;
                bool triplequoted = false;
                while (true)
                {
                    int c = source.Read();
                    if (c == -1)
                    {
                        OnError("Unexpected end of stream within a string", loc);
                    }

                    if (b.Length == 0 && c == (int)'"' && source.Peek() == (int)'"')
                    {
                        triplequoted = true;
                        source.Read();
                        continue;
                    }

                    if (!escaped && c == '\\')
                    {
                        escaped = true;
                    }
                    else if (escaped)
                    {
                        ReadEscapedChar((char)c, b, source, loc);
                        escaped = false;
                    }
                    else
                    {
                        if (c == '"' && !triplequoted)
                        {
                            break;
                        }
                        if (c == '"' && source.Peek() == '"' && source.Peek2() == '"' && triplequoted)
                        {
                            break;
                        }
                        b.Append((char)c);
                    }
                }

                if (triplequoted)                   // read the extra end quotes
                {
                    source.Read();
                    source.Read();
                }

                string litvalue = b.ToString();
                string litlang  = null;
                string litdt    = null;

                // Strings can be suffixed with @langcode or ^^symbol (but not both?).
                if (source.Peek() == '@')
                {
                    source.Read();
                    b.Length = 0;
                    while (char.IsLetterOrDigit((char)source.Peek()) || source.Peek() == (int)'-')
                    {
                        b.Append((char)source.Read());
                    }
                    litlang = b.ToString();
                }
                else if (source.Peek() == '^' && source.Peek2() == '^')
                {
                    loc = new Location(source.Line, source.Col);
                    source.Read();
                    source.Read();
                    litdt = ReadToken(source, context).ToString();                     // better be a string URI
                    if (litdt.StartsWith("<") && litdt.EndsWith(">"))
                    {
                        litdt = litdt.Substring(1, litdt.Length - 2);
                    }
                    else if (litdt.IndexOf(":") != -1)
                    {
                        Resource r = ResolveQName(litdt, context, loc);
                        if (r.Uri == null)
                        {
                            OnError("A literal datatype cannot be an anonymous entity", loc);
                        }
                        litdt = r.Uri;
                    }
                }

                return(new Literal(litvalue, litlang, litdt));
            }
            else if (char.IsLetter((char)firstchar) || firstchar == '?' || firstchar == '@' || firstchar == ':' || firstchar == '_')
            {
                // Something starting with @
                // A QName: ([a-zA-Z_][a-zA-Z0-9_]*)?:)?([a-zA-Z_][a-zA-Z0-9_]*)?
                // A variable: \?[a-zA-Z_][a-zA-Z0-9_]*
                while (true)
                {
                    int c = source.Peek();
                    if (c == -1 || (!char.IsLetterOrDigit((char)c) && c != '-' && c != '_' && c != ':'))
                    {
                        break;
                    }
                    b.Append((char)source.Read());
                }
            }
            else if (char.IsDigit((char)firstchar) || firstchar == '+' || firstchar == '-')
            {
                while (true)
                {
                    int ci = source.Peek();
                    if (ci == -1)
                    {
                        break;
                    }
                    if (ci == ']' || ci == ')' || ci == '}')
                    {
                        break;
                    }

                    // punctuation followed by a space means the punctuation is
                    // punctuation, and not part of this token
                    if (!char.IsDigit((char)ci) && source.Peek2() != -1 && char.IsWhiteSpace((char)source.Peek2()))
                    {
                        break;
                    }

                    char c = (char)ci;
                    if (char.IsWhiteSpace(c))
                    {
                        break;
                    }

                    b.Append((char)source.Read());
                }
            }
            else if (firstchar == '=')
            {
                if (source.Peek() == (int)'>')
                {
                    b.Append((char)source.Read());
                }
            }
            else if (firstchar == '[')
            {
                // The start of an anonymous node.
            }
            else if (firstchar == '{')
            {
                return("{");
            }
            else if (firstchar == '(')
            {
                return("(");
            }
            else if (firstchar == ')')
            {
                return(")");
            }
            else
            {
                while (true)
                {
                    int c = source.Read();
                    if (c == -1)
                    {
                        break;
                    }
                    if (char.IsWhiteSpace((char)c))
                    {
                        break;
                    }
                    b.Append((char)c);
                }
                OnError("Invalid token: " + b.ToString(), loc);
            }

            return(b.ToString());
        }
Пример #12
0
        private void ReadEscapedChar(char c, StringBuilder b, MyReader source, Location loc)
        {
            if (c == 'n')
            {
                b.Append('\n');
            }
            else if (c == 'r')
            {
                b.Append('\r');
            }
            else if (c == 't')
            {
                b.Append('\t');
            }
            else if (c == '\\')
            {
                b.Append('\\');
            }
            else if (c == '"')
            {
                b.Append('"');
            }
            else if (c == '\'')
            {
                b.Append('\'');
            }
            else if (c == 'a')
            {
                b.Append('\a');
            }
            else if (c == 'b')
            {
                b.Append('\b');
            }
            else if (c == 'f')
            {
                b.Append('\f');
            }
            else if (c == 'v')
            {
                b.Append('\v');
            }
            else if (c == '\n')
            {
            }
            else if (c == '\r')
            {
            }
            else if (c == 'u' || c == 'U')
            {
                StringBuilder num = new StringBuilder();
                if (c == 'u')
                {
                    num.Append((char)source.Read());                     // four hex digits
                    num.Append((char)source.Read());
                    num.Append((char)source.Read());
                    num.Append((char)source.Read());
                }
                else
                {
                    source.Read();                     // two zeros
                    source.Read();
                    num.Append((char)source.Read());   // six hex digits
                    num.Append((char)source.Read());
                    num.Append((char)source.Read());
                    num.Append((char)source.Read());
                    num.Append((char)source.Read());
                    num.Append((char)source.Read());
                }

                int unicode = int.Parse(num.ToString(), System.Globalization.NumberStyles.AllowHexSpecifier);
                b.Append((char)unicode);                 // is this correct?
            }
            else if (char.IsDigit((char)c) || c == 'x')
            {
                OnError("Octal and hex byte-value escapes are deprecated and not supported", loc);
            }
            else
            {
                OnError("Unrecognized escape character: " + (char)c, loc);
            }
        }
Пример #13
0
		private object ReadToken(MyReader source, ParseContext context) {
			ReadWhitespace(source);
			
			Location loc = new Location(source.Line, source.Col);
			
			int firstchar = source.Read();
			if (firstchar == -1)
				return "";
			
			StringBuilder b = readTokenBuffer; readTokenBuffer.Length = 0;
			b.Append((char)firstchar);

			if (firstchar == '<') {
				// This is a URI or the <= verb.  URIs can have \'s and <'s escaped with a backslash, plus \u and \U notation.
				bool escaped = false;
				while (true) {
					int c = source.Read();
					if (c == -1) OnError("Unexpected end of stream within a token beginning with <", loc);
					
					if (b.Length == 2 && c == '=')
						return "<="; // the <= verb
					
					if (escaped) {
						ReadEscapedChar((char)c, b, source, loc, true, true, false);
							// the first flag should be set true only if not NTriples, but we are flexible on reading.
							// the second flag should be set true only if NTriples, but we are flexible on reading.
						escaped = false;
					} else if (c == '\\') {
						escaped = true;
					} else {
						b.Append((char)c);
						if (c == '>') // end of the URI
							break;
					}
				}
				
			} else if (firstchar == '"') {
				// This can either be a string "..." or a longString """...""", which additionally allows embedded \n, \r, and \t, and quotes.
				
				b.Length = 0; // get rid of the open quote
				bool escaped = false;
				bool triplequoted = false;
				while (true) {
					int c = source.Read();
					if (c == -1) OnError("Unexpected end of stream within a string", loc);
					
					// Check if this is started by three quotes. If we've already read three quotes, don't keep checking or we can't read """""".
					if (b.Length == 0 && c == (int)'"' && source.Peek() == (int)'"' && !triplequoted) {
						triplequoted = true;
						source.Read();
						continue;
					}
					
					if (escaped) {
						ReadEscapedChar((char)c, b, source, loc, false, true, true); // the last flag should be set true only if N3, but we are flexible on reading
						escaped = false;
					} else if (c == '\\') {
						escaped = true;
					} else {
						if (c == '"' && !triplequoted)
							break;
						if (c == '"' && source.Peek() == '"' && source.Peek2() == '"' && triplequoted)
							break;
						b.Append((char)c);
					}
				}
				
				if (triplequoted) { // read the extra end quotes
					source.Read();
					source.Read();
				}
				
				string litvalue = b.ToString();
				string litlang = null;
				string litdt = null;

				// Strings can be suffixed with @langcode or ^^symbol (but not both?).
				if (source.Peek() == '@') {
					source.Read();
					b.Length = 0;
					while (char.IsLetterOrDigit((char)source.Peek()) || source.Peek() == (int)'-')
						b.Append((char)source.Read());
					litlang = b.ToString();
				} else if (source.Peek() == '^' && source.Peek2() == '^') {
					loc = new Location(source.Line, source.Col);
					source.Read();
					source.Read();
					litdt = ReadToken(source, context).ToString(); // better be a string URI
					if (litdt.StartsWith("<") && litdt.EndsWith(">"))
						litdt = litdt.Substring(1, litdt.Length-2);
					else if (litdt.IndexOf(":") != -1) {
						Resource r = ResolveQName(litdt, context, loc);
						if (r.Uri == null)
							OnError("A literal datatype cannot be an anonymous entity", loc);
						litdt = r.Uri;
					}
				}
				
				Literal literal = new Literal(litvalue, litlang, litdt);
				ValidateLiteral(literal);
				return literal;

			} else if (char.IsLetter((char)firstchar) || firstchar == '?' || firstchar == '@' || firstchar == ':' || firstchar == '_') {
				// Something starting with @
				// A QName: ([a-zA-Z_][a-zA-Z0-9_]*)?:)?([a-zA-Z_][a-zA-Z0-9_]*)?
				// A variable: \?[a-zA-Z_][a-zA-Z0-9_]*
				while (true) {
					int c = source.Peek();
					if (c == -1 || (!Entity.ValidateUriIsIUnreserved((char)c) && c != ':') || c == '.') break;
					b.Append((char)source.Read());
				}
			
			} else if (char.IsDigit((char)firstchar) || firstchar == '+' || firstchar == '-') {
				while (true) {
					int ci = source.Peek();
					if (ci == -1) break;
					if (ci == ']' || ci == ')' || ci == '}') break;
					
					// punctuation followed by a space means the punctuation is
					// punctuation, and not part of this token
					if (!char.IsDigit((char)ci) && source.Peek2() != -1 && char.IsWhiteSpace((char)source.Peek2()))
						break;
					
					char c = (char)ci;
					if (char.IsWhiteSpace(c)) break;
					
					b.Append((char)source.Read());
				}
				
			} else if (firstchar == '=') {
				if (source.Peek() == (int)'>')
					b.Append((char)source.Read());
				
				if (source.Peek() == (int)':' && source.Peek2() == (int)'>') { // SPECIAL EXTENSION "=:>"
					b.Append((char)source.Read());
					b.Append((char)source.Read());
				}
			
			} else if (firstchar == '[') {
				// The start of an anonymous node.

			} else if (firstchar == '{') {
				return "{";

			} else if (firstchar == '(') {
				return "(";
			} else if (firstchar == ')') {
				return ")";

			} else {
				while (true) {
					int c = source.Read();
					if (c == -1) break;
					if (char.IsWhiteSpace((char)c)) break;
					b.Append((char)c);
				}
				OnError("Invalid token: " + b.ToString(), loc);
			}
			
			return b.ToString();
		}
Пример #14
0
		private void ReadEscapedChar(char c, StringBuilder b, MyReader source, Location loc, bool esc3E, bool escSPQ, bool escN3) {
			// AbsoluteURIs (NTriples), RelativeURIs (Turtle), strings, and longStrings all allow \'s to be escaped and u and U notation.
			if (c == '\\') b.Append('\\');		
			else if (c == 'u' || c == 'U') {
				StringBuilder num = new StringBuilder();
				if (c == 'u')  {
					num.Append((char)source.Read()); // four hex digits
					num.Append((char)source.Read());
					num.Append((char)source.Read());
					num.Append((char)source.Read());
				} else {
					source.Read(); // two zeros
					source.Read();
					num.Append((char)source.Read()); // six hex digits
					num.Append((char)source.Read());
					num.Append((char)source.Read());
					num.Append((char)source.Read());
					num.Append((char)source.Read());
					num.Append((char)source.Read());
				}
				
				int unicode = int.Parse(num.ToString(), System.Globalization.NumberStyles.AllowHexSpecifier);
				b.Append((char)unicode); // is this correct?
			}
			
			// RelativeURis in Turtle (but not string or longString or NTriples Absolute URis) allow >'s to be escaped.
			else if (esc3E && c == '>') b.Append(">");
			
			// AbsoluteURIs (NTriples), string, and longString allow the three space escapes and the double quote.
			else if (escSPQ && c == 'n') b.Append('\n');
			else if (escSPQ && c == 'r') b.Append('\r');
			else if (escSPQ && c == 't') b.Append('\t');
			else if (escSPQ && c == '"') b.Append('"');
			
			// In Notation 3 strings, additional escapes are allowed.
			else if (escN3 && c == '\'') b.Append('\'');
			else if (escN3 && c == '\n') { }
			else if (escN3 && c == '\r') { }
			
			else if (char.IsDigit((char)c) || c == 'x' || c == 'a' || c == 'b' || c == 'f' || c == 'v')
				OnError("Octal and hex byte-value escapes and other escapes are deprecated and not supported", loc);
			else
				OnError("Invalid escape character: " + (char)c, loc);
		}