private object ReadToken(MyReader source, ParseContext context) { ReadWhitespace(source); Location loc = new Location(source.Line, source.Col); int firstchar = source.Read(); if (firstchar == -1) { return(""); } StringBuilder b = readTokenBuffer; readTokenBuffer.Length = 0; b.Append((char)firstchar); if (firstchar == '<') { // This is a URI or the <= verb. URIs can have \'s and <'s escaped with a backslash, plus \u and \U notation. bool escaped = false; while (true) { int c = source.Read(); if (c == -1) { OnError("Unexpected end of stream within a token beginning with <", loc); } if (b.Length == 2 && c == '=') { return("<="); // the <= verb } if (escaped) { ReadEscapedChar((char)c, b, source, loc, true, true, false); // the first flag should be set true only if not NTriples, but we are flexible on reading. // the second flag should be set true only if NTriples, but we are flexible on reading. escaped = false; } else if (c == '\\') { escaped = true; } else { b.Append((char)c); if (c == '>') // end of the URI { break; } } } } else if (firstchar == '"') { // This can either be a string "..." or a longString """...""", which additionally allows embedded \n, \r, and \t, and quotes. b.Length = 0; // get rid of the open quote bool escaped = false; bool triplequoted = false; while (true) { int c = source.Read(); if (c == -1) { OnError("Unexpected end of stream within a string", loc); } // Check if this is started by three quotes. If we've already read three quotes, don't keep checking or we can't read """""". if (b.Length == 0 && c == (int)'"' && source.Peek() == (int)'"' && !triplequoted) { triplequoted = true; source.Read(); continue; } if (escaped) { ReadEscapedChar((char)c, b, source, loc, false, true, true); // the last flag should be set true only if N3, but we are flexible on reading escaped = false; } else if (c == '\\') { escaped = true; } else { if (c == '"' && !triplequoted) { break; } if (c == '"' && source.Peek() == '"' && source.Peek2() == '"' && triplequoted) { break; } b.Append((char)c); } } if (triplequoted) // read the extra end quotes { source.Read(); source.Read(); } string litvalue = b.ToString(); string litlang = null; string litdt = null; // Strings can be suffixed with @langcode or ^^symbol (but not both?). if (source.Peek() == '@') { source.Read(); b.Length = 0; while (char.IsLetterOrDigit((char)source.Peek()) || source.Peek() == (int)'-') { b.Append((char)source.Read()); } litlang = b.ToString(); } else if (source.Peek() == '^' && source.Peek2() == '^') { loc = new Location(source.Line, source.Col); source.Read(); source.Read(); litdt = ReadToken(source, context).ToString(); // better be a string URI if (litdt.StartsWith("<") && litdt.EndsWith(">")) { litdt = litdt.Substring(1, litdt.Length - 2); } else if (litdt.IndexOf(":") != -1) { Resource r = ResolveQName(litdt, context, loc); if (r.Uri == null) { OnError("A literal datatype cannot be an anonymous entity", loc); } litdt = r.Uri; } } Literal literal = new Literal(litvalue, litlang, litdt); ValidateLiteral(literal); return(literal); } else if (char.IsLetter((char)firstchar) || firstchar == '?' || firstchar == '@' || firstchar == ':' || firstchar == '_') { // Something starting with @ // A QName: ([a-zA-Z_][a-zA-Z0-9_]*)?:)?([a-zA-Z_][a-zA-Z0-9_]*)? // A variable: \?[a-zA-Z_][a-zA-Z0-9_]* while (true) { int c = source.Peek(); if (c == -1 || (!Entity.ValidateUriIsIUnreserved((char)c) && c != ':') || c == '.') { break; } b.Append((char)source.Read()); } } else if (char.IsDigit((char)firstchar) || firstchar == '+' || firstchar == '-') { while (true) { int ci = source.Peek(); if (ci == -1) { break; } if (ci == ']' || ci == ')' || ci == '}') { break; } // punctuation followed by a space means the punctuation is // punctuation, and not part of this token if (!char.IsDigit((char)ci) && source.Peek2() != -1 && char.IsWhiteSpace((char)source.Peek2())) { break; } char c = (char)ci; if (char.IsWhiteSpace(c)) { break; } b.Append((char)source.Read()); } } else if (firstchar == '=') { if (source.Peek() == (int)'>') { b.Append((char)source.Read()); } if (source.Peek() == (int)':' && source.Peek2() == (int)'>') // SPECIAL EXTENSION "=:>" { b.Append((char)source.Read()); b.Append((char)source.Read()); } } else if (firstchar == '[') { // The start of an anonymous node. } else if (firstchar == '{') { return("{"); } else if (firstchar == '(') { return("("); } else if (firstchar == ')') { return(")"); } else { while (true) { int c = source.Read(); if (c == -1) { break; } if (char.IsWhiteSpace((char)c)) { break; } b.Append((char)c); } OnError("Invalid token: " + b.ToString(), loc); } return(b.ToString()); }
private object ReadToken(MyReader source, ParseContext context) { ReadWhitespace(source); Location loc = new Location(source.Line, source.Col); int firstchar = source.Read(); if (firstchar == -1) { return(""); } StringBuilder b = readTokenBuffer; readTokenBuffer.Length = 0; b.Append((char)firstchar); if (firstchar == '<') { // This is a URI or the <= verb. URIs can be escaped like strings, at least in the NTriples spec. bool escaped = false; while (true) { int c = source.Read(); if (c == -1) { OnError("Unexpected end of stream within a token beginning with <", loc); } if (b.Length == 2 && c == '=') { return("<="); // the <= verb } if (escaped) { ReadEscapedChar((char)c, b, source, loc); escaped = false; } else if (c == '\\') { escaped = true; } else { b.Append((char)c); if (c == '>') // end of the URI { break; } } } } else if (firstchar == '"') { // A string: ("""[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*""")|("[^"\\]*(?:\\.[^"\\]*)*") // What kind of crazy regex is this?? b.Length = 0; // get rid of the open quote bool escaped = false; bool triplequoted = false; while (true) { int c = source.Read(); if (c == -1) { OnError("Unexpected end of stream within a string", loc); } if (b.Length == 0 && c == (int)'"' && source.Peek() == (int)'"') { triplequoted = true; source.Read(); continue; } if (!escaped && c == '\\') { escaped = true; } else if (escaped) { ReadEscapedChar((char)c, b, source, loc); escaped = false; } else { if (c == '"' && !triplequoted) { break; } if (c == '"' && source.Peek() == '"' && source.Peek2() == '"' && triplequoted) { break; } b.Append((char)c); } } if (triplequoted) // read the extra end quotes { source.Read(); source.Read(); } string litvalue = b.ToString(); string litlang = null; string litdt = null; // Strings can be suffixed with @langcode or ^^symbol (but not both?). if (source.Peek() == '@') { source.Read(); b.Length = 0; while (char.IsLetterOrDigit((char)source.Peek()) || source.Peek() == (int)'-') { b.Append((char)source.Read()); } litlang = b.ToString(); } else if (source.Peek() == '^' && source.Peek2() == '^') { loc = new Location(source.Line, source.Col); source.Read(); source.Read(); litdt = ReadToken(source, context).ToString(); // better be a string URI if (litdt.StartsWith("<") && litdt.EndsWith(">")) { litdt = litdt.Substring(1, litdt.Length - 2); } else if (litdt.IndexOf(":") != -1) { Resource r = ResolveQName(litdt, context, loc); if (r.Uri == null) { OnError("A literal datatype cannot be an anonymous entity", loc); } litdt = r.Uri; } } return(new Literal(litvalue, litlang, litdt)); } else if (char.IsLetter((char)firstchar) || firstchar == '?' || firstchar == '@' || firstchar == ':' || firstchar == '_') { // Something starting with @ // A QName: ([a-zA-Z_][a-zA-Z0-9_]*)?:)?([a-zA-Z_][a-zA-Z0-9_]*)? // A variable: \?[a-zA-Z_][a-zA-Z0-9_]* while (true) { int c = source.Peek(); if (c == -1 || (!char.IsLetterOrDigit((char)c) && c != '-' && c != '_' && c != ':')) { break; } b.Append((char)source.Read()); } } else if (char.IsDigit((char)firstchar) || firstchar == '+' || firstchar == '-') { while (true) { int ci = source.Peek(); if (ci == -1) { break; } if (ci == ']' || ci == ')' || ci == '}') { break; } // punctuation followed by a space means the punctuation is // punctuation, and not part of this token if (!char.IsDigit((char)ci) && source.Peek2() != -1 && char.IsWhiteSpace((char)source.Peek2())) { break; } char c = (char)ci; if (char.IsWhiteSpace(c)) { break; } b.Append((char)source.Read()); } } else if (firstchar == '=') { if (source.Peek() == (int)'>') { b.Append((char)source.Read()); } } else if (firstchar == '[') { // The start of an anonymous node. } else if (firstchar == '{') { return("{"); } else if (firstchar == '(') { return("("); } else if (firstchar == ')') { return(")"); } else { while (true) { int c = source.Read(); if (c == -1) { break; } if (char.IsWhiteSpace((char)c)) { break; } b.Append((char)c); } OnError("Invalid token: " + b.ToString(), loc); } return(b.ToString()); }
private object ReadToken(MyReader source, ParseContext context) { ReadWhitespace(source); Location loc = new Location(source.Line, source.Col); int firstchar = source.Read(); if (firstchar == -1) return ""; StringBuilder b = readTokenBuffer; readTokenBuffer.Length = 0; b.Append((char)firstchar); if (firstchar == '<') { // This is a URI or the <= verb. URIs can be escaped like strings, at least in the NTriples spec. bool escaped = false; while (true) { int c = source.Read(); if (c == -1) OnError("Unexpected end of stream within a token beginning with <", loc); if (b.Length == 2 && c == '=') return "<="; // the <= verb if (escaped) { ReadEscapedChar((char)c, b, source, loc); escaped = false; } else if (c == '\\') { escaped = true; } else { b.Append((char)c); if (c == '>') // end of the URI break; } } } else if (firstchar == '"') { // A string: ("""[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*""")|("[^"\\]*(?:\\.[^"\\]*)*") // What kind of crazy regex is this?? b.Length = 0; // get rid of the open quote bool escaped = false; bool triplequoted = false; while (true) { int c = source.Read(); if (c == -1) OnError("Unexpected end of stream within a string", loc); if (b.Length == 0 && c == (int)'"' && source.Peek() == (int)'"') { triplequoted = true; source.Read(); continue; } if (!escaped && c == '\\') escaped = true; else if (escaped) { ReadEscapedChar((char)c, b, source, loc); escaped = false; } else { if (c == '"' && !triplequoted) break; if (c == '"' && source.Peek() == '"' && source.Peek2() == '"' && triplequoted) break; b.Append((char)c); } } if (triplequoted) { // read the extra end quotes source.Read(); source.Read(); } string litvalue = b.ToString(); string litlang = null; string litdt = null; // Strings can be suffixed with @langcode or ^^symbol (but not both?). if (source.Peek() == '@') { source.Read(); b.Length = 0; while (char.IsLetterOrDigit((char)source.Peek()) || source.Peek() == (int)'-') b.Append((char)source.Read()); litlang = b.ToString(); } else if (source.Peek() == '^' && source.Peek2() == '^') { loc = new Location(source.Line, source.Col); source.Read(); source.Read(); litdt = ReadToken(source, context).ToString(); // better be a string URI if (litdt.StartsWith("<") && litdt.EndsWith(">")) litdt = litdt.Substring(1, litdt.Length-2); else if (litdt.IndexOf(":") != -1) { Resource r = ResolveQName(litdt, context, loc); if (r.Uri == null) OnError("A literal datatype cannot be an anonymous entity", loc); litdt = r.Uri; } } return new Literal(litvalue, litlang, litdt); } else if (char.IsLetter((char)firstchar) || firstchar == '?' || firstchar == '@' || firstchar == ':' || firstchar == '_') { // Something starting with @ // A QName: ([a-zA-Z_][a-zA-Z0-9_]*)?:)?([a-zA-Z_][a-zA-Z0-9_]*)? // A variable: \?[a-zA-Z_][a-zA-Z0-9_]* while (true) { int c = source.Peek(); if (c == -1 || (!char.IsLetterOrDigit((char)c) && c != '-' && c != '_' && c != ':')) break; b.Append((char)source.Read()); } } else if (char.IsDigit((char)firstchar) || firstchar == '+' || firstchar == '-') { while (true) { int ci = source.Peek(); if (ci == -1) break; // punctuation followed by a space means the punctuation is // punctuation, and not part of this token if (!char.IsDigit((char)ci) && source.Peek2() != -1 && char.IsWhiteSpace((char)source.Peek2())) break; char c = (char)ci; if (char.IsWhiteSpace(c)) break; b.Append((char)source.Read()); } } else if (firstchar == '=') { if (source.Peek() == (int)'>') b.Append((char)source.Read()); } else if (firstchar == '[') { // The start of an anonymous node. } else if (firstchar == '{') { return "{"; } else if (firstchar == '(') { return "("; } else if (firstchar == ')') { return ")"; } else { while (true) { int c = source.Read(); if (c == -1) break; if (char.IsWhiteSpace((char)c)) break; b.Append((char)c); } OnError("Invalid token: " + b.ToString(), loc); } return b.ToString(); }
private object ReadToken(MyReader source, ParseContext context) { ReadWhitespace(source); Location loc = new Location(source.Line, source.Col); int firstchar = source.Read(); if (firstchar == -1) return ""; StringBuilder b = readTokenBuffer; readTokenBuffer.Length = 0; b.Append((char)firstchar); if (firstchar == '<') { // This is a URI or the <= verb. URIs can have \'s and <'s escaped with a backslash, plus \u and \U notation. bool escaped = false; while (true) { int c = source.Read(); if (c == -1) OnError("Unexpected end of stream within a token beginning with <", loc); if (b.Length == 2 && c == '=') return "<="; // the <= verb if (escaped) { ReadEscapedChar((char)c, b, source, loc, true, true, false); // the first flag should be set true only if not NTriples, but we are flexible on reading. // the second flag should be set true only if NTriples, but we are flexible on reading. escaped = false; } else if (c == '\\') { escaped = true; } else { b.Append((char)c); if (c == '>') // end of the URI break; } } } else if (firstchar == '"') { // This can either be a string "..." or a longString """...""", which additionally allows embedded \n, \r, and \t, and quotes. b.Length = 0; // get rid of the open quote bool escaped = false; bool triplequoted = false; while (true) { int c = source.Read(); if (c == -1) OnError("Unexpected end of stream within a string", loc); // Check if this is started by three quotes. If we've already read three quotes, don't keep checking or we can't read """""". if (b.Length == 0 && c == (int)'"' && source.Peek() == (int)'"' && !triplequoted) { triplequoted = true; source.Read(); continue; } if (escaped) { ReadEscapedChar((char)c, b, source, loc, false, true, true); // the last flag should be set true only if N3, but we are flexible on reading escaped = false; } else if (c == '\\') { escaped = true; } else { if (c == '"' && !triplequoted) break; if (c == '"' && source.Peek() == '"' && source.Peek2() == '"' && triplequoted) break; b.Append((char)c); } } if (triplequoted) { // read the extra end quotes source.Read(); source.Read(); } string litvalue = b.ToString(); string litlang = null; string litdt = null; // Strings can be suffixed with @langcode or ^^symbol (but not both?). if (source.Peek() == '@') { source.Read(); b.Length = 0; while (char.IsLetterOrDigit((char)source.Peek()) || source.Peek() == (int)'-') b.Append((char)source.Read()); litlang = b.ToString(); } else if (source.Peek() == '^' && source.Peek2() == '^') { loc = new Location(source.Line, source.Col); source.Read(); source.Read(); litdt = ReadToken(source, context).ToString(); // better be a string URI if (litdt.StartsWith("<") && litdt.EndsWith(">")) litdt = litdt.Substring(1, litdt.Length-2); else if (litdt.IndexOf(":") != -1) { Resource r = ResolveQName(litdt, context, loc); if (r.Uri == null) OnError("A literal datatype cannot be an anonymous entity", loc); litdt = r.Uri; } } Literal literal = new Literal(litvalue, litlang, litdt); ValidateLiteral(literal); return literal; } else if (char.IsLetter((char)firstchar) || firstchar == '?' || firstchar == '@' || firstchar == ':' || firstchar == '_') { // Something starting with @ // A QName: ([a-zA-Z_][a-zA-Z0-9_]*)?:)?([a-zA-Z_][a-zA-Z0-9_]*)? // A variable: \?[a-zA-Z_][a-zA-Z0-9_]* while (true) { int c = source.Peek(); if (c == -1 || (!Entity.ValidateUriIsIUnreserved((char)c) && c != ':') || c == '.') break; b.Append((char)source.Read()); } } else if (char.IsDigit((char)firstchar) || firstchar == '+' || firstchar == '-') { while (true) { int ci = source.Peek(); if (ci == -1) break; if (ci == ']' || ci == ')' || ci == '}') break; // punctuation followed by a space means the punctuation is // punctuation, and not part of this token if (!char.IsDigit((char)ci) && source.Peek2() != -1 && char.IsWhiteSpace((char)source.Peek2())) break; char c = (char)ci; if (char.IsWhiteSpace(c)) break; b.Append((char)source.Read()); } } else if (firstchar == '=') { if (source.Peek() == (int)'>') b.Append((char)source.Read()); if (source.Peek() == (int)':' && source.Peek2() == (int)'>') { // SPECIAL EXTENSION "=:>" b.Append((char)source.Read()); b.Append((char)source.Read()); } } else if (firstchar == '[') { // The start of an anonymous node. } else if (firstchar == '{') { return "{"; } else if (firstchar == '(') { return "("; } else if (firstchar == ')') { return ")"; } else { while (true) { int c = source.Read(); if (c == -1) break; if (char.IsWhiteSpace((char)c)) break; b.Append((char)c); } OnError("Invalid token: " + b.ToString(), loc); } return b.ToString(); }