/// <exception cref="JsonLD.Core.JsonLdError"></exception> private void ValidateIRI(TurtleRDFParser.State state, string iri) { if (!IrirefMinusContainer.Matcher(iri).Matches()) { throw new JsonLdError(JsonLdError.Error.ParseError, "Error while parsing Turtle; invalid IRI after escaping. {line: " + state.lineNumber + "position: " + state.linePosition + "}"); } }
/// <exception cref="JsonLD.Core.JsonLdError"></exception> public virtual RDFDataset Parse(JToken input) { if (!(input.Type == JTokenType.String)) { throw new JsonLdError(JsonLdError.Error.InvalidInput, "Invalid input; Triple RDF Parser requires a string input" ); } RDFDataset result = new RDFDataset(); TurtleRDFParser.State state = new TurtleRDFParser.State(this, (string)input); while (!string.Empty.Equals(state.line)) { // check if line is a directive Matcher match = TurtleRDFParser.Regex.Directive.Matcher(state.line); if (match.Find()) { if (match.Group(1) != null || match.Group(4) != null) { string ns = match.Group(1) != null?match.Group(1) : match.Group(4); string iri = match.Group(1) != null?match.Group(2) : match.Group(5); if (!iri.Contains(":")) { iri = state.baseIri + iri; } iri = RDFDatasetUtils.Unescape(iri); ValidateIRI(state, iri); state.namespaces[ns] = iri; result.SetNamespace(ns, iri); } else { string @base = match.Group(3) != null?match.Group(3) : match.Group(6); @base = RDFDatasetUtils.Unescape(@base); ValidateIRI(state, @base); if ([email protected](":")) { state.baseIri = state.baseIri + @base; } else { state.baseIri = @base; } } state.AdvanceLinePosition(match.Group(0).Length); continue; } if (state.curSubject == null) { // we need to match a subject match = TurtleRDFParser.Regex.Subject.Matcher(state.line); if (match.Find()) { string iri; if (match.Group(1) != null) { // matched IRI iri = RDFDatasetUtils.Unescape(match.Group(1)); if (!iri.Contains(":")) { iri = state.baseIri + iri; } } else { if (match.Group(2) != null) { // matched NS:NAME string ns = match.Group(2); string name = UnescapeReserved(match.Group(3)); iri = state.ExpandIRI(ns, name); } else { if (match.Group(4) != null) { // match ns: only iri = state.ExpandIRI(match.Group(4), string.Empty); } else { if (match.Group(5) != null) { // matched BNODE iri = state.namer.GetName(match.Group(0).Trim()); } else { // matched anon node iri = state.namer.GetName(); } } } } // make sure IRI still matches an IRI after escaping ValidateIRI(state, iri); state.curSubject = iri; state.AdvanceLinePosition(match.Group(0).Length); } else { // handle blank nodes if (state.line.StartsWith("[")) { string bnode = state.namer.GetName(); state.AdvanceLinePosition(1); state.Push(); state.curSubject = bnode; } else { // handle collections if (state.line.StartsWith("(")) { string bnode = state.namer.GetName(); // so we know we want a predicate if the collection close // isn't followed by a subject end state.curSubject = bnode; state.AdvanceLinePosition(1); state.Push(); state.curSubject = bnode; state.curPredicate = JSONLDConsts.RdfFirst; } else { // make sure we have a subject already throw new JsonLdError(JsonLdError.Error.ParseError, "Error while parsing Turtle; missing expected subject. {line: " + state.lineNumber + "position: " + state.linePosition + "}"); } } } } if (state.curPredicate == null) { // match predicate match = TurtleRDFParser.Regex.Predicate.Matcher(state.line); if (match.Find()) { string iri = string.Empty; if (match.Group(1) != null) { // matched IRI iri = RDFDatasetUtils.Unescape(match.Group(1)); if (!iri.Contains(":")) { iri = state.baseIri + iri; } } else { if (match.Group(2) != null) { // matched NS:NAME string ns = match.Group(2); string name = UnescapeReserved(match.Group(3)); iri = state.ExpandIRI(ns, name); } else { if (match.Group(4) != null) { // matched ns: iri = state.ExpandIRI(match.Group(4), string.Empty); } else { // matched "a" iri = JSONLDConsts.RdfType; } } } ValidateIRI(state, iri); state.curPredicate = iri; state.AdvanceLinePosition(match.Group(0).Length); } else { throw new JsonLdError(JsonLdError.Error.ParseError, "Error while parsing Turtle; missing expected predicate. {line: " + state.lineNumber + "position: " + state.linePosition + "}"); } } // expecting bnode or object // match BNODE values if (state.line.StartsWith("[")) { string bnode = state.namer.GetName(); result.AddTriple(state.curSubject, state.curPredicate, bnode); state.AdvanceLinePosition(1); // check for anonymous objects if (state.line.StartsWith("]")) { state.AdvanceLinePosition(1); } else { // next we expect a statement or object separator // otherwise we're inside the blank node state.Push(); state.curSubject = bnode; // next we expect a predicate continue; } } else { // match collections if (state.line.StartsWith("(")) { state.AdvanceLinePosition(1); // check for empty collection if (state.line.StartsWith(")")) { state.AdvanceLinePosition(1); result.AddTriple(state.curSubject, state.curPredicate, JSONLDConsts.RdfNil); } else { // next we expect a statement or object separator // otherwise we're inside the collection string bnode = state.namer.GetName(); result.AddTriple(state.curSubject, state.curPredicate, bnode); state.Push(); state.curSubject = bnode; state.curPredicate = JSONLDConsts.RdfFirst; continue; } } else { // match object match = TurtleRDFParser.Regex.Object.Matcher(state.line); if (match.Find()) { string iri = null; if (match.Group(1) != null) { // matched IRI iri = RDFDatasetUtils.Unescape(match.Group(1)); if (!iri.Contains(":")) { iri = state.baseIri + iri; } } else { if (match.Group(2) != null) { // matched NS:NAME string ns = match.Group(2); string name = UnescapeReserved(match.Group(3)); iri = state.ExpandIRI(ns, name); } else { if (match.Group(4) != null) { // matched ns: iri = state.ExpandIRI(match.Group(4), string.Empty); } else { if (match.Group(5) != null) { // matched BNODE iri = state.namer.GetName(match.Group(0).Trim()); } } } } if (iri != null) { ValidateIRI(state, iri); // we have a object result.AddTriple(state.curSubject, state.curPredicate, iri); } else { // we have a literal string value = match.Group(6); string lang = null; string datatype = null; if (value != null) { // we have a string literal value = UnquoteString(value); value = RDFDatasetUtils.Unescape(value); lang = match.Group(7); if (lang == null) { if (match.Group(8) != null) { datatype = RDFDatasetUtils.Unescape(match.Group(8)); if (!datatype.Contains(":")) { datatype = state.baseIri + datatype; } ValidateIRI(state, datatype); } else { if (match.Group(9) != null) { datatype = state.ExpandIRI(match.Group(9), UnescapeReserved(match.Group(10))); } else { if (match.Group(11) != null) { datatype = state.ExpandIRI(match.Group(11), string.Empty); } } } } else { datatype = JSONLDConsts.RdfLangstring; } } else { if (match.Group(12) != null) { // integer literal value = match.Group(12); datatype = JSONLDConsts.XsdDouble; } else { if (match.Group(13) != null) { // decimal literal value = match.Group(13); datatype = JSONLDConsts.XsdDecimal; } else { if (match.Group(14) != null) { // double literal value = match.Group(14); datatype = JSONLDConsts.XsdInteger; } else { if (match.Group(15) != null) { // boolean literal value = match.Group(15); datatype = JSONLDConsts.XsdBoolean; } } } } } result.AddTriple(state.curSubject, state.curPredicate, value, datatype, lang); } state.AdvanceLinePosition(match.Group(0).Length); } else { throw new JsonLdError(JsonLdError.Error.ParseError, "Error while parsing Turtle; missing expected object or blank node. {line: " + state.lineNumber + "position: " + state.linePosition + "}"); } } } // close collection bool collectionClosed = false; while (state.line.StartsWith(")")) { if (!JSONLDConsts.RdfFirst.Equals(state.curPredicate)) { throw new JsonLdError(JsonLdError.Error.ParseError, "Error while parsing Turtle; unexpected ). {line: " + state.lineNumber + "position: " + state.linePosition + "}"); } result.AddTriple(state.curSubject, JSONLDConsts.RdfRest, JSONLDConsts.RdfNil); state.Pop(); state.AdvanceLinePosition(1); collectionClosed = true; } bool expectDotOrPred = false; // match end of bnode if (state.line.StartsWith("]")) { string bnode = state.curSubject; state.Pop(); state.AdvanceLinePosition(1); if (state.curSubject == null) { // this is a bnode as a subject and we // expect either a . or a predicate state.curSubject = bnode; expectDotOrPred = true; } } // match list separator if (!expectDotOrPred && state.line.StartsWith(",")) { state.AdvanceLinePosition(1); // now we expect another object/bnode continue; } // match predicate end if (!expectDotOrPred) { while (state.line.StartsWith(";")) { state.curPredicate = null; state.AdvanceLinePosition(1); // now we expect another predicate, or a dot expectDotOrPred = true; } } if (state.line.StartsWith(".")) { if (state.expectingBnodeClose) { throw new JsonLdError(JsonLdError.Error.ParseError, "Error while parsing Turtle; missing expected )\"]\". {line: " + state.lineNumber + "position: " + state.linePosition + "}"); } state.curSubject = null; state.curPredicate = null; state.AdvanceLinePosition(1); // this can now be the end of the document. continue; } else { if (expectDotOrPred) { // we're expecting another predicate since we didn't find a dot continue; } } // if we're in a collection if (JSONLDConsts.RdfFirst.Equals(state.curPredicate)) { string bnode = state.namer.GetName(); result.AddTriple(state.curSubject, JSONLDConsts.RdfRest, bnode); state.curSubject = bnode; continue; } if (collectionClosed) { // we expect another object // TODO: it's not clear yet if this is valid continue; } // if we get here, we're missing a close statement throw new JsonLdError(JsonLdError.Error.ParseError, "Error while parsing Turtle; missing expected \"]\" \",\" \";\" or \".\". {line: " + state.lineNumber + "position: " + state.linePosition + "}"); } return(result); }