Beispiel #1
0
 /// <exception cref="JsonLD.Core.JsonLdError"></exception>
 private void ValidateIRI(TurtleRDFParser.State state, string iri)
 {
     if (!IrirefMinusContainer.Matcher(iri).Matches())
     {
         throw new JsonLdError(JsonLdError.Error.ParseError, "Error while parsing Turtle; invalid IRI after escaping. {line: "
                               + state.lineNumber + "position: " + state.linePosition + "}");
     }
 }
Beispiel #2
0
        /// <exception cref="JsonLD.Core.JsonLdError"></exception>
        public virtual RDFDataset Parse(JToken input)
        {
            if (!(input.Type == JTokenType.String))
            {
                throw new JsonLdError(JsonLdError.Error.InvalidInput, "Invalid input; Triple RDF Parser requires a string input"
                                      );
            }
            RDFDataset result = new RDFDataset();

            TurtleRDFParser.State state = new TurtleRDFParser.State(this, (string)input);
            while (!string.Empty.Equals(state.line))
            {
                // check if line is a directive
                Matcher match = TurtleRDFParser.Regex.Directive.Matcher(state.line);
                if (match.Find())
                {
                    if (match.Group(1) != null || match.Group(4) != null)
                    {
                        string ns = match.Group(1) != null?match.Group(1) : match.Group(4);

                        string iri = match.Group(1) != null?match.Group(2) : match.Group(5);

                        if (!iri.Contains(":"))
                        {
                            iri = state.baseIri + iri;
                        }
                        iri = RDFDatasetUtils.Unescape(iri);
                        ValidateIRI(state, iri);
                        state.namespaces[ns] = iri;
                        result.SetNamespace(ns, iri);
                    }
                    else
                    {
                        string @base = match.Group(3) != null?match.Group(3) : match.Group(6);

                        @base = RDFDatasetUtils.Unescape(@base);
                        ValidateIRI(state, @base);
                        if ([email protected](":"))
                        {
                            state.baseIri = state.baseIri + @base;
                        }
                        else
                        {
                            state.baseIri = @base;
                        }
                    }
                    state.AdvanceLinePosition(match.Group(0).Length);
                    continue;
                }
                if (state.curSubject == null)
                {
                    // we need to match a subject
                    match = TurtleRDFParser.Regex.Subject.Matcher(state.line);
                    if (match.Find())
                    {
                        string iri;
                        if (match.Group(1) != null)
                        {
                            // matched IRI
                            iri = RDFDatasetUtils.Unescape(match.Group(1));
                            if (!iri.Contains(":"))
                            {
                                iri = state.baseIri + iri;
                            }
                        }
                        else
                        {
                            if (match.Group(2) != null)
                            {
                                // matched NS:NAME
                                string ns   = match.Group(2);
                                string name = UnescapeReserved(match.Group(3));
                                iri = state.ExpandIRI(ns, name);
                            }
                            else
                            {
                                if (match.Group(4) != null)
                                {
                                    // match ns: only
                                    iri = state.ExpandIRI(match.Group(4), string.Empty);
                                }
                                else
                                {
                                    if (match.Group(5) != null)
                                    {
                                        // matched BNODE
                                        iri = state.namer.GetName(match.Group(0).Trim());
                                    }
                                    else
                                    {
                                        // matched anon node
                                        iri = state.namer.GetName();
                                    }
                                }
                            }
                        }
                        // make sure IRI still matches an IRI after escaping
                        ValidateIRI(state, iri);
                        state.curSubject = iri;
                        state.AdvanceLinePosition(match.Group(0).Length);
                    }
                    else
                    {
                        // handle blank nodes
                        if (state.line.StartsWith("["))
                        {
                            string bnode = state.namer.GetName();
                            state.AdvanceLinePosition(1);
                            state.Push();
                            state.curSubject = bnode;
                        }
                        else
                        {
                            // handle collections
                            if (state.line.StartsWith("("))
                            {
                                string bnode = state.namer.GetName();
                                // so we know we want a predicate if the collection close
                                // isn't followed by a subject end
                                state.curSubject = bnode;
                                state.AdvanceLinePosition(1);
                                state.Push();
                                state.curSubject   = bnode;
                                state.curPredicate = JSONLDConsts.RdfFirst;
                            }
                            else
                            {
                                // make sure we have a subject already
                                throw new JsonLdError(JsonLdError.Error.ParseError, "Error while parsing Turtle; missing expected subject. {line: "
                                                      + state.lineNumber + "position: " + state.linePosition + "}");
                            }
                        }
                    }
                }
                if (state.curPredicate == null)
                {
                    // match predicate
                    match = TurtleRDFParser.Regex.Predicate.Matcher(state.line);
                    if (match.Find())
                    {
                        string iri = string.Empty;
                        if (match.Group(1) != null)
                        {
                            // matched IRI
                            iri = RDFDatasetUtils.Unescape(match.Group(1));
                            if (!iri.Contains(":"))
                            {
                                iri = state.baseIri + iri;
                            }
                        }
                        else
                        {
                            if (match.Group(2) != null)
                            {
                                // matched NS:NAME
                                string ns   = match.Group(2);
                                string name = UnescapeReserved(match.Group(3));
                                iri = state.ExpandIRI(ns, name);
                            }
                            else
                            {
                                if (match.Group(4) != null)
                                {
                                    // matched ns:
                                    iri = state.ExpandIRI(match.Group(4), string.Empty);
                                }
                                else
                                {
                                    // matched "a"
                                    iri = JSONLDConsts.RdfType;
                                }
                            }
                        }
                        ValidateIRI(state, iri);
                        state.curPredicate = iri;
                        state.AdvanceLinePosition(match.Group(0).Length);
                    }
                    else
                    {
                        throw new JsonLdError(JsonLdError.Error.ParseError, "Error while parsing Turtle; missing expected predicate. {line: "
                                              + state.lineNumber + "position: " + state.linePosition + "}");
                    }
                }
                // expecting bnode or object
                // match BNODE values
                if (state.line.StartsWith("["))
                {
                    string bnode = state.namer.GetName();
                    result.AddTriple(state.curSubject, state.curPredicate, bnode);
                    state.AdvanceLinePosition(1);
                    // check for anonymous objects
                    if (state.line.StartsWith("]"))
                    {
                        state.AdvanceLinePosition(1);
                    }
                    else
                    {
                        // next we expect a statement or object separator
                        // otherwise we're inside the blank node
                        state.Push();
                        state.curSubject = bnode;
                        // next we expect a predicate
                        continue;
                    }
                }
                else
                {
                    // match collections
                    if (state.line.StartsWith("("))
                    {
                        state.AdvanceLinePosition(1);
                        // check for empty collection
                        if (state.line.StartsWith(")"))
                        {
                            state.AdvanceLinePosition(1);
                            result.AddTriple(state.curSubject, state.curPredicate, JSONLDConsts.RdfNil);
                        }
                        else
                        {
                            // next we expect a statement or object separator
                            // otherwise we're inside the collection
                            string bnode = state.namer.GetName();
                            result.AddTriple(state.curSubject, state.curPredicate, bnode);
                            state.Push();
                            state.curSubject   = bnode;
                            state.curPredicate = JSONLDConsts.RdfFirst;
                            continue;
                        }
                    }
                    else
                    {
                        // match object
                        match = TurtleRDFParser.Regex.Object.Matcher(state.line);
                        if (match.Find())
                        {
                            string iri = null;
                            if (match.Group(1) != null)
                            {
                                // matched IRI
                                iri = RDFDatasetUtils.Unescape(match.Group(1));
                                if (!iri.Contains(":"))
                                {
                                    iri = state.baseIri + iri;
                                }
                            }
                            else
                            {
                                if (match.Group(2) != null)
                                {
                                    // matched NS:NAME
                                    string ns   = match.Group(2);
                                    string name = UnescapeReserved(match.Group(3));
                                    iri = state.ExpandIRI(ns, name);
                                }
                                else
                                {
                                    if (match.Group(4) != null)
                                    {
                                        // matched ns:
                                        iri = state.ExpandIRI(match.Group(4), string.Empty);
                                    }
                                    else
                                    {
                                        if (match.Group(5) != null)
                                        {
                                            // matched BNODE
                                            iri = state.namer.GetName(match.Group(0).Trim());
                                        }
                                    }
                                }
                            }
                            if (iri != null)
                            {
                                ValidateIRI(state, iri);
                                // we have a object
                                result.AddTriple(state.curSubject, state.curPredicate, iri);
                            }
                            else
                            {
                                // we have a literal
                                string value    = match.Group(6);
                                string lang     = null;
                                string datatype = null;
                                if (value != null)
                                {
                                    // we have a string literal
                                    value = UnquoteString(value);
                                    value = RDFDatasetUtils.Unescape(value);
                                    lang  = match.Group(7);
                                    if (lang == null)
                                    {
                                        if (match.Group(8) != null)
                                        {
                                            datatype = RDFDatasetUtils.Unescape(match.Group(8));
                                            if (!datatype.Contains(":"))
                                            {
                                                datatype = state.baseIri + datatype;
                                            }
                                            ValidateIRI(state, datatype);
                                        }
                                        else
                                        {
                                            if (match.Group(9) != null)
                                            {
                                                datatype = state.ExpandIRI(match.Group(9), UnescapeReserved(match.Group(10)));
                                            }
                                            else
                                            {
                                                if (match.Group(11) != null)
                                                {
                                                    datatype = state.ExpandIRI(match.Group(11), string.Empty);
                                                }
                                            }
                                        }
                                    }
                                    else
                                    {
                                        datatype = JSONLDConsts.RdfLangstring;
                                    }
                                }
                                else
                                {
                                    if (match.Group(12) != null)
                                    {
                                        // integer literal
                                        value    = match.Group(12);
                                        datatype = JSONLDConsts.XsdDouble;
                                    }
                                    else
                                    {
                                        if (match.Group(13) != null)
                                        {
                                            // decimal literal
                                            value    = match.Group(13);
                                            datatype = JSONLDConsts.XsdDecimal;
                                        }
                                        else
                                        {
                                            if (match.Group(14) != null)
                                            {
                                                // double literal
                                                value    = match.Group(14);
                                                datatype = JSONLDConsts.XsdInteger;
                                            }
                                            else
                                            {
                                                if (match.Group(15) != null)
                                                {
                                                    // boolean literal
                                                    value    = match.Group(15);
                                                    datatype = JSONLDConsts.XsdBoolean;
                                                }
                                            }
                                        }
                                    }
                                }
                                result.AddTriple(state.curSubject, state.curPredicate, value, datatype, lang);
                            }
                            state.AdvanceLinePosition(match.Group(0).Length);
                        }
                        else
                        {
                            throw new JsonLdError(JsonLdError.Error.ParseError, "Error while parsing Turtle; missing expected object or blank node. {line: "
                                                  + state.lineNumber + "position: " + state.linePosition + "}");
                        }
                    }
                }
                // close collection
                bool collectionClosed = false;
                while (state.line.StartsWith(")"))
                {
                    if (!JSONLDConsts.RdfFirst.Equals(state.curPredicate))
                    {
                        throw new JsonLdError(JsonLdError.Error.ParseError, "Error while parsing Turtle; unexpected ). {line: "
                                              + state.lineNumber + "position: " + state.linePosition + "}");
                    }
                    result.AddTriple(state.curSubject, JSONLDConsts.RdfRest, JSONLDConsts.RdfNil);
                    state.Pop();
                    state.AdvanceLinePosition(1);
                    collectionClosed = true;
                }
                bool expectDotOrPred = false;
                // match end of bnode
                if (state.line.StartsWith("]"))
                {
                    string bnode = state.curSubject;
                    state.Pop();
                    state.AdvanceLinePosition(1);
                    if (state.curSubject == null)
                    {
                        // this is a bnode as a subject and we
                        // expect either a . or a predicate
                        state.curSubject = bnode;
                        expectDotOrPred  = true;
                    }
                }
                // match list separator
                if (!expectDotOrPred && state.line.StartsWith(","))
                {
                    state.AdvanceLinePosition(1);
                    // now we expect another object/bnode
                    continue;
                }
                // match predicate end
                if (!expectDotOrPred)
                {
                    while (state.line.StartsWith(";"))
                    {
                        state.curPredicate = null;
                        state.AdvanceLinePosition(1);
                        // now we expect another predicate, or a dot
                        expectDotOrPred = true;
                    }
                }
                if (state.line.StartsWith("."))
                {
                    if (state.expectingBnodeClose)
                    {
                        throw new JsonLdError(JsonLdError.Error.ParseError, "Error while parsing Turtle; missing expected )\"]\". {line: "
                                              + state.lineNumber + "position: " + state.linePosition + "}");
                    }
                    state.curSubject   = null;
                    state.curPredicate = null;
                    state.AdvanceLinePosition(1);
                    // this can now be the end of the document.
                    continue;
                }
                else
                {
                    if (expectDotOrPred)
                    {
                        // we're expecting another predicate since we didn't find a dot
                        continue;
                    }
                }
                // if we're in a collection
                if (JSONLDConsts.RdfFirst.Equals(state.curPredicate))
                {
                    string bnode = state.namer.GetName();
                    result.AddTriple(state.curSubject, JSONLDConsts.RdfRest, bnode);
                    state.curSubject = bnode;
                    continue;
                }
                if (collectionClosed)
                {
                    // we expect another object
                    // TODO: it's not clear yet if this is valid
                    continue;
                }
                // if we get here, we're missing a close statement
                throw new JsonLdError(JsonLdError.Error.ParseError, "Error while parsing Turtle; missing expected \"]\" \",\" \";\" or \".\". {line: "
                                      + state.lineNumber + "position: " + state.linePosition + "}");
            }
            return(result);
        }