Esempio n. 1
0
        /// <summary>
        /// Deserializes the given TriX stream to a memory store.
        /// </summary>
        internal static RDFMemoryStore Deserialize(Stream inputStream)
        {
            try
            {
                #region deserialize
                RDFMemoryStore result = new RDFMemoryStore();
                Dictionary <Int64, RDFGraph> graphs = new Dictionary <Int64, RDFGraph>();
                using (StreamReader streamReader = new StreamReader(inputStream, Encoding.UTF8))
                {
                    using (XmlTextReader trixReader = new XmlTextReader(streamReader))
                    {
                        trixReader.DtdProcessing = DtdProcessing.Parse;
                        trixReader.Normalization = false;

                        #region document
                        XmlDocument trixDoc = new XmlDocument();
                        trixDoc.Load(trixReader);
                        #endregion

                        #region graph
                        if (trixDoc.DocumentElement != null)
                        {
                            #region graphs extraction
                            var graphEnum = trixDoc.DocumentElement.ChildNodes.GetEnumerator();
                            while (graphEnum != null && graphEnum.MoveNext())
                            {
                                XmlNode graph = (XmlNode)graphEnum.Current;
                                if (!graph.Name.Equals("graph", StringComparison.Ordinal))
                                {
                                    throw new RDFModelException(" a \"<graph>\" element was expected, instead of unrecognized \"<" + graph.Name + ">\".");
                                }
                                Uri   graphUri = RDFNamespaceRegister.DefaultNamespace.NamespaceUri;
                                Int64 graphID  = RDFNamespaceRegister.DefaultNamespace.NamespaceID;
                                if (!graphs.ContainsKey(graphID))
                                {
                                    graphs.Add(graphID, new RDFGraph().SetContext(graphUri));
                                }

                                #region triple
                                var encodedUris = 0;
                                var tripleEnum  = graph.ChildNodes.GetEnumerator();
                                while (tripleEnum != null && tripleEnum.MoveNext())
                                {
                                    XmlNode triple = (XmlNode)tripleEnum.Current;

                                    #region uri
                                    if (triple.Name.Equals("uri", StringComparison.Ordinal))
                                    {
                                        encodedUris++;
                                        if (encodedUris > 1)
                                        {
                                            throw new RDFModelException(" given file encodes a graph with more than one \"<uri>\" element.");
                                        }

                                        graphUri = RDFModelUtilities.GetUriFromString(triple.ChildNodes[0].InnerText);
                                        graphID  = RDFModelUtilities.CreateHash(graphUri.ToString());
                                        if (!graphs.ContainsKey(graphID))
                                        {
                                            graphs.Add(graphID, new RDFGraph().SetContext(graphUri));
                                        }
                                    }
                                    #endregion

                                    #region triple
                                    else if (triple.Name.Equals("triple", StringComparison.Ordinal) && triple.ChildNodes.Count == 3)
                                    {
                                        #region subj
                                        //Subject is a resource ("<uri>") or a blank node ("<id>")
                                        if (triple.ChildNodes[0].Name.Equals("uri", StringComparison.Ordinal) ||
                                            triple.ChildNodes[0].Name.Equals("id", StringComparison.Ordinal))
                                        {
                                            //Sanitize eventual blank node value
                                            if (triple.ChildNodes[0].Name.Equals("id", StringComparison.Ordinal))
                                            {
                                                if (!triple.ChildNodes[0].InnerText.StartsWith("bnode:"))
                                                {
                                                    triple.ChildNodes[0].InnerText = "bnode:" + triple.ChildNodes[0].InnerText.Replace("_:", String.Empty);
                                                }
                                            }
                                        }
                                        //Subject is not valid: exception must be raised
                                        else
                                        {
                                            throw new RDFModelException("subject (" + triple.ChildNodes[0].Name + ") of \"<triple>\" element is neither \"<uri>\" or \"<id>\".");
                                        }
                                        #endregion

                                        #region pred
                                        //Predicate is not valid: exception must be raised
                                        if (!triple.ChildNodes[1].Name.Equals("uri", StringComparison.Ordinal))
                                        {
                                            throw new RDFModelException("predicate (" + triple.ChildNodes[1].Name + ") of \"<triple>\" element must be \"<uri>\".");
                                        }
                                        #endregion

                                        #region object
                                        //Object is a resource ("<uri>") or a blank node ("<id>")
                                        if (triple.ChildNodes[2].Name.Equals("uri", StringComparison.Ordinal) ||
                                            triple.ChildNodes[2].Name.Equals("id", StringComparison.Ordinal))
                                        {
                                            //Sanitize eventual blank node value
                                            if (triple.ChildNodes[2].Name.Equals("id", StringComparison.Ordinal))
                                            {
                                                if (!triple.ChildNodes[2].InnerText.StartsWith("bnode:"))
                                                {
                                                    triple.ChildNodes[2].InnerText = "bnode:" + triple.ChildNodes[2].InnerText.Replace("_:", String.Empty);
                                                }
                                            }
                                            graphs[graphID].AddTriple(new RDFTriple(new RDFResource(triple.ChildNodes[0].InnerText),
                                                                                    new RDFResource(triple.ChildNodes[1].InnerText),
                                                                                    new RDFResource(triple.ChildNodes[2].InnerText)));
                                        }
                                        #endregion

                                        #region literal

                                        #region plain literal
                                        else if (triple.ChildNodes[2].Name.Equals("plainLiteral"))
                                        {
                                            if (triple.ChildNodes[2].Attributes != null && triple.ChildNodes[2].Attributes.Count > 0)
                                            {
                                                XmlAttribute xmlLang = triple.ChildNodes[2].Attributes[RDFVocabulary.XML.PREFIX + ":lang"];
                                                if (xmlLang != null)
                                                {
                                                    //Plain literal with language
                                                    graphs[graphID].AddTriple(new RDFTriple(new RDFResource(triple.ChildNodes[0].InnerText),
                                                                                            new RDFResource(triple.ChildNodes[1].InnerText),
                                                                                            new RDFPlainLiteral(RDFModelUtilities.ASCII_To_Unicode(HttpUtility.HtmlDecode(triple.ChildNodes[2].InnerText)), xmlLang.Value)));
                                                }
                                                else
                                                {
                                                    //Plain literal without language
                                                    graphs[graphID].AddTriple(new RDFTriple(new RDFResource(triple.ChildNodes[0].InnerText),
                                                                                            new RDFResource(triple.ChildNodes[1].InnerText),
                                                                                            new RDFPlainLiteral(RDFModelUtilities.ASCII_To_Unicode(HttpUtility.HtmlDecode(triple.ChildNodes[2].InnerText)))));
                                                }
                                            }
                                            else
                                            {
                                                //Plain literal without language
                                                graphs[graphID].AddTriple(new RDFTriple(new RDFResource(triple.ChildNodes[0].InnerText),
                                                                                        new RDFResource(triple.ChildNodes[1].InnerText),
                                                                                        new RDFPlainLiteral(RDFModelUtilities.ASCII_To_Unicode(HttpUtility.HtmlDecode(triple.ChildNodes[2].InnerText)))));
                                            }
                                        }
                                        #endregion

                                        #region typed literal
                                        else if (triple.ChildNodes[2].Name.Equals("typedLiteral", StringComparison.Ordinal))
                                        {
                                            if (triple.ChildNodes[2].Attributes != null && triple.ChildNodes[2].Attributes.Count > 0)
                                            {
                                                XmlAttribute rdfDtype = triple.ChildNodes[2].Attributes["datatype"];
                                                if (rdfDtype != null)
                                                {
                                                    graphs[graphID].AddTriple(new RDFTriple(new RDFResource(triple.ChildNodes[0].InnerText),
                                                                                            new RDFResource(triple.ChildNodes[1].InnerText),
                                                                                            new RDFTypedLiteral(RDFModelUtilities.ASCII_To_Unicode(HttpUtility.HtmlDecode(triple.ChildNodes[2].InnerText)), RDFModelUtilities.GetDatatypeFromString(rdfDtype.Value))));
                                                }
                                                else
                                                {
                                                    throw new RDFModelException(" found typed literal without required \"datatype\" attribute.");
                                                }
                                            }
                                            else
                                            {
                                                throw new RDFModelException(" found typed literal without required \"datatype\" attribute.");
                                            }
                                        }
                                        #endregion

                                        #endregion

                                        #region exception
                                        //Object is not valid: exception must be raised
                                        else
                                        {
                                            throw new RDFModelException("object (" + triple.ChildNodes[2].Name + ") of \"<triple>\" element is neither \"<uri>\" or \"<id>\" or \"<plainLiteral>\" or \"<typedLiteral>\".");
                                        }
                                        #endregion
                                    }
                                    #endregion

                                    #region exception
                                    else
                                    {
                                        throw new RDFModelException("found a TriX element (" + triple.Name + ") which is neither \"<uri>\" or \"<triple>\", or is a \"<triple>\" without the required 3 childs.");
                                    }
                                    #endregion
                                }
                                #endregion
                            }
                            #endregion

                            #region graphs merging
                            foreach (var graph in graphs)
                            {
                                result.MergeGraph(graph.Value);
                            }
                            #endregion
                        }
                        #endregion
                    }
                }
                return(result);

                #endregion
            }
            catch (Exception ex)
            {
                throw new RDFModelException("Cannot deserialize TriX because: " + ex.Message, ex);
            }
        }
Esempio n. 2
0
        /// <summary>
        /// Deserializes the given N-Quads stream to a memory store.
        /// </summary>
        internal static RDFMemoryStore Deserialize(Stream inputStream)
        {
            Int64 nquadIndex = 0;

            try {
                #region deserialize
                using (StreamReader sr = new StreamReader(inputStream)) {
                    RDFMemoryStore result = new RDFMemoryStore();
                    String         nquad  = String.Empty;
                    String[]       tokens = new String[4];
                    RDFResource    S      = null;
                    RDFResource    P      = null;
                    RDFResource    O      = null;
                    RDFLiteral     L      = null;
                    RDFContext     C      = new RDFContext();
                    while ((nquad = sr.ReadLine()) != null)
                    {
                        nquadIndex++;

                        #region sanitize  & tokenize
                        //Cleanup previous data
                        S         = null;
                        tokens[0] = String.Empty;
                        P         = null;
                        tokens[1] = String.Empty;
                        O         = null;
                        L         = null;
                        tokens[2] = String.Empty;
                        C         = new RDFContext();
                        tokens[3] = String.Empty;

                        //Preliminary sanitizations: clean trailing space-like chars
                        nquad = nquad.Trim(new Char[] { ' ', '\t', '\r', '\n' });

                        //Skip empty or comment lines
                        if (nquad == String.Empty || nquad.StartsWith("#"))
                        {
                            continue;
                        }

                        //Tokenizes the sanitized quad
                        tokens = TokenizeNQuad(nquad);
                        #endregion

                        #region subj
                        String subj = tokens[0].TrimStart(new Char[] { '<' })
                                      .TrimEnd(new   Char[] { '>' })
                                      .Replace("_:", "bnode:");
                        S = new RDFResource(RDFModelUtilities.ASCII_To_Unicode(subj));
                        #endregion

                        #region pred
                        String pred = tokens[1].TrimStart(new Char[] { '<' })
                                      .TrimEnd(new   Char[] { '>' });
                        P = new RDFResource(RDFModelUtilities.ASCII_To_Unicode(pred));
                        #endregion

                        #region object
                        if (tokens[2].StartsWith("<") ||
                            tokens[2].StartsWith("bnode:") ||
                            tokens[2].StartsWith("_:"))
                        {
                            String obj = tokens[2].TrimStart(new Char[] { '<' })
                                         .TrimEnd(new Char[] { '>' })
                                         .Replace("_:", "bnode:")
                                         .Trim(new Char[] { ' ', '\n', '\t', '\r' });
                            O = new RDFResource(RDFModelUtilities.ASCII_To_Unicode(obj));
                        }
                        #endregion

                        #region literal
                        else
                        {
                            #region sanitize
                            tokens[2] = RDFNTriples.regexSqt.Replace(tokens[2], String.Empty);
                            tokens[2] = RDFNTriples.regexEqt.Replace(tokens[2], String.Empty);
                            tokens[2] = tokens[2].Replace("\\\"", "\"")
                                        .Replace("\\n", "\n")
                                        .Replace("\\t", "\t")
                                        .Replace("\\r", "\r");
                            tokens[2] = RDFModelUtilities.ASCII_To_Unicode(tokens[2]);
                            #endregion

                            #region plain literal
                            if (!tokens[2].Contains("^^") ||
                                tokens[2].EndsWith("^^") ||
                                tokens[2].Substring(tokens[2].LastIndexOf("^^", StringComparison.Ordinal) + 2, 1) != "<")
                            {
                                if (RDFNTriples.regexLPL.Match(tokens[2]).Success)
                                {
                                    tokens[2] = tokens[2].Replace("\"@", "@");
                                    String pLitValue = tokens[2].Substring(0, tokens[2].LastIndexOf("@", StringComparison.Ordinal));
                                    String pLitLang  = tokens[2].Substring(tokens[2].LastIndexOf("@", StringComparison.Ordinal) + 1);
                                    L = new RDFPlainLiteral(HttpUtility.HtmlDecode(pLitValue), pLitLang);
                                }
                                else
                                {
                                    L = new RDFPlainLiteral(HttpUtility.HtmlDecode(tokens[2]));
                                }
                            }
                            #endregion

                            #region typed literal
                            else
                            {
                                tokens[2] = tokens[2].Replace("\"^^", "^^");
                                String tLitValue    = tokens[2].Substring(0, tokens[2].LastIndexOf("^^", StringComparison.Ordinal));
                                String tLitDatatype = tokens[2].Substring(tokens[2].LastIndexOf("^^", StringComparison.Ordinal) + 2)
                                                      .TrimStart(new Char[] { '<' })
                                                      .TrimEnd(new   Char[] { '>' });
                                RDFModelEnums.RDFDatatype dt = RDFModelUtilities.GetDatatypeFromString(tLitDatatype);
                                L = new RDFTypedLiteral(HttpUtility.HtmlDecode(tLitValue), dt);
                            }
                            #endregion
                        }
                        #endregion

                        #region context
                        if (!String.IsNullOrEmpty(tokens[3]))
                        {
                            String ctx = tokens[3].TrimStart(new Char[] { '<' })
                                         .TrimEnd(new   Char[] { '>' });

                            Uri ctxUri = null;
                            if (Uri.TryCreate(ctx, UriKind.Absolute, out ctxUri))
                            {
                                C = new RDFContext(RDFModelUtilities.ASCII_To_Unicode(ctxUri.ToString()));
                            }
                            else
                            {
                                throw new RDFModelException("found context '" + ctx + "' which is not a well-formed absolute Uri");
                            }
                        }
                        #endregion

                        #region addquadruple
                        if (O != null)
                        {
                            result.AddQuadruple(new RDFQuadruple(C, S, P, O));
                        }
                        else
                        {
                            result.AddQuadruple(new RDFQuadruple(C, S, P, L));
                        }
                        #endregion
                    }
                    return(result);
                }
                #endregion
            }
            catch (Exception ex) {
                throw new RDFModelException("Cannot deserialize N-Quads (line " + nquadIndex + ") because: " + ex.Message, ex);
            }
        }
Esempio n. 3
0
        /// <summary>
        /// Deserializes the given N-Triples stream to a graph.
        /// </summary>
        internal static RDFGraph Deserialize(Stream inputStream)
        {
            Int64 ntripleIndex = 0;

            try
            {
                #region deserialize
                using (StreamReader sr = new StreamReader(inputStream, Encoding.ASCII))
                {
                    RDFGraph    result  = new RDFGraph();
                    String      ntriple = String.Empty;
                    String[]    tokens  = new String[3];
                    RDFResource S       = null;
                    RDFResource P       = null;
                    RDFResource O       = null;
                    RDFLiteral  L       = null;
                    while ((ntriple = sr.ReadLine()) != null)
                    {
                        ntripleIndex++;

                        #region sanitize  & tokenize
                        //Cleanup previous data
                        S         = null;
                        tokens[0] = String.Empty;
                        P         = null;
                        tokens[1] = String.Empty;
                        O         = null;
                        L         = null;
                        tokens[2] = String.Empty;

                        //Preliminary sanitizations: clean trailing space-like chars
                        ntriple = ntriple.Trim(new Char[] { ' ', '\t', '\r', '\n' });

                        //Skip empty or comment lines
                        if (ntriple == String.Empty || ntriple.StartsWith("#"))
                        {
                            continue;
                        }

                        //Tokenizes the sanitized triple
                        tokens = TokenizeNTriple(ntriple);
                        #endregion

                        #region subj
                        String subj = tokens[0].TrimStart(new Char[] { '<' })
                                      .TrimEnd(new Char[] { '>' })
                                      .Replace("_:", "bnode:");
                        S = new RDFResource(RDFModelUtilities.ASCII_To_Unicode(subj));
                        #endregion

                        #region pred
                        String pred = tokens[1].TrimStart(new Char[] { '<' })
                                      .TrimEnd(new Char[] { '>' });
                        P = new RDFResource(RDFModelUtilities.ASCII_To_Unicode(pred));
                        #endregion

                        #region object
                        if (tokens[2].StartsWith("<") ||
                            tokens[2].StartsWith("bnode:") ||
                            tokens[2].StartsWith("_:"))
                        {
                            String obj = tokens[2].TrimStart(new Char[] { '<' })
                                         .TrimEnd(new Char[] { '>' })
                                         .Replace("_:", "bnode:")
                                         .Trim(new Char[] { ' ', '\n', '\t', '\r' });
                            O = new RDFResource(RDFModelUtilities.ASCII_To_Unicode(obj));
                        }
                        #endregion

                        #region literal
                        else
                        {
                            #region sanitize
                            tokens[2] = regexSqt.Replace(tokens[2], String.Empty);
                            tokens[2] = regexEqt.Replace(tokens[2], String.Empty);
                            tokens[2] = tokens[2].Replace("\\\\", "\\")
                                        .Replace("\\\"", "\"")
                                        .Replace("\\n", "\n")
                                        .Replace("\\t", "\t")
                                        .Replace("\\r", "\r");
                            tokens[2] = RDFModelUtilities.ASCII_To_Unicode(tokens[2]);
                            #endregion

                            #region plain literal
                            if (!tokens[2].Contains("^^") ||
                                tokens[2].EndsWith("^^") ||
                                tokens[2].Substring(tokens[2].LastIndexOf("^^", StringComparison.Ordinal) + 2, 1) != "<")
                            {
                                if (regexLPL.Match(tokens[2]).Success)
                                {
                                    tokens[2] = tokens[2].Replace("\"@", "@");
                                    String pLitValue = tokens[2].Substring(0, tokens[2].LastIndexOf("@", StringComparison.Ordinal));
                                    String pLitLang  = tokens[2].Substring(tokens[2].LastIndexOf("@", StringComparison.Ordinal) + 1);
                                    L = new RDFPlainLiteral(HttpUtility.HtmlDecode(pLitValue), pLitLang);
                                }
                                else
                                {
                                    L = new RDFPlainLiteral(HttpUtility.HtmlDecode(tokens[2]));
                                }
                            }
                            #endregion

                            #region typed literal
                            else
                            {
                                tokens[2] = tokens[2].Replace("\"^^", "^^");
                                String tLitValue    = tokens[2].Substring(0, tokens[2].LastIndexOf("^^", StringComparison.Ordinal));
                                String tLitDatatype = tokens[2].Substring(tokens[2].LastIndexOf("^^", StringComparison.Ordinal) + 2)
                                                      .TrimStart(new Char[] { '<' })
                                                      .TrimEnd(new Char[] { '>' });
                                RDFModelEnums.RDFDatatypes dt = RDFModelUtilities.GetDatatypeFromString(tLitDatatype);
                                L = new RDFTypedLiteral(HttpUtility.HtmlDecode(tLitValue), dt);
                            }
                            #endregion
                        }
                        #endregion

                        #region addtriple
                        if (O != null)
                        {
                            result.AddTriple(new RDFTriple(S, P, O));
                        }
                        else
                        {
                            result.AddTriple(new RDFTriple(S, P, L));
                        }
                        #endregion
                    }
                    return(result);
                }
                #endregion
            }
            catch (Exception ex)
            {
                throw new RDFModelException("Cannot deserialize N-Triples (line " + ntripleIndex + ") because: " + ex.Message, ex);
            }
        }