/// <summary> /// Deserializes the given TriX stream to a memory store. /// </summary> internal static RDFMemoryStore Deserialize(Stream inputStream) { try { #region deserialize RDFMemoryStore result = new RDFMemoryStore(); Dictionary <Int64, RDFGraph> graphs = new Dictionary <Int64, RDFGraph>(); using (StreamReader streamReader = new StreamReader(inputStream, Encoding.UTF8)) { using (XmlTextReader trixReader = new XmlTextReader(streamReader)) { trixReader.DtdProcessing = DtdProcessing.Parse; trixReader.Normalization = false; #region document XmlDocument trixDoc = new XmlDocument(); trixDoc.Load(trixReader); #endregion #region graph if (trixDoc.DocumentElement != null) { #region graphs extraction var graphEnum = trixDoc.DocumentElement.ChildNodes.GetEnumerator(); while (graphEnum != null && graphEnum.MoveNext()) { XmlNode graph = (XmlNode)graphEnum.Current; if (!graph.Name.Equals("graph", StringComparison.Ordinal)) { throw new RDFModelException(" a \"<graph>\" element was expected, instead of unrecognized \"<" + graph.Name + ">\"."); } Uri graphUri = RDFNamespaceRegister.DefaultNamespace.NamespaceUri; Int64 graphID = RDFNamespaceRegister.DefaultNamespace.NamespaceID; if (!graphs.ContainsKey(graphID)) { graphs.Add(graphID, new RDFGraph().SetContext(graphUri)); } #region triple var encodedUris = 0; var tripleEnum = graph.ChildNodes.GetEnumerator(); while (tripleEnum != null && tripleEnum.MoveNext()) { XmlNode triple = (XmlNode)tripleEnum.Current; #region uri if (triple.Name.Equals("uri", StringComparison.Ordinal)) { encodedUris++; if (encodedUris > 1) { throw new RDFModelException(" given file encodes a graph with more than one \"<uri>\" element."); } graphUri = RDFModelUtilities.GetUriFromString(triple.ChildNodes[0].InnerText); graphID = RDFModelUtilities.CreateHash(graphUri.ToString()); if (!graphs.ContainsKey(graphID)) { graphs.Add(graphID, new RDFGraph().SetContext(graphUri)); } } #endregion #region triple else if (triple.Name.Equals("triple", StringComparison.Ordinal) && triple.ChildNodes.Count == 3) { #region subj //Subject is a resource ("<uri>") or a blank node ("<id>") if (triple.ChildNodes[0].Name.Equals("uri", StringComparison.Ordinal) || triple.ChildNodes[0].Name.Equals("id", StringComparison.Ordinal)) { //Sanitize eventual blank node value if (triple.ChildNodes[0].Name.Equals("id", StringComparison.Ordinal)) { if (!triple.ChildNodes[0].InnerText.StartsWith("bnode:")) { triple.ChildNodes[0].InnerText = "bnode:" + triple.ChildNodes[0].InnerText.Replace("_:", String.Empty); } } } //Subject is not valid: exception must be raised else { throw new RDFModelException("subject (" + triple.ChildNodes[0].Name + ") of \"<triple>\" element is neither \"<uri>\" or \"<id>\"."); } #endregion #region pred //Predicate is not valid: exception must be raised if (!triple.ChildNodes[1].Name.Equals("uri", StringComparison.Ordinal)) { throw new RDFModelException("predicate (" + triple.ChildNodes[1].Name + ") of \"<triple>\" element must be \"<uri>\"."); } #endregion #region object //Object is a resource ("<uri>") or a blank node ("<id>") if (triple.ChildNodes[2].Name.Equals("uri", StringComparison.Ordinal) || triple.ChildNodes[2].Name.Equals("id", StringComparison.Ordinal)) { //Sanitize eventual blank node value if (triple.ChildNodes[2].Name.Equals("id", StringComparison.Ordinal)) { if (!triple.ChildNodes[2].InnerText.StartsWith("bnode:")) { triple.ChildNodes[2].InnerText = "bnode:" + triple.ChildNodes[2].InnerText.Replace("_:", String.Empty); } } graphs[graphID].AddTriple(new RDFTriple(new RDFResource(triple.ChildNodes[0].InnerText), new RDFResource(triple.ChildNodes[1].InnerText), new RDFResource(triple.ChildNodes[2].InnerText))); } #endregion #region literal #region plain literal else if (triple.ChildNodes[2].Name.Equals("plainLiteral")) { if (triple.ChildNodes[2].Attributes != null && triple.ChildNodes[2].Attributes.Count > 0) { XmlAttribute xmlLang = triple.ChildNodes[2].Attributes[RDFVocabulary.XML.PREFIX + ":lang"]; if (xmlLang != null) { //Plain literal with language graphs[graphID].AddTriple(new RDFTriple(new RDFResource(triple.ChildNodes[0].InnerText), new RDFResource(triple.ChildNodes[1].InnerText), new RDFPlainLiteral(RDFModelUtilities.ASCII_To_Unicode(HttpUtility.HtmlDecode(triple.ChildNodes[2].InnerText)), xmlLang.Value))); } else { //Plain literal without language graphs[graphID].AddTriple(new RDFTriple(new RDFResource(triple.ChildNodes[0].InnerText), new RDFResource(triple.ChildNodes[1].InnerText), new RDFPlainLiteral(RDFModelUtilities.ASCII_To_Unicode(HttpUtility.HtmlDecode(triple.ChildNodes[2].InnerText))))); } } else { //Plain literal without language graphs[graphID].AddTriple(new RDFTriple(new RDFResource(triple.ChildNodes[0].InnerText), new RDFResource(triple.ChildNodes[1].InnerText), new RDFPlainLiteral(RDFModelUtilities.ASCII_To_Unicode(HttpUtility.HtmlDecode(triple.ChildNodes[2].InnerText))))); } } #endregion #region typed literal else if (triple.ChildNodes[2].Name.Equals("typedLiteral", StringComparison.Ordinal)) { if (triple.ChildNodes[2].Attributes != null && triple.ChildNodes[2].Attributes.Count > 0) { XmlAttribute rdfDtype = triple.ChildNodes[2].Attributes["datatype"]; if (rdfDtype != null) { graphs[graphID].AddTriple(new RDFTriple(new RDFResource(triple.ChildNodes[0].InnerText), new RDFResource(triple.ChildNodes[1].InnerText), new RDFTypedLiteral(RDFModelUtilities.ASCII_To_Unicode(HttpUtility.HtmlDecode(triple.ChildNodes[2].InnerText)), RDFModelUtilities.GetDatatypeFromString(rdfDtype.Value)))); } else { throw new RDFModelException(" found typed literal without required \"datatype\" attribute."); } } else { throw new RDFModelException(" found typed literal without required \"datatype\" attribute."); } } #endregion #endregion #region exception //Object is not valid: exception must be raised else { throw new RDFModelException("object (" + triple.ChildNodes[2].Name + ") of \"<triple>\" element is neither \"<uri>\" or \"<id>\" or \"<plainLiteral>\" or \"<typedLiteral>\"."); } #endregion } #endregion #region exception else { throw new RDFModelException("found a TriX element (" + triple.Name + ") which is neither \"<uri>\" or \"<triple>\", or is a \"<triple>\" without the required 3 childs."); } #endregion } #endregion } #endregion #region graphs merging foreach (var graph in graphs) { result.MergeGraph(graph.Value); } #endregion } #endregion } } return(result); #endregion } catch (Exception ex) { throw new RDFModelException("Cannot deserialize TriX because: " + ex.Message, ex); } }
/// <summary> /// Deserializes the given N-Quads stream to a memory store. /// </summary> internal static RDFMemoryStore Deserialize(Stream inputStream) { Int64 nquadIndex = 0; try { #region deserialize using (StreamReader sr = new StreamReader(inputStream)) { RDFMemoryStore result = new RDFMemoryStore(); String nquad = String.Empty; String[] tokens = new String[4]; RDFResource S = null; RDFResource P = null; RDFResource O = null; RDFLiteral L = null; RDFContext C = new RDFContext(); while ((nquad = sr.ReadLine()) != null) { nquadIndex++; #region sanitize & tokenize //Cleanup previous data S = null; tokens[0] = String.Empty; P = null; tokens[1] = String.Empty; O = null; L = null; tokens[2] = String.Empty; C = new RDFContext(); tokens[3] = String.Empty; //Preliminary sanitizations: clean trailing space-like chars nquad = nquad.Trim(new Char[] { ' ', '\t', '\r', '\n' }); //Skip empty or comment lines if (nquad == String.Empty || nquad.StartsWith("#")) { continue; } //Tokenizes the sanitized quad tokens = TokenizeNQuad(nquad); #endregion #region subj String subj = tokens[0].TrimStart(new Char[] { '<' }) .TrimEnd(new Char[] { '>' }) .Replace("_:", "bnode:"); S = new RDFResource(RDFModelUtilities.ASCII_To_Unicode(subj)); #endregion #region pred String pred = tokens[1].TrimStart(new Char[] { '<' }) .TrimEnd(new Char[] { '>' }); P = new RDFResource(RDFModelUtilities.ASCII_To_Unicode(pred)); #endregion #region object if (tokens[2].StartsWith("<") || tokens[2].StartsWith("bnode:") || tokens[2].StartsWith("_:")) { String obj = tokens[2].TrimStart(new Char[] { '<' }) .TrimEnd(new Char[] { '>' }) .Replace("_:", "bnode:") .Trim(new Char[] { ' ', '\n', '\t', '\r' }); O = new RDFResource(RDFModelUtilities.ASCII_To_Unicode(obj)); } #endregion #region literal else { #region sanitize tokens[2] = RDFNTriples.regexSqt.Replace(tokens[2], String.Empty); tokens[2] = RDFNTriples.regexEqt.Replace(tokens[2], String.Empty); tokens[2] = tokens[2].Replace("\\\"", "\"") .Replace("\\n", "\n") .Replace("\\t", "\t") .Replace("\\r", "\r"); tokens[2] = RDFModelUtilities.ASCII_To_Unicode(tokens[2]); #endregion #region plain literal if (!tokens[2].Contains("^^") || tokens[2].EndsWith("^^") || tokens[2].Substring(tokens[2].LastIndexOf("^^", StringComparison.Ordinal) + 2, 1) != "<") { if (RDFNTriples.regexLPL.Match(tokens[2]).Success) { tokens[2] = tokens[2].Replace("\"@", "@"); String pLitValue = tokens[2].Substring(0, tokens[2].LastIndexOf("@", StringComparison.Ordinal)); String pLitLang = tokens[2].Substring(tokens[2].LastIndexOf("@", StringComparison.Ordinal) + 1); L = new RDFPlainLiteral(HttpUtility.HtmlDecode(pLitValue), pLitLang); } else { L = new RDFPlainLiteral(HttpUtility.HtmlDecode(tokens[2])); } } #endregion #region typed literal else { tokens[2] = tokens[2].Replace("\"^^", "^^"); String tLitValue = tokens[2].Substring(0, tokens[2].LastIndexOf("^^", StringComparison.Ordinal)); String tLitDatatype = tokens[2].Substring(tokens[2].LastIndexOf("^^", StringComparison.Ordinal) + 2) .TrimStart(new Char[] { '<' }) .TrimEnd(new Char[] { '>' }); RDFModelEnums.RDFDatatype dt = RDFModelUtilities.GetDatatypeFromString(tLitDatatype); L = new RDFTypedLiteral(HttpUtility.HtmlDecode(tLitValue), dt); } #endregion } #endregion #region context if (!String.IsNullOrEmpty(tokens[3])) { String ctx = tokens[3].TrimStart(new Char[] { '<' }) .TrimEnd(new Char[] { '>' }); Uri ctxUri = null; if (Uri.TryCreate(ctx, UriKind.Absolute, out ctxUri)) { C = new RDFContext(RDFModelUtilities.ASCII_To_Unicode(ctxUri.ToString())); } else { throw new RDFModelException("found context '" + ctx + "' which is not a well-formed absolute Uri"); } } #endregion #region addquadruple if (O != null) { result.AddQuadruple(new RDFQuadruple(C, S, P, O)); } else { result.AddQuadruple(new RDFQuadruple(C, S, P, L)); } #endregion } return(result); } #endregion } catch (Exception ex) { throw new RDFModelException("Cannot deserialize N-Quads (line " + nquadIndex + ") because: " + ex.Message, ex); } }
/// <summary> /// Deserializes the given N-Triples stream to a graph. /// </summary> internal static RDFGraph Deserialize(Stream inputStream) { Int64 ntripleIndex = 0; try { #region deserialize using (StreamReader sr = new StreamReader(inputStream, Encoding.ASCII)) { RDFGraph result = new RDFGraph(); String ntriple = String.Empty; String[] tokens = new String[3]; RDFResource S = null; RDFResource P = null; RDFResource O = null; RDFLiteral L = null; while ((ntriple = sr.ReadLine()) != null) { ntripleIndex++; #region sanitize & tokenize //Cleanup previous data S = null; tokens[0] = String.Empty; P = null; tokens[1] = String.Empty; O = null; L = null; tokens[2] = String.Empty; //Preliminary sanitizations: clean trailing space-like chars ntriple = ntriple.Trim(new Char[] { ' ', '\t', '\r', '\n' }); //Skip empty or comment lines if (ntriple == String.Empty || ntriple.StartsWith("#")) { continue; } //Tokenizes the sanitized triple tokens = TokenizeNTriple(ntriple); #endregion #region subj String subj = tokens[0].TrimStart(new Char[] { '<' }) .TrimEnd(new Char[] { '>' }) .Replace("_:", "bnode:"); S = new RDFResource(RDFModelUtilities.ASCII_To_Unicode(subj)); #endregion #region pred String pred = tokens[1].TrimStart(new Char[] { '<' }) .TrimEnd(new Char[] { '>' }); P = new RDFResource(RDFModelUtilities.ASCII_To_Unicode(pred)); #endregion #region object if (tokens[2].StartsWith("<") || tokens[2].StartsWith("bnode:") || tokens[2].StartsWith("_:")) { String obj = tokens[2].TrimStart(new Char[] { '<' }) .TrimEnd(new Char[] { '>' }) .Replace("_:", "bnode:") .Trim(new Char[] { ' ', '\n', '\t', '\r' }); O = new RDFResource(RDFModelUtilities.ASCII_To_Unicode(obj)); } #endregion #region literal else { #region sanitize tokens[2] = regexSqt.Replace(tokens[2], String.Empty); tokens[2] = regexEqt.Replace(tokens[2], String.Empty); tokens[2] = tokens[2].Replace("\\\\", "\\") .Replace("\\\"", "\"") .Replace("\\n", "\n") .Replace("\\t", "\t") .Replace("\\r", "\r"); tokens[2] = RDFModelUtilities.ASCII_To_Unicode(tokens[2]); #endregion #region plain literal if (!tokens[2].Contains("^^") || tokens[2].EndsWith("^^") || tokens[2].Substring(tokens[2].LastIndexOf("^^", StringComparison.Ordinal) + 2, 1) != "<") { if (regexLPL.Match(tokens[2]).Success) { tokens[2] = tokens[2].Replace("\"@", "@"); String pLitValue = tokens[2].Substring(0, tokens[2].LastIndexOf("@", StringComparison.Ordinal)); String pLitLang = tokens[2].Substring(tokens[2].LastIndexOf("@", StringComparison.Ordinal) + 1); L = new RDFPlainLiteral(HttpUtility.HtmlDecode(pLitValue), pLitLang); } else { L = new RDFPlainLiteral(HttpUtility.HtmlDecode(tokens[2])); } } #endregion #region typed literal else { tokens[2] = tokens[2].Replace("\"^^", "^^"); String tLitValue = tokens[2].Substring(0, tokens[2].LastIndexOf("^^", StringComparison.Ordinal)); String tLitDatatype = tokens[2].Substring(tokens[2].LastIndexOf("^^", StringComparison.Ordinal) + 2) .TrimStart(new Char[] { '<' }) .TrimEnd(new Char[] { '>' }); RDFModelEnums.RDFDatatypes dt = RDFModelUtilities.GetDatatypeFromString(tLitDatatype); L = new RDFTypedLiteral(HttpUtility.HtmlDecode(tLitValue), dt); } #endregion } #endregion #region addtriple if (O != null) { result.AddTriple(new RDFTriple(S, P, O)); } else { result.AddTriple(new RDFTriple(S, P, L)); } #endregion } return(result); } #endregion } catch (Exception ex) { throw new RDFModelException("Cannot deserialize N-Triples (line " + ntripleIndex + ") because: " + ex.Message, ex); } }