/// <summary>Hashes all of the quads about a blank node.</summary> /// <remarks>Hashes all of the quads about a blank node.</remarks> /// <param name="id">the ID of the bnode to hash quads for.</param> /// <param name="bnodes">the mapping of bnodes to quads.</param> /// <param name="namer">the canonical bnode namer.</param> /// <returns>the new hash.</returns> private static string HashQuads(string id, IDictionary <string, IDictionary <string, object> > bnodes, UniqueNamer namer) { // return cached hash if (bnodes[id].ContainsKey("hash")) { return((string)bnodes[id]["hash"]); } // serialize all of bnode's quads IList <RDFDataset.Quad> quads = (IList <RDFDataset.Quad>)bnodes[id]["quads"]; IList <string> nquads = new List <string>(); for (int i = 0; i < quads.Count; ++i) { object name; nquads.Add(RDFDatasetUtils.ToNQuad((RDFDataset.Quad)quads[i], quads[i].TryGetValue("name", out name) ? (string)((IDictionary <string, object>)name)["value"] : null, id)); } // sort serialized quads nquads.SortInPlace(StringComparer.Ordinal); // return hashed quads string hash = Sha1hash(nquads); ((IDictionary <string, object>)bnodes[id])["hash"] = hash; return(hash); }
/// <summary>Hashes all of the quads about a blank node.</summary> /// <remarks>Hashes all of the quads about a blank node.</remarks> /// <param name="id">the ID of the bnode to hash quads for.</param> /// <param name="bnodes">the mapping of bnodes to quads.</param> /// <param name="namer">the canonical bnode namer.</param> /// <returns>the new hash.</returns> private static string HashQuads(string id, JObject bnodes, UniqueNamer namer) { // return cached hash if (((JObject)bnodes[id]).ContainsKey("hash")) { return((string)((JObject)bnodes[id])["hash"]); } // serialize all of bnode's quads JArray quads = (JArray)((JObject)bnodes[id])["quads"]; IList <string> nquads = new List <string>(); for (int i = 0; i < quads.Count; ++i) { nquads.Add(RDFDatasetUtils.ToNQuad((RDFDataset.Quad)quads[i], quads[i]["name"] != null ? (string)((JObject)quads[i]["name"])["value"] : null, id)); } // sort serialized quads nquads.SortInPlace(); // return hashed quads string hash = Sha1hash(nquads); ((JObject)bnodes[id])["hash"] = hash; return(hash); }
// generates unique and duplicate hashes for bnodes /// <exception cref="JsonLD.Core.JsonLdError"></exception> public virtual object HashBlankNodes(IEnumerable <string> unnamed_) { #if !PORTABLE IList <string> unnamed = new List <string>(unnamed_); IList <string> nextUnnamed = new List <string>(); IDictionary <string, IList <string> > duplicates = new Dictionary <string, IList <string > >(); IDictionary <string, string> unique = new Dictionary <string, string>(); // NOTE: not using the same structure as javascript here to avoid // possible stack overflows // hash quads for each unnamed bnode for (int hui = 0; ; hui++) { if (hui == unnamed.Count) { // done, name blank nodes bool named = false; IList <string> hashes = new List <string>(unique.Keys); hashes.SortInPlace(); foreach (string hash in hashes) { string bnode = unique[hash]; namer.GetName(bnode); named = true; } // continue to hash bnodes if a bnode was assigned a name if (named) { // this resets the initial variables, so it seems like it // has to go on the stack // but since this is the end of the function either way, it // might not have to // hashBlankNodes(unnamed); hui = -1; unnamed = nextUnnamed; nextUnnamed = new List <string>(); duplicates = new Dictionary <string, IList <string> >(); unique = new Dictionary <string, string>(); continue; } else { // name the duplicate hash bnods // names duplicate hash bnodes // enumerate duplicate hash groups in sorted order hashes = new List <string>(duplicates.Keys); hashes.SortInPlace(); // process each group for (int pgi = 0; ; pgi++) { if (pgi == hashes.Count) { // done, create JSON-LD array // return createArray(); IList <string> normalized = new List <string>(); // Note: At this point all bnodes in the set of RDF // quads have been // assigned canonical names, which have been stored // in the 'namer' object. // Here each quad is updated by assigning each of // its bnodes its new name // via the 'namer' object // update bnode names in each quad and serialize for (int cai = 0; cai < quads.Count; ++cai) { RDFDataset.Quad quad = quads[cai]; foreach (string attr in new string[] { "subject", "object", "name" }) { if (quad.ContainsKey(attr)) { IDictionary <string, object> qa = (IDictionary <string, object>)quad[attr]; if (qa != null && (string)qa["type"] == "blank node" && ((string)qa["value"]).IndexOf ("_:c14n") != 0) { qa["value"] = namer.GetName((string)qa["value"]); } } } normalized.Add(RDFDatasetUtils.ToNQuad(quad, quad.ContainsKey("name" ) && !(quad["name"] == null) ? (string)((IDictionary <string, object>)((IDictionary <string, object>)quad)["name"])["value"] : null)); } // sort normalized output normalized.SortInPlace(); // handle output format if (options.format != null) { if ("application/nquads".Equals(options.format)) { string rval = string.Empty; foreach (string n in normalized) { rval += n; } return(rval); } else { throw new JsonLdError(JsonLdError.Error.UnknownFormat, options.format); } } string rval_1 = string.Empty; foreach (string n_1 in normalized) { rval_1 += n_1; } return(RDFDatasetUtils.ParseNQuads(rval_1)); } // name each group member IList <string> group = duplicates[hashes[pgi]]; IList <NormalizeUtils.HashResult> results = new List <NormalizeUtils.HashResult>(); for (int n_2 = 0; ; n_2++) { if (n_2 == group.Count) { // name bnodes in hash order results.SortInPlace(new _IComparer_145()); foreach (NormalizeUtils.HashResult r in results) { // name all bnodes in path namer in // key-entry order // Note: key-order is preserved in // javascript foreach (string key in r.pathNamer.Existing().GetKeys()) { namer.GetName(key); } } // processGroup(i+1); break; } else { // skip already-named bnodes string bnode = group[n_2]; if (namer.IsNamed(bnode)) { continue; } // hash bnode paths UniqueNamer pathNamer = new UniqueNamer("_:b"); pathNamer.GetName(bnode); NormalizeUtils.HashResult result = HashPaths(bnode, bnodes, namer, pathNamer); results.Add(result); } } } } } // hash unnamed bnode string bnode_1 = unnamed[hui]; string hash_1 = HashQuads(bnode_1, bnodes, namer); // store hash as unique or a duplicate if (duplicates.ContainsKey(hash_1)) { duplicates[hash_1].Add(bnode_1); nextUnnamed.Add(bnode_1); } else { if (unique.ContainsKey(hash_1)) { IList <string> tmp = new List <string>(); tmp.Add(unique[hash_1]); tmp.Add(bnode_1); duplicates[hash_1] = tmp; nextUnnamed.Add(unique[hash_1]); nextUnnamed.Add(bnode_1); JsonLD.Collections.Remove(unique, hash_1); } else { unique[hash_1] = bnode_1; } } } #else throw new PlatformNotSupportedException(); #endif }
public Object Normalize() { this.quads = new List <IDictionary <string, IDictionary <string, string> > >(); this.blankNodeInfo = new Dictionary <string, IDictionary <string, IList <object> > >(); this.hashToBlankNodes = new Dictionary <string, IList <string> >(); this.canonicalIssuer = new IdentifierIssuer("_:c14n"); /* * 2) For every quad in input dataset: * STATUS : step 2 is good! */ foreach (string graphName in this.dataset.Keys) { IList <IDictionary <string, IDictionary <string, string> > > triples = (IList <IDictionary <string, IDictionary <string, string> > >) this.dataset[graphName]; if (graphName.Equals("@default")) { graphName.Replace("@default", null); } foreach (IDictionary <string, IDictionary <string, string> > quad in triples) { if (!string.ReferenceEquals(graphName, null)) { if (graphName.StartsWith("_:", StringComparison.Ordinal)) { IDictionary <string, string> tmp = new Dictionary <string, string>(); tmp["type"] = "blank node"; quad["name"] = tmp; } else { IDictionary <string, string> tmp = new Dictionary <string, string>(); tmp["type"] = "IRI"; quad["name"] = tmp; } quad["name"]["value"] = graphName; } this.quads.Add(quad); /* 2.1) For each blank node that occurs in the quad, add a * reference to the quad using the blank node identifier in the * blank node to quads map, creating a new entry if necessary. * */ foreach (string key in quad.Keys) { Dictionary <string, string> component = (Dictionary <string, string>)quad[key]; if (key.Equals("predicate") || !component["type"].Equals("blank node")) { continue; } string id = component["value"]; if (this.blankNodeInfo[id] == null) { IDictionary <string, IList <Object> > quadList = new Dictionary <string, IList <Object> >(); quadList["quads"] = new List <Object>(); quadList["quads"].Add(quad); this.blankNodeInfo[id] = quadList; } else { this.blankNodeInfo[id]["quads"].Add(quad); } } } List <string> nonNormalized = new List <string>(); nonNormalized.AddRange(blankNodeInfo.Keys); //Collections.sort(nonNormalized); /* 4) Initialize simple, a boolean flag, to true. * STATUS : if this does not work we have a serious problem */ bool simple = true; /* * 5) While simple is true, issue canonical identifiers for blank nodes: */ while (simple) { // 5.1) Set simple to false. simple = false; // 5.2) Clear hash to blank nodes map. this.hashToBlankNodes.Clear(); /* * 5.3) For each blank node identifier identifier in non-normalized * identifiers: * STATUS : working on it */ foreach (string id in nonNormalized) { string hash = hashFirstDegreeQuads(id); if (this.hashToBlankNodes.ContainsKey(hash)) { this.hashToBlankNodes[hash].Add(id); } else { List <string> idList = new List <string>(); idList.Add(id); this.hashToBlankNodes.Add(hash, idList); } } /* * 5.4) For each hash to identifier list mapping in hash to blank * nodes map, lexicographically-sorted by hash: */ foreach (string hash in sortMapKeys(this.hashToBlankNodes)) { IList <string> idList = this.hashToBlankNodes[hash]; if (idList.Count() > 1) { continue; } /* 5.4.2) Use the Issue Identifier algorithm, passing canonical * issuer and the single blank node identifier in identifier * list, identifier, to issue a canonical replacement identifier * for identifier. */ string id = idList[0]; this.canonicalIssuer.getId(id); // 5.4.3) Remove identifier from non-normalized identifiers. nonNormalized.Remove(id); // 5.4.4) Remove hash from the hash to blank nodes map. this.hashToBlankNodes.Remove(hash); // 5.4.5) Set simple to true. simple = true; } } /* * 6) For each hash to identifier list mapping in hash to blank nodes * map, lexicographically-sorted by hash: * STATUS: does not loop through it */ foreach (string hash in sortMapKeys(this.hashToBlankNodes)) { IList <string> idList = this.hashToBlankNodes[hash]; /* * 6.1) Create hash path list where each item will be a result of * running the Hash N-Degree Quads algorithm. */ var hashPathList = new List <IDictionary <string, object> >(); /* * 6.2) For each blank node identifier identifier in identifier * list: */ foreach (string id in idList) { /* * 6.2.1) If a canonical identifier has already been issued for * identifier, continue to the next identifier. */ if (this.canonicalIssuer.hasID(id)) { continue; } /* * 6.2.2) Create temporary issuer, an identifier issuer * initialized with the prefix _:b. */ IdentifierIssuer issuer = new IdentifierIssuer("_:b"); /* * 6.2.3) Use the Issue Identifier algorithm, passing temporary * issuer and identifier, to issue a new temporary blank node * identifier for identifier. */ issuer.getId(id); /* * 6.2.4) Run the Hash N-Degree Quads algorithm, passing * temporary issuer, and append the result to the hash path * list. */ hashPathList.Add(hashNDegreeQuads(issuer, id)); } /* * 6.3) For each result in the hash path list, * lexicographically-sorted by the hash in result: */ sortMapList(hashPathList); foreach (var result in hashPathList) { if (result["issuer"] != null) { foreach (var existing in ((IdentifierIssuer)result["issuer"]).getOrder()) { this.canonicalIssuer.getId(existing); } } } } /* * Note: At this point all blank nodes in the set of RDF quads have been * assigned canonical identifiers, which have been stored in the * canonical issuer. Here each quad is updated by assigning each of its * blank nodes its new identifier. */ // 7) For each quad, quad, in input dataset: List <string> normalized = new List <string>(); foreach (var quadMap in this.quads) { /* * Create a copy, quad copy, of quad and replace any existing * blank node identifiers using the canonical identifiers previously * issued by canonical issuer. Note: We optimize away the copy here. * STATUS : currently working on it */ foreach (var key in quadMap.Keys) { if (key.Equals("predicate")) { continue; } else { var component = quadMap[key]; if (component["type"].Equals("blank node") && !component["value"].StartsWith(this.canonicalIssuer.getPrefix())) { component.Add("value", this.canonicalIssuer.getId(component["value"])); } } } // 7.2) Add quad copy to the normalized dataset. RDFDataset.Quad quad = new RDFDataset.Quad(quadMap, quadMap.ContainsKey("name") && quadMap["name"] != null ? (quadMap["name"])["value"] : null); normalized.Add(RDFDatasetUtils.ToNQuad(quad, quadMap.ContainsKey("name") && quadMap["name"] != null ? (quadMap["name"])["value"] : null)); } // 8) Return the normalized dataset. Collections.SortInPlace(normalized); if (this.options.format != null) { if ("applications/nquads".Equals(this.options.format)) { StringBuilder rval = new StringBuilder(); foreach (var n in normalized) { rval.Append(n); } return(rval.ToString()); } else { // will need to implement error handling return(null); } } else { StringBuilder rval = new StringBuilder(); foreach (var n in normalized) { rval.Append(n); } try { return(RDFDatasetUtils.ParseNQuads(rval.ToString())); } catch (Exception ex) { Console.Out.WriteLine(ex); return(ex); } } } return(null); }
/* * STATUS : working on it */ private string hashFirstDegreeQuads(string id) { IDictionary <string, IList <Object> > info = this.blankNodeInfo[id]; if (info.ContainsKey("hash")) { return(info["hash"].ToString()); } // 1) Initialize nquads to an empty list. It will be used to store quads // in N-Quads format. IList <string> nquads = new List <string>(); // 2) Get the list of quads quads associated with the reference blank // node identifier in the blank node to quads map. IList <Object> quads = info["quads"]; // 3) For each quad quad in quads: foreach (var quad in quads) { // 3.1) Serialize the quad in N-Quads format with the following // special rule: // 3.1.1) If any component in quad is an blank node, then serialize // it using a special identifier as follows: // copy = {} IDictionary <string, IDictionary <string, string> > copy = new Dictionary <string, IDictionary <string, string> >(); /* 3.1.2) If the blank node's existing blank node identifier * matches the reference blank node identifier then use the * blank node identifier _:a, otherwise, use the blank node * identifier _:z. * STATUS: working */ RDFDataset.Quad quadMap = (RDFDataset.Quad)quad; foreach (var key in quadMap) { IDictionary <string, string> component = new Dictionary <string, string>(); component.Add(key.Key, key.Value.ToString()); if (key.Equals("predicate")) { copy.Add(key.Key, component); continue; } copy.Add(key.Key, modifyFirstDegreeComponent(component, id)); } RDFDataset.Quad copyQuad = new RDFDataset.Quad(copy, copy.ContainsKey("name") && copy["name"] != null ? (copy["name"])["value"] : null); nquads.Add(RDFDatasetUtils.ToNQuad(copyQuad, copyQuad.ContainsKey("name") && copyQuad["name"] != null ? (string)((IDictionary <string, object>)copyQuad["name"])["value"] : null)); // 4) Sort nquads in lexicographical order. } Collections.SortInPlace(nquads); // 5) Return the hash that results from passing the sorted, joined // nquads through the hash algorithm. return(NormalizeUtils.sha256HashnQuads(nquads)); }