예제 #1
0
        /// <summary>Hashes all of the quads about a blank node.</summary>
        /// <remarks>Hashes all of the quads about a blank node.</remarks>
        /// <param name="id">the ID of the bnode to hash quads for.</param>
        /// <param name="bnodes">the mapping of bnodes to quads.</param>
        /// <param name="namer">the canonical bnode namer.</param>
        /// <returns>the new hash.</returns>
        private static string HashQuads(string id, IDictionary <string, IDictionary <string, object> > bnodes, UniqueNamer
                                        namer)
        {
            // return cached hash
            if (bnodes[id].ContainsKey("hash"))
            {
                return((string)bnodes[id]["hash"]);
            }
            // serialize all of bnode's quads
            IList <RDFDataset.Quad> quads  = (IList <RDFDataset.Quad>)bnodes[id]["quads"];
            IList <string>          nquads = new List <string>();

            for (int i = 0; i < quads.Count; ++i)
            {
                object name;
                nquads.Add(RDFDatasetUtils.ToNQuad((RDFDataset.Quad)quads[i], quads[i].TryGetValue("name", out name) ? (string)((IDictionary <string, object>)name)["value"] : null, id));
            }
            // sort serialized quads
            nquads.SortInPlace(StringComparer.Ordinal);
            // return hashed quads
            string hash = Sha1hash(nquads);

            ((IDictionary <string, object>)bnodes[id])["hash"] = hash;
            return(hash);
        }
예제 #2
0
        /// <summary>Hashes all of the quads about a blank node.</summary>
        /// <remarks>Hashes all of the quads about a blank node.</remarks>
        /// <param name="id">the ID of the bnode to hash quads for.</param>
        /// <param name="bnodes">the mapping of bnodes to quads.</param>
        /// <param name="namer">the canonical bnode namer.</param>
        /// <returns>the new hash.</returns>
        private static string HashQuads(string id, JObject bnodes, UniqueNamer
                                        namer)
        {
            // return cached hash
            if (((JObject)bnodes[id]).ContainsKey("hash"))
            {
                return((string)((JObject)bnodes[id])["hash"]);
            }
            // serialize all of bnode's quads
            JArray         quads  = (JArray)((JObject)bnodes[id])["quads"];
            IList <string> nquads = new List <string>();

            for (int i = 0; i < quads.Count; ++i)
            {
                nquads.Add(RDFDatasetUtils.ToNQuad((RDFDataset.Quad)quads[i], quads[i]["name"] !=
                                                   null ? (string)((JObject)quads[i]["name"])["value"] : null,
                                                   id));
            }
            // sort serialized quads
            nquads.SortInPlace();
            // return hashed quads
            string hash = Sha1hash(nquads);

            ((JObject)bnodes[id])["hash"] = hash;
            return(hash);
        }
예제 #3
0
        // generates unique and duplicate hashes for bnodes
        /// <exception cref="JsonLD.Core.JsonLdError"></exception>
        public virtual object HashBlankNodes(IEnumerable <string> unnamed_)
        {
#if !PORTABLE
            IList <string> unnamed     = new List <string>(unnamed_);
            IList <string> nextUnnamed = new List <string>();
            IDictionary <string, IList <string> > duplicates = new Dictionary <string, IList <string
                                                                                              > >();
            IDictionary <string, string> unique = new Dictionary <string, string>();
            // NOTE: not using the same structure as javascript here to avoid
            // possible stack overflows
            // hash quads for each unnamed bnode
            for (int hui = 0; ; hui++)
            {
                if (hui == unnamed.Count)
                {
                    // done, name blank nodes
                    bool           named  = false;
                    IList <string> hashes = new List <string>(unique.Keys);
                    hashes.SortInPlace();
                    foreach (string hash in hashes)
                    {
                        string bnode = unique[hash];
                        namer.GetName(bnode);
                        named = true;
                    }
                    // continue to hash bnodes if a bnode was assigned a name
                    if (named)
                    {
                        // this resets the initial variables, so it seems like it
                        // has to go on the stack
                        // but since this is the end of the function either way, it
                        // might not have to
                        // hashBlankNodes(unnamed);
                        hui         = -1;
                        unnamed     = nextUnnamed;
                        nextUnnamed = new List <string>();
                        duplicates  = new Dictionary <string, IList <string> >();
                        unique      = new Dictionary <string, string>();
                        continue;
                    }
                    else
                    {
                        // name the duplicate hash bnods
                        // names duplicate hash bnodes
                        // enumerate duplicate hash groups in sorted order
                        hashes = new List <string>(duplicates.Keys);
                        hashes.SortInPlace();
                        // process each group
                        for (int pgi = 0; ; pgi++)
                        {
                            if (pgi == hashes.Count)
                            {
                                // done, create JSON-LD array
                                // return createArray();
                                IList <string> normalized = new List <string>();
                                // Note: At this point all bnodes in the set of RDF
                                // quads have been
                                // assigned canonical names, which have been stored
                                // in the 'namer' object.
                                // Here each quad is updated by assigning each of
                                // its bnodes its new name
                                // via the 'namer' object
                                // update bnode names in each quad and serialize
                                for (int cai = 0; cai < quads.Count; ++cai)
                                {
                                    RDFDataset.Quad quad = quads[cai];
                                    foreach (string attr in new string[] { "subject", "object", "name" })
                                    {
                                        if (quad.ContainsKey(attr))
                                        {
                                            IDictionary <string, object> qa = (IDictionary <string, object>)quad[attr];
                                            if (qa != null && (string)qa["type"] == "blank node" && ((string)qa["value"]).IndexOf
                                                    ("_:c14n") != 0)
                                            {
                                                qa["value"] = namer.GetName((string)qa["value"]);
                                            }
                                        }
                                    }
                                    normalized.Add(RDFDatasetUtils.ToNQuad(quad, quad.ContainsKey("name"
                                                                                                  ) && !(quad["name"] == null) ? (string)((IDictionary <string, object>)((IDictionary <string, object>)quad)["name"])["value"] : null));
                                }
                                // sort normalized output
                                normalized.SortInPlace();
                                // handle output format
                                if (options.format != null)
                                {
                                    if ("application/nquads".Equals(options.format))
                                    {
                                        string rval = string.Empty;
                                        foreach (string n in normalized)
                                        {
                                            rval += n;
                                        }
                                        return(rval);
                                    }
                                    else
                                    {
                                        throw new JsonLdError(JsonLdError.Error.UnknownFormat, options.format);
                                    }
                                }
                                string rval_1 = string.Empty;
                                foreach (string n_1 in normalized)
                                {
                                    rval_1 += n_1;
                                }
                                return(RDFDatasetUtils.ParseNQuads(rval_1));
                            }
                            // name each group member
                            IList <string> group = duplicates[hashes[pgi]];
                            IList <NormalizeUtils.HashResult> results = new List <NormalizeUtils.HashResult>();
                            for (int n_2 = 0; ; n_2++)
                            {
                                if (n_2 == group.Count)
                                {
                                    // name bnodes in hash order
                                    results.SortInPlace(new _IComparer_145());
                                    foreach (NormalizeUtils.HashResult r in results)
                                    {
                                        // name all bnodes in path namer in
                                        // key-entry order
                                        // Note: key-order is preserved in
                                        // javascript
                                        foreach (string key in r.pathNamer.Existing().GetKeys())
                                        {
                                            namer.GetName(key);
                                        }
                                    }
                                    // processGroup(i+1);
                                    break;
                                }
                                else
                                {
                                    // skip already-named bnodes
                                    string bnode = group[n_2];
                                    if (namer.IsNamed(bnode))
                                    {
                                        continue;
                                    }
                                    // hash bnode paths
                                    UniqueNamer pathNamer = new UniqueNamer("_:b");
                                    pathNamer.GetName(bnode);
                                    NormalizeUtils.HashResult result = HashPaths(bnode, bnodes, namer, pathNamer);
                                    results.Add(result);
                                }
                            }
                        }
                    }
                }
                // hash unnamed bnode
                string bnode_1 = unnamed[hui];
                string hash_1  = HashQuads(bnode_1, bnodes, namer);
                // store hash as unique or a duplicate
                if (duplicates.ContainsKey(hash_1))
                {
                    duplicates[hash_1].Add(bnode_1);
                    nextUnnamed.Add(bnode_1);
                }
                else
                {
                    if (unique.ContainsKey(hash_1))
                    {
                        IList <string> tmp = new List <string>();
                        tmp.Add(unique[hash_1]);
                        tmp.Add(bnode_1);
                        duplicates[hash_1] = tmp;
                        nextUnnamed.Add(unique[hash_1]);
                        nextUnnamed.Add(bnode_1);
                        JsonLD.Collections.Remove(unique, hash_1);
                    }
                    else
                    {
                        unique[hash_1] = bnode_1;
                    }
                }
            }
#else
            throw new PlatformNotSupportedException();
#endif
        }
예제 #4
0
        public Object Normalize()
        {
            this.quads            = new List <IDictionary <string, IDictionary <string, string> > >();
            this.blankNodeInfo    = new Dictionary <string, IDictionary <string, IList <object> > >();
            this.hashToBlankNodes = new Dictionary <string, IList <string> >();
            this.canonicalIssuer  = new IdentifierIssuer("_:c14n");

            /*
             * 2) For every quad in input dataset:
             * STATUS : step 2 is good!
             */

            foreach (string graphName in this.dataset.Keys)
            {
                IList <IDictionary <string, IDictionary <string, string> > > triples = (IList <IDictionary <string, IDictionary <string, string> > >) this.dataset[graphName];

                if (graphName.Equals("@default"))
                {
                    graphName.Replace("@default", null);
                }

                foreach (IDictionary <string, IDictionary <string, string> > quad in triples)
                {
                    if (!string.ReferenceEquals(graphName, null))
                    {
                        if (graphName.StartsWith("_:", StringComparison.Ordinal))
                        {
                            IDictionary <string, string> tmp = new Dictionary <string, string>();
                            tmp["type"]  = "blank node";
                            quad["name"] = tmp;
                        }
                        else
                        {
                            IDictionary <string, string> tmp = new Dictionary <string, string>();
                            tmp["type"]  = "IRI";
                            quad["name"] = tmp;
                        }
                        quad["name"]["value"] = graphName;
                    }
                    this.quads.Add(quad);

                    /* 2.1) For each blank node that occurs in the quad, add a
                     * reference to the quad using the blank node identifier in the
                     * blank node to quads map, creating a new entry if necessary.
                     * */

                    foreach (string key in quad.Keys)
                    {
                        Dictionary <string, string> component = (Dictionary <string, string>)quad[key];
                        if (key.Equals("predicate") || !component["type"].Equals("blank node"))
                        {
                            continue;
                        }
                        string id = component["value"];
                        if (this.blankNodeInfo[id] == null)
                        {
                            IDictionary <string, IList <Object> > quadList = new Dictionary <string, IList <Object> >();
                            quadList["quads"] = new List <Object>();
                            quadList["quads"].Add(quad);
                            this.blankNodeInfo[id] = quadList;
                        }
                        else
                        {
                            this.blankNodeInfo[id]["quads"].Add(quad);
                        }
                    }
                }

                List <string> nonNormalized = new List <string>();
                nonNormalized.AddRange(blankNodeInfo.Keys);

                //Collections.sort(nonNormalized);

                /* 4) Initialize simple, a boolean flag, to true.
                 * STATUS : if this does not work we have a serious problem
                 */
                bool simple = true;

                /*
                 * 5) While simple is true, issue canonical identifiers for blank nodes:
                 */
                while (simple)
                {
                    // 5.1) Set simple to false.
                    simple = false;

                    // 5.2) Clear hash to blank nodes map.
                    this.hashToBlankNodes.Clear();

                    /*
                     * 5.3) For each blank node identifier identifier in non-normalized
                     * identifiers:
                     * STATUS : working on it
                     */
                    foreach (string id in nonNormalized)
                    {
                        string hash = hashFirstDegreeQuads(id);

                        if (this.hashToBlankNodes.ContainsKey(hash))
                        {
                            this.hashToBlankNodes[hash].Add(id);
                        }
                        else
                        {
                            List <string> idList = new List <string>();
                            idList.Add(id);
                            this.hashToBlankNodes.Add(hash, idList);
                        }
                    }

                    /*
                     * 5.4) For each hash to identifier list mapping in hash to blank
                     * nodes map, lexicographically-sorted by hash:
                     */

                    foreach (string hash in sortMapKeys(this.hashToBlankNodes))
                    {
                        IList <string> idList = this.hashToBlankNodes[hash];
                        if (idList.Count() > 1)
                        {
                            continue;
                        }

                        /* 5.4.2) Use the Issue Identifier algorithm, passing canonical
                         * issuer and the single blank node identifier in identifier
                         * list, identifier, to issue a canonical replacement identifier
                         * for identifier.
                         */

                        string id = idList[0];
                        this.canonicalIssuer.getId(id);

                        // 5.4.3) Remove identifier from non-normalized identifiers.
                        nonNormalized.Remove(id);

                        // 5.4.4) Remove hash from the hash to blank nodes map.

                        this.hashToBlankNodes.Remove(hash);

                        //  5.4.5) Set simple to true.

                        simple = true;
                    }
                }

                /*
                 * 6) For each hash to identifier list mapping in hash to blank nodes
                 * map, lexicographically-sorted by hash:
                 * STATUS: does not loop through it
                 */
                foreach (string hash in sortMapKeys(this.hashToBlankNodes))
                {
                    IList <string> idList = this.hashToBlankNodes[hash];

                    /*
                     * 6.1) Create hash path list where each item will be a result of
                     * running the Hash N-Degree Quads algorithm.
                     */
                    var hashPathList = new List <IDictionary <string, object> >();

                    /*
                     * 6.2) For each blank node identifier identifier in identifier
                     * list:
                     */
                    foreach (string id in idList)
                    {
                        /*
                         * 6.2.1) If a canonical identifier has already been issued for
                         * identifier, continue to the next identifier.
                         */

                        if (this.canonicalIssuer.hasID(id))
                        {
                            continue;
                        }

                        /*
                         * 6.2.2) Create temporary issuer, an identifier issuer
                         * initialized with the prefix _:b.
                         */

                        IdentifierIssuer issuer = new IdentifierIssuer("_:b");

                        /*
                         * 6.2.3) Use the Issue Identifier algorithm, passing temporary
                         * issuer and identifier, to issue a new temporary blank node
                         * identifier for identifier.
                         */

                        issuer.getId(id);

                        /*
                         * 6.2.4) Run the Hash N-Degree Quads algorithm, passing
                         * temporary issuer, and append the result to the hash path
                         * list.
                         */

                        hashPathList.Add(hashNDegreeQuads(issuer, id));
                    }


                    /*
                     * 6.3) For each result in the hash path list,
                     * lexicographically-sorted by the hash in result:
                     */

                    sortMapList(hashPathList);
                    foreach (var result in hashPathList)
                    {
                        if (result["issuer"] != null)
                        {
                            foreach (var existing in ((IdentifierIssuer)result["issuer"]).getOrder())
                            {
                                this.canonicalIssuer.getId(existing);
                            }
                        }
                    }
                }

                /*
                 * Note: At this point all blank nodes in the set of RDF quads have been
                 * assigned canonical identifiers, which have been stored in the
                 * canonical issuer. Here each quad is updated by assigning each of its
                 * blank nodes its new identifier.
                 */

                // 7) For each quad, quad, in input dataset:
                List <string> normalized = new List <string>();
                foreach (var quadMap in this.quads)
                {
                    /*
                     * Create a copy, quad copy, of quad and replace any existing
                     * blank node identifiers using the canonical identifiers previously
                     * issued by canonical issuer. Note: We optimize away the copy here.
                     * STATUS : currently working on it
                     */
                    foreach (var key in quadMap.Keys)
                    {
                        if (key.Equals("predicate"))
                        {
                            continue;
                        }
                        else
                        {
                            var component = quadMap[key];
                            if (component["type"].Equals("blank node") && !component["value"].StartsWith(this.canonicalIssuer.getPrefix()))
                            {
                                component.Add("value", this.canonicalIssuer.getId(component["value"]));
                            }
                        }
                    }

                    //  7.2) Add quad copy to the normalized dataset.
                    RDFDataset.Quad quad = new RDFDataset.Quad(quadMap, quadMap.ContainsKey("name") && quadMap["name"] != null
                        ? (quadMap["name"])["value"] : null);
                    normalized.Add(RDFDatasetUtils.ToNQuad(quad, quadMap.ContainsKey("name") && quadMap["name"] != null
                        ? (quadMap["name"])["value"] : null));
                }

                // 8) Return the normalized dataset.
                Collections.SortInPlace(normalized);
                if (this.options.format != null)
                {
                    if ("applications/nquads".Equals(this.options.format))
                    {
                        StringBuilder rval = new StringBuilder();
                        foreach (var n in normalized)
                        {
                            rval.Append(n);
                        }
                        return(rval.ToString());
                    }
                    else
                    {
                        // will need to implement error handling
                        return(null);
                    }
                }
                else
                {
                    StringBuilder rval = new StringBuilder();
                    foreach (var n in normalized)
                    {
                        rval.Append(n);
                    }
                    try
                    {
                        return(RDFDatasetUtils.ParseNQuads(rval.ToString()));
                    }
                    catch (Exception ex)
                    {
                        Console.Out.WriteLine(ex);
                        return(ex);
                    }
                }
            }
            return(null);
        }
예제 #5
0
        /*
         * STATUS : working on it
         */
        private string hashFirstDegreeQuads(string id)
        {
            IDictionary <string, IList <Object> > info = this.blankNodeInfo[id];

            if (info.ContainsKey("hash"))
            {
                return(info["hash"].ToString());
            }

            // 1) Initialize nquads to an empty list. It will be used to store quads
            // in N-Quads format.
            IList <string> nquads = new List <string>();

            // 2) Get the list of quads quads associated with the reference blank
            // node identifier in the blank node to quads map.

            IList <Object> quads = info["quads"];

            // 3) For each quad quad in quads:
            foreach (var quad in quads)
            {
                // 3.1) Serialize the quad in N-Quads format with the following
                // special rule:

                // 3.1.1) If any component in quad is an blank node, then serialize
                // it using a special identifier as follows:

                // copy = {}

                IDictionary <string, IDictionary <string, string> > copy = new Dictionary <string, IDictionary <string, string> >();

                /* 3.1.2) If the blank node's existing blank node identifier
                 * matches the reference blank node identifier then use the
                 * blank node identifier _:a, otherwise, use the blank node
                 * identifier _:z.
                 * STATUS: working
                 */

                RDFDataset.Quad quadMap = (RDFDataset.Quad)quad;
                foreach (var key in quadMap)
                {
                    IDictionary <string, string> component = new Dictionary <string, string>();
                    component.Add(key.Key, key.Value.ToString());
                    if (key.Equals("predicate"))
                    {
                        copy.Add(key.Key, component);
                        continue;
                    }
                    copy.Add(key.Key, modifyFirstDegreeComponent(component, id));
                }

                RDFDataset.Quad copyQuad = new RDFDataset.Quad(copy, copy.ContainsKey("name") && copy["name"] != null
                    ? (copy["name"])["value"] : null);
                nquads.Add(RDFDatasetUtils.ToNQuad(copyQuad, copyQuad.ContainsKey("name") && copyQuad["name"] != null
                    ? (string)((IDictionary <string, object>)copyQuad["name"])["value"] : null));

                // 4) Sort nquads in lexicographical order.
            }

            Collections.SortInPlace(nquads);

            // 5) Return the hash that results from passing the sorted, joined
            // nquads through the hash algorithm.

            return(NormalizeUtils.sha256HashnQuads(nquads));
        }