/// <summary>
        /// Crea un LiteralNode según el tipo de dato del contenido.
        /// </summary>
        /// <param name="pDataGraph">Grafo.</param>
        /// <param name="pContenido">Contenido del nodo.</param>
        /// <param name="pDatatype">Tipo del dato del contenido.</param>
        /// <param name="pTransform">Transformación del contenido</param>
        /// <returns>Nodo construido según el tipo de dato.</returns>
        private ILiteralNode CreateILiteralNodeType(RohGraph pDataGraph, string pContenido, string pDatatype, string pTransform = null)
        {
            pContenido = pContenido.Trim();
            if (!string.IsNullOrEmpty(pTransform))
            {
                if (pTransform.Contains("{value}"))
                {
                    pContenido = pTransform.Replace("{value}", pContenido);
                }
                if (pTransform.Contains("{regex|"))
                {
                    string regString = pTransform.Substring(pTransform.IndexOf("{regex|") + 7);
                    regString = regString.Substring(0, regString.IndexOf("|endregex}"));

                    Regex regex = new Regex(regString);
                    Match match = regex.Match(pContenido);
                    pContenido = pTransform.Replace("{regex|" + regString + "|endregex}", match.Value);
                }
            }
            if (string.IsNullOrEmpty(pDatatype))
            {
                return(pDataGraph.CreateLiteralNode(pContenido, new Uri("http://www.w3.org/2001/XMLSchema#string")));
            }
            else
            {
                return(pDataGraph.CreateLiteralNode(pContenido, new Uri(pDatatype)));
            }
        }
        /// <summary>
        /// Publica un RDF en Asio aplicado todos losprocedimientos pertinentes
        /// </summary>
        /// <param name="pDataGraph">Grafo con los datos a cargar</param>
        /// <param name="pOntologyGraph">Grafo con la ontología</param>
        /// <param name="pAttributedTo">Sujeto y nombre para atribuir los triples de los apis externos</param>
        /// <param name="pActivityStartedAtTime">Inicio del proceso</param>
        /// <param name="pActivityEndedAtTime">Fin del proceso</param>
        /// <param name="pDiscoverLinkData">Datos para trabajar con el descubrimiento de enlaces</param>
        /// <param name="pCallUrisFactoryApiService">Servicio para hacer llamadas a los métodos del Uris Factory</param>
        public void PublishRDF(RohGraph pDataGraph, RohGraph pOntologyGraph, KeyValuePair <string, string>?pAttributedTo, DateTime pActivityStartedAtTime, DateTime pActivityEndedAtTime, DiscoverLinkData pDiscoverLinkData, CallUrisFactoryApiService pCallUrisFactoryApiService)
        {
            RohGraph inferenceDataGraph = null;

            if (pOntologyGraph != null)
            {
                inferenceDataGraph = pDataGraph.Clone();
                RohRdfsReasoner reasoner = new RohRdfsReasoner();
                reasoner.Initialise(pOntologyGraph);
                reasoner.Apply(inferenceDataGraph);
            }

            // 1º Eliminamos de la BBDD las entidades principales que aparecen en el RDF
            HashSet <string> graphs = RemovePrimaryTopics(ref pDataGraph);

            graphs.Add(_Graph);

            // 2º Eliminamos todos los triples de la BBDD cuyo sujeto y predicado estén en el RDF a cargar y estén marcados como monovaluados.
            if (pOntologyGraph != null && inferenceDataGraph != null)
            {
                RemoveMonovaluatedProperties(pOntologyGraph, inferenceDataGraph);
            }

            //3º Insertamos los triples en la BBDD
            if (pAttributedTo.HasValue)
            {
                //Añadimos triples del softwareagent
                IUriNode t_subject           = pDataGraph.CreateUriNode(UriFactory.Create(pAttributedTo.Value.Key));
                IUriNode t_predicate_rdftype = pDataGraph.CreateUriNode(UriFactory.Create("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"));
                IUriNode t_object_rdftype    = pDataGraph.CreateUriNode(UriFactory.Create("http://www.w3.org/ns/prov#SoftwareAgent"));
                pDataGraph.Assert(new Triple(t_subject, t_predicate_rdftype, t_object_rdftype));
                IUriNode     t_predicate_name = pDataGraph.CreateUriNode(UriFactory.Create("http://purl.org/roh/mirror/foaf#name"));
                ILiteralNode t_object_name    = pDataGraph.CreateLiteralNode(pAttributedTo.Value.Value, new Uri("http://www.w3.org/2001/XMLSchema#string"));
                pDataGraph.Assert(new Triple(t_subject, t_predicate_name, t_object_name));
            }
            SparqlUtility.LoadTriples(SparqlUtility.GetTriplesFromGraph(pDataGraph), _SPARQLEndpoint, _QueryParam, _Graph, _Username, _Password);

            //4º Insertamos los triples con provenance en la BBDD
            if (pDiscoverLinkData != null && pDiscoverLinkData.entitiesProperties != null)
            {
                Dictionary <string, List <string> > graphDeletes = new Dictionary <string, List <string> >();
                Dictionary <string, List <string> > graphTriples = new Dictionary <string, List <string> >();
                foreach (string t_subject in pDiscoverLinkData.entitiesProperties.Keys)
                {
                    foreach (DiscoverLinkData.PropertyData property in pDiscoverLinkData.entitiesProperties[t_subject])
                    {
                        string t_property = property.property;
                        foreach (var prop in property.valueProvenance)
                        {
                            string           t_object    = prop.Key;
                            HashSet <string> t_sourceids = prop.Value;
                            foreach (string sourceId in t_sourceids)
                            {
                                string graph = pCallUrisFactoryApiService.GetUri("Graph", sourceId);
                                if (!graphTriples.ContainsKey(graph))
                                {
                                    graphTriples.Add(graph, new List <string>());
                                }
                                string bNodeid = "_:" + Guid.NewGuid().ToString();
                                graphTriples[graph].Add($@"<{t_subject}> <http://www.w3.org/ns/prov#wasUsedBy> {bNodeid} .");
                                graphTriples[graph].Add($@"{bNodeid} <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/prov#Activity> .");
                                graphTriples[graph].Add($@"{bNodeid} <http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate> <{t_property}>.");
                                if (Uri.IsWellFormedUriString(t_object, UriKind.Absolute))
                                {
                                    graphTriples[graph].Add($@"{bNodeid} <http://www.w3.org/1999/02/22-rdf-syntax-ns#object> <{ t_object}>.");
                                }
                                else
                                {
                                    graphTriples[graph].Add($@"{bNodeid} <http://www.w3.org/1999/02/22-rdf-syntax-ns#object> ""{ t_object.Replace("\"", "\\\"").Replace("\n", "\\n") }""^^<http://www.w3.org/2001/XMLSchema#string>.");
                                }
                                graphTriples[graph].Add($@"{bNodeid} <http://www.w3.org/ns/prov#startedAtTime> ""{ pActivityStartedAtTime.ToString("yyyy-MM-ddTHH:mm:ss.fffzzz") }""^^<http://www.w3.org/2001/XMLSchema#datetime>.");
                                graphTriples[graph].Add($@"{bNodeid} <http://www.w3.org/ns/prov#endedAtTime> ""{ pActivityEndedAtTime.ToString("yyyy-MM-ddTHH:mm:ss.fffzzz") }""^^<http://www.w3.org/2001/XMLSchema#datetime>.");
                                graphTriples[graph].Add($@"{bNodeid} <http://www.w3.org/ns/prov#wasAssociatedWith> <{pAttributedTo.Value.Key}>.");

                                graphTriples[graph].Add($@"{bNodeid} <http://www.w3.org/ns/prov#wasAssociatedWith> <{pCallUrisFactoryApiService.GetUri("http://purl.org/roh/mirror/foaf#Organization", sourceId)}>.");

                                if (pAttributedTo.HasValue)
                                {
                                    graphTriples[graph].Add($@"{bNodeid} <http://www.w3.org/ns/prov#wasAssociatedWith> <{pAttributedTo.Value.Key}>.");
                                }

                                if (!graphDeletes.ContainsKey(graph))
                                {
                                    graphDeletes.Add(graph, new List <string>());
                                }

                                if (!Uri.IsWellFormedUriString(t_object, UriKind.Absolute))
                                {
                                    string stringDelete = $@"   {{
                                                                ?s ?p ?o. 
                                                                ?o <http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate> <{t_property}>.
                                                                ?o <http://www.w3.org/1999/02/22-rdf-syntax-ns#object> ""{ t_object.Replace("\"", "\\\"").Replace("\n", "\\n") }""^^<http://www.w3.org/2001/XMLSchema#string>.
                                                                FILTER(?s = <{t_subject}>)
                                                            }}";
                                    graphDeletes[graph].Add(stringDelete);
                                }
                            }
                        }
                    }
                }

                //Eliminamos aquellos triples de provenance que ya estén cargados
                foreach (string graph in graphDeletes.Keys)
                {
                    graphs.Add(graph);
                    string queryDeleteProvenance = $@"  DELETE {{ ?s ?p ?o. }}
                                                        WHERE 
                                                        {{
                                                            {{{string.Join("}UNION{", graphDeletes[graph])}}}
                                                        }}";
                    _SparqlUtility.SelectData(_SPARQLEndpoint, graph, queryDeleteProvenance, _QueryParam, _Username, _Password);
                }

                //Cargamos los nuevos triples
                foreach (string graph in graphTriples.Keys)
                {
                    SparqlUtility.LoadTriples(graphTriples[graph], _SPARQLEndpoint, _QueryParam, graph, _Username, _Password);
                }
            }

            //5º Limpiamos los blanknodes huerfanos, o que no tengan triples
            //TODO mover a una tarea que se ejecute continuamente
            //DeleteOrphanNodes(graphs);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Aplica el descubrimiento sobre las entidades cargadas en el SGI
        /// </summary>
        /// <param name="pSecondsSleep">Segundos para dormir después de procesar una entidad</param>
        /// <param name="pCallUrisFactoryApiService">Servicio para hacer llamadas a los métodos del Uris Factory</param>
        public void ApplyDiscoverLoadedEntities(int pSecondsSleep, CallUrisFactoryApiService pCallUrisFactoryApiService)
        {
            CallEtlApiService callEtlApiService = _serviceScopeFactory.CreateScope().ServiceProvider.GetRequiredService <CallEtlApiService>();

            #region Cargamos configuraciones
            ConfigSparql ConfigSparql             = new ConfigSparql();
            string       SGI_SPARQLEndpoint       = ConfigSparql.GetEndpoint();
            string       SGI_SPARQLGraph          = ConfigSparql.GetGraph();
            string       SGI_SPARQLQueryParam     = ConfigSparql.GetQueryParam();
            string       SGI_SPARQLUsername       = ConfigSparql.GetUsername();
            string       SGI_SPARQLPassword       = ConfigSparql.GetPassword();
            string       Unidata_SPARQLEndpoint   = ConfigSparql.GetUnidataEndpoint();
            string       Unidata_SPARQLGraph      = ConfigSparql.GetUnidataGraph();
            string       Unidata_SPARQLQueryParam = ConfigSparql.GetUnidataQueryParam();
            string       Unidata_SPARQLUsername   = ConfigSparql.GetUnidataUsername();
            string       Unidata_SPARQLPassword   = ConfigSparql.GetUnidataPassword();

            ConfigService ConfigService       = new ConfigService();
            string        UnidataDomain       = ConfigService.GetUnidataDomain();
            string        UnidataUriTransform = ConfigService.GetUnidataUriTransform();
            float         MaxScore            = ConfigService.GetMaxScore();
            float         MinScore            = ConfigService.GetMinScore();

            ConfigScopus   ConfigScopus      = new ConfigScopus();
            string         ScopusApiKey      = ConfigScopus.GetScopusApiKey();
            string         ScopusUrl         = ConfigScopus.GetScopusUrl();
            ConfigCrossref ConfigCrossref    = new ConfigCrossref();
            string         CrossrefUserAgent = ConfigCrossref.GetCrossrefUserAgent();
            ConfigWOS      ConfigWOS         = new ConfigWOS();
            string         WOSAuthorization  = ConfigWOS.GetWOSAuthorization();
            #endregion

            DiscoverUtility discoverUtility = new DiscoverUtility();

            //Cargar todas las personas en la lista de manera aleatoria.
            List <string> personList       = discoverUtility.GetPersonList(SGI_SPARQLEndpoint, SGI_SPARQLGraph, SGI_SPARQLQueryParam, SGI_SPARQLUsername, SGI_SPARQLPassword);
            List <string> randomPersonList = GetRandomOrderList(personList);
            RohGraph      ontologyGraph    = callEtlApiService.CallGetOntology();
            foreach (string person in randomPersonList)
            {
                try
                {
                    //Hora de inicio de la ejecución
                    DateTime startTime = DateTime.Now;

                    //Obtener el RohGraph de una única persona.
                    RohGraph dataGraph = discoverUtility.GetDataGraphPersonLoadedForDiscover(person, SGI_SPARQLEndpoint, SGI_SPARQLGraph, SGI_SPARQLQueryParam, SGI_SPARQLUsername, SGI_SPARQLPassword);
                    //Clonamos el grafo original para hacer luego comprobaciones
                    RohGraph originalDataGraph = dataGraph.Clone();

                    RohRdfsReasoner reasoner = new RohRdfsReasoner();
                    reasoner.Initialise(ontologyGraph);
                    RohGraph dataInferenceGraph = dataGraph.Clone();
                    reasoner.Apply(dataInferenceGraph);

                    bool hasChanges = false;
                    //Dictionary<string, string> discoveredEntityList = new Dictionary<string, string>();
                    Dictionary <string, Dictionary <string, float> >            discoveredEntitiesProbability = new Dictionary <string, Dictionary <string, float> >();
                    Dictionary <string, ReconciliationData.ReconciliationScore> entidadesReconciliadasConIntegracionExternaAux;
                    Dictionary <string, HashSet <string> > discardDissambiguations = new Dictionary <string, HashSet <string> >();
                    DiscoverCache       discoverCache       = new DiscoverCache();
                    DiscoverCacheGlobal discoverCacheGlobal = new DiscoverCacheGlobal();

                    //Obtención de la integración externa
                    ReconciliationData reconciliationData = new ReconciliationData();
                    DiscoverLinkData   discoverLinkData   = new DiscoverLinkData();
                    Dictionary <string, List <DiscoverLinkData.PropertyData> > integration = discoverUtility.ExternalIntegration(ref hasChanges, ref reconciliationData, ref discoverLinkData, ref discoveredEntitiesProbability, ref dataGraph, reasoner, null, ontologyGraph, out entidadesReconciliadasConIntegracionExternaAux, discardDissambiguations, discoverCache, discoverCacheGlobal, ScopusApiKey, ScopusUrl, CrossrefUserAgent, WOSAuthorization, MinScore, MaxScore, SGI_SPARQLEndpoint, SGI_SPARQLGraph, SGI_SPARQLQueryParam, SGI_SPARQLUsername, SGI_SPARQLPassword, pCallUrisFactoryApiService, false);

                    //Limpiamos 'integration' para no insertar triples en caso de que ya estén cargados
                    foreach (string entity in integration.Keys.ToList())
                    {
                        foreach (DiscoverLinkData.PropertyData propertyData in integration[entity].ToList())
                        {
                            string           p       = propertyData.property;
                            HashSet <string> objetos = new HashSet <string>(propertyData.valueProvenance.Keys.ToList());
                            foreach (string o in objetos)
                            {
                                if (((SparqlResultSet)originalDataGraph.ExecuteQuery($@"ASK
                                        WHERE 
                                        {{
                                            ?s ?p ?o.
                                            FILTER(?s=<{entity}>)
                                            FILTER(?p=<{p}>)
                                            FILTER(str(?o)='{o}')
                                        }}")).Result)
                                {
                                    //Elimiamos el valor porque ya estaba cargado
                                    propertyData.valueProvenance.Remove(o);
                                }
                            }
                            if (propertyData.valueProvenance.Count == 0)
                            {
                                integration[entity].Remove(propertyData);
                            }
                        }
                        if (integration[entity].Count == 0)
                        {
                            integration.Remove(entity);
                        }
                    }


                    //Creación de dataGraph con el contenido de 'integration' + RdfTypes + SameAS
                    RohGraph dataGraphIntegration = new RohGraph();
                    foreach (string sujeto in integration.Keys)
                    {
                        IUriNode s = dataGraphIntegration.CreateUriNode(UriFactory.Create(sujeto));

                        //Agregamos SameAs y RDFType de las entidades
                        SparqlResultSet sparqlResultSet = (SparqlResultSet)dataGraph.ExecuteQuery("select ?rdftype ?sameas where {?s a ?rdftype. OPTIONAL{?s <http://www.w3.org/2002/07/owl#sameAs> ?sameAS} FILTER(?s=<" + sujeto + ">)}");
                        foreach (SparqlResult sparqlResult in sparqlResultSet.Results)
                        {
                            string   rdfType  = sparqlResult["rdftype"].ToString();
                            IUriNode pRdfType = dataGraphIntegration.CreateUriNode(UriFactory.Create("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"));
                            IUriNode oRdfType = dataGraphIntegration.CreateUriNode(UriFactory.Create(rdfType));
                            dataGraphIntegration.Assert(new Triple(s, pRdfType, oRdfType));
                            if (sparqlResult.Variables.Contains("sameas"))
                            {
                                string   sameas  = sparqlResult["sameas"].ToString();
                                IUriNode pSameAs = dataGraphIntegration.CreateUriNode(UriFactory.Create("http://www.w3.org/2002/07/owl#sameAs"));
                                IUriNode oSameAs = dataGraphIntegration.CreateUriNode(UriFactory.Create(sameas));
                                dataGraphIntegration.Assert(new Triple(s, pSameAs, oSameAs));
                            }
                        }

                        foreach (DiscoverLinkData.PropertyData propertyData in integration[sujeto])
                        {
                            foreach (string valor in propertyData.valueProvenance.Keys)
                            {
                                IUriNode p = dataGraphIntegration.CreateUriNode(UriFactory.Create(propertyData.property));
                                if (Uri.IsWellFormedUriString(valor, UriKind.Absolute))
                                {
                                    IUriNode uriNode = dataGraphIntegration.CreateUriNode(UriFactory.Create(propertyData.property));
                                    dataGraphIntegration.Assert(new Triple(s, p, uriNode));
                                }
                                else
                                {
                                    ILiteralNode literalNode = dataGraphIntegration.CreateLiteralNode(valor, new Uri("http://www.w3.org/2001/XMLSchema#string"));
                                    dataGraphIntegration.Assert(new Triple(s, p, literalNode));
                                }

                                foreach (string org in propertyData.valueProvenance[valor])
                                {
                                    //Agregamos los datos de las organizaciones y los grafos
                                    SparqlResultSet sparqlResultSetOrgs = (SparqlResultSet)dataGraph.ExecuteQuery("select ?s ?p ?o where {?s ?p ?o. FILTER(?s in(<" + pCallUrisFactoryApiService.GetUri("http://purl.org/roh/mirror/foaf#Organization", org) + ">,<" + pCallUrisFactoryApiService.GetUri("Graph", org) + "> ))}");
                                    foreach (SparqlResult sparqlResult in sparqlResultSetOrgs.Results)
                                    {
                                        INode sOrg = dataGraphIntegration.CreateUriNode(UriFactory.Create(sparqlResult["s"].ToString()));
                                        INode pOrg = dataGraphIntegration.CreateUriNode(UriFactory.Create(sparqlResult["p"].ToString()));
                                        if (sparqlResult["o"] is UriNode)
                                        {
                                            INode oOrg = dataGraphIntegration.CreateUriNode(UriFactory.Create(sparqlResult["o"].ToString()));
                                            dataGraphIntegration.Assert(new Triple(sOrg, pOrg, oOrg));
                                        }
                                        else if (sparqlResult["o"] is LiteralNode)
                                        {
                                            INode oOrg = dataGraphIntegration.CreateLiteralNode(((LiteralNode)sparqlResult["o"]).Value, ((LiteralNode)sparqlResult["o"]).DataType);
                                            dataGraphIntegration.Assert(new Triple(sOrg, pOrg, oOrg));
                                        }
                                    }
                                }
                            }
                        }
                    }
                    //Hora fin de la ejecución
                    DateTime endTime = DateTime.Now;
                    if (integration.Count > 0)
                    {
                        //Si hay datos nuevos los cargamos
                        string urlDiscoverAgent = pCallUrisFactoryApiService.GetUri("Agent", "discover");

                        //Publicamos en el SGI
                        AsioPublication asioPublication = new AsioPublication(SGI_SPARQLEndpoint, SGI_SPARQLQueryParam, SGI_SPARQLGraph, SGI_SPARQLUsername, SGI_SPARQLPassword);
                        asioPublication.PublishRDF(dataGraphIntegration, null, new KeyValuePair <string, string>(urlDiscoverAgent, "Algoritmos de descubrimiento"), startTime, endTime, discoverLinkData, pCallUrisFactoryApiService);


                        //Preparamos los datos para cargarlos en Unidata
                        RohGraph unidataGraph = dataGraphIntegration.Clone();
                        #region Si no tiene un sameAs apuntando a Unidata lo eliminamos, no hay que cargar la entidad
                        SparqlResultSet           sparqlResultSet           = (SparqlResultSet)unidataGraph.ExecuteQuery("select ?s ?rdftype ?sameas where {?s a ?rdftype. OPTIONAL{?s <http://www.w3.org/2002/07/owl#sameAs> ?sameAS} }");
                        Dictionary <string, bool> entidadesConSameAsUnidata = new Dictionary <string, bool>();
                        foreach (SparqlResult sparqlResult in sparqlResultSet.Results)
                        {
                            string s = sparqlResult["s"].ToString();
                            if (!entidadesConSameAsUnidata.ContainsKey(s))
                            {
                                entidadesConSameAsUnidata.Add(s, false);
                            }
                            if (sparqlResult.Variables.Contains("sameas"))
                            {
                                if (sparqlResult["sameas"].ToString().StartsWith(UnidataDomain))
                                {
                                    entidadesConSameAsUnidata[s] = true;
                                }
                            }
                        }
                        TripleStore store = new TripleStore();
                        store.Add(unidataGraph);
                        foreach (string entity in entidadesConSameAsUnidata.Keys)
                        {
                            if (!entidadesConSameAsUnidata[entity])
                            {
                                //Cambiamos candidato.Key por entityID
                                SparqlUpdateParser       parser    = new SparqlUpdateParser();
                                SparqlUpdateCommandSet   delete    = parser.ParseFromString(@"DELETE { ?s ?p ?o. }
                                                                                WHERE 
                                                                                {
                                                                                    ?s ?p ?o. 
                                                                                    FILTER(?s = <" + entity + @">)
                                                                                
                                                                                }");
                                LeviathanUpdateProcessor processor = new LeviathanUpdateProcessor(store);
                                processor.ProcessCommandSet(delete);
                            }
                        }
                        #endregion

                        //TODO descomentar cuando esté habilitaado Unidata
                        ////Si hay triples para cargar en Unidata procedemos
                        //if (unidataGraph.Triples.ToList().Count > 0)
                        //{
                        //    //Publicamos en UNIDATA
                        //    AsioPublication asioPublicationUnidata = new AsioPublication(Unidata_SPARQLEndpoint, Unidata_SPARQLQueryParam, Unidata_SPARQLGraph, Unidata_SPARQLUsername, Unidata_SPARQLPassword);
                        //    // Prepara el grafo para su carga en Unidata, para ello coge las URIs de Unidata del SameAs y la aplica a los sujetos y los antiguos sujetos se agregan al SameAs
                        //    unidataGraph = AsioPublication.TransformUrisToUnidata(unidataGraph, UnidataDomain, UnidataUriTransform);
                        //    asioPublicationUnidata.PublishRDF(unidataGraph, null, new KeyValuePair<string, string>(urlDiscoverAgent, "Algoritmos de descubrimiento"), startTime, endTime, discoverLinkData,pCallUrisFactoryApiService);
                        //}
                    }
                }
                catch (Exception exception)
                {
                    Logging.Error(exception);
                }
                Thread.Sleep(pSecondsSleep * 1000);
            }
        }