/// <summary> /// Crea un LiteralNode según el tipo de dato del contenido. /// </summary> /// <param name="pDataGraph">Grafo.</param> /// <param name="pContenido">Contenido del nodo.</param> /// <param name="pDatatype">Tipo del dato del contenido.</param> /// <param name="pTransform">Transformación del contenido</param> /// <returns>Nodo construido según el tipo de dato.</returns> private ILiteralNode CreateILiteralNodeType(RohGraph pDataGraph, string pContenido, string pDatatype, string pTransform = null) { pContenido = pContenido.Trim(); if (!string.IsNullOrEmpty(pTransform)) { if (pTransform.Contains("{value}")) { pContenido = pTransform.Replace("{value}", pContenido); } if (pTransform.Contains("{regex|")) { string regString = pTransform.Substring(pTransform.IndexOf("{regex|") + 7); regString = regString.Substring(0, regString.IndexOf("|endregex}")); Regex regex = new Regex(regString); Match match = regex.Match(pContenido); pContenido = pTransform.Replace("{regex|" + regString + "|endregex}", match.Value); } } if (string.IsNullOrEmpty(pDatatype)) { return(pDataGraph.CreateLiteralNode(pContenido, new Uri("http://www.w3.org/2001/XMLSchema#string"))); } else { return(pDataGraph.CreateLiteralNode(pContenido, new Uri(pDatatype))); } }
/// <summary> /// Publica un RDF en Asio aplicado todos losprocedimientos pertinentes /// </summary> /// <param name="pDataGraph">Grafo con los datos a cargar</param> /// <param name="pOntologyGraph">Grafo con la ontología</param> /// <param name="pAttributedTo">Sujeto y nombre para atribuir los triples de los apis externos</param> /// <param name="pActivityStartedAtTime">Inicio del proceso</param> /// <param name="pActivityEndedAtTime">Fin del proceso</param> /// <param name="pDiscoverLinkData">Datos para trabajar con el descubrimiento de enlaces</param> /// <param name="pCallUrisFactoryApiService">Servicio para hacer llamadas a los métodos del Uris Factory</param> public void PublishRDF(RohGraph pDataGraph, RohGraph pOntologyGraph, KeyValuePair <string, string>?pAttributedTo, DateTime pActivityStartedAtTime, DateTime pActivityEndedAtTime, DiscoverLinkData pDiscoverLinkData, CallUrisFactoryApiService pCallUrisFactoryApiService) { RohGraph inferenceDataGraph = null; if (pOntologyGraph != null) { inferenceDataGraph = pDataGraph.Clone(); RohRdfsReasoner reasoner = new RohRdfsReasoner(); reasoner.Initialise(pOntologyGraph); reasoner.Apply(inferenceDataGraph); } // 1º Eliminamos de la BBDD las entidades principales que aparecen en el RDF HashSet <string> graphs = RemovePrimaryTopics(ref pDataGraph); graphs.Add(_Graph); // 2º Eliminamos todos los triples de la BBDD cuyo sujeto y predicado estén en el RDF a cargar y estén marcados como monovaluados. if (pOntologyGraph != null && inferenceDataGraph != null) { RemoveMonovaluatedProperties(pOntologyGraph, inferenceDataGraph); } //3º Insertamos los triples en la BBDD if (pAttributedTo.HasValue) { //Añadimos triples del softwareagent IUriNode t_subject = pDataGraph.CreateUriNode(UriFactory.Create(pAttributedTo.Value.Key)); IUriNode t_predicate_rdftype = pDataGraph.CreateUriNode(UriFactory.Create("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")); IUriNode t_object_rdftype = pDataGraph.CreateUriNode(UriFactory.Create("http://www.w3.org/ns/prov#SoftwareAgent")); pDataGraph.Assert(new Triple(t_subject, t_predicate_rdftype, t_object_rdftype)); IUriNode t_predicate_name = pDataGraph.CreateUriNode(UriFactory.Create("http://purl.org/roh/mirror/foaf#name")); ILiteralNode t_object_name = pDataGraph.CreateLiteralNode(pAttributedTo.Value.Value, new Uri("http://www.w3.org/2001/XMLSchema#string")); pDataGraph.Assert(new Triple(t_subject, t_predicate_name, t_object_name)); } SparqlUtility.LoadTriples(SparqlUtility.GetTriplesFromGraph(pDataGraph), _SPARQLEndpoint, _QueryParam, _Graph, _Username, _Password); //4º Insertamos los triples con provenance en la BBDD if (pDiscoverLinkData != null && pDiscoverLinkData.entitiesProperties != null) { Dictionary <string, List <string> > graphDeletes = new Dictionary <string, List <string> >(); Dictionary <string, List <string> > graphTriples = new Dictionary <string, List <string> >(); foreach (string t_subject in pDiscoverLinkData.entitiesProperties.Keys) { foreach (DiscoverLinkData.PropertyData property in pDiscoverLinkData.entitiesProperties[t_subject]) { string t_property = property.property; foreach (var prop in property.valueProvenance) { string t_object = prop.Key; HashSet <string> t_sourceids = prop.Value; foreach (string sourceId in t_sourceids) { string graph = pCallUrisFactoryApiService.GetUri("Graph", sourceId); if (!graphTriples.ContainsKey(graph)) { graphTriples.Add(graph, new List <string>()); } string bNodeid = "_:" + Guid.NewGuid().ToString(); graphTriples[graph].Add($@"<{t_subject}> <http://www.w3.org/ns/prov#wasUsedBy> {bNodeid} ."); graphTriples[graph].Add($@"{bNodeid} <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/prov#Activity> ."); graphTriples[graph].Add($@"{bNodeid} <http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate> <{t_property}>."); if (Uri.IsWellFormedUriString(t_object, UriKind.Absolute)) { graphTriples[graph].Add($@"{bNodeid} <http://www.w3.org/1999/02/22-rdf-syntax-ns#object> <{ t_object}>."); } else { graphTriples[graph].Add($@"{bNodeid} <http://www.w3.org/1999/02/22-rdf-syntax-ns#object> ""{ t_object.Replace("\"", "\\\"").Replace("\n", "\\n") }""^^<http://www.w3.org/2001/XMLSchema#string>."); } graphTriples[graph].Add($@"{bNodeid} <http://www.w3.org/ns/prov#startedAtTime> ""{ pActivityStartedAtTime.ToString("yyyy-MM-ddTHH:mm:ss.fffzzz") }""^^<http://www.w3.org/2001/XMLSchema#datetime>."); graphTriples[graph].Add($@"{bNodeid} <http://www.w3.org/ns/prov#endedAtTime> ""{ pActivityEndedAtTime.ToString("yyyy-MM-ddTHH:mm:ss.fffzzz") }""^^<http://www.w3.org/2001/XMLSchema#datetime>."); graphTriples[graph].Add($@"{bNodeid} <http://www.w3.org/ns/prov#wasAssociatedWith> <{pAttributedTo.Value.Key}>."); graphTriples[graph].Add($@"{bNodeid} <http://www.w3.org/ns/prov#wasAssociatedWith> <{pCallUrisFactoryApiService.GetUri("http://purl.org/roh/mirror/foaf#Organization", sourceId)}>."); if (pAttributedTo.HasValue) { graphTriples[graph].Add($@"{bNodeid} <http://www.w3.org/ns/prov#wasAssociatedWith> <{pAttributedTo.Value.Key}>."); } if (!graphDeletes.ContainsKey(graph)) { graphDeletes.Add(graph, new List <string>()); } if (!Uri.IsWellFormedUriString(t_object, UriKind.Absolute)) { string stringDelete = $@" {{ ?s ?p ?o. ?o <http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate> <{t_property}>. ?o <http://www.w3.org/1999/02/22-rdf-syntax-ns#object> ""{ t_object.Replace("\"", "\\\"").Replace("\n", "\\n") }""^^<http://www.w3.org/2001/XMLSchema#string>. FILTER(?s = <{t_subject}>) }}"; graphDeletes[graph].Add(stringDelete); } } } } } //Eliminamos aquellos triples de provenance que ya estén cargados foreach (string graph in graphDeletes.Keys) { graphs.Add(graph); string queryDeleteProvenance = $@" DELETE {{ ?s ?p ?o. }} WHERE {{ {{{string.Join("}UNION{", graphDeletes[graph])}}} }}"; _SparqlUtility.SelectData(_SPARQLEndpoint, graph, queryDeleteProvenance, _QueryParam, _Username, _Password); } //Cargamos los nuevos triples foreach (string graph in graphTriples.Keys) { SparqlUtility.LoadTriples(graphTriples[graph], _SPARQLEndpoint, _QueryParam, graph, _Username, _Password); } } //5º Limpiamos los blanknodes huerfanos, o que no tengan triples //TODO mover a una tarea que se ejecute continuamente //DeleteOrphanNodes(graphs); }
/// <summary> /// Aplica el descubrimiento sobre las entidades cargadas en el SGI /// </summary> /// <param name="pSecondsSleep">Segundos para dormir después de procesar una entidad</param> /// <param name="pCallUrisFactoryApiService">Servicio para hacer llamadas a los métodos del Uris Factory</param> public void ApplyDiscoverLoadedEntities(int pSecondsSleep, CallUrisFactoryApiService pCallUrisFactoryApiService) { CallEtlApiService callEtlApiService = _serviceScopeFactory.CreateScope().ServiceProvider.GetRequiredService <CallEtlApiService>(); #region Cargamos configuraciones ConfigSparql ConfigSparql = new ConfigSparql(); string SGI_SPARQLEndpoint = ConfigSparql.GetEndpoint(); string SGI_SPARQLGraph = ConfigSparql.GetGraph(); string SGI_SPARQLQueryParam = ConfigSparql.GetQueryParam(); string SGI_SPARQLUsername = ConfigSparql.GetUsername(); string SGI_SPARQLPassword = ConfigSparql.GetPassword(); string Unidata_SPARQLEndpoint = ConfigSparql.GetUnidataEndpoint(); string Unidata_SPARQLGraph = ConfigSparql.GetUnidataGraph(); string Unidata_SPARQLQueryParam = ConfigSparql.GetUnidataQueryParam(); string Unidata_SPARQLUsername = ConfigSparql.GetUnidataUsername(); string Unidata_SPARQLPassword = ConfigSparql.GetUnidataPassword(); ConfigService ConfigService = new ConfigService(); string UnidataDomain = ConfigService.GetUnidataDomain(); string UnidataUriTransform = ConfigService.GetUnidataUriTransform(); float MaxScore = ConfigService.GetMaxScore(); float MinScore = ConfigService.GetMinScore(); ConfigScopus ConfigScopus = new ConfigScopus(); string ScopusApiKey = ConfigScopus.GetScopusApiKey(); string ScopusUrl = ConfigScopus.GetScopusUrl(); ConfigCrossref ConfigCrossref = new ConfigCrossref(); string CrossrefUserAgent = ConfigCrossref.GetCrossrefUserAgent(); ConfigWOS ConfigWOS = new ConfigWOS(); string WOSAuthorization = ConfigWOS.GetWOSAuthorization(); #endregion DiscoverUtility discoverUtility = new DiscoverUtility(); //Cargar todas las personas en la lista de manera aleatoria. List <string> personList = discoverUtility.GetPersonList(SGI_SPARQLEndpoint, SGI_SPARQLGraph, SGI_SPARQLQueryParam, SGI_SPARQLUsername, SGI_SPARQLPassword); List <string> randomPersonList = GetRandomOrderList(personList); RohGraph ontologyGraph = callEtlApiService.CallGetOntology(); foreach (string person in randomPersonList) { try { //Hora de inicio de la ejecución DateTime startTime = DateTime.Now; //Obtener el RohGraph de una única persona. RohGraph dataGraph = discoverUtility.GetDataGraphPersonLoadedForDiscover(person, SGI_SPARQLEndpoint, SGI_SPARQLGraph, SGI_SPARQLQueryParam, SGI_SPARQLUsername, SGI_SPARQLPassword); //Clonamos el grafo original para hacer luego comprobaciones RohGraph originalDataGraph = dataGraph.Clone(); RohRdfsReasoner reasoner = new RohRdfsReasoner(); reasoner.Initialise(ontologyGraph); RohGraph dataInferenceGraph = dataGraph.Clone(); reasoner.Apply(dataInferenceGraph); bool hasChanges = false; //Dictionary<string, string> discoveredEntityList = new Dictionary<string, string>(); Dictionary <string, Dictionary <string, float> > discoveredEntitiesProbability = new Dictionary <string, Dictionary <string, float> >(); Dictionary <string, ReconciliationData.ReconciliationScore> entidadesReconciliadasConIntegracionExternaAux; Dictionary <string, HashSet <string> > discardDissambiguations = new Dictionary <string, HashSet <string> >(); DiscoverCache discoverCache = new DiscoverCache(); DiscoverCacheGlobal discoverCacheGlobal = new DiscoverCacheGlobal(); //Obtención de la integración externa ReconciliationData reconciliationData = new ReconciliationData(); DiscoverLinkData discoverLinkData = new DiscoverLinkData(); Dictionary <string, List <DiscoverLinkData.PropertyData> > integration = discoverUtility.ExternalIntegration(ref hasChanges, ref reconciliationData, ref discoverLinkData, ref discoveredEntitiesProbability, ref dataGraph, reasoner, null, ontologyGraph, out entidadesReconciliadasConIntegracionExternaAux, discardDissambiguations, discoverCache, discoverCacheGlobal, ScopusApiKey, ScopusUrl, CrossrefUserAgent, WOSAuthorization, MinScore, MaxScore, SGI_SPARQLEndpoint, SGI_SPARQLGraph, SGI_SPARQLQueryParam, SGI_SPARQLUsername, SGI_SPARQLPassword, pCallUrisFactoryApiService, false); //Limpiamos 'integration' para no insertar triples en caso de que ya estén cargados foreach (string entity in integration.Keys.ToList()) { foreach (DiscoverLinkData.PropertyData propertyData in integration[entity].ToList()) { string p = propertyData.property; HashSet <string> objetos = new HashSet <string>(propertyData.valueProvenance.Keys.ToList()); foreach (string o in objetos) { if (((SparqlResultSet)originalDataGraph.ExecuteQuery($@"ASK WHERE {{ ?s ?p ?o. FILTER(?s=<{entity}>) FILTER(?p=<{p}>) FILTER(str(?o)='{o}') }}")).Result) { //Elimiamos el valor porque ya estaba cargado propertyData.valueProvenance.Remove(o); } } if (propertyData.valueProvenance.Count == 0) { integration[entity].Remove(propertyData); } } if (integration[entity].Count == 0) { integration.Remove(entity); } } //Creación de dataGraph con el contenido de 'integration' + RdfTypes + SameAS RohGraph dataGraphIntegration = new RohGraph(); foreach (string sujeto in integration.Keys) { IUriNode s = dataGraphIntegration.CreateUriNode(UriFactory.Create(sujeto)); //Agregamos SameAs y RDFType de las entidades SparqlResultSet sparqlResultSet = (SparqlResultSet)dataGraph.ExecuteQuery("select ?rdftype ?sameas where {?s a ?rdftype. OPTIONAL{?s <http://www.w3.org/2002/07/owl#sameAs> ?sameAS} FILTER(?s=<" + sujeto + ">)}"); foreach (SparqlResult sparqlResult in sparqlResultSet.Results) { string rdfType = sparqlResult["rdftype"].ToString(); IUriNode pRdfType = dataGraphIntegration.CreateUriNode(UriFactory.Create("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")); IUriNode oRdfType = dataGraphIntegration.CreateUriNode(UriFactory.Create(rdfType)); dataGraphIntegration.Assert(new Triple(s, pRdfType, oRdfType)); if (sparqlResult.Variables.Contains("sameas")) { string sameas = sparqlResult["sameas"].ToString(); IUriNode pSameAs = dataGraphIntegration.CreateUriNode(UriFactory.Create("http://www.w3.org/2002/07/owl#sameAs")); IUriNode oSameAs = dataGraphIntegration.CreateUriNode(UriFactory.Create(sameas)); dataGraphIntegration.Assert(new Triple(s, pSameAs, oSameAs)); } } foreach (DiscoverLinkData.PropertyData propertyData in integration[sujeto]) { foreach (string valor in propertyData.valueProvenance.Keys) { IUriNode p = dataGraphIntegration.CreateUriNode(UriFactory.Create(propertyData.property)); if (Uri.IsWellFormedUriString(valor, UriKind.Absolute)) { IUriNode uriNode = dataGraphIntegration.CreateUriNode(UriFactory.Create(propertyData.property)); dataGraphIntegration.Assert(new Triple(s, p, uriNode)); } else { ILiteralNode literalNode = dataGraphIntegration.CreateLiteralNode(valor, new Uri("http://www.w3.org/2001/XMLSchema#string")); dataGraphIntegration.Assert(new Triple(s, p, literalNode)); } foreach (string org in propertyData.valueProvenance[valor]) { //Agregamos los datos de las organizaciones y los grafos SparqlResultSet sparqlResultSetOrgs = (SparqlResultSet)dataGraph.ExecuteQuery("select ?s ?p ?o where {?s ?p ?o. FILTER(?s in(<" + pCallUrisFactoryApiService.GetUri("http://purl.org/roh/mirror/foaf#Organization", org) + ">,<" + pCallUrisFactoryApiService.GetUri("Graph", org) + "> ))}"); foreach (SparqlResult sparqlResult in sparqlResultSetOrgs.Results) { INode sOrg = dataGraphIntegration.CreateUriNode(UriFactory.Create(sparqlResult["s"].ToString())); INode pOrg = dataGraphIntegration.CreateUriNode(UriFactory.Create(sparqlResult["p"].ToString())); if (sparqlResult["o"] is UriNode) { INode oOrg = dataGraphIntegration.CreateUriNode(UriFactory.Create(sparqlResult["o"].ToString())); dataGraphIntegration.Assert(new Triple(sOrg, pOrg, oOrg)); } else if (sparqlResult["o"] is LiteralNode) { INode oOrg = dataGraphIntegration.CreateLiteralNode(((LiteralNode)sparqlResult["o"]).Value, ((LiteralNode)sparqlResult["o"]).DataType); dataGraphIntegration.Assert(new Triple(sOrg, pOrg, oOrg)); } } } } } } //Hora fin de la ejecución DateTime endTime = DateTime.Now; if (integration.Count > 0) { //Si hay datos nuevos los cargamos string urlDiscoverAgent = pCallUrisFactoryApiService.GetUri("Agent", "discover"); //Publicamos en el SGI AsioPublication asioPublication = new AsioPublication(SGI_SPARQLEndpoint, SGI_SPARQLQueryParam, SGI_SPARQLGraph, SGI_SPARQLUsername, SGI_SPARQLPassword); asioPublication.PublishRDF(dataGraphIntegration, null, new KeyValuePair <string, string>(urlDiscoverAgent, "Algoritmos de descubrimiento"), startTime, endTime, discoverLinkData, pCallUrisFactoryApiService); //Preparamos los datos para cargarlos en Unidata RohGraph unidataGraph = dataGraphIntegration.Clone(); #region Si no tiene un sameAs apuntando a Unidata lo eliminamos, no hay que cargar la entidad SparqlResultSet sparqlResultSet = (SparqlResultSet)unidataGraph.ExecuteQuery("select ?s ?rdftype ?sameas where {?s a ?rdftype. OPTIONAL{?s <http://www.w3.org/2002/07/owl#sameAs> ?sameAS} }"); Dictionary <string, bool> entidadesConSameAsUnidata = new Dictionary <string, bool>(); foreach (SparqlResult sparqlResult in sparqlResultSet.Results) { string s = sparqlResult["s"].ToString(); if (!entidadesConSameAsUnidata.ContainsKey(s)) { entidadesConSameAsUnidata.Add(s, false); } if (sparqlResult.Variables.Contains("sameas")) { if (sparqlResult["sameas"].ToString().StartsWith(UnidataDomain)) { entidadesConSameAsUnidata[s] = true; } } } TripleStore store = new TripleStore(); store.Add(unidataGraph); foreach (string entity in entidadesConSameAsUnidata.Keys) { if (!entidadesConSameAsUnidata[entity]) { //Cambiamos candidato.Key por entityID SparqlUpdateParser parser = new SparqlUpdateParser(); SparqlUpdateCommandSet delete = parser.ParseFromString(@"DELETE { ?s ?p ?o. } WHERE { ?s ?p ?o. FILTER(?s = <" + entity + @">) }"); LeviathanUpdateProcessor processor = new LeviathanUpdateProcessor(store); processor.ProcessCommandSet(delete); } } #endregion //TODO descomentar cuando esté habilitaado Unidata ////Si hay triples para cargar en Unidata procedemos //if (unidataGraph.Triples.ToList().Count > 0) //{ // //Publicamos en UNIDATA // AsioPublication asioPublicationUnidata = new AsioPublication(Unidata_SPARQLEndpoint, Unidata_SPARQLQueryParam, Unidata_SPARQLGraph, Unidata_SPARQLUsername, Unidata_SPARQLPassword); // // Prepara el grafo para su carga en Unidata, para ello coge las URIs de Unidata del SameAs y la aplica a los sujetos y los antiguos sujetos se agregan al SameAs // unidataGraph = AsioPublication.TransformUrisToUnidata(unidataGraph, UnidataDomain, UnidataUriTransform); // asioPublicationUnidata.PublishRDF(unidataGraph, null, new KeyValuePair<string, string>(urlDiscoverAgent, "Algoritmos de descubrimiento"), startTime, endTime, discoverLinkData,pCallUrisFactoryApiService); //} } } catch (Exception exception) { Logging.Error(exception); } Thread.Sleep(pSecondsSleep * 1000); } }