/// <summary> /// Executes a Graph Pattern style query against the Source /// </summary> /// <param name="graph">Graph Pattern</param> /// <param name="options">Query Options</param> /// <param name="sink">Results Sink</param> /// <remarks> /// <para> /// This is implemented by transforming the Graph Pattern which is a set of SemWeb Statement templates into a SPARQL Algebra BGP. The resulting algebra is then executed using the Leviathan engine and the results converted into VariableBindings for SemWeb /// </para> /// <para> /// The only Query Option that is supported is the Limit option /// </para> /// </remarks> public void Query(Statement[] graph, SW.Query.QueryOptions options, SW.Query.QueryResultSink sink) { ISparqlAlgebra algebra = this.ToAlgebra(graph); SparqlEvaluationContext context = new SparqlEvaluationContext(null, new InMemoryDataset(this._store)); BaseMultiset results = context.Evaluate(algebra);//algebra.Evaluate(context); sink.Init(results.Variables.Select(v => new Variable(v)).ToArray()); if (results.Count > 0) { int c = 0; foreach (Set s in results.Sets) { //Apply Limit if applicable if (options.Limit > 0 && c >= options.Limit) { sink.Finished(); return; } //Convert the Set to VariableBindings for SemWeb Variable[] vars = s.Variables.Select(v => new Variable(v)).ToArray(); Resource[] resources = s.Variables.Select(v => SemWebConverter.ToSemWeb(s[v], this.GetMapping(s[v].Graph))).ToArray(); SW.Query.VariableBindings bindings = new SW.Query.VariableBindings(vars, resources); //Keep adding results until the sink tells us to stop if (!sink.Add(bindings)) { sink.Finished(); return; } c++; } sink.Finished(); } else { sink.Finished(); } }
/// <summary> /// Queries the Store using the Graph Pattern specified by the set of Statement Patterns /// </summary> /// <param name="graph">Graph Pattern</param> /// <param name="options">Query Options</param> /// <param name="sink">Results Sink</param> /// <remarks> /// <para> /// Implemented by converting the Statement Patterns into a SPARQL SELECT query and executing that against the underlying Store's SPARQL engine /// </para> /// <para> /// The only Query Option that is supported is the Limit option /// </para> /// </remarks> public void Query(Statement[] graph, SW.Query.QueryOptions options, SW.Query.QueryResultSink sink) { //Implement as a SPARQL SELECT SparqlParameterizedString queryString = new SparqlParameterizedString(); queryString.QueryText = "SELECT * WHERE {"; int p = 0; foreach (Statement stmt in graph) { //Add Subject queryString.QueryText += "\n"; if (stmt.Subject is Variable) { queryString.QueryText += stmt.Subject.ToString(); } else { queryString.QueryText += "@param" + p; queryString.SetParameter("param" + p, SemWebConverter.FromSemWeb(stmt.Subject, this._mapping)); p++; } queryString.QueryText += " "; //Add Predicate if (stmt.Predicate is Variable) { queryString.QueryText += stmt.Predicate.ToString(); } else { queryString.QueryText += "@param" + p; queryString.SetParameter("param" + p, SemWebConverter.FromSemWeb(stmt.Predicate, this._mapping)); p++; } queryString.QueryText += " "; //Add Object if (stmt.Object is Variable) { queryString.QueryText += stmt.Object.ToString(); } else { queryString.QueryText += "@param" + p; queryString.SetParameter("param" + p, SemWebConverter.FromSemWeb(stmt.Object, this._mapping)); p++; } queryString.QueryText += " ."; } queryString.QueryText += "}"; //Execute the Query and convert the Results Object results = this._store.ExecuteQuery(queryString.ToString()); if (results is SparqlResultSet) { SparqlResultSet rset = (SparqlResultSet)results; sink.Init(rset.Variables.Select(v => new Variable(v)).ToArray()); if (rset.Count > 0) { int c = 0; foreach (SparqlResult r in rset) { //Apply Limit if applicable if (options.Limit > 0 && c >= options.Limit) { sink.Finished(); return; } //Convert the Set to VariableBindings for SemWeb Variable[] vars = r.Variables.Select(v => new Variable(v)).ToArray(); Resource[] resources = r.Variables.Select(v => SemWebConverter.ToSemWeb(r[v], this._mapping)).ToArray(); SW.Query.VariableBindings bindings = new SW.Query.VariableBindings(vars, resources); //Keep adding results until the sink tells us to stop if (!sink.Add(bindings)) { sink.Finished(); return; } c++; } sink.Finished(); } else { sink.Finished(); } } else { throw new RdfQueryException("Query returned an unexpected result where a SPARQL Result Set was expected"); } }
public void Query(Statement[] graph, SemWeb.Query.QueryOptions options, SemWeb.Query.QueryResultSink sink) { if (graph.Length == 0) throw new ArgumentException("graph array must have at least one element"); // This method translates the graph pattern into a single SQL statement. Each graph statement // corresponds to a new use of the _statements table in the FROM clause. For instance: // ?a foaf:knows ?b . ?b foaf:name ?c . // translates to // SELECT // g0.subject, v0.value, // g0.object, v1.value, // g1.object, v2.value, v2lit.value, v2lit.language, v2lit.datatype // FROM // db_tables as g0 LEFT JOIN db_entities AS v0 ON g0.subject=v0.id LEFT JOIN db_entities AS v1 ON g0.object=v1.id, // db_tables as g1 LEFT JOIN db_entities AS v2 ON g1.object=v2.id LEFT JOIN db_literals AS v2lit ON g1.object=v2lit.id // WHERE // g0.predicate = <the id of the foaf:knows entity> AND // g1.predicate = <the id of the foaf:name entity> AND // g0.object = g1.subject // // If any variable column is an *undistinguished* variable --- which is to say that the caller // says it is a variable, but is not concerned with its values --- then we want to apply // DISTINCT to the SELECT statement. This is because while in the normal case we may get // duplicates, we expect that to not occur more than the caller expects, but in the latter // case there will often be many duplicates. Consider the SPARQL query: // SELECT DISTINCT ?p WHERE { ?s ?p ?o } // to get a list of predicates in the dataset, which corresponds to the graph query // ?s ?p ?o // where only ?p is distinguished. // This normally translates to: // SELECT // g0.predicate, v0.value, // FROM // db_tables as g0 LEFT JOIN db_entities AS v0 ON g0.predicate=v0.id // which of course is going to return a result for every triple in the database. // So we add DISTINCT to beginning ("SELECT DISTINCT"). // Unfortunately, MySQL performs the DISTINCT bit only after the LEFT JOINs (which makes sense normally). // That means that MySQL is repeatedly fetching the URI values of the predicates and checking // if a new unique row has been created, and this is very slow. What we want is to get the distinct // IDs of the predicates first, and then get their URIs. // I first tried implementing this with VIEWs, but it didn't always speed things up, and it was // difficult to manage the creation and deletion of VIEWs. // So instead, in this case, we do the query in two parts. First we get the IDs of the variables, // and then we get their URIs. options = options.Clone(); // because we modify the knownvalues array // Order the variables mentioned in the graph. Variable[] varOrder; ResSet distinguishedVars = null; bool useDistinct = false; { if (options.DistinguishedVariables != null) distinguishedVars = new ResSet(options.DistinguishedVariables); else distinguishedVars = new ResSet(); Hashtable seenvars = new Hashtable(); foreach (Statement filter in graph) { for (int i = 0; i < 4; i++) { Resource r = filter.GetComponent(i); if (r == null) throw new ArgumentException("The graph may not have any null components. Use Variables instead."); if (r is Variable) { if (options.DistinguishedVariables != null) { if (!distinguishedVars.Contains(r)) { // If we are omitting a column from the results because it is // not distinguished, and it's not a meta column, then we'll // use DISTINCT. if (i != 3) useDistinct = true; // Don't put this into seenvars. continue; } } else { distinguishedVars.Add(r); // all variables are distinguished } seenvars[r] = r; } } } varOrder = new Variable[seenvars.Count]; int ctr = 0; foreach (Variable v in seenvars.Keys) varOrder[ctr++] = v; } // Set the initial bindings to the result sink sink.Init(varOrder); Hashtable varLitFilters = new Hashtable(); // Prefetch the IDs of all resources mentioned in the graph and in variable known values. // For Resources in the graph that are not in the store, the query immediately fails. { ArrayList graphResources = new ArrayList(); foreach (Statement s in graph) { for (int i = 0; i < 4; i++) { Resource r = s.GetComponent(i); if (!(r is BNode)) // definitely exclude variables, but bnodes are useless too graphResources.Add(r); } } if (options.VariableKnownValues != null) foreach (ICollection values in options.VariableKnownValues.Values) graphResources.AddRange(values); PrefetchResourceIds(graphResources); // Check resources in graph and fail fast if any is not in the store. foreach (Statement s in graph) { for (int i = 0; i < 4; i++) { Resource r = s.GetComponent(i); if (r is Variable) continue; if ((object)r != (object)Statement.DefaultMeta && GetResourceKey(r) == null) { sink.AddComments("Resource " + r + " is not contained in the data model."); sink.Finished(); return; } } } // Check variable known values and remove any values not in the store. // Don't do any fail-fasting here because there might be entries in this // dictionary that aren't even used in this query (yes, poor design). // We check later anyway. if (options.VariableKnownValues != null) { #if !DOTNET2 foreach (Variable v in new ArrayList(options.VariableKnownValues.Keys)) { #else foreach (Variable v in new System.Collections.Generic.List<Variable>(options.VariableKnownValues.Keys)) { #endif #if !DOTNET2 ArrayList newvalues = new ArrayList(); #else System.Collections.Generic.List<Resource> newvalues = new System.Collections.Generic.List<Resource>(); #endif foreach (Resource r in (ICollection)options.VariableKnownValues[v]) { if ((object)r == (object)Statement.DefaultMeta || GetResourceKey(r) != null) newvalues.Add(r); } options.VariableKnownValues[v] = newvalues; } } } // Helpers string[] colnames = { "subject", "predicate", "object", "meta" }; // we initialize these things while locked, but use them after we release the lock ArrayList results = new ArrayList(); Hashtable resourceCache = new Hashtable(); // map resource ID to Resource instances // We can either include JOINs to the entities and literals table for every variable // in the query, or we can delay fetching that information to separate SELECTs // after the main part of the query is done. If we are including DISTINCT, then we // don't want to do the JOINs because the JOINs happen before the DISTINCT and will // be unnecessarily repeated. Also if the query has many variables, say more than 6, // then it may slow down query planning (the MySQL optimizer) to include them all in // one query. bool joinEntitiesAndLiterals = true; if (useDistinct) joinEntitiesAndLiterals = false; if (varOrder.Length > 6) joinEntitiesAndLiterals = false; // Lock the store and make sure we are initialized and any pending add's have been committed. lock (syncroot) { Init(); RunAddBuffer(); // Compile the SQL statement. Hashtable varRef = new Hashtable(); // the column name representing the variable, as in "g0.subject" Hashtable varRef2 = new Hashtable(); // the index of the variable, for accessing the entities and literals joined tables Hashtable varSelectedLiteral = new Hashtable(); // whether the variable is in a literal column and a LEFT JOIN for the literals table was used for it Hashtable varCouldBeLiteral = new Hashtable(); // whether the variable is only in literal columns Hashtable varSelectedEntity = new Hashtable(); // whether a LEFT JOIN for the entities table was used for a variable StringBuilder fromClause = new StringBuilder(); StringBuilder whereClause = new StringBuilder(); for (int f = 0; f < graph.Length; f++) { // For each filter, we select FROM the statements table with an // alias: q#, where # is the filter's index. if (f > 0) fromClause.Append(','); fromClause.Append(table); fromClause.Append("_statements AS g"); fromClause.Append(f); // For each component of the filter... for (int i = 0; i < 4; i++) { // This has the name of the column corresponding to this variable (i.e. "g1.predicate"). string myRef = "g" + f + "." + colnames[i]; Variable v = graph[f].GetComponent(i) as Variable; if (v != null) { // If the component is a variable, then if this is // the first time we're seeing the variable, we don't // add any restrictions to the WHERE clause, but we // note the variable's "name" in the world of SQL // so we can refer back to it later and we add the // necessary FROM tables so we can get its URI and // literal value if it is a reported variable. // If this isn't the first time, then we add a WHERE restriction so // that the proper columns here and in a previous // filter are forced to have the same value. if (!varRef.ContainsKey(v)) { // This is the first time we are seeing this variable. // Record the column name for the variable (i.e. g0.subject). varRef[v] = myRef; // Record an index for the variable (i.e. 0, 1, 2, ...) int vIndex = varRef.Count; varRef2[v] = vIndex; varCouldBeLiteral[v] = (i == 2); // LEFT JOIN the entities table for this variable to get its URI // only if it is a distinguished variable and we are not using DISTINCT. varSelectedEntity[v] = false; if (joinEntitiesAndLiterals && distinguishedVars.Contains(v)) { varSelectedEntity[v] = true; // Record that we are selecting the entities table for this variable. fromClause.Append(" LEFT JOIN "); fromClause.Append(table); fromClause.Append("_entities AS vent"); fromClause.Append(vIndex); fromClause.Append(" ON "); fromClause.Append(myRef); fromClause.Append("="); fromClause.Append("vent" + vIndex + ".id "); } // LEFT JOIN the literals table for this variable: // if it is in an object position // to get its value, language, and datatype only if it is a distinguished variable and we are not using DISTINCT // to apply a literal value filter (which will be done later) #if !DOTNET2 bool hasLitFilter = (options.VariableLiteralFilters != null && options.VariableLiteralFilters[v] != null); #else bool hasLitFilter = (options.VariableLiteralFilters != null && options.VariableLiteralFilters.ContainsKey(v)); #endif varSelectedLiteral[v] = false; if (i == 2 && ((joinEntitiesAndLiterals && distinguishedVars.Contains(v)) || hasLitFilter)) { varSelectedLiteral[v] = true; // Record that we are selecting the literals table for this variable. fromClause.Append(" LEFT JOIN "); fromClause.Append(table); fromClause.Append("_literals AS vlit"); fromClause.Append(vIndex); fromClause.Append(" ON "); fromClause.Append(myRef); fromClause.Append("="); fromClause.Append("vlit" + vIndex + ".id "); } // If this variable has known values, then we must restrict what values can appear using a WHERE clause. if (options.VariableKnownValues != null) { ICollection values = null; #if DOTNET2 if (options.VariableKnownValues.ContainsKey(v)) #endif values = (ICollection)options.VariableKnownValues[v]; if (values != null) { if (values.Count == 0) { sink.Finished(); return; } Resource r = ToMultiRes((Resource[])new ArrayList(values).ToArray(typeof(Resource))); if (!WhereItem(myRef, r, whereClause, whereClause.Length != 0)) { // We know at this point that the query cannot return any results. sink.Finished(); return; } } } } else { // We've seen this variable before, so link up the column in this // statement to the corresponding column in a previous (or this) statement. if (whereClause.Length != 0) whereClause.Append(" AND "); whereClause.Append('('); whereClause.Append((string)varRef[v]); whereClause.Append('='); whereClause.Append(myRef); whereClause.Append(')'); if (i != 2) varCouldBeLiteral[v] = false; } } else { // If this is not a variable, then it is a resource. // Append something into the WHERE clause to make sure this component gets // the right fixed value. If we cannot add the component to the WHERE clause // because the fixed value isn't even known in the data source, we can stop early. if (!WhereItem(myRef, graph[f].GetComponent(i), whereClause, whereClause.Length != 0)) { // We know at this point that the query cannot return any results. sink.Finished(); return; } } } } // graph filter 0...n // Add literal filters to the WHERE clause foreach (Variable v in varOrder) { // Is there a literal value filter? if (options.VariableLiteralFilters == null) continue; #if !DOTNET2 if (options.VariableLiteralFilters[v] == null) continue; #else if (!options.VariableLiteralFilters.ContainsKey(v)) continue; #endif // If this variable was not used in a literal column, then // we cannot filter its value. Really, it will never be a literal. if (!(bool)varSelectedLiteral[v]) continue; foreach (LiteralFilter filter in (ICollection)options.VariableLiteralFilters[v]) { string s = FilterToSQL(filter, "vlit" + (int)varRef2[v] + ".value"); if (s == null) continue; if (whereClause.Length != 0) whereClause.Append(" AND "); whereClause.Append(s); } } // Put the parts of the SQL statement together StringBuilder cmd = new StringBuilder(); cmd.Append("SELECT "); if (useDistinct) cmd.Append("DISTINCT "); if (!SupportsLimitClause && options.Limit > 0) { cmd.Append("TOP "); cmd.Append(options.Limit); cmd.Append(' '); } // Add all of the distinguished variables to the SELECT clause. bool firstvar = true; foreach (Variable v in varOrder) { if (!firstvar) cmd.Append(','); firstvar = false; cmd.Append((string)varRef[v]); if ((bool)varSelectedEntity[v]) { cmd.Append(", vent" + (int)varRef2[v] + ".value"); } if ((bool)varSelectedLiteral[v]) { cmd.Append(", vlit" + (int)varRef2[v] + ".value"); cmd.Append(", vlit" + (int)varRef2[v] + ".language"); cmd.Append(", vlit" + (int)varRef2[v] + ".datatype"); } } cmd.Append(" FROM "); cmd.Append(fromClause.ToString()); if (whereClause.Length > 0) cmd.Append(" WHERE "); cmd.Append(whereClause.ToString()); if (SupportsLimitClause && options.Limit > 0) { cmd.Append(" LIMIT "); cmd.Append(options.Limit); } cmd.Append(';'); if (Debug) { string cmd2 = cmd.ToString(); //if (cmd2.Length > 80) cmd2 = cmd2.Substring(0, 80); Console.Error.WriteLine(cmd2); } // Execute the query. // When we use DISTINCT and don't select URI and literal values at first, // we have to select them after. And since we can't maintain two IDataReaders // simultaneously, that means we have to pull the first set of results into // memory. It would be nice to not have to do that when we don't use DISTINCT, // but in practice it doesn't really matter since in SPARQL it's all sucked // into memory anyway. using (IDataReader reader = RunReader(cmd.ToString())) { while (reader.Read()) { QueryResultRowVariable[] row = new QueryResultRowVariable[varOrder.Length]; results.Add(row); int col = 0; for (int i = 0; i < varOrder.Length; i++) { Variable v = varOrder[i]; row[i].id = reader.GetInt64(col++); if ((bool)varSelectedEntity[v]) { row[i].uri = AsString(reader[col++]); } if ((bool)varSelectedLiteral[v]) { row[i].litvalue = AsString(reader[col++]); row[i].litlanguage = AsString(reader[col++]); row[i].litdatatype = AsString(reader[col++]); } } } } // For any distinguished variable that we did not select URIs or literal values for, // select that information now. for (int i = 0; i < varOrder.Length; i++) { Variable v = varOrder[i]; if ((bool)varSelectedEntity[v] && (!(bool)varCouldBeLiteral[v] || (bool)varSelectedLiteral[v])) continue; // Get the list of resource IDs found for this variable. ArrayList rids = new ArrayList(); foreach (QueryResultRowVariable[] row in results) { if (row[i].id <= 1) continue; // can't fetch for Statement.DefaultMeta if (resourceCache.ContainsKey(row[i].id)) continue; // we've already fetched it rids.Add(row[i].id); // probably no need to remove duplicates } if (rids.Count > 0) { // Fetch what we can for entities. if (!(bool)varSelectedEntity[v]) { StringBuilder cmd2 = new StringBuilder(); cmd2.Append("SELECT id, value FROM "); cmd2.Append(table); cmd2.Append("_entities WHERE id IN ("); bool first = true; foreach (Int64 id in rids) { if (!first) cmd2.Append(','); first = false; cmd2.Append(id); } cmd2.Append(")"); if (Debug) { Console.Error.WriteLine(cmd2.ToString()); } using (IDataReader reader = RunReader(cmd2.ToString())) { while (reader.Read()) { Int64 id = reader.GetInt64(0); string uri = AsString(reader[1]); resourceCache[id] = MakeEntity(id, uri, null); } } } // Fetch what we can for literals. if ((bool)varCouldBeLiteral[v] && !(bool)varSelectedLiteral[v]) { StringBuilder cmd2 = new StringBuilder(); cmd2.Append("SELECT id, value, language, datatype FROM "); cmd2.Append(table); cmd2.Append("_literals WHERE id IN ("); bool first = true; foreach (Int64 id in rids) { if (!first) cmd2.Append(','); first = false; cmd2.Append(id); } cmd2.Append(")"); if (Debug) { Console.Error.WriteLine(cmd2.ToString()); } using (IDataReader reader = RunReader(cmd2.ToString())) { while (reader.Read()) { Int64 id = reader.GetInt64(0); string value = AsString(reader[1]); string language = AsString(reader[2]); string datatype = AsString(reader[3]); Literal lit = new Literal(value, language, datatype); SetResourceKey(lit, new ResourceKey(id)); resourceCache[id] = lit; } } } // Any ids not found so far are bnodes. foreach (Int64 id in rids) { if (!resourceCache.ContainsKey(id)) { BNode b = new BNode(); SetResourceKey(b, new ResourceKey(id)); resourceCache[id] = b; } } } } } // lock // Now loop through the binding results. foreach (QueryResultRowVariable[] row in results) { bool match = true; Resource[] variableBindings = new Resource[varOrder.Length]; for (int i = 0; i < varOrder.Length; i++) { Int64 id = row[i].id; if (resourceCache.ContainsKey(id)) { variableBindings[i] = (Resource)resourceCache[id]; } else { if (row[i].litvalue == null) { variableBindings[i] = MakeEntity(id, row[i].uri, null); } else { Literal lit = new Literal(row[i].litvalue, row[i].litlanguage, row[i].litdatatype); ArrayList litFilters = (ArrayList)varLitFilters[varOrder[i]]; if (litFilters != null && !LiteralFilter.MatchesFilters(lit, (LiteralFilter[])litFilters.ToArray(typeof(LiteralFilter)), this)) { match = false; break; } SetResourceKey(lit, new ResourceKey(id)); variableBindings[i] = lit; } // reuse this entity later resourceCache[id] = variableBindings[i]; } } if (!match) continue; if (!sink.Add(new SemWeb.Query.VariableBindings(varOrder, variableBindings))) return; } sink.Finished(); }
public override void Query(Statement[] graph, SemWeb.Query.QueryOptions options, SelectableSource targetModel, SemWeb.Query.QueryResultSink sink) { QueryCheckArg(graph); // Try to do the inferencing. ArrayList evidence = prove(rules, targetModel, graph, -1); if (evidence == null) return; // not provable (in max number of steps, if that were given) // Then send the possible bindings to the QueryResultSink. // Map variables to indexes. Hashtable vars = new Hashtable(); foreach (Statement s in graph) { if (s.Subject is Variable && !vars.ContainsKey(s.Subject)) vars[s.Subject] = vars.Count; if (s.Predicate is Variable && !vars.ContainsKey(s.Predicate)) vars[s.Predicate] = vars.Count; if (s.Object is Variable && !vars.ContainsKey(s.Object)) vars[s.Object] = vars.Count; } // Prepare the bindings array. Variable[] varOrder = new Variable[vars.Count]; foreach (Variable v in vars.Keys) varOrder[(int)vars[v]] = v; // Initialize the sink. sink.Init(varOrder); // Send a binding set for each piece of evidence. foreach (EvidenceItem ei in evidence) { // Write a comment to the results with the actual proof. (nifty actually) sink.AddComments(ei.ToProof().ToString()); // Create the binding array and send it on Resource[] variableBindings = new Resource[varOrder.Length]; foreach (Variable v in vars.Keys) if (ei.env.ContainsKey(v)) variableBindings[(int)vars[v]] = (Resource)ei.env[v]; sink.Add(new SemWeb.Query.VariableBindings(varOrder, variableBindings)); } // Close the sink. sink.Finished(); }