void AddRelation(Entity a, Entity b, Hashtable h) { ResSet r = (ResSet)h[a]; if (r == null) { r = new ResSet(); h[a] = r; } r.Add(b); }
void AddRelation(Entity a, Entity b, Hashtable h, bool incself) { ResSet r = (ResSet)h[a]; if (r == null) { r = new ResSet(); h[a] = r; if (incself) r.Add(a); } r.Add(b); }
public void RetainAll(ResSet set) { foreach (Resource r in new ArrayList(this)) { if (!set.Contains(r)) { Remove(r); } } }
public void AddRange(ResSet set) { if (set == null) { return; } foreach (Resource r in set.Items) { Add(r); } }
static Entity[] GetClosure(Resource[] starts, Hashtable table) { ResSet ret = new ResSet(); ResSet toadd = new ResSet(starts); while (toadd.Count > 0) { ResSet newadd = new ResSet(); foreach (Resource e in toadd) { if (!(e is Entity)) continue; if (ret.Contains(e)) continue; ret.Add(e); if (table.ContainsKey(e)) newadd.AddRange((ResSet)table[e]); } toadd.Clear(); toadd.AddRange(newadd); } return ret.ToEntityArray(); }
static Entity[] GetClosure(ResSet starts, Hashtable table) { if (starts == null) return null; return GetClosure(starts.ToArray(), table); }
public void Select(SelectFilter filter, StatementSink sink) { if (filter.Predicates == null || filter.LiteralFilters != null) { data.Select(filter, sink); return; } ResSet remainingPredicates = new ResSet(); Entity[] subjects = filter.Subjects; Entity[] predicates = filter.Predicates; Resource[] objects = filter.Objects; Entity[] metas = filter.Metas; foreach (Entity p in predicates) { if (p == type) { if (objects != null) { // Do the subjects have any of the types listed in the objects, // or what things have those types? // Expand objects by the subclass closure of the objects data.Select(new SelectFilter(subjects, new Entity[] { p }, GetClosure(objects, subclasses), metas), sink); // Process domains and ranges. ResSet dom = new ResSet(), ran = new ResSet(); Hashtable domPropToType = new Hashtable(); Hashtable ranPropToType = new Hashtable(); foreach (Entity e in objects) { Entity[] dc = GetClosure((ResSet)domainof[e], subprops); if (dc != null) foreach (Entity c in dc) { dom.Add(c); AddRelation(c, e, domPropToType, false); } dc = GetClosure((ResSet)rangeof[e], subprops); if (dc != null) foreach (Entity c in dc) { ran.Add(c); AddRelation(c, e, ranPropToType, false); } } // If it's in the domain of any of these properties, // we know its type. if (subjects != null) { if (dom.Count > 0) data.Select(new SelectFilter(subjects, dom.ToEntityArray(), null, metas), new ExpandDomRan(0, domPropToType, sink)); if (ran.Count > 0) data.Select(new SelectFilter(null, ran.ToEntityArray(), subjects, metas), new ExpandDomRan(1, ranPropToType, sink)); } } else if (subjects != null) { // What types do these subjects have? // Expand the resulting types by the closure of their superclasses data.Select(new SelectFilter(subjects, new Entity[] { p }, objects, metas), new Expand(superclasses, sink)); // Use domains and ranges to get type info data.Select(new SelectFilter(subjects, null, null, metas), new Expand3(0, domains, superclasses, sink)); data.Select(new SelectFilter(null, null, subjects, metas), new Expand3(1, ranges, superclasses, sink)); } else { // What has type what? We won't answer that question. data.Select(filter, sink); } } else if ((p == subClassOf || p == subPropertyOf) && (metas == null || metas[0] == Statement.DefaultMeta)) { Hashtable supers = (p == subClassOf) ? superclasses : superprops; Hashtable subs = (p == subClassOf) ? subclasses : subprops; if (subjects != null && objects != null) { // Expand objects by the subs closure of the objects. data.Select(new SelectFilter(subjects, new Entity[] { p }, GetClosure(objects, subs), metas), sink); } else if (subjects != null) { // get all of the supers of all of the subjects foreach (Entity s in subjects) foreach (Entity o in GetClosure(new Entity[] { s }, supers)) sink.Add(new Statement(s, p, o)); } else if (objects != null) { // get all of the subs of all of the objects foreach (Resource o in objects) { if (o is Literal) continue; foreach (Entity s in GetClosure(new Entity[] { (Entity)o }, subs)) sink.Add(new Statement(s, p, (Entity)o)); } } else { // What is a subclass/property of what? We won't answer that. data.Select(filter, sink); } } else { remainingPredicates.Add(p); } } if (remainingPredicates.Count > 0) { // Also query the subproperties of any property // being queried, but remember which subproperties // came from which superproperties so we can map them // back to the properties actually queried. The closures // contain the queried properties themselves too. ResSet qprops = new ResSet(); Hashtable propfrom = new Hashtable(); foreach (Entity p in remainingPredicates) { foreach (Entity sp in GetClosure(new Entity[] { p }, subprops)) { AddRelation(sp, p, propfrom, false); qprops.Add(sp); } } //data.Select(subjects, qprops.ToEntityArray(), objects, metas, new LiteralDTMap(ranges, new PredMap(propfrom, sink))); SelectFilter sf = new SelectFilter(subjects, qprops.ToEntityArray(), objects, metas); sf.LiteralFilters = filter.LiteralFilters; sf.Limit = filter.Limit; data.Select(sf, new PredMap(propfrom, sink)); } }
void Set(int varIndex, Resource binding) { if (Bindings[varIndex] == null) Bindings[varIndex] = new ResSet(); else Bindings[varIndex].Clear(); Bindings[varIndex].Add(binding); }
public Sink(StatementSink msg, ResSet add) { this.msg = msg; this.add = add; }
private static void MakeLeanMSG2(Store msg, ResSet predicates, StatementSink removed, ResSet nodesremoved, BNode startingnode) { // Find every pair of two distinct outgoing edges from startingnode // with the same predicate, targeting entities only. MultiMap edges = new MultiMap(); foreach (Statement s in msg.Select(new Statement(startingnode, null, null))) if (s.Object is Entity) edges.Put(new Edge(true, startingnode, s.Predicate, null), s.Object); foreach (Statement s in msg.Select(new Statement(null, null, startingnode))) edges.Put(new Edge(false, startingnode, s.Predicate, null), s.Subject); foreach (Edge e in edges.Keys) { // Make sure we have a distinct set of targets. ResSet targets_set = new ResSet(); foreach (Entity r in edges.Get(e)) targets_set.Add(r); if (targets_set.Count == 1) continue; IList targets = targets_set.ToEntityArray(); // Take every pair of targets, provided // one is a bnode that can be a variable. for (int i = 0; i < targets.Count; i++) { if (!(targets[i] is BNode) || predicates.Contains((BNode)targets[i])) continue; if (nodesremoved.Contains((BNode)targets[i])) continue; for (int j = 0; j < targets.Count; j++) { if (i == j) continue; // Create a new synchronous-path object. SyncPath p = new SyncPath(); p.FixedNodes.Add((Resource)targets[j]); p.FrontierVariables.Add((Resource)targets[i]); p.Mapping[targets[i]] = targets[j]; p.Path[new Edge(e.Direction, e.Start, e.Predicate, (BNode)targets[i])] = p.Path; if (MakeLeanMSG3(msg, predicates, removed, nodesremoved, p)) break; // the target was removed } } } }
public QueryStatementComparer(Hashtable setVars, ResSet fps, ResSet ifps) { this.setVars = setVars; this.fps = fps; this.ifps = ifps; }
static Entity[] GetClosure(Resource[] starts, Hashtable table, bool includeStarts) { ResSet ret = new ResSet(); ResSet toadd = new ResSet(starts); bool firstRound = true; while (toadd.Count > 0) { ResSet newadd = new ResSet(); foreach (Resource e in toadd) { if (!(e is Entity)) continue; if (ret.Contains(e)) continue; if (!(firstRound && !includeStarts)) ret.Add(e); if (table.ContainsKey(e)) newadd.AddRange((ResSet)table[e]); } toadd.Clear(); toadd.AddRange(newadd); firstRound = false; } return ret.ToEntityArray(); }
ResSet GetQueryRes(Statement s, int i, SemWeb.Query.QueryOptions options) { ResSet ret = new ResSet(); Resource r = s.GetComponent(i); if (r == null) return ret; if (!(r is Variable)) ret.Add(r); if (options.VariableKnownValues != null && r is Variable #if !DOTNET2 && options.VariableKnownValues.Contains((Variable)r)) {
static Entity[] GetClosure(ResSet starts, Hashtable table, bool includeStarts) { if (starts == null) return null; return GetClosure(starts.ToArray(), table, includeStarts); }
public Sink(Store msg, ResSet add) { this.msg = msg; this.add = add; }
public void Query(Statement[] graph, QueryOptions options, QueryResultSink sink) { if (options.DistinguishedVariables != null && options.DistinguishedVariables.Count == 0) throw new ArgumentException("options.DistinguishedVariables cannot be an empty list."); StringBuilder query = new StringBuilder(); query.Append("SELECT "); // Get a list of variables and map them to fresh names #if !DOTNET2 Hashtable variableNames = new Hashtable(); #else Dictionary<Variable,string> variableNames = new Dictionary<Variable,string>(); #endif Hashtable variableNames2 = new Hashtable(); foreach (Statement s in graph) { for (int j = 0; j < 3; j++) { Variable v = s.GetComponent(j) as Variable; if (v == null) continue; if (variableNames.ContainsKey(v)) continue; variableNames2["v" + variableNames.Count] = v; variableNames[v] = "?v" + variableNames.Count; } } // What variables will we select on? ArrayList selectedVars = new ArrayList(); foreach (Variable v in options.DistinguishedVariables != null ? options.DistinguishedVariables : variableNames.Keys) { if (!variableNames.ContainsKey(v)) continue; // in case distinguished variables list // has more than what actually appears in query if (selectedVars.Contains(v)) continue; // don't select more than once query.Append(variableNames[v]); query.Append(' '); selectedVars.Add(v); } if (selectedVars.Count == 0) { if (options.DistinguishedVariables == null) throw new ArgumentException("There were no variables in the query."); else throw new ArgumentException("None of the variables in the query were distinguished."); } // Bnodes are not allowed here -- we can't query on them. foreach (Statement s in graph) { for (int j = 0; j < 3; j++) { if (s.GetComponent(j) is BNode && !(s.GetComponent(j) is Variable)) { Variable[] varArray = (Variable[])selectedVars.ToArray(typeof(Variable)); sink.Init(varArray); sink.Finished(); return; } } } // Build the graph pattern. query.Append("WHERE {\n"); ResSet firstVarUse = new ResSet(); foreach (Statement s in graph) { for (int j = 0; j < 3; j++) { Resource r = s.GetComponent(j); query.Append(S(r, r is Variable && variableNames.ContainsKey((Variable)r) ? (string)variableNames[(Variable)r] : null)); query.Append(" "); } query.Append(" . \n"); if (options.VariableKnownValues != null) { for (int j = 0; j < 3; j++) { Resource r = s.GetComponent(j); if (firstVarUse.Contains(r)) continue; firstVarUse.Add(r); if (r is Variable && variableNames.ContainsKey((Variable)r) && #if !DOTNET2 options.VariableKnownValues.Contains(r) #else options.VariableKnownValues.ContainsKey((Variable)r) #endif ) query.Append(SL(options.VariableKnownValues[(Variable)r], (string)variableNames[(Variable)r], true)); } } // And what about meta...? } query.Append("}"); if (options.Limit > 0) { query.Append(" LIMIT "); query.Append(options.Limit); } Load(query.ToString(), new QueryResultsWrapper(sink, variableNames2)); }
public Permutation(ResSet[] bindings) { index = new int[bindings.Length]; values = new Resource[bindings.Length][]; for (int i = 0; i < bindings.Length; i++) { values[i] = new Resource[bindings[i] == null ? 1 : bindings[i].Count]; if (bindings[i] != null) { int ctr = 0; foreach (Resource r in bindings[i]) values[i][ctr++] = r; } } }
public Sink(ResSet variables, Store store) { this.variables = variables; this.store = store; }
private static void MakeLeanMSG(Store msg, ICollection bnodecollection, StatementSink removed) { // To make any graph lean, we try to eliminate duplicate // paths through the graph, where duplicate means we // take some subset of the bnodes and call them variables, // and we relabel them as other bnodes from the remaining // set (the fixed nodes). But there are 2^N subsets of bnodes // we could choose as variables (N=number of bnodes), so we can't // reasonably iterate through them. // I'll make a simplifying assumption that bnode predicates // in the graph will be considered always fixed. // This lets us view the graph as actually a graph (with // nodes and edges), and then we can make the observation that // if variable node V is part of a subgraph that can be removed, // if V directly connects to fixed node F via an edge labeled P, // then F must connect to a fixed node G via an edge also // labeled P. That is, we can start our search looking for // nodes that project two edges with the same label. // Also, we only want to consider contiguous 'paths' -- subsets // of the bnodes connected only through those nodes -- // to see if there is another path in the MSG if we // map bnodes in the first path to nodes in the MSG. // So the strategy is to start at each node in the graph // and consider it fixed. If it has two outgoing // edges with the same property and one terminates on a // bnode, this is the beginning of a possible pair // of redundant paths (the one with the bnode being // eliminable). // However, the path with the bnode // has to be incremented with all of that bnode's // outgoing edges. The other path has to be // incremented in parallel, following the same predicates // to other nodes. If that can't be done, then these // paths are not duplicates. If the parallel predicates // terminate on the very same nodes, the bnode and its edges can // be removed. // From there, each of the nodes the bnode edges terminate on, // besides the initial node, can be considered fixed or // a variable. If it's a variable it might be able to have // one of many possible values, but then the path has to // be expanded to include all of the outgoing edges for this // variable. // Ok, here we go. // If there is only one bnode in the MSG, then // there are no subgraphs to check. That's nice. if (bnodecollection.Count == 1) return; // Remember which bnodes have been removed in // due course. ResSet nodesremoved = new ResSet(); // Remember which nodes are predicates and can't // be considered variable. ResSet predicates = new ResSet(); foreach (Statement s in msg.Select(Statement.All)) predicates.Add(s.Predicate); // Start with each bnode to consider fixed. foreach (BNode b in bnodecollection) { if (nodesremoved.Contains(b)) continue; MakeLeanMSG2(msg, predicates, removed, nodesremoved, b); } }
private bool Query(int groupindex, BindingSet bindings, SelectableSource targetModel) { QueryStatement[] group = statements[groupindex]; QueryStatement qs = group[0]; int numMultiplyBound = IsMultiplyBound(qs.Subject, bindings) + IsMultiplyBound(qs.Predicate, bindings) + IsMultiplyBound(qs.Object, bindings); if (numMultiplyBound >= 1) { // If there is one or more multiply-bound variable, // then we need to iterate through the permutations // of the variables in the statement. Debug(qs.ToString() + " Something Multiply Bound"); MemoryStore matches = new MemoryStore(); targetModel.Select( new SelectFilter( (Entity[])qs.Subject.GetValues(bindings.Union, true), (Entity[])qs.Predicate.GetValues(bindings.Union, true), qs.Object.GetValues(bindings.Union, false), QueryMeta == null ? null : new Entity[] { QueryMeta } ), new ClearMetaDupCheck(matches)); Debug("\t" + matches.StatementCount + " Matches"); if (matches.StatementCount == 0) { // This statement doesn't match any of // the existing bindings. If this was // optional, preserve the bindings. return qs.Optional; } // We need to preserve the pairings of // the multiply bound variable with the matching // statements. ArrayList newbindings = new ArrayList(); if (!qs.Optional) bindings.Union.Clear(qs); foreach (QueryResult binding in bindings.Results) { // Break apart the permutations in this binding. BindingEnumerator enumer2 = new BindingEnumerator(qs, binding); Entity s, p; Resource o; while (enumer2.MoveNext(out s, out p, out o)) { // Get the matching statements from the union query Statement bs = new Statement(s, p, o); MemoryStore innermatches = matches.Select(bs).Load(); // If no matches, the binding didn't match the filter. if (innermatches.StatementCount == 0) { if (qs.Optional) { // Preserve the binding. QueryResult bc = binding.Clone(); bc.Set(qs, bs); newbindings.Add(bc); continue; } else { // Toss out the binding. continue; } } for (int si = 0; si < innermatches.StatementCount; si++) { Statement m = innermatches[si]; if (!MatchesFilters(m, qs, targetModel)) { if (qs.Optional) { QueryResult bc = binding.Clone(); bc.Set(qs, bs); newbindings.Add(bc); } continue; } bindings.Union.Add(qs, m); QueryResult r = binding.Clone(); r.Set(qs, m); r.StatementMatched[groupindex] = true; newbindings.Add(r); } } } bindings.Results = newbindings; } else { // There are no multiply bound variables, but if // there are more than two unbound variables, // we need to be sure to preserve the pairings // of the matching values. int numUnbound = IsUnbound(qs.Subject, bindings) + IsUnbound(qs.Predicate, bindings) + IsUnbound(qs.Object, bindings); bool sunbound = IsUnbound(qs.Subject, bindings) == 1; bool punbound = IsUnbound(qs.Predicate, bindings) == 1; bool ounbound = IsUnbound(qs.Object, bindings) == 1; Statement s = GetStatement(qs, bindings); // If we couldn't get a statement out of this, // then if this was not an optional filter, // fail. If this was optional, don't change // the bindings any. if (s == StatementFailed) return qs.Optional; if (numUnbound == 0) { Debug(qs.ToString() + " All bound"); // All variables are singly bound already. // We can just test if the statement exists. if (targetModel.Contains(s)) { // Mark each binding that it matched this statement. foreach (QueryResult r in bindings.Results) r.StatementMatched[groupindex] = true; } else { return qs.Optional; } } else if (numUnbound == 1) { Debug(qs.ToString() + " 1 Unbound"); // There is just one unbound variable. The others // are not multiply bound, so they must be uniquely // bound (but they may not be bound in all results). // Run a combined select to find all possible values // of the unbound variable at once, and set these to // be the values of the variable for matching results. ResSet values = new ResSet(); MemoryStore ms = new MemoryStore(); targetModel.Select(s, ms); for (int si = 0; si < ms.StatementCount; si++) { Statement match = ms[si]; if (!MatchesFilters(match, qs, targetModel)) continue; if (sunbound) values.Add(match.Subject); if (punbound) values.Add(match.Predicate); if (ounbound) values.Add(match.Object); } Debug("\t" + values.Count + " matches"); if (values.Count == 0) return qs.Optional; int varIndex = -1; if (sunbound) varIndex = qs.Subject.VarIndex; if (punbound) varIndex = qs.Predicate.VarIndex; if (ounbound) varIndex = qs.Object.VarIndex; if (bindings.Results.Count == 0) bindings.Results.Add(new QueryResult(this)); bindings.Union.Bindings[varIndex] = new ResSet(); foreach (Resource r in values) bindings.Union.Bindings[varIndex].Add(r); foreach (QueryResult r in bindings.Results) { // Check that the bound variables are bound in this result. // If it is bound, it will be bound to the correct resource, // but it might not be bound at all if an optional statement // failed to match -- in which case, don't modify the binding. if (qs.Subject.IsVariable && !sunbound && r.Bindings[qs.Subject.VarIndex] == null) continue; if (qs.Predicate.IsVariable && !punbound && r.Bindings[qs.Predicate.VarIndex] == null) continue; if (qs.Object.IsVariable && !ounbound && r.Bindings[qs.Object.VarIndex] == null) continue; r.Bindings[varIndex] = values; r.StatementMatched[groupindex] = true; } } else { // There are two or more unbound variables, the // third variable being uniquely bound, if bound. // Keep track of the pairing of unbound variables. if (numUnbound == 3) throw new QueryExecutionException("Query would select all statements in the store."); Debug(qs.ToString() + " 2 or 3 Unbound"); if (bindings.Results.Count == 0) bindings.Results.Add(new QueryResult(this)); ArrayList newbindings = new ArrayList(); MemoryStore ms = new MemoryStore(); targetModel.Select(s, ms); for (int si = 0; si < ms.StatementCount; si++) { Statement match = ms[si]; if (!MatchesFilters(match, qs, targetModel)) continue; bindings.Union.Add(qs, match); foreach (QueryResult r in bindings.Results) { if (numUnbound == 2) { // Check that the bound variable is bound in this result. // If it is bound, it will be bound to the correct resource, // but it might not be bound at all if an optional statement // failed to match -- in which case, preserve the binding if // this was an optional statement. bool matches = true; if (qs.Subject.IsVariable && !sunbound && r.Bindings[qs.Subject.VarIndex] == null) matches = false; if (qs.Predicate.IsVariable && !punbound && r.Bindings[qs.Predicate.VarIndex] == null) matches = false; if (qs.Object.IsVariable && !ounbound && r.Bindings[qs.Object.VarIndex] == null) matches = false; if (!matches) { if (qs.Optional) newbindings.Add(r); continue; } } QueryResult r2 = r.Clone(); r2.Add(qs, match); r2.StatementMatched[groupindex] = true; newbindings.Add(r2); } } if (newbindings.Count == 0) return qs.Optional; // don't clear out bindings if this was optional and it failed bindings.Results = newbindings; } } return true; }
private static bool MakeLeanMSG3(Store msg, ResSet predicates, StatementSink removed, ResSet nodesremoved, SyncPath path) { // The variable path has to be expanded by including the statements // connected to the variables on the frontier. Statements // mentioning a variable node have already been considered. // The target of each such statement can be considered fixed // or variable. If a variable is considered fixed, the edge // must exist in the MSG substituting the variables for their // values. If it's variable, it has to have at least one // match in the MSG but not as any of the variable nodes. // If all targets are considered fixed (and have matches), // then the variables so far (and their edges) can all be // removed and no more processing needs to be done. // There are (2^N)-1 other considerations. For each of those, // the targets considered variables all become the new // frontier, and this is repeated. // First, get a list of edges from the frontier that we // haven't considered yet. ArrayList alledges = new ArrayList(); foreach (BNode b in path.FrontierVariables) { // Make sure all edges are kept because even the ones // to literals have to be removed when duplication is found. foreach (Statement s in msg.Select(new Statement(b, null, null))) alledges.Add(new Edge(true, b, s.Predicate, s.Object)); foreach (Statement s in msg.Select(new Statement(null, null, b))) alledges.Add(new Edge(false, b, s.Predicate, s.Subject)); } ArrayList newedges = new ArrayList(); ResSet alltargets = new ResSet(); ResSet fixabletargetsset = new ResSet(); // can be fixed ResSet variabletargetsset = new ResSet(); // must be variable foreach (Edge e in alledges) { if (path.Path.ContainsKey(e)) continue; path.Path[e] = e; // This checks if we can keep the target of this edge // fixed, given the variable mappings we have so far. bool isTargetFixable = msg.Contains(e.AsStatement().Replace(path.Mapping)); // If the target of e is any of the following, we // can check immediately if the edge is supported // by the MSG under the variable mapping we have so far: // a named node, literal, fixed node, or predicate // a variable we've seen already // If it's not supported, this path fails. If it is // supported, we're done with this edge. if (!(e.End is BNode) || path.FixedNodes.Contains(e.End) || predicates.Contains(e.End) || path.VariableNodes.Contains(e.End)) { if (!isTargetFixable) return false; continue; // this edge is supported, so we can continue } // The target of e is a new BNode. // If this target is not fixable via this edge, it's // not fixable at all. if (!isTargetFixable) { fixabletargetsset.Remove(e.End); variabletargetsset.Add(e.End); } if (!alltargets.Contains(e.End)) { alltargets.Add(e.End); fixabletargetsset.Add(e.End); } newedges.Add(e); } // If all of the targets were fixable (trivially true also // if there simple were no new edges/targets), then we've reached // the end of this path. We can immediately remove // the edges we've seen so far, under the variable mapping // we've chosen. if (variabletargetsset.Count == 0) { foreach (Edge e in path.Path.Keys) { Statement s = e.AsStatement(); msg.Remove(s); if (removed != null) removed.Add(s); } foreach (Entity e in path.Mapping.Keys) nodesremoved.Add(e); return true; } // At this point, at least one target must be a variable // and we'll have to expand the path in that direction. // We might want to permute through the ways we can // take fixable nodes as either fixed or variable, but // we'll be greedy and assume everything fixable is // fixed and everything else is a variable. path.FixedNodes.AddRange(fixabletargetsset); path.VariableNodes.AddRange(variabletargetsset); // But we need to look at all the ways each variable target // can be mapped to a new value, which means intersecting // the possible matches for each relevant edge. Entity[] variables = variabletargetsset.ToEntityArray(); ResSet[] values = new ResSet[variables.Length]; Entity[][] values_array = new Entity[variables.Length][]; int[] choices = new int[variables.Length]; for (int i = 0; i < variables.Length; i++) { foreach (Edge e in newedges) { if (e.End != variables[i]) continue; // Get the possible values this edge allows Resource[] vr; if (e.Direction) vr = msg.SelectObjects((Entity)path.Mapping[e.Start], e.Predicate); else vr = msg.SelectSubjects(e.Predicate, (Entity)path.Mapping[e.Start]); // Filter out literals and any variables // on the path! The two paths can't intersect // except at fixed nodes. ResSet v = new ResSet(); foreach (Resource r in vr) { if (r is Literal) continue; if (path.Mapping.ContainsKey(r)) continue; v.Add(r); } // Intersect these with the values we have already. if (values[i] == null) values[i] = v; else values[i].RetainAll(v); // If no values are available for this variable, // we're totally done. if (values[i].Count == 0) return false; } choices[i] = values[i].Count; values_array[i] = values[i].ToEntityArray(); } // Now we have to permute through the choice of values. // Make an array of the number of choices for each variable. Permutation p = new Permutation(choices); int[] pstate; while ((pstate = p.Next()) != null) { SyncPath newpath = new SyncPath(); newpath.FixedNodes.AddRange(path.FixedNodes); newpath.VariableNodes.AddRange(path.VariableNodes); newpath.Mapping = (Hashtable)path.Mapping.Clone(); newpath.Path = (Hashtable)path.Path.Clone(); newpath.FrontierVariables = variabletargetsset; for (int i = 0; i < variables.Length; i++) { Entity value = values_array[i][pstate[i]]; newpath.Mapping[variables[i]] = value; newpath.FixedNodes.Add(value); } if (MakeLeanMSG3(msg, predicates, removed, nodesremoved, newpath)) return true; } return false; }
static bool eq(object[] a, object[] b) { if (a == b) return true; if (a == null || b == null) return false; if (a.Length != b.Length) return false; bool alleq = true; for (int i = 0; i < a.Length; i++) if (!a[i].Equals(b[i])) alleq = false; if (alleq) return true; ResSet xa = new ResSet(a); ResSet xb = new ResSet(b); xa.RetainAll(xb); return xa.Count == xb.Count; }
public static void FindMSG(SelectableSource store, Entity node, StatementSink msg) { if (node.Uri != null) throw new ArgumentException("node must be anonymous"); ResSet nodesSeen = new ResSet(); ResSet nodesToAdd = new ResSet(); nodesToAdd.Add(node); while (nodesToAdd.Count > 0) { ResSet nodes = nodesToAdd; nodesToAdd = new ResSet(); Sink sink = new Sink(msg, nodesToAdd); foreach (Entity n in nodes) { if (nodesSeen.Contains(n)) continue; nodesSeen.Add(n); store.Select(new Statement(n, null, null, null), sink); store.Select(new Statement(null, n, null, null), sink); store.Select(new Statement(null, null, n, null), sink); } } }
// The next few routines convert a set of axioms from a StatementSource // into a data structure of use for the algorithm, with Sequents and things. private static Hashtable RulesToCases(StatementSource rules) { Hashtable cases = new Hashtable(); MemoryStore rules_store = new MemoryStore(rules); foreach (Statement p in rules_store) { if (p.Meta == Statement.DefaultMeta) { if (p.Predicate == entLOGIMPLIES && p.Object is Entity) { MemoryStore body = new MemoryStore(); MemoryStore head = new MemoryStore(); rules_store.Select(new Statement(null, null, null, (Entity)p.Subject), new RemoveMeta(body)); rules_store.Select(new Statement(null, null, null, (Entity)p.Object), new RemoveMeta(head)); // Any variables in the head not bound in the body represent existentially closed bnodes. // (Euler's OWL test case does this. Wish they had used bnodes instead of vars...) ResSet bodyvars = new ResSet(); foreach (Statement b in body) { if (b.Subject is Variable) bodyvars.Add(b.Subject); if (b.Predicate is Variable) bodyvars.Add(b.Predicate); if (b.Object is Variable) bodyvars.Add(b.Object); } foreach (Entity v in head.GetEntities()) { if (v is Variable && !bodyvars.Contains(v)) head.Replace(v, new BNode(((Variable)v).LocalName)); } // Replace (...) lists in the body that are tied to the subjects // of user predicates with callArgs objects. Hashtable callArgs = new Hashtable(); CollectCallArgs(body, callArgs); // Rules can't have more than one statement in their // consequent. The best we can do is break up // the consequent into multiple rules. (Since all head // variables are bound in body, it's equivalent...?) foreach (Statement h in head) AddSequent(cases, new Sequent(h, body.ToArray(), callArgs)); } else { AddSequent(cases, new Sequent(p, new Statement[0], null)); } } } return cases; }
public void AddRange(ResSet set) { if (set == null) return; foreach (Resource r in set.Items) { Add(r); } }
public void RetainAll(ResSet set) { foreach (Resource r in new ArrayList(this)) if (!set.Contains(r)) Remove(r); }
public void Query(Statement[] graph, SemWeb.Query.QueryOptions options, SemWeb.Query.QueryResultSink sink) { if (graph.Length == 0) throw new ArgumentException("graph array must have at least one element"); // This method translates the graph pattern into a single SQL statement. Each graph statement // corresponds to a new use of the _statements table in the FROM clause. For instance: // ?a foaf:knows ?b . ?b foaf:name ?c . // translates to // SELECT // g0.subject, v0.value, // g0.object, v1.value, // g1.object, v2.value, v2lit.value, v2lit.language, v2lit.datatype // FROM // db_tables as g0 LEFT JOIN db_entities AS v0 ON g0.subject=v0.id LEFT JOIN db_entities AS v1 ON g0.object=v1.id, // db_tables as g1 LEFT JOIN db_entities AS v2 ON g1.object=v2.id LEFT JOIN db_literals AS v2lit ON g1.object=v2lit.id // WHERE // g0.predicate = <the id of the foaf:knows entity> AND // g1.predicate = <the id of the foaf:name entity> AND // g0.object = g1.subject // // If any variable column is an *undistinguished* variable --- which is to say that the caller // says it is a variable, but is not concerned with its values --- then we want to apply // DISTINCT to the SELECT statement. This is because while in the normal case we may get // duplicates, we expect that to not occur more than the caller expects, but in the latter // case there will often be many duplicates. Consider the SPARQL query: // SELECT DISTINCT ?p WHERE { ?s ?p ?o } // to get a list of predicates in the dataset, which corresponds to the graph query // ?s ?p ?o // where only ?p is distinguished. // This normally translates to: // SELECT // g0.predicate, v0.value, // FROM // db_tables as g0 LEFT JOIN db_entities AS v0 ON g0.predicate=v0.id // which of course is going to return a result for every triple in the database. // So we add DISTINCT to beginning ("SELECT DISTINCT"). // Unfortunately, MySQL performs the DISTINCT bit only after the LEFT JOINs (which makes sense normally). // That means that MySQL is repeatedly fetching the URI values of the predicates and checking // if a new unique row has been created, and this is very slow. What we want is to get the distinct // IDs of the predicates first, and then get their URIs. // I first tried implementing this with VIEWs, but it didn't always speed things up, and it was // difficult to manage the creation and deletion of VIEWs. // So instead, in this case, we do the query in two parts. First we get the IDs of the variables, // and then we get their URIs. options = options.Clone(); // because we modify the knownvalues array // Order the variables mentioned in the graph. Variable[] varOrder; ResSet distinguishedVars = null; bool useDistinct = false; { if (options.DistinguishedVariables != null) distinguishedVars = new ResSet(options.DistinguishedVariables); else distinguishedVars = new ResSet(); Hashtable seenvars = new Hashtable(); foreach (Statement filter in graph) { for (int i = 0; i < 4; i++) { Resource r = filter.GetComponent(i); if (r == null) throw new ArgumentException("The graph may not have any null components. Use Variables instead."); if (r is Variable) { if (options.DistinguishedVariables != null) { if (!distinguishedVars.Contains(r)) { // If we are omitting a column from the results because it is // not distinguished, and it's not a meta column, then we'll // use DISTINCT. if (i != 3) useDistinct = true; // Don't put this into seenvars. continue; } } else { distinguishedVars.Add(r); // all variables are distinguished } seenvars[r] = r; } } } varOrder = new Variable[seenvars.Count]; int ctr = 0; foreach (Variable v in seenvars.Keys) varOrder[ctr++] = v; } // Set the initial bindings to the result sink sink.Init(varOrder); Hashtable varLitFilters = new Hashtable(); // Prefetch the IDs of all resources mentioned in the graph and in variable known values. // For Resources in the graph that are not in the store, the query immediately fails. { ArrayList graphResources = new ArrayList(); foreach (Statement s in graph) { for (int i = 0; i < 4; i++) { Resource r = s.GetComponent(i); if (!(r is BNode)) // definitely exclude variables, but bnodes are useless too graphResources.Add(r); } } if (options.VariableKnownValues != null) foreach (ICollection values in options.VariableKnownValues.Values) graphResources.AddRange(values); PrefetchResourceIds(graphResources); // Check resources in graph and fail fast if any is not in the store. foreach (Statement s in graph) { for (int i = 0; i < 4; i++) { Resource r = s.GetComponent(i); if (r is Variable) continue; if ((object)r != (object)Statement.DefaultMeta && GetResourceKey(r) == null) { sink.AddComments("Resource " + r + " is not contained in the data model."); sink.Finished(); return; } } } // Check variable known values and remove any values not in the store. // Don't do any fail-fasting here because there might be entries in this // dictionary that aren't even used in this query (yes, poor design). // We check later anyway. if (options.VariableKnownValues != null) { #if !DOTNET2 foreach (Variable v in new ArrayList(options.VariableKnownValues.Keys)) { #else foreach (Variable v in new System.Collections.Generic.List<Variable>(options.VariableKnownValues.Keys)) { #endif #if !DOTNET2 ArrayList newvalues = new ArrayList(); #else System.Collections.Generic.List<Resource> newvalues = new System.Collections.Generic.List<Resource>(); #endif foreach (Resource r in (ICollection)options.VariableKnownValues[v]) { if ((object)r == (object)Statement.DefaultMeta || GetResourceKey(r) != null) newvalues.Add(r); } options.VariableKnownValues[v] = newvalues; } } } // Helpers string[] colnames = { "subject", "predicate", "object", "meta" }; // we initialize these things while locked, but use them after we release the lock ArrayList results = new ArrayList(); Hashtable resourceCache = new Hashtable(); // map resource ID to Resource instances // We can either include JOINs to the entities and literals table for every variable // in the query, or we can delay fetching that information to separate SELECTs // after the main part of the query is done. If we are including DISTINCT, then we // don't want to do the JOINs because the JOINs happen before the DISTINCT and will // be unnecessarily repeated. Also if the query has many variables, say more than 6, // then it may slow down query planning (the MySQL optimizer) to include them all in // one query. bool joinEntitiesAndLiterals = true; if (useDistinct) joinEntitiesAndLiterals = false; if (varOrder.Length > 6) joinEntitiesAndLiterals = false; // Lock the store and make sure we are initialized and any pending add's have been committed. lock (syncroot) { Init(); RunAddBuffer(); // Compile the SQL statement. Hashtable varRef = new Hashtable(); // the column name representing the variable, as in "g0.subject" Hashtable varRef2 = new Hashtable(); // the index of the variable, for accessing the entities and literals joined tables Hashtable varSelectedLiteral = new Hashtable(); // whether the variable is in a literal column and a LEFT JOIN for the literals table was used for it Hashtable varCouldBeLiteral = new Hashtable(); // whether the variable is only in literal columns Hashtable varSelectedEntity = new Hashtable(); // whether a LEFT JOIN for the entities table was used for a variable StringBuilder fromClause = new StringBuilder(); StringBuilder whereClause = new StringBuilder(); for (int f = 0; f < graph.Length; f++) { // For each filter, we select FROM the statements table with an // alias: q#, where # is the filter's index. if (f > 0) fromClause.Append(','); fromClause.Append(table); fromClause.Append("_statements AS g"); fromClause.Append(f); // For each component of the filter... for (int i = 0; i < 4; i++) { // This has the name of the column corresponding to this variable (i.e. "g1.predicate"). string myRef = "g" + f + "." + colnames[i]; Variable v = graph[f].GetComponent(i) as Variable; if (v != null) { // If the component is a variable, then if this is // the first time we're seeing the variable, we don't // add any restrictions to the WHERE clause, but we // note the variable's "name" in the world of SQL // so we can refer back to it later and we add the // necessary FROM tables so we can get its URI and // literal value if it is a reported variable. // If this isn't the first time, then we add a WHERE restriction so // that the proper columns here and in a previous // filter are forced to have the same value. if (!varRef.ContainsKey(v)) { // This is the first time we are seeing this variable. // Record the column name for the variable (i.e. g0.subject). varRef[v] = myRef; // Record an index for the variable (i.e. 0, 1, 2, ...) int vIndex = varRef.Count; varRef2[v] = vIndex; varCouldBeLiteral[v] = (i == 2); // LEFT JOIN the entities table for this variable to get its URI // only if it is a distinguished variable and we are not using DISTINCT. varSelectedEntity[v] = false; if (joinEntitiesAndLiterals && distinguishedVars.Contains(v)) { varSelectedEntity[v] = true; // Record that we are selecting the entities table for this variable. fromClause.Append(" LEFT JOIN "); fromClause.Append(table); fromClause.Append("_entities AS vent"); fromClause.Append(vIndex); fromClause.Append(" ON "); fromClause.Append(myRef); fromClause.Append("="); fromClause.Append("vent" + vIndex + ".id "); } // LEFT JOIN the literals table for this variable: // if it is in an object position // to get its value, language, and datatype only if it is a distinguished variable and we are not using DISTINCT // to apply a literal value filter (which will be done later) #if !DOTNET2 bool hasLitFilter = (options.VariableLiteralFilters != null && options.VariableLiteralFilters[v] != null); #else bool hasLitFilter = (options.VariableLiteralFilters != null && options.VariableLiteralFilters.ContainsKey(v)); #endif varSelectedLiteral[v] = false; if (i == 2 && ((joinEntitiesAndLiterals && distinguishedVars.Contains(v)) || hasLitFilter)) { varSelectedLiteral[v] = true; // Record that we are selecting the literals table for this variable. fromClause.Append(" LEFT JOIN "); fromClause.Append(table); fromClause.Append("_literals AS vlit"); fromClause.Append(vIndex); fromClause.Append(" ON "); fromClause.Append(myRef); fromClause.Append("="); fromClause.Append("vlit" + vIndex + ".id "); } // If this variable has known values, then we must restrict what values can appear using a WHERE clause. if (options.VariableKnownValues != null) { ICollection values = null; #if DOTNET2 if (options.VariableKnownValues.ContainsKey(v)) #endif values = (ICollection)options.VariableKnownValues[v]; if (values != null) { if (values.Count == 0) { sink.Finished(); return; } Resource r = ToMultiRes((Resource[])new ArrayList(values).ToArray(typeof(Resource))); if (!WhereItem(myRef, r, whereClause, whereClause.Length != 0)) { // We know at this point that the query cannot return any results. sink.Finished(); return; } } } } else { // We've seen this variable before, so link up the column in this // statement to the corresponding column in a previous (or this) statement. if (whereClause.Length != 0) whereClause.Append(" AND "); whereClause.Append('('); whereClause.Append((string)varRef[v]); whereClause.Append('='); whereClause.Append(myRef); whereClause.Append(')'); if (i != 2) varCouldBeLiteral[v] = false; } } else { // If this is not a variable, then it is a resource. // Append something into the WHERE clause to make sure this component gets // the right fixed value. If we cannot add the component to the WHERE clause // because the fixed value isn't even known in the data source, we can stop early. if (!WhereItem(myRef, graph[f].GetComponent(i), whereClause, whereClause.Length != 0)) { // We know at this point that the query cannot return any results. sink.Finished(); return; } } } } // graph filter 0...n // Add literal filters to the WHERE clause foreach (Variable v in varOrder) { // Is there a literal value filter? if (options.VariableLiteralFilters == null) continue; #if !DOTNET2 if (options.VariableLiteralFilters[v] == null) continue; #else if (!options.VariableLiteralFilters.ContainsKey(v)) continue; #endif // If this variable was not used in a literal column, then // we cannot filter its value. Really, it will never be a literal. if (!(bool)varSelectedLiteral[v]) continue; foreach (LiteralFilter filter in (ICollection)options.VariableLiteralFilters[v]) { string s = FilterToSQL(filter, "vlit" + (int)varRef2[v] + ".value"); if (s == null) continue; if (whereClause.Length != 0) whereClause.Append(" AND "); whereClause.Append(s); } } // Put the parts of the SQL statement together StringBuilder cmd = new StringBuilder(); cmd.Append("SELECT "); if (useDistinct) cmd.Append("DISTINCT "); if (!SupportsLimitClause && options.Limit > 0) { cmd.Append("TOP "); cmd.Append(options.Limit); cmd.Append(' '); } // Add all of the distinguished variables to the SELECT clause. bool firstvar = true; foreach (Variable v in varOrder) { if (!firstvar) cmd.Append(','); firstvar = false; cmd.Append((string)varRef[v]); if ((bool)varSelectedEntity[v]) { cmd.Append(", vent" + (int)varRef2[v] + ".value"); } if ((bool)varSelectedLiteral[v]) { cmd.Append(", vlit" + (int)varRef2[v] + ".value"); cmd.Append(", vlit" + (int)varRef2[v] + ".language"); cmd.Append(", vlit" + (int)varRef2[v] + ".datatype"); } } cmd.Append(" FROM "); cmd.Append(fromClause.ToString()); if (whereClause.Length > 0) cmd.Append(" WHERE "); cmd.Append(whereClause.ToString()); if (SupportsLimitClause && options.Limit > 0) { cmd.Append(" LIMIT "); cmd.Append(options.Limit); } cmd.Append(';'); if (Debug) { string cmd2 = cmd.ToString(); //if (cmd2.Length > 80) cmd2 = cmd2.Substring(0, 80); Console.Error.WriteLine(cmd2); } // Execute the query. // When we use DISTINCT and don't select URI and literal values at first, // we have to select them after. And since we can't maintain two IDataReaders // simultaneously, that means we have to pull the first set of results into // memory. It would be nice to not have to do that when we don't use DISTINCT, // but in practice it doesn't really matter since in SPARQL it's all sucked // into memory anyway. using (IDataReader reader = RunReader(cmd.ToString())) { while (reader.Read()) { QueryResultRowVariable[] row = new QueryResultRowVariable[varOrder.Length]; results.Add(row); int col = 0; for (int i = 0; i < varOrder.Length; i++) { Variable v = varOrder[i]; row[i].id = reader.GetInt64(col++); if ((bool)varSelectedEntity[v]) { row[i].uri = AsString(reader[col++]); } if ((bool)varSelectedLiteral[v]) { row[i].litvalue = AsString(reader[col++]); row[i].litlanguage = AsString(reader[col++]); row[i].litdatatype = AsString(reader[col++]); } } } } // For any distinguished variable that we did not select URIs or literal values for, // select that information now. for (int i = 0; i < varOrder.Length; i++) { Variable v = varOrder[i]; if ((bool)varSelectedEntity[v] && (!(bool)varCouldBeLiteral[v] || (bool)varSelectedLiteral[v])) continue; // Get the list of resource IDs found for this variable. ArrayList rids = new ArrayList(); foreach (QueryResultRowVariable[] row in results) { if (row[i].id <= 1) continue; // can't fetch for Statement.DefaultMeta if (resourceCache.ContainsKey(row[i].id)) continue; // we've already fetched it rids.Add(row[i].id); // probably no need to remove duplicates } if (rids.Count > 0) { // Fetch what we can for entities. if (!(bool)varSelectedEntity[v]) { StringBuilder cmd2 = new StringBuilder(); cmd2.Append("SELECT id, value FROM "); cmd2.Append(table); cmd2.Append("_entities WHERE id IN ("); bool first = true; foreach (Int64 id in rids) { if (!first) cmd2.Append(','); first = false; cmd2.Append(id); } cmd2.Append(")"); if (Debug) { Console.Error.WriteLine(cmd2.ToString()); } using (IDataReader reader = RunReader(cmd2.ToString())) { while (reader.Read()) { Int64 id = reader.GetInt64(0); string uri = AsString(reader[1]); resourceCache[id] = MakeEntity(id, uri, null); } } } // Fetch what we can for literals. if ((bool)varCouldBeLiteral[v] && !(bool)varSelectedLiteral[v]) { StringBuilder cmd2 = new StringBuilder(); cmd2.Append("SELECT id, value, language, datatype FROM "); cmd2.Append(table); cmd2.Append("_literals WHERE id IN ("); bool first = true; foreach (Int64 id in rids) { if (!first) cmd2.Append(','); first = false; cmd2.Append(id); } cmd2.Append(")"); if (Debug) { Console.Error.WriteLine(cmd2.ToString()); } using (IDataReader reader = RunReader(cmd2.ToString())) { while (reader.Read()) { Int64 id = reader.GetInt64(0); string value = AsString(reader[1]); string language = AsString(reader[2]); string datatype = AsString(reader[3]); Literal lit = new Literal(value, language, datatype); SetResourceKey(lit, new ResourceKey(id)); resourceCache[id] = lit; } } } // Any ids not found so far are bnodes. foreach (Int64 id in rids) { if (!resourceCache.ContainsKey(id)) { BNode b = new BNode(); SetResourceKey(b, new ResourceKey(id)); resourceCache[id] = b; } } } } } // lock // Now loop through the binding results. foreach (QueryResultRowVariable[] row in results) { bool match = true; Resource[] variableBindings = new Resource[varOrder.Length]; for (int i = 0; i < varOrder.Length; i++) { Int64 id = row[i].id; if (resourceCache.ContainsKey(id)) { variableBindings[i] = (Resource)resourceCache[id]; } else { if (row[i].litvalue == null) { variableBindings[i] = MakeEntity(id, row[i].uri, null); } else { Literal lit = new Literal(row[i].litvalue, row[i].litlanguage, row[i].litdatatype); ArrayList litFilters = (ArrayList)varLitFilters[varOrder[i]]; if (litFilters != null && !LiteralFilter.MatchesFilters(lit, (LiteralFilter[])litFilters.ToArray(typeof(LiteralFilter)), this)) { match = false; break; } SetResourceKey(lit, new ResourceKey(id)); variableBindings[i] = lit; } // reuse this entity later resourceCache[id] = variableBindings[i]; } } if (!match) continue; if (!sink.Add(new SemWeb.Query.VariableBindings(varOrder, variableBindings))) return; } sink.Finished(); }
void Add(int varIndex, Resource binding) { if (Bindings[varIndex] == null) Bindings[varIndex] = new ResSet(); Bindings[varIndex].Add(binding); }