/// <summary> /// Converts a Bindings Clause to a Multiset /// </summary> /// <returns></returns> public BaseMultiset ToMultiset() { if (this._vars.Any()) { Multiset m = new Multiset(); foreach (String var in this._vars) { m.AddVariable(var); } foreach (BindingTuple tuple in this._tuples) { m.Add(new Set(tuple)); } return m; } else { return new IdentityMultiset(); } }
/// <summary> /// Does an Exists Join of this Multiset to another Multiset where the Join is predicated on the existence/non-existence of a joinable solution on the RHS. /// </summary> /// <param name="other">Other Multiset.</param> /// <param name="mustExist">Whether a solution must exist in the Other Multiset for the join to be made.</param> /// <returns></returns> public virtual BaseMultiset ExistsJoin(BaseMultiset other, bool mustExist) { // For EXISTS and NOT EXISTS if the other is the Identity then it has no effect if (other is IdentityMultiset) { return(this); } if (mustExist) { // If an EXISTS then Null/Empty Other results in Null if (other is NullMultiset) { return(other); } if (other.IsEmpty) { return(new NullMultiset()); } } else { // If a NOT EXISTS then Null/Empty results in this if (other is NullMultiset) { return(this); } if (other.IsEmpty) { return(this); } } // Find the Variables that are to be used for Joining List <String> joinVars = Variables.Where(v => other.Variables.Contains(v)).ToList(); if (joinVars.Count == 0) { // All Disjoint Solutions are compatible if (mustExist) { // If an EXISTS and disjoint then result is this return(this); } else { // If a NOT EXISTS and disjoint then result is null return(new NullMultiset()); } } // Start building the Joined Set Multiset joinedSet = new Multiset(); // This is the new algorithm which is also correct but is O(3n) so much faster and scalable // Downside is that it does require more memory than the old algorithm List <MultiDictionary <INode, List <int> > > values = new List <MultiDictionary <INode, List <int> > >(); List <List <int> > nulls = new List <List <int> >(); foreach (String var in joinVars) { joinedSet.AddVariable(var); values.Add(new MultiDictionary <INode, List <int> >(new FastVirtualNodeComparer())); nulls.Add(new List <int>()); } // First do a pass over the LHS Result to find all possible values for joined variables foreach (ISet x in Sets) { int i = 0; foreach (String var in joinVars) { INode value = x[var]; if (value != null) { if (values[i].TryGetValue(value, out List <int> ids)) { ids.Add(x.ID); } else { values[i].Add(value, new List <int> { x.ID }); } } else { nulls[i].Add(x.ID); } i++; } } // Then do a pass over the RHS and work out the intersections HashSet <int> exists = new HashSet <int>(); foreach (ISet y in other.Sets) { IEnumerable <int> possMatches = null; int i = 0; foreach (String var in joinVars) { INode value = y[var]; if (value != null) { if (values[i].ContainsKey(value)) { possMatches = (possMatches == null ? values[i][value].Concat(nulls[i]) : possMatches.Intersect(values[i][value].Concat(nulls[i]))); } else { possMatches = Enumerable.Empty <int>(); break; } } else { // Don't forget that a null will be potentially compatible with everything possMatches = (possMatches == null ? SetIDs : possMatches.Intersect(SetIDs)); } i++; } if (possMatches == null) { continue; } // Look at possible matches, if is a valid match then mark the set as having an existing match // Don't reconsider sets which have already been marked as having an existing match foreach (int poss in possMatches) { if (exists.Contains(poss)) { continue; } if (this[poss].IsCompatibleWith(y, joinVars)) { exists.Add(poss); } } } // Apply the actual exists if (exists.Count == Count) { // If number of sets that have a match is equal to number of sets then we're either returning everything or nothing if (mustExist) { return(this); } else { return(new NullMultiset()); } } else { // Otherwise iterate foreach (ISet x in Sets) { if (mustExist) { if (exists.Contains(x.ID)) { joinedSet.Add(x.Copy()); } } else { if (!exists.Contains(x.ID)) { joinedSet.Add(x.Copy()); } } } } return(joinedSet); }
/// <summary> /// Joins this Multiset to another Multiset. /// </summary> /// <param name="other">Other Multiset.</param> /// <returns></returns> public virtual BaseMultiset Join(BaseMultiset other) { // If the Other is the Identity Multiset the result is this Multiset if (other is IdentityMultiset) { return(this); } // If the Other is the Null Multiset the result is the Null Multiset if (other is NullMultiset) { return(other); } // If the Other is Empty then the result is the Null Multiset if (other.IsEmpty) { return(new NullMultiset()); } // Find the First Variable from this Multiset which is in both Multisets // If there is no Variable from this Multiset in the other Multiset then this // should be a Product operation instead of a Join List <String> joinVars = Variables.Where(v => other.Variables.Contains(v)).ToList(); if (joinVars.Count == 0) { return(Product(other)); } // Start building the Joined Set Multiset joinedSet = new Multiset(); // This is the new Join algorithm which is O(2n) so much faster and scalable // Downside is that it does require more memory than the old algorithm List <MultiDictionary <INode, List <int> > > values = new List <MultiDictionary <INode, List <int> > >(); List <List <int> > nulls = new List <List <int> >(); foreach (String var in joinVars) { joinedSet.AddVariable(var); values.Add(new MultiDictionary <INode, List <int> >(new FastVirtualNodeComparer())); nulls.Add(new List <int>()); } // First do a pass over the LHS Result to find all possible values for joined variables foreach (ISet x in Sets) { var i = 0; foreach (var var in joinVars) { var value = x[var]; if (value != null) { if (values[i].TryGetValue(value, out List <int> ids)) { ids.Add(x.ID); } else { values[i].Add(value, new List <int> { x.ID }); } } else { nulls[i].Add(x.ID); } i++; } } #if NET40 if (Options.UsePLinqEvaluation) { // Use a paralllel join other.Sets.AsParallel().ForAll(y => EvalJoin(y, joinVars, values, nulls, joinedSet)); } else { // Use a serial join // Then do a pass over the RHS and work out the intersections foreach (ISet y in other.Sets) { this.EvalJoin(y, joinVars, values, nulls, joinedSet); } } #else // Use a serial join // Then do a pass over the RHS and work out the intersections foreach (ISet y in other.Sets) { EvalJoin(y, joinVars, values, nulls, joinedSet); } #endif return(joinedSet); }
/// <summary> /// Does a Left Join of this Multiset to another Multiset where the Join is predicated on the given Expression. /// </summary> /// <param name="other">Other Multiset.</param> /// <param name="expr">Expression.</param> /// <returns></returns> public virtual BaseMultiset LeftJoin(BaseMultiset other, ISparqlExpression expr) { // If the Other is the Identity/Null Multiset the result is this Multiset if (other is IdentityMultiset) { return(this); } if (other is NullMultiset) { return(this); } if (other.IsEmpty) { return(this); } Multiset joinedSet = new Multiset(); LeviathanLeftJoinBinder binder = new LeviathanLeftJoinBinder(joinedSet); SparqlEvaluationContext subcontext = new SparqlEvaluationContext(binder); // Find the First Variable from this Multiset which is in both Multisets // If there is no Variable from this Multiset in the other Multiset then this // should be a Join operation instead of a LeftJoin List <String> joinVars = Variables.Where(v => other.Variables.Contains(v)).ToList(); if (joinVars.Count == 0) { #if NET40 if (Options.UsePLinqEvaluation && expr.CanParallelise) { PartitionedMultiset partitionedSet = new PartitionedMultiset(this.Count, other.Count + 1); this.Sets.AsParallel().ForAll(x => EvalLeftJoinProduct(x, other, partitionedSet, expr)); return(partitionedSet); } #endif // Do a serial Left Join Product // Calculate a Product filtering as we go foreach (ISet x in Sets) { bool standalone = false; bool matched = false; foreach (ISet y in other.Sets) { ISet z = x.Join(y); try { joinedSet.Add(z); if (!expr.Evaluate(subcontext, z.ID).AsSafeBoolean()) { joinedSet.Remove(z.ID); standalone = true; } else { matched = true; } } catch { joinedSet.Remove(z.ID); standalone = true; } } if (standalone && !matched) { joinedSet.Add(x.Copy()); } } #if NET40 #endif } else { // This is the new Join algorithm which is also correct but is O(2n) so much faster and scalable // Downside is that it does require more memory than the old algorithm List <MultiDictionary <INode, List <int> > > values = new List <MultiDictionary <INode, List <int> > >(); List <List <int> > nulls = new List <List <int> >(); foreach (String var in joinVars) { joinedSet.AddVariable(var); values.Add(new MultiDictionary <INode, List <int> >(new FastVirtualNodeComparer())); nulls.Add(new List <int>()); } // First do a pass over the RHS Result to find all possible values for joined variables foreach (ISet y in other.Sets) { int i = 0; foreach (String var in joinVars) { INode value = y[var]; if (value != null) { if (values[i].TryGetValue(value, out List <int> ids)) { ids.Add(y.ID); } else { values[i].Add(value, new List <int> { y.ID }); } } else { nulls[i].Add(y.ID); } i++; } } // Then do a pass over the LHS and work out the intersections #if NET40 if (Options.UsePLinqEvaluation && expr.CanParallelise) { this.Sets.AsParallel().ForAll(x => EvalLeftJoin(x, other, joinVars, values, nulls, joinedSet, subcontext, expr)); } else { // Use a Serial Left Join foreach (ISet x in this.Sets) { this.EvalLeftJoin(x, other, joinVars, values, nulls, joinedSet, subcontext, expr); } } #else // Use a Serial Left Join foreach (var x in Sets) { EvalLeftJoin(x, other, joinVars, values, nulls, joinedSet, subcontext, expr); } #endif } return(joinedSet); }