/// <summary> /// Does a Left Join of this Multiset to another Multiset where the Join is predicated on the given Expression /// </summary> /// <param name="other">Other Multiset</param> /// <param name="expr">Expression</param> /// <returns></returns> public override BaseMultiset LeftJoin(BaseMultiset other, ISparqlExpression expr) { //If the Other is the Identity/Null Multiset the result is this Multiset if (other is IdentityMultiset) return this; if (other is NullMultiset) return this; if (other.IsEmpty) return this; Multiset joinedSet = new Multiset(); LeviathanLeftJoinBinder binder = new LeviathanLeftJoinBinder(joinedSet); SparqlEvaluationContext subcontext = new SparqlEvaluationContext(binder); //Find the First Variable from this Multiset which is in both Multisets //If there is no Variable from this Multiset in the other Multiset then this //should be a Join operation instead of a LeftJoin List<String> joinVars = this._variables.Where(v => other.Variables.Contains(v)).ToList(); if (joinVars.Count == 0) { //Calculate a Product filtering as we go foreach (ISet x in this.Sets) { bool standalone = false; foreach (ISet y in other.Sets) { ISet z = x.Join(y); try { joinedSet.Add(z); if (!expr.Evaluate(subcontext, z.ID).AsSafeBoolean()) { joinedSet.Remove(z.ID); standalone = true; } } catch { joinedSet.Remove(z.ID); standalone = true; } } if (standalone) joinedSet.Add(x.Copy()); } } else { //This is the old algorithm which is correct but has complexity O(n^2) so it scales terribly //foreach (ISet x in this.Sets) //{ // IEnumerable<ISet> ys = other.Sets.Where(s => joinVars.All(v => x[v] == null || s[v] == null || x[v].Equals(s[v]))); // //IEnumerable<ISet> ys = other.Sets.Where(s => s.IsCompatibleWith(x, joinVars)); // bool standalone = false; // int i = 0; // foreach (ISet y in ys) // { // i++; // ISet z = x.Join(y); // try // { // joinedSet.Add(z); // if (!expr.Evaluate(subcontext, z.ID).AsSafeBoolean()) // { // joinedSet.Remove(z.ID); // standalone = true; // } // } // catch // { // joinedSet.Remove(z.ID); // standalone = true; // } // } // if (standalone || i == 0) joinedSet.Add(x); //} //This is the new Join algorithm which is also correct but is O(2n) so much faster and scalable //Downside is that it does require more memory than the old algorithm List<HashTable<INode, int>> values = new List<HashTable<INode, int>>(); List<List<int>> nulls = new List<List<int>>(); foreach (String var in joinVars) { values.Add(new HashTable<INode, int>(HashTableBias.Enumeration)); nulls.Add(new List<int>()); } //First do a pass over the LHS Result to find all possible values for joined variables HashSet<int> matched = new HashSet<int>(); HashSet<int> standalone = new HashSet<int>(); foreach (ISet x in this.Sets) { int i = 0; foreach (String var in joinVars) { INode value = x[var]; if (value != null) { values[i].Add(value, x.ID); } else { nulls[i].Add(x.ID); } i++; } } //Then do a pass over the RHS and work out the intersections foreach (ISet y in other.Sets) { IEnumerable<int> possMatches = null; int i = 0; foreach (String var in joinVars) { INode value = y[var]; if (value != null) { if (values[i].ContainsKey(value)) { possMatches = (possMatches == null ? values[i].GetValues(value).Concat(nulls[i]) : possMatches.Intersect(values[i].GetValues(value).Concat(nulls[i]))); } else { possMatches = Enumerable.Empty<int>(); break; } } else { //Don't forget that a null will be potentially compatible with everything possMatches = (possMatches == null ? this.SetIDs : possMatches.Intersect(this.SetIDs)); } i++; } if (possMatches == null) continue; //Now do the actual joins for the current set //Note - We access the dictionary directly here because going through the this[int id] method //incurs a Contains() call each time and we know the IDs must exist because they came from //our dictionary originally! foreach (int poss in possMatches) { if (this._sets[poss].IsCompatibleWith(y, joinVars)) { ISet z = this._sets[poss].Join(y); joinedSet.Add(z); try { if (!expr.Evaluate(subcontext, z.ID).AsSafeBoolean()) { joinedSet.Remove(z.ID); standalone.Add(poss); } else { matched.Add(poss); } } catch { joinedSet.Remove(z.ID); standalone.Add(poss); } } } } //Finally add in unmatched sets from LHS foreach (int id in this.SetIDs) { if (!matched.Contains(id) || standalone.Contains(id)) joinedSet.Add(this._sets[id].Copy()); } } return joinedSet; }
/// <summary> /// Evaluates the filtered product /// </summary> /// <param name="context">Evaluation Context</param> /// <returns></returns> public BaseMultiset Evaluate(SparqlEvaluationContext context) { BaseMultiset initialInput = context.InputMultiset; BaseMultiset lhsResults = context.Evaluate(this._lhs); if (lhsResults is NullMultiset || lhsResults.IsEmpty) { //If LHS Results are Null/Empty then end result will always be null so short circuit context.OutputMultiset = new NullMultiset(); } else { context.InputMultiset = initialInput; BaseMultiset rhsResults = context.Evaluate(this._rhs); if (rhsResults is NullMultiset || rhsResults.IsEmpty) { //If RHS Results are Null/Empty then end results will always be null so short circuit context.OutputMultiset = new NullMultiset(); } else if (rhsResults is IdentityMultiset) { //Apply Filter over LHS Results only - defer evaluation to filter implementation context.InputMultiset = lhsResults; UnaryExpressionFilter filter = new UnaryExpressionFilter(this._expr); filter.Evaluate(context); context.OutputMultiset = lhsResults; } else { //Calculate the product applying the filter as we go #if NET40 && !SILVERLIGHT if (Options.UsePLinqEvaluation && this._expr.CanParallelise) { PartitionedMultiset partitionedSet; SparqlResultBinder binder = context.Binder; if (lhsResults.Count >= rhsResults.Count) { partitionedSet = new PartitionedMultiset(lhsResults.Count, rhsResults.Count); context.Binder = new LeviathanLeftJoinBinder(partitionedSet); lhsResults.Sets.AsParallel().ForAll(x => this.EvalFilteredProduct(context, x, rhsResults, partitionedSet)); } else { partitionedSet = new PartitionedMultiset(rhsResults.Count, lhsResults.Count); context.Binder = new LeviathanLeftJoinBinder(partitionedSet); rhsResults.Sets.AsParallel().ForAll(y => this.EvalFilteredProduct(context, y, lhsResults, partitionedSet)); } context.Binder = binder; context.OutputMultiset = partitionedSet; } else { #endif BaseMultiset productSet = new Multiset(); SparqlResultBinder binder = context.Binder; context.Binder = new LeviathanLeftJoinBinder(productSet); foreach (ISet x in lhsResults.Sets) { foreach (ISet y in rhsResults.Sets) { ISet z = x.Join(y); productSet.Add(z); try { if (!this._expr.Evaluate(context, z.ID).AsSafeBoolean()) { //Means the expression evaluates to false so we discard the solution productSet.Remove(z.ID); } } catch { //Means this solution does not meet the FILTER and can be discarded productSet.Remove(z.ID); } } //Remember to check for timeouts occassionaly context.CheckTimeout(); } context.Binder = binder; context.OutputMultiset = productSet; #if NET40 && !SILVERLIGHT } #endif } } return(context.OutputMultiset); }
/// <summary> /// Does a Left Join of this Multiset to another Multiset where the Join is predicated on the given Expression. /// </summary> /// <param name="other">Other Multiset.</param> /// <param name="expr">Expression.</param> /// <returns></returns> public virtual BaseMultiset LeftJoin(BaseMultiset other, ISparqlExpression expr) { // If the Other is the Identity/Null Multiset the result is this Multiset if (other is IdentityMultiset) { return(this); } if (other is NullMultiset) { return(this); } if (other.IsEmpty) { return(this); } Multiset joinedSet = new Multiset(); LeviathanLeftJoinBinder binder = new LeviathanLeftJoinBinder(joinedSet); SparqlEvaluationContext subcontext = new SparqlEvaluationContext(binder); // Find the First Variable from this Multiset which is in both Multisets // If there is no Variable from this Multiset in the other Multiset then this // should be a Join operation instead of a LeftJoin List <String> joinVars = Variables.Where(v => other.Variables.Contains(v)).ToList(); if (joinVars.Count == 0) { #if NET40 if (Options.UsePLinqEvaluation && expr.CanParallelise) { PartitionedMultiset partitionedSet = new PartitionedMultiset(this.Count, other.Count + 1); this.Sets.AsParallel().ForAll(x => EvalLeftJoinProduct(x, other, partitionedSet, expr)); return(partitionedSet); } #endif // Do a serial Left Join Product // Calculate a Product filtering as we go foreach (ISet x in Sets) { bool standalone = false; bool matched = false; foreach (ISet y in other.Sets) { ISet z = x.Join(y); try { joinedSet.Add(z); if (!expr.Evaluate(subcontext, z.ID).AsSafeBoolean()) { joinedSet.Remove(z.ID); standalone = true; } else { matched = true; } } catch { joinedSet.Remove(z.ID); standalone = true; } } if (standalone && !matched) { joinedSet.Add(x.Copy()); } } #if NET40 #endif } else { // This is the new Join algorithm which is also correct but is O(2n) so much faster and scalable // Downside is that it does require more memory than the old algorithm List <MultiDictionary <INode, List <int> > > values = new List <MultiDictionary <INode, List <int> > >(); List <List <int> > nulls = new List <List <int> >(); foreach (String var in joinVars) { joinedSet.AddVariable(var); values.Add(new MultiDictionary <INode, List <int> >(new FastVirtualNodeComparer())); nulls.Add(new List <int>()); } // First do a pass over the RHS Result to find all possible values for joined variables foreach (ISet y in other.Sets) { int i = 0; foreach (String var in joinVars) { INode value = y[var]; if (value != null) { if (values[i].TryGetValue(value, out List <int> ids)) { ids.Add(y.ID); } else { values[i].Add(value, new List <int> { y.ID }); } } else { nulls[i].Add(y.ID); } i++; } } // Then do a pass over the LHS and work out the intersections #if NET40 if (Options.UsePLinqEvaluation && expr.CanParallelise) { this.Sets.AsParallel().ForAll(x => EvalLeftJoin(x, other, joinVars, values, nulls, joinedSet, subcontext, expr)); } else { // Use a Serial Left Join foreach (ISet x in this.Sets) { this.EvalLeftJoin(x, other, joinVars, values, nulls, joinedSet, subcontext, expr); } } #else // Use a Serial Left Join foreach (var x in Sets) { EvalLeftJoin(x, other, joinVars, values, nulls, joinedSet, subcontext, expr); } #endif } return(joinedSet); }
/// <summary> /// Does a Left Join of this Multiset to another Multiset where the Join is predicated on the given Expression /// </summary> /// <param name="other">Other Multiset</param> /// <param name="expr">Expression</param> /// <returns></returns> public override BaseMultiset LeftJoin(BaseMultiset other, ISparqlExpression expr) { //If the Other is the Identity/Null Multiset the result is this Multiset if (other is IdentityMultiset) return this; if (other is NullMultiset) return this; if (other.IsEmpty) return this; Multiset joinedSet = new Multiset(); LeviathanLeftJoinBinder binder = new LeviathanLeftJoinBinder(joinedSet); SparqlEvaluationContext subcontext = new SparqlEvaluationContext(binder); //Find the First Variable from this Multiset which is in both Multisets //If there is no Variable from this Multiset in the other Multiset then this //should be a Join operation instead of a LeftJoin List<String> joinVars = this._variables.Where(v => other.Variables.Contains(v)).ToList(); if (joinVars.Count == 0) { //Calculate a Product filtering as we go foreach (ISet x in this.Sets) { bool standalone = false; foreach (ISet y in other.Sets) { ISet z = x.Join(y); try { joinedSet.Add(z); if (!expr.EffectiveBooleanValue(subcontext, z.ID)) { joinedSet.Remove(z.ID); standalone = true; } } catch { joinedSet.Remove(z.ID); standalone = true; } } if (standalone) joinedSet.Add(x); } } else { foreach (ISet x in this.Sets) { IEnumerable<ISet> ys = other.Sets.Where(s => joinVars.All(v => x[v] == null || s[v] == null || x[v].Equals(s[v]))); //IEnumerable<ISet> ys = other.Sets.Where(s => s.IsCompatibleWith(x, joinVars)); bool standalone = false; int i = 0; foreach (ISet y in ys) { i++; ISet z = x.Join(y); try { joinedSet.Add(z); if (!expr.EffectiveBooleanValue(subcontext, z.ID)) { joinedSet.Remove(z.ID); standalone = true; } } catch { joinedSet.Remove(z.ID); standalone = true; } } if (standalone || i == 0) joinedSet.Add(x); } } return joinedSet; }
/// <summary> /// Does a Left Join of this Multiset to another Multiset where the Join is predicated on the given Expression /// </summary> /// <param name="other">Other Multiset</param> /// <param name="expr">Expression</param> /// <returns></returns> public override BaseMultiset LeftJoin(BaseMultiset other, ISparqlExpression expr) { //If the Other is the Identity/Null Multiset the result is this Multiset if (other is IdentityMultiset) { return(this); } if (other is NullMultiset) { return(this); } if (other.IsEmpty) { return(this); } Multiset joinedSet = new Multiset(); LeviathanLeftJoinBinder binder = new LeviathanLeftJoinBinder(joinedSet); SparqlEvaluationContext subcontext = new SparqlEvaluationContext(binder); //Find the First Variable from this Multiset which is in both Multisets //If there is no Variable from this Multiset in the other Multiset then this //should be a Join operation instead of a LeftJoin List <String> joinVars = this._variables.Where(v => other.Variables.Contains(v)).ToList(); if (joinVars.Count == 0) { //Calculate a Product filtering as we go foreach (ISet x in this.Sets) { bool standalone = false; foreach (ISet y in other.Sets) { ISet z = x.Join(y); try { joinedSet.Add(z); if (!expr.Evaluate(subcontext, z.ID).AsSafeBoolean()) { joinedSet.Remove(z.ID); standalone = true; } } catch { joinedSet.Remove(z.ID); standalone = true; } } if (standalone) { joinedSet.Add(x.Copy()); } } } else { //This is the old algorithm which is correct but has complexity O(n^2) so it scales terribly //foreach (ISet x in this.Sets) //{ // IEnumerable<ISet> ys = other.Sets.Where(s => joinVars.All(v => x[v] == null || s[v] == null || x[v].Equals(s[v]))); // //IEnumerable<ISet> ys = other.Sets.Where(s => s.IsCompatibleWith(x, joinVars)); // bool standalone = false; // int i = 0; // foreach (ISet y in ys) // { // i++; // ISet z = x.Join(y); // try // { // joinedSet.Add(z); // if (!expr.Evaluate(subcontext, z.ID).AsSafeBoolean()) // { // joinedSet.Remove(z.ID); // standalone = true; // } // } // catch // { // joinedSet.Remove(z.ID); // standalone = true; // } // } // if (standalone || i == 0) joinedSet.Add(x); //} //This is the new Join algorithm which is also correct but is O(2n) so much faster and scalable //Downside is that it does require more memory than the old algorithm List <HashTable <INode, int> > values = new List <HashTable <INode, int> >(); List <List <int> > nulls = new List <List <int> >(); foreach (String var in joinVars) { values.Add(new HashTable <INode, int>(HashTableBias.Enumeration)); nulls.Add(new List <int>()); } //First do a pass over the LHS Result to find all possible values for joined variables HashSet <int> matched = new HashSet <int>(); HashSet <int> standalone = new HashSet <int>(); foreach (ISet x in this.Sets) { int i = 0; foreach (String var in joinVars) { INode value = x[var]; if (value != null) { values[i].Add(value, x.ID); } else { nulls[i].Add(x.ID); } i++; } } //Then do a pass over the RHS and work out the intersections foreach (ISet y in other.Sets) { IEnumerable <int> possMatches = null; int i = 0; foreach (String var in joinVars) { INode value = y[var]; if (value != null) { if (values[i].ContainsKey(value)) { possMatches = (possMatches == null ? values[i].GetValues(value).Concat(nulls[i]) : possMatches.Intersect(values[i].GetValues(value).Concat(nulls[i]))); } else { possMatches = Enumerable.Empty <int>(); break; } } else { //Don't forget that a null will be potentially compatible with everything possMatches = (possMatches == null ? this.SetIDs : possMatches.Intersect(this.SetIDs)); } i++; } if (possMatches == null) { continue; } //Now do the actual joins for the current set //Note - We access the dictionary directly here because going through the this[int id] method //incurs a Contains() call each time and we know the IDs must exist because they came from //our dictionary originally! foreach (int poss in possMatches) { if (this._sets[poss].IsCompatibleWith(y, joinVars)) { ISet z = this._sets[poss].Join(y); joinedSet.Add(z); try { if (!expr.Evaluate(subcontext, z.ID).AsSafeBoolean()) { joinedSet.Remove(z.ID); standalone.Add(poss); } else { matched.Add(poss); } } catch { joinedSet.Remove(z.ID); standalone.Add(poss); } } } } //Finally add in unmatched sets from LHS foreach (int id in this.SetIDs) { if (!matched.Contains(id) || standalone.Contains(id)) { joinedSet.Add(this._sets[id].Copy()); } } } return(joinedSet); }
/// <summary> /// Does a Left Join of this Multiset to another Multiset where the Join is predicated on the given Expression /// </summary> /// <param name="other">Other Multiset</param> /// <param name="expr">Expression</param> /// <returns></returns> public override BaseMultiset LeftJoin(BaseMultiset other, ISparqlExpression expr) { //If the Other is the Identity/Null Multiset the result is this Multiset if (other is IdentityMultiset) { return(this); } if (other is NullMultiset) { return(this); } if (other.IsEmpty) { return(this); } Multiset joinedSet = new Multiset(); LeviathanLeftJoinBinder binder = new LeviathanLeftJoinBinder(joinedSet); SparqlEvaluationContext subcontext = new SparqlEvaluationContext(binder); //Find the First Variable from this Multiset which is in both Multisets //If there is no Variable from this Multiset in the other Multiset then this //should be a Join operation instead of a LeftJoin List <String> joinVars = this._variables.Where(v => other.Variables.Contains(v)).ToList(); if (joinVars.Count == 0) { //Calculate a Product filtering as we go foreach (ISet x in this.Sets) { bool standalone = false; foreach (ISet y in other.Sets) { ISet z = x.Join(y); try { joinedSet.Add(z); if (!expr.EffectiveBooleanValue(subcontext, z.ID)) { joinedSet.Remove(z.ID); standalone = true; } } catch { joinedSet.Remove(z.ID); standalone = true; } } if (standalone) { joinedSet.Add(x); } } } else { foreach (ISet x in this.Sets) { IEnumerable <ISet> ys = other.Sets.Where(s => joinVars.All(v => x[v] == null || s[v] == null || x[v].Equals(s[v]))); //IEnumerable<ISet> ys = other.Sets.Where(s => s.IsCompatibleWith(x, joinVars)); bool standalone = false; int i = 0; foreach (ISet y in ys) { i++; ISet z = x.Join(y); try { joinedSet.Add(z); if (!expr.EffectiveBooleanValue(subcontext, z.ID)) { joinedSet.Remove(z.ID); standalone = true; } } catch { joinedSet.Remove(z.ID); standalone = true; } } if (standalone || i == 0) { joinedSet.Add(x); } } } return(joinedSet); }