/// <summary> /// Optimises the algebra so that all Node terms are virtualised /// </summary> /// <param name="algebra">Algebra</param> /// <returns></returns> public ISparqlAlgebra Optimise(ISparqlAlgebra algebra) { if (algebra is IAbstractJoin) { return(((IAbstractJoin)algebra).Transform(this)); } else if (algebra is IUnaryOperator) { return(((IUnaryOperator)algebra).Transform(this)); } else if (algebra is IBgp) { IBgp current = (IBgp)algebra; if (current.PatternCount == 0) { return(current); } else { ISparqlAlgebra result = new Bgp(); List <ITriplePattern> patterns = new List <ITriplePattern>(); List <ITriplePattern> ps = new List <ITriplePattern>(current.TriplePatterns.ToList()); TNodeID nullID = this._provider.NullID; for (int i = 0; i < current.PatternCount; i++) { if (ps[i] is FilterPattern || ps[i] is BindPattern) { //First ensure that if we've found any other Triple Patterns up to this point //we dump this into a BGP and join with the result so far if (patterns.Count > 0) { result = Join.CreateJoin(result, new Bgp(patterns)); patterns.Clear(); } if (ps[i] is FilterPattern) { result = new Filter(result, ((FilterPattern)ps[i]).Filter); } else { BindPattern bind = (BindPattern)ps[i]; result = new Extend(result, bind.AssignExpression, bind.VariableName); } } else { //Convert Terms in the Pattern into Virtual Nodes TriplePattern tp = (TriplePattern)ps[i]; PatternItem subj, pred, obj; if (tp.Subject is NodeMatchPattern) { TNodeID id = this._provider.GetID(((NodeMatchPattern)tp.Subject).Node); if (id == null || id.Equals(nullID)) { result = new NullOperator(current.Variables); break; } else { subj = new NodeMatchPattern(this.CreateVirtualNode(id, ((NodeMatchPattern)tp.Subject).Node)); } } else { subj = tp.Subject; } if (tp.Predicate is NodeMatchPattern) { TNodeID id = this._provider.GetID(((NodeMatchPattern)tp.Predicate).Node); if (id == null || id.Equals(nullID)) { result = new NullOperator(current.Variables); break; } else { pred = new NodeMatchPattern(this.CreateVirtualNode(id, ((NodeMatchPattern)tp.Predicate).Node)); } } else { pred = tp.Predicate; } if (tp.Object is NodeMatchPattern) { TNodeID id = this._provider.GetID(((NodeMatchPattern)tp.Object).Node); if (id == null || id.Equals(nullID)) { result = new NullOperator(current.Variables); break; } else { obj = new NodeMatchPattern(this.CreateVirtualNode(id, ((NodeMatchPattern)tp.Object).Node)); } } else { obj = tp.Object; } patterns.Add(new TriplePattern(subj, pred, obj)); } } if (result is NullOperator) { return(result); } else if (patterns.Count == current.PatternCount) { //If count of remaining patterns same as original pattern count there was no optimisation //to do so return as is return(current); } else if (patterns.Count > 0) { //If any patterns left at end join as a BGP with result so far result = Join.CreateJoin(result, new Bgp(patterns)); return(result); } else { return(result); } } } else if (algebra is ITerminalOperator) { return(algebra); } else { return(algebra); } }
private BaseMultiset StreamingEvaluate(SparqlEvaluationContext context, int pattern, out bool halt) { halt = false; //Handle Empty BGPs if (pattern == 0 && this._triplePatterns.Count == 0) { context.OutputMultiset = new IdentityMultiset(); return(context.OutputMultiset); } BaseMultiset initialInput, localOutput, results = null; //Determine whether the Pattern modifies the existing Input rather than joining to it bool modifies = (this._triplePatterns[pattern] is FilterPattern); bool extended = (pattern > 0 && this._triplePatterns[pattern - 1] is BindPattern); bool modified = (pattern > 0 && this._triplePatterns[pattern - 1] is FilterPattern); //Set up the Input and Output Multiset appropriately switch (pattern) { case 0: //Input is as given and Output is new empty multiset if (!modifies) { initialInput = context.InputMultiset; } else { //If the Pattern will modify the Input and is the first thing in the BGP then it actually modifies a new empty input //This takes care of FILTERs being out of scope initialInput = new Multiset(); } localOutput = new Multiset(); break; case 1: //Input becomes current Output and Output is new empty multiset initialInput = context.OutputMultiset; localOutput = new Multiset(); break; default: if (!extended && !modified) { //Input is join of previous input and output and Output is new empty multiset if (context.InputMultiset.IsDisjointWith(context.OutputMultiset)) { //Disjoint so do a Product initialInput = context.InputMultiset.ProductWithTimeout(context.OutputMultiset, context.RemainingTimeout); } else { //Normal Join initialInput = context.InputMultiset.Join(context.OutputMultiset); } } else { initialInput = context.OutputMultiset; } localOutput = new Multiset(); break; } context.InputMultiset = initialInput; context.OutputMultiset = localOutput; //Get the Triple Pattern we're evaluating ITriplePattern temp = this._triplePatterns[pattern]; int resultsFound = 0; int prevResults = -1; if (temp is TriplePattern) { //Find the first Triple which matches the Pattern TriplePattern tp = (TriplePattern)temp; IEnumerable <Triple> ts = tp.GetTriples(context); //In the case that we're lazily evaluating an optimisable ORDER BY then //we need to apply OrderBy()'s to our enumeration //This only applies to the 1st pattern if (pattern == 0) { if (context.Query != null) { if (context.Query.OrderBy != null && context.Query.IsOptimisableOrderBy) { IComparer <Triple> comparer = context.Query.OrderBy.GetComparer(tp); if (comparer != null) { ts = ts.OrderBy(t => t, comparer); } else { //Can't get a comparer so can't optimise this._requiredResults = -1; } } } } foreach (Triple t in ts) { //Remember to check for Timeouts during Lazy Evaluation context.CheckTimeout(); if (tp.Accepts(context, t)) { resultsFound++; if (tp.IndexType == TripleIndexType.NoVariables) { localOutput = new IdentityMultiset(); context.OutputMultiset = localOutput; } else { context.OutputMultiset.Add(tp.CreateResult(t)); } //Recurse unless we're the last pattern if (pattern < this._triplePatterns.Count - 1) { results = this.StreamingEvaluate(context, pattern + 1, out halt); //If recursion leads to a halt then we halt and return immediately if (halt && results.Count >= this._requiredResults && this._requiredResults != -1) { return(results); } else if (halt) { if (results.Count == 0) { //If recursing leads to no results then eliminate all outputs //Also reset to prevResults to -1 resultsFound = 0; localOutput = new Multiset(); prevResults = -1; } else if (prevResults > -1) { if (results.Count == prevResults) { //If the amount of results found hasn't increased then this match does not //generate any further solutions further down the recursion so we can eliminate //this from the results localOutput.Remove(localOutput.SetIDs.Max()); } } prevResults = results.Count; //If we're supposed to halt but not reached the number of required results then continue context.InputMultiset = initialInput; context.OutputMultiset = localOutput; } else { //Otherwise we need to keep going here //So must reset our input and outputs before continuing context.InputMultiset = initialInput; context.OutputMultiset = new Multiset(); resultsFound--; } } else { //If we're at the last pattern and we've found a match then we can halt halt = true; //Generate the final output and return it if (context.InputMultiset.IsDisjointWith(context.OutputMultiset)) { //Disjoint so do a Product results = context.InputMultiset.ProductWithTimeout(context.OutputMultiset, context.RemainingTimeout); } else { //Normal Join results = context.InputMultiset.Join(context.OutputMultiset); } //If not reached required number of results continue if (results.Count >= this._requiredResults && this._requiredResults != -1) { context.OutputMultiset = results; return(context.OutputMultiset); } } } } } else if (temp is FilterPattern) { FilterPattern filter = (FilterPattern)temp; ISparqlExpression filterExpr = filter.Filter.Expression; if (filter.Variables.IsDisjoint(context.InputMultiset.Variables)) { //Remember to check for Timeouts during Lazy Evaluation context.CheckTimeout(); //Filter is Disjoint so determine whether it has any affect or not if (filter.Variables.Any()) { //Has Variables but disjoint from input => not in scope so gets ignored //Do we recurse or not? if (pattern < this._triplePatterns.Count - 1) { //Recurse and return results = this.StreamingEvaluate(context, pattern + 1, out halt); return(results); } else { //We don't affect the input in any way so just return it return(context.InputMultiset); } } else { //No Variables so have to evaluate it to see if it gives true otherwise try { if (filterExpr.EffectiveBooleanValue(context, 0)) { if (pattern < this._triplePatterns.Count - 1) { //Recurse and return results = this.StreamingEvaluate(context, pattern + 1, out halt); return(results); } else { //Last Pattern and we evaluate to true so can return the input as-is halt = true; return(context.InputMultiset); } } } catch (RdfQueryException) { //Evaluates to false so eliminates all solutions (use an empty Multiset) return(new Multiset()); } } } else { //Remember to check for Timeouts during Lazy Evaluation context.CheckTimeout(); //Test each solution found so far against the Filter and eliminate those that evalute to false/error foreach (int id in context.InputMultiset.SetIDs.ToList()) { try { if (filterExpr.EffectiveBooleanValue(context, id)) { //If evaluates to true then add to output context.OutputMultiset.Add(context.InputMultiset[id]); } } catch (RdfQueryException) { //Error means we ignore the solution } } //Remember to check for Timeouts during Lazy Evaluation context.CheckTimeout(); //Decide whether to recurse or not resultsFound = context.OutputMultiset.Count; if (pattern < this._triplePatterns.Count - 1) { //Recurse then return //We can never decide whether to recurse again at this point as we are not capable of deciding //which solutions should be dumped (that is the job of an earlier pattern in the BGP) results = this.StreamingEvaluate(context, pattern + 1, out halt); return(results); } else { halt = true; //However many results we need we'll halt - previous patterns can call us again if they find more potential solutions //for us to filter return(context.OutputMultiset); } } } else if (temp is BindPattern) { BindPattern bind = (BindPattern)temp; ISparqlExpression bindExpr = bind.AssignExpression; String bindVar = bind.VariableName; if (context.InputMultiset.ContainsVariable(bindVar)) { throw new RdfQueryException("Cannot use a BIND assigment to BIND to a variable that has previously been used in the Query"); } else { //Remember to check for Timeouts during Lazy Evaluation context.CheckTimeout(); //Compute the Binding for every value context.OutputMultiset.AddVariable(bindVar); foreach (ISet s in context.InputMultiset.Sets) { ISet x = s.Copy(); try { INode val = bindExpr.Value(context, s.ID); x.Add(bindVar, val); } catch (RdfQueryException) { //Equivalent to no assignment but the solution is preserved } context.OutputMultiset.Add(x); } //Remember to check for Timeouts during Lazy Evaluation context.CheckTimeout(); //Decide whether to recurse or not resultsFound = context.OutputMultiset.Count; if (pattern < this._triplePatterns.Count - 1) { //Recurse then return results = this.StreamingEvaluate(context, pattern + 1, out halt); return(results); } else { halt = true; //However many results we need we'll halt - previous patterns can call us again if they find more potential solutions //for us to extend return(context.OutputMultiset); } } } else { throw new RdfQueryException("Encountered a " + temp.GetType().FullName + " which is not a lazily evaluable Pattern"); } //If we found no possibles we return the null multiset if (resultsFound == 0) { return(new NullMultiset()); } else { //Generate the final output and return it if (!modifies) { if (context.InputMultiset.IsDisjointWith(context.OutputMultiset)) { //Disjoint so do a Product results = context.InputMultiset.ProductWithTimeout(context.OutputMultiset, context.RemainingTimeout); } else { //Normal Join results = context.InputMultiset.Join(context.OutputMultiset); } context.OutputMultiset = results; } return(context.OutputMultiset); } }
/// <summary> /// Optimises BGPs in the Algebra to use Filter() and Extend() rather than the embedded FILTER and BIND /// </summary> /// <param name="algebra">Algebra to optimise</param> /// <returns></returns> public ISparqlAlgebra Optimise(ISparqlAlgebra algebra) { if (algebra is IAbstractJoin) { return(((IAbstractJoin)algebra).Transform(this)); } else if (algebra is IUnaryOperator) { return(((IUnaryOperator)algebra).Transform(this)); } else if (algebra is IBgp) { IBgp current = (IBgp)algebra; if (current.PatternCount == 0) { return(current); } else { ISparqlAlgebra result = new Bgp(); List <ITriplePattern> patterns = new List <ITriplePattern>(); List <ITriplePattern> ps = new List <ITriplePattern>(current.TriplePatterns.ToList()); for (int i = 0; i < current.PatternCount; i++) { if (!(ps[i] is TriplePattern)) { //First ensure that if we've found any other Triple Patterns up to this point //we dump this into a BGP and join with the result so far if (patterns.Count > 0) { result = Join.CreateJoin(result, new Bgp(patterns)); patterns.Clear(); } //Then generate the appropriate strict algebra operator if (ps[i] is FilterPattern) { result = new Filter(result, ((FilterPattern)ps[i]).Filter); } else if (ps[i] is BindPattern) { BindPattern bind = (BindPattern)ps[i]; result = new Extend(result, bind.AssignExpression, bind.VariableName); } else if (ps[i] is LetPattern) { LetPattern let = (LetPattern)ps[i]; result = new Extend(result, let.AssignExpression, let.VariableName); } else if (ps[i] is SubQueryPattern) { SubQueryPattern sq = (SubQueryPattern)ps[i]; result = Join.CreateJoin(result, new SubQuery(sq.SubQuery)); } else if (ps[i] is PropertyPathPattern) { PropertyPathPattern pp = (PropertyPathPattern)ps[i]; result = Join.CreateJoin(result, new PropertyPath(pp.Subject, pp.Path, pp.Object)); } } else { patterns.Add(ps[i]); } } if (patterns.Count == current.PatternCount) { //If count of remaining patterns same as original pattern count there was no optimisation //to do so return as is return(current); } else if (patterns.Count > 0) { //If any patterns left at end join as a BGP with result so far result = Join.CreateJoin(result, new Bgp(patterns)); return(result); } else { return(result); } } } else if (algebra is ITerminalOperator) { return(algebra); } else { return(algebra); } }
/// <summary> /// Formats a Triple Pattern in nicely formatted SPARQL syntax /// </summary> /// <param name="tp">Triple Pattern</param> /// <returns></returns> public virtual String Format(ITriplePattern tp) { StringBuilder output = new StringBuilder(); if (tp is TriplePattern) { TriplePattern match = (TriplePattern)tp; output.Append(this.Format(match.Subject, TripleSegment.Subject)); output.Append(' '); output.Append(this.Format(match.Predicate, TripleSegment.Predicate)); output.Append(' '); output.Append(this.Format(match.Object, TripleSegment.Object)); output.Append(" ."); } else if (tp is FilterPattern) { FilterPattern filter = (FilterPattern)tp; output.Append("FILTER("); output.Append(this.FormatExpression(filter.Filter.Expression)); output.Append(")"); } else if (tp is SubQueryPattern) { SubQueryPattern subquery = (SubQueryPattern)tp; output.AppendLine("{"); output.AppendLineIndented(this.Format(subquery.SubQuery), 2); output.AppendLine("}"); } else if (tp is PropertyPathPattern) { PropertyPathPattern path = (PropertyPathPattern)tp; output.Append(this.Format(path.Subject, TripleSegment.Subject)); output.Append(' '); output.Append(this.FormatPath(path.Path)); output.Append(' '); output.Append(this.Format(path.Object, TripleSegment.Object)); output.Append(" ."); } else if (tp is LetPattern) { LetPattern let = (LetPattern)tp; output.Append("LET(?"); output.Append(let.VariableName); output.Append(" := "); output.Append(this.FormatExpression(let.AssignExpression)); output.Append(")"); } else if (tp is BindPattern) { BindPattern bind = (BindPattern)tp; output.Append("BIND ("); output.Append(this.FormatExpression(bind.AssignExpression)); output.Append(" AS ?"); output.Append(bind.VariableName); output.Append(")"); } else { throw new RdfOutputException("Unable to Format an unknown ITriplePattern implementation as a String"); } return(output.ToString()); }
private void TryParseBindAssignment(SparqlQueryParserContext context, GraphPattern p) { if (context.SyntaxMode == SparqlQuerySyntax.Sparql_1_0) throw new RdfParseException("BIND assignment is not supported in SPARQL 1.0"); //First need to discard opening ( IToken next = context.Tokens.Dequeue(); if (next.TokenType != Token.LEFTBRACKET) throw ParserHelper.Error("Unexpected Token '" + next.GetType().ToString() + "' encountered, expected a ( to start a BIND assignment after a BIND keyword", next); //Expect a bracketted expression terminated by an AS ISparqlExpression expr = this.TryParseExpression(context, false, true); if (context.Tokens.LastTokenType != Token.AS) { throw ParserHelper.Error("A BIND assignment did not end with an AS ?var as expected, BIND assignment must be of the general form BIND(expr AS ?var)", next); } //Ensure there is a Variable after the AS next = context.Tokens.Dequeue(); if (next.TokenType == Token.VARIABLE) { BindPattern bind = new BindPattern(next.Value.Substring(1), expr); //Check that the Variable has not already been used if (context.Query.RootGraphPattern != null && context.Query.RootGraphPattern.Variables.Contains(bind.VariableName)) { throw ParserHelper.Error("A BIND assignment is attempting to bind to the variable ?" + bind.VariableName + " but this variable is already in use in the query", next); } else if (p.Variables.Contains(bind.VariableName)) { throw ParserHelper.Error("A BIND assignment is attempting to bind to the variable ?" + bind.VariableName + " but this variable is already in use earlier in the Graph pattern", next); } if (Options.QueryOptimisation) { p.AddAssignment(bind); } else { //When Optimisation is turned off we'll just stick the BIND in the Triples Pattern where it occurs //since we're not going to do any Triple Pattern ordering, Assignment or FILTER placement p.AddTriplePattern(bind); //In this case the BIND must break the BGP since using AddTriplePattern will not do it automatically p.BreakBGP(); } //Ensure the BIND assignment is terminated with a ) next = context.Tokens.Dequeue(); if (next.TokenType != Token.RIGHTBRACKET) throw ParserHelper.Error("Unexpected Token '" + next.GetType().ToString() + "' encountered, expected a ) to terminate a BIND assignment", next); } else { throw ParserHelper.Error("Unexpected Token '" + next.GetType().ToString() + "' encountered, expected a Variable after the AS in a BIND assignment", next); } }
/// <summary> /// Attempts to do variable substitution within the given algebra /// </summary> /// <param name="algebra">Algebra</param> /// <returns></returns> public ISparqlAlgebra Optimise(ISparqlAlgebra algebra) { //By default we are only safe to replace objects in a scope if we are replacing with a constant //Note that if we also make a replace in a subject/predicate position for a variable replace then //that makes object replacement safe for that scope only bool canReplaceObjects = (this._canReplaceCustom ? this._canReplaceObjects : this._replaceItem is NodeMatchPattern); if (algebra is IBgp) { IBgp bgp = (IBgp)algebra; if (bgp.PatternCount == 0) { return(bgp); } //Do variable substitution on the patterns List <ITriplePattern> ps = new List <ITriplePattern>(); foreach (ITriplePattern p in bgp.TriplePatterns) { if (p is TriplePattern) { TriplePattern tp = (TriplePattern)p; PatternItem subj = tp.Subject.VariableName != null && tp.Subject.VariableName.Equals(this._findVar) ? this._replaceItem : tp.Subject; if (ReferenceEquals(subj, this._replaceItem)) { canReplaceObjects = (this._canReplaceCustom ? this._canReplaceObjects : true); } PatternItem pred = tp.Predicate.VariableName != null && tp.Predicate.VariableName.Equals(this._findVar) ? this._replaceItem : tp.Predicate; if (ReferenceEquals(pred, this._replaceItem)) { canReplaceObjects = (this._canReplaceCustom ? this._canReplaceObjects : true); } PatternItem obj = tp.Object.VariableName != null && tp.Object.VariableName.Equals(this._findVar) ? this._replaceItem : tp.Object; if (ReferenceEquals(obj, this._replaceItem) && !canReplaceObjects) { throw new Exception("Unable to substitute a variable into the object position in this scope"); } ps.Add(new TriplePattern(subj, pred, obj)); } else if (p is FilterPattern) { FilterPattern fp = (FilterPattern)p; ps.Add(new FilterPattern(new UnaryExpressionFilter(this.Transform(fp.Filter.Expression)))); } else if (p is BindPattern) { BindPattern bp = (BindPattern)p; ps.Add(new BindPattern(bp.VariableName, this.Transform(bp.AssignExpression))); } else if (p is LetPattern) { LetPattern lp = (LetPattern)p; ps.Add(new LetPattern(lp.VariableName, this.Transform(lp.AssignExpression))); } else if (p is SubQueryPattern) { throw new RdfQueryException("Cannot do variable substitution when a sub-query is present"); } else if (p is PropertyPathPattern) { throw new RdfQueryException("Cannot do variable substitution when a property path is present"); } else { throw new RdfQueryException("Cannot do variable substitution on unknown triple patterns"); } } return(new Bgp(ps)); } else if (algebra is Service) { throw new RdfQueryException("Cannot do variable substitution when a SERVICE clause is present"); } else if (algebra is SubQuery) { throw new RdfQueryException("Cannot do variable substitution when a sub-query is present"); } else if (algebra is IPathOperator) { throw new RdfQueryException("Cannot do variable substitution when a property path is present"); } else if (algebra is Algebra.Graph) { Algebra.Graph g = (Algebra.Graph)((IUnaryOperator)algebra).Transform(this); if (g.GraphSpecifier is VariableToken && g.GraphSpecifier.Value.Equals("?" + this._findVar)) { if (this._replaceToken != null) { return(new Algebra.Graph(g.InnerAlgebra, this._replaceToken)); } else { throw new RdfQueryException("Cannot do a variable substitution when the variable is used for a GRAPH specifier and the replacement term is not a URI"); } } else { return(g); } } else if (algebra is IUnaryOperator) { return(((IUnaryOperator)algebra).Transform(this)); } else if (algebra is IAbstractJoin) { return(((IAbstractJoin)algebra).Transform(this)); } else if (algebra is ITerminalOperator) { return(algebra); } else { throw new RdfQueryException("Cannot do variable substitution on unknown algebra"); } }