public override object Clone() { JoinExpression exp = new JoinExpression(); CloneThis(exp); return exp; }
private ITable Join(ITable left, ITable right, JoinExpression op) { // Get the type of join, JoinType joinType = op.JoinType; // The filter expression Expression filterExp = op.Filter; // If it's a simple relation bool simpleRelation = op.IsSimpleRelation; // If the join is not a simple relation, then we need to naturally join // and scan if (!simpleRelation) { JoinedTableBase result = new NaturalJoinedTable(left, right); result.SetOrderCompositeIsChild(); if (filterExp != null) // return the scan over the cartesian product return FilterByScan(result, filterExp); return result; } // This is a simple relation so we may not need to scan over the // cartesian join. A simple relation is of the type '[something1] // [comparison] [something2]' where something1 and 2 reference terms // in the right and left tables exclusively, or a multi variable // equivalence comparison such as 't1.a = t2.a and t1.b = t2.b'. // A join of this type should always be a scan on the left and lookup // on the right. // The process cost (roughly) long processCost = 0; // NOTE, these are marked up by the QueryCostModel (perhaps should move // this markup functionality in the planner. IList<Expression> leftVarExps = (IList<Expression>)op.GetArgument("!left_var_exps"); IList<Expression> rightVarExps = (IList<Expression>)op.GetArgument("!right_var_exps"); IList<string> functionTypes = (IList<string>)op.GetArgument("!function_types"); // Right index, if applicable string rIndexStr = (string)op.GetArgument("use_right_index"); TableName rIndexTableName = (TableName)op.GetArgument("use_right_index_table_name"); // If the right index is defined, then we know the cost model has // determined the right table has a single index we can use. IIndexSetDataSource rightIndex; IndexResolver rightResolver; if (rIndexStr != null) { // Fetch the index rightIndex = GetIndex(right, rIndexStr); // If no index, we screwed up somewhere. Error in cost model most // likely. if (rightIndex == null) throw new ApplicationException("Right index '" + rIndexStr + "' not found."); // Create a resolver for the right table IndexCollation rcollation = rightIndex.Collation; rightResolver = new CollationIndexResolver(right, rcollation); } else { // No right index, so we need to prepare a temporary index // We index on the right var ops (note that 'right_var_ops' will not // necessarily be a variable reference, it may be a complex expression). // Create the resolver for the term(s) on the right table Expression[] rops = new Expression[rightVarExps.Count]; rightVarExps.CopyTo(rops, 0); rightResolver = CreateResolver(right, rops); // The working set, IIndex<RowId> workingSet = transaction.CreateTemporaryIndex<RowId>(right.RowCount); // Iterate over the right table IRowCursor rightCursor = right.GetRowCursor(); // Wrap in a forward prefetch cursor rightCursor = new PrefetchRowCursor(rightCursor, right); while (rightCursor.MoveNext()) { // The rowid RowId rowid = rightCursor.Current; // Fetch the SqlObject SqlObject[] value = rightResolver.GetValue(rowid); // Index it workingSet.Insert(value, rowid, rightResolver); } // Map this into a RowIndex object, rightIndex = new IndexBasedIndexSetDataSource(right, rightResolver, workingSet); // Rough cost estimate of a sort on the right elements processCost += rightCursor.Count * 5; } // Now we have a rightIndex and rightResolver that describes the keys // we are searching for. Scan the left table and lookup values in the // right. // The join function string joinFunctionName = functionTypes[0]; // Work out the maximum number of elements needed to perform this join long maxSize; long leftSize = left.RowCount; long rightSize = right.RowCount; // Make sure to account for the possibility of overflow if (leftSize < Int32.MaxValue && rightSize < Int32.MaxValue) { maxSize = leftSize * rightSize; } else { // This is a poor estimate, but it meets the requirements of the // contract of 'createTemporaryIndex'. Idea: use a BigDecimal here? maxSize = Int64.MaxValue; } // Allocate the indexes IIndex<RowId> leftSet = transaction.CreateTemporaryIndex<RowId>(maxSize); IIndex<RowId> rightSet = transaction.CreateTemporaryIndex<RowId>(maxSize); // Create a resolver for the left terms Expression[] lops = new Expression[leftVarExps.Count]; leftVarExps.CopyTo(lops, 0); IndexResolver leftResolver = CreateResolver(left, lops); // Cursor over the left table IRowCursor leftCursor = left.GetRowCursor(); // Wrap in a forward prefetch cursor leftCursor = new PrefetchRowCursor(leftCursor, left); while (leftCursor.MoveNext()) { // The left rowid RowId leftRowid = leftCursor.Current; // TODO: Need to change this to support multi-column join // conditions, // Fetch it into a SqlObject SqlObject[] value = leftResolver.GetValue(leftRowid); // lookup in the right SelectableRange joinRange = SelectableRange.Full; joinRange = joinRange.Intersect(SelectableRange.GetOperatorFromFunction(joinFunctionName), value); IRowCursor matchedResult = rightIndex.Select(joinRange); // If there are elements if (matchedResult.Count > 0) { // For each matched element, add a left rowid and right rowid while (matchedResult.MoveNext()) { RowId rightRowid = matchedResult.Current; leftSet.Add(leftRowid); rightSet.Add(rightRowid); } } else { // If there are no elements, is this an outer join? if (joinType == JoinType.OuterLeft) { // Yes, so add left with a null entry, leftSet.Add(leftRowid); rightSet.Add(null); } } } // Rough cost estimate on the scan/lookup processCost += (left.RowCount + (left.RowCount * 5)); // Return the joined table. JoinedTableBase joinTable = new JoinedTable(left, right, leftSet, rightSet); joinTable.SetOrderCompositeIsChild(); return joinTable; }
private void AddToJoinFilter(JoinExpression joinExpression, Expression toAdd) { Expression joinFilter = joinExpression.Filter; if (joinFilter != null) { if (toAdd != null) { FunctionExpression newFilter = new FunctionExpression("@and_sql"); newFilter.Parameters.Add(toAdd); newFilter.Parameters.Add(joinFilter); toAdd = newFilter; } else { toAdd = joinFilter; } } joinExpression.Filter = toAdd; }
public Expression OnAfterWalk(Expression expression) { // -- Perform mark ups -- // Test for aggregation and mark up the select operation if so if (expression is SelectExpression) { SelectExpression selectExp = (SelectExpression)expression; bool aggregated = false; // If there are group by elements, we are aggregated if (selectExp.GroupBy.Count > 0) aggregated = true; int sz = selectExp.Output.Count; // For each output of the select, check the aggregrated functions for (int i = 0; i < sz; ++i) { SelectOutput selectOut = selectExp.Output[i]; if (optimizer.CheckAndMarkupExpressionAggregated(selectOut.Expression)) aggregated = true; } // If aggregated select, mark it up selectExp.IsAggregated = aggregated; // Source check MarkSourceTables(selectExp.Join); } // -- Make all outer joins left outer -- else if (expression is JoinExpression) { JoinExpression joinExp = (JoinExpression)expression; JoinType joinType = joinExp.JoinType; // If this is a right outer join, we turn it into a left outer join // by swapping the left/right operations. if (joinType == JoinType.OuterRight) { Expression oldLeft = joinExp.Left; Expression oldRight = joinExp.Right; JoinExpression newJoinExp = new JoinExpression(oldRight, oldLeft, JoinType.OuterLeft, joinExp.Filter); expression = newJoinExp; } } // -- Qualify all function operations -- else if (expression is FunctionExpression) { FunctionExpression functionExp = (FunctionExpression)expression; // Translate all parsed functions into system functions, // for example, '+' turns into 'add_sql' string origFunctionName = functionExp.Name; // Exempt function names, if (!NonQualifiedFunctions.Contains(origFunctionName)) { if (!origFunctionName.StartsWith("@")) { string fname = optimizer.transaction.FunctionManager.QualifyName(origFunctionName); if (fname == null) { throw new SqlParseException("Unable to translate system function " + origFunctionName, expression); } functionExp.Name = fname; origFunctionName = fname; } } // Look for set functions that have nested queries, eg. // '@anyeq_sql' and work out if we can collapse the function into a // join or a logical function tree. string functionName = functionExp.Name; if (QueryPlanner.IsSimpleComparison(functionName)) { // Is the RHS a nested list? Expression rhsExp = (Expression) functionExp.Parameters[1]; if (rhsExp is FunctionExpression) { string rhsFunctionName = ((FunctionExpression)rhsExp).Name; // Is the right hand side a nested list? if (rhsFunctionName.Equals("@nested_list")) { // Transform the function into a set of logical operations, // The type of expression (eg. @eq_sql), String setComparison = "@" + functionName.Substring(4); Expression lhsExp = (Expression) functionExp.Parameters[0]; // This is a sub-query expression to process, if (functionName.StartsWith("@any")) { // ANY function is turned into an OR tree expression = ComposeSetLogicalGraph("@or_sql", setComparison, lhsExp, (FunctionExpression) rhsExp); } else if (functionName.StartsWith("@all")) { // ALL function is turned into a group of AND expression = ComposeSetLogicalGraph("@and_sql", setComparison, lhsExp, (FunctionExpression) rhsExp); } } } } } if (expression is SelectExpression) { // Leaving SELECT so pop the forward reference list from the stack IList<FetchVariableExpression> varList = refStack[refStack.Count - 1]; refStack.RemoveAt(refStack.Count - 1); } return expression; }
private List<TableName> RandomPlanSchedule(IList<QueryPredicate> expressions, IList<Expression> danglingExps, Expression joinGraph) { // Collect the set of table sources around the left branch, List<Expression> expList = new List<Expression>(); if (joinGraph is JoinExpression) { LeftDeepTableSources(expList, joinGraph); } else { expList.Add(joinGraph); } // Recurse on any right branches first (outer joins) and create a list of // sources List<TableName> sources = new List<TableName>(); int sz = expList.Count; for (int i = 0; i < sz; ++i) { Expression expression = expList[i]; if (expression is JoinExpression) { sources.AddRange(RandomPlanSchedule(expressions, danglingExps, expression)); } else { TableName tn = ((AliasTableNameExpression) expression).Alias; if (tn == null) throw new SystemException(); sources.Add(tn); } } // Now 'sources' is our domain of tables to work with // By the time this method returns, the domain 'sources' must be entirely // joined together in the danglingExps list. // The list of predicates that are entirely dependant on tables in this // source. List<QueryPredicate> predicates = new List<QueryPredicate>(); foreach(QueryPredicate expr in expressions) { int dependOnCount = expr.dependant_on.Count; // Some dependants if (dependOnCount > 0) { bool touchAll = true; foreach(TableName src in expr.dependant_on) { if (!sources.Contains(src)) { touchAll = false; break; } } // Add if the dependants of this expression are all contained within // the source domain. if (touchAll) { predicates.Add(expr); } } } while (true) { // Find a random predicate that can be scheduled in this domain. // Returns -1 if no predicate can be found in the set. If this is the // case and the source domain isn't entirely joined, then we perform a // cartesian join on any dangling tables. int ri = RandomPredicate(predicates, danglingExps); // If no predicates found, if (ri == -1) { // Break the main loop and return break; } // Remove the predicate from the list QueryPredicate predicate = predicates[ri]; predicates.RemoveAt(ri); // Pick the operations from the source this query is dependant on List<Expression> srcExps = new List<Expression>(); List<int> srcIds = new List<int>(); foreach(TableName tname in predicate.dependant_on) { int id = 0; foreach(Expression op in danglingExps) { if (IsASourceOf(tname, op)) { if (!srcIds.Contains(id)) { srcIds.Add(id); srcExps.Add(op); } break; } ++id; } } // Error condition if (srcExps.Count == 0) throw new ApplicationException("Unable to schedule predicate: " + predicate); // If we only found 1 predicate, we simply merge the predicate // expression as a scan operation. if (srcExps.Count <= 1) { int expPos = srcIds[0]; Expression oldExp = danglingExps[expPos]; // Make a filter with 'old_op' as the child and predicate.expression // as the filter danglingExps[expPos] = new FilterExpression("single_filter", oldExp, predicate.expression); } else { // If 2 or more // If more than 2, we randomly pick sources to merge as a cartesian // product while still maintaining the right requirement of the join // if there is one. if (srcExps.Count > 2) { // Randomize the list Util.CollectionsUtil.Shuffle(srcIds); // If the predicate has a right dependancy, put it on the end if (predicate.right_dependancy != null) { TableName farRight = predicate.right_dependancy[0]; int i = 0; foreach(int expId in srcIds) { Expression op = danglingExps[expId]; if (IsASourceOf(farRight, op)) { // swap with last element int lastI = srcIds.Count - 1; int lid = srcIds[lastI]; srcIds[lastI] = srcIds[i]; srcIds[i] = lid; } ++i; } } // Cartesian join the terms, left to right until we get to the last // element. Expression procExp = danglingExps[srcIds[0]]; for (int i = 1; i < srcIds.Count - 1; ++i) { procExp = new JoinExpression(procExp, danglingExps[srcIds[i]], JoinType.Cartesian, null); } // Remove the terms from the current layout list // Remember the expression on the right Expression leftExp1 = procExp; Expression rightExp1 = danglingExps[srcIds[srcIds.Count - 1]]; // Sort the id list int[] idSet = srcIds.ToArray(); Array.Sort(idSet); // Remove the values for (int i = idSet.Length - 1; i >= 0; --i) { danglingExps.RemoveAt(idSet[i]); } // Reset the src_ids and src_ops list srcIds.Clear(); srcExps.Clear(); // Add the left and right expression danglingExps.Add(leftExp1); danglingExps.Add(rightExp1); srcIds.Add(danglingExps.Count - 2); srcIds.Add(danglingExps.Count - 1); srcExps.Add(leftExp1); srcExps.Add(rightExp1); } // Ok, down to 2 to merge, int li; // Do we have a right requirement? if (predicate.right_dependancy != null) { // Yes, so either one src is part of the right dependancy or they // are both part of the right dependancy. Expression exp1 = srcExps[0]; Expression exp2 = srcExps[1]; int op1_c = 0; int op2_c = 0; foreach(TableName tname in predicate.right_dependancy) { if (IsASourceOf(tname, exp1)) { ++op1_c; } if (IsASourceOf(tname, exp2)) { ++op2_c; } } // If they are both part of the right dependancy, we cartesian join if (op1_c > 0 && op2_c > 0) { // TODO: throw new NotImplementedException(); } // If op1 is part of the right dependancy, if (op1_c > 0) { li = 1; } else { // If exp2 is part of the right dependancy, li = 0; } } else { // No right dependancy, // Heuristic - If one of the sources is not a fetch table command // then we have a greater chance to pick that as our left. This // encourages left deep scan graphs which are the sorts of graphs // we are interested in. ExpressionType type0 = srcExps[0].Type; ExpressionType type1 = srcExps[1].Type; if (type0 != ExpressionType.AliasTableName && type1 == ExpressionType.AliasTableName) { li = (random.Next(10) >= 2) ? 0 : 1; } else if (type1 != ExpressionType.AliasTableName && type0 == ExpressionType.AliasTableName) { li = (random.Next(10) >= 2) ? 1 : 0; } else { // Randomly pick if both are fetch table operations li = random.Next(2); } } Expression leftExp = srcExps[li]; int leftId = srcIds[li]; Expression rightExp = srcExps[(li + 1)%2]; int rightId = srcIds[(li + 1)%2]; // Schedule the join operation, // For 'join_inner', 'join_outer', etc // FIXME: check this ... JoinType jtype = !predicate.joinTypeSet ? JoinType.Inner : predicate.JoinType; // string join_type = "scan-" + jtype; JoinExpression join_op = new JoinExpression(leftExp, rightExp, jtype, predicate.expression); // Remove the left and right id from the list if (leftId > rightId) { danglingExps.RemoveAt(leftId); danglingExps.RemoveAt(rightId); } else { danglingExps.RemoveAt(rightId); danglingExps.RemoveAt(leftId); } // Add the new join danglingExps.Add(join_op); } } return sources; }
private void CostJoinExpression(Expression left, Expression right, JoinExpression joinExpression) { // Get the left and right row count double leftRows = left.CostRows; double rightRows = right.CostRows; // The time scan iteration cost up to this point double costTime = left.CostTime + right.CostTime; // join type JoinType joinType = joinExpression.JoinType; if (joinType == JoinType.Cartesian) { // The cost of a cartesian join is nothing in addition to the right and // left cost (it's a simple matter to map tables into a cartesian join), // however the number of rows multiply. double rowSize = (leftRows * rightRows); joinExpression.CostRows = rowSize; joinExpression.CostTime = costTime; } else if (joinType == JoinType.Inner || joinType == JoinType.OuterLeft) { double rowResult; // Get the filter expression Expression filter = joinExpression.Filter; // Test if the filter expression is simple enough that we can use it in // a scan and search expression on the right table. An expression is // sufficiently simple when the search side (right side) is nothing more // than a variable reference, and the function is a simple comparison. // Otherwise, the join will be the cost of a cartesian join plus a scan // on the result. // True if the expression is a simple comparison and the parameters // reference the left and right sources respectively. bool isSimpleRelation = false; List<Expression> leftVarExps = new List<Expression>(4); List<Expression> rightVarExps = new List<Expression>(4); List<String> functionTypes = new List<string>(4); if (filter is FunctionExpression) { // Filter is a function. What this code does is make a list of // expressions that source to the left and right branches respectively // in the filter expression. For example, consider left/right branch // T1 and T2, given the expression 'T2.a=T1.a' this will put T1.a in // the left list, T2.a in the right list. This also works with // equi groups, for example, (T1.a = T2.a AND T1.b = T2.b). List<TableName> leftSources = new List<TableName>(); List<TableName> rightSources = new List<TableName>(); QueryPlanner.PopulateSourceList(leftSources, new List<Expression>(), left); QueryPlanner.PopulateSourceList(rightSources, new List<Expression>(), right); // Groups all the operations that source to the left and right // respectively. bool valid = LeftRightComparisonPairs(filter, functionTypes, leftVarExps, rightVarExps, leftSources, rightSources); // NOTES: // xxxVarExps can contain anything, including statics, nested // queries, etc. eg. consider (T1.A = T2.A AND T1.A + 2 = T2.B). if (valid) { // If there is more than one functionTypes, they must all be // of the same equivalence if (functionTypes.Count > 1) { if (!QueryPlanner.IsSimpleEquivalence(functionTypes[0])) valid = false; for (int i = 1; i < functionTypes.Count; ++i) { string tFunType = functionTypes[i]; if (!tFunType.Equals(functionTypes[0])) valid = false; } } // If still valid if (valid) { // Passed the test for a simple relation expression. This means we are // either a single simple comparison expression, or we are the // interesection (logical AND) of a group of equivalence functions. // Also, we have a distinct group of left and right joining // conditions. isSimpleRelation = true; } } } // If it's not a simple relation expression, if (!isSimpleRelation) { // Mark the join up as a simple relation expression joinExpression.SetArgument("cartesian_scan", "true"); // These joins are nasty - we'd need to find the cartesian product and // scan on the result. double complexJoinCost = (leftRows * rightRows) * 1.1d; costTime = costTime + complexJoinCost; // Work out the probability of the filter_op being true for this set. // Assume worse case, rowResult = leftRows * rightRows; } else { // If it's a simple relation expression, // Mark the join as a simple relation expression joinExpression.IsSimpleRelation = true; // Record state information with this item joinExpression.SetArgument("!left_var_exps", leftVarExps.AsReadOnly()); joinExpression.SetArgument("!right_var_exps", rightVarExps.AsReadOnly()); joinExpression.SetArgument("!function_types", functionTypes.AsReadOnly()); bool addRightPrepareCost = true; // The cost is dependant on the indexes available and which we use. // If all rightVarExps are fetch vars and there's a multi-column // index, we cost for that. If there's partial indexes we can use // then the cost must reflect that. // Is the right a fetch table op? if (right is AliasTableNameExpression) { // Yes, so this is a candidate for using an index if there is one // Look for the multi-column index IndexKey idxKey = FindIndexOn(rightVarExps, right); TableName rightIndexTableName = idxKey.IndexTable; string rightIndexName = idxKey.IndexName; // If there are none and this is multi-column, we try and pick the // first index (this is not really a very good heuristic). The // processor will choose the best index. if (rightVarExps.Count > 1) { for (int i = 0; rightIndexName == null && i < rightVarExps.Count; ++i) { Expression varExp = rightVarExps[i]; if (varExp is FetchVariableExpression) { rightIndexName = varExp.IndexCandidate; rightIndexTableName = varExp.IndexTableName; } } } // If we found an index, we don't prepare right if (rightIndexName != null) { addRightPrepareCost = false; // The index to use joinExpression.SetArgument("use_right_index", rightIndexName); joinExpression.SetArgument("use_right_index_table_name", rightIndexTableName); } } // Is right sorted by 'rightVarExps'? // TODO: // A scan join will always scan the left table, lookup values in the // right table. The factors that change the time cost of this expression // is when there is an applicable index that can be used by one or more // of the expressions, or when operations on the right table have left // an ordering that is useful for the join. // If there is no index or convenient ordering, then the cost of the // join is the cost of a sort or hash on the right table in addition to // the regular cost. // Cost time is a scan on the left table plus lookup cost for each left // element on the right table. double joinCost = (leftRows * 1.1) + (leftRows * BTreeLookupCost * 2.0d); double rightPrepareCost = rightRows * BTreeLookupCost; // The cost calculation costTime = costTime + joinCost; if (addRightPrepareCost) { costTime = costTime + rightPrepareCost; } // TODO: // Estimate the number of results returned by this expression. We // know that leftVarExp and rightVarExp are valid, so we can build // some statistical basis of a search provided the left and right // operations are sufficiently simple. // Is the filter expression sufficiently simple that we can make it // into a database fact? rowResult = -1d; // Does the filter have a fact id? string factId = (string)filter.GetArgument("fact_id"); if (factId != null) { FactStatistics facts = transaction.FactStatistics; // Do we have any historical data? if (facts.FactSampleCount(factId) > 0) { // What is the truth probability of this fact? double prob = facts.ProbabilityEstimate(factId); // The probability of the cartesian product. We set a min of 3 // rows. rowResult = ((leftRows * rightRows) * prob); if (rowResult < 3.0d) rowResult = 3.0d; } } // If no results from fact statistics, we use a general heuristic to // find the result. if (rowResult < 0d) { // For the moment, we use a general heuristic to determine the // probability of the result, where equi joins results in less // results. // Of course, an equi join can be a cartesian join when all values // match, however in most useful systems an equivalence test, at // worse, will match as many values as the larger table. string relationType = functionTypes[0]; if (QueryPlanner.IsSimpleEquivalence(relationType)) { // Somewhere between left rows and right rows double v = rightRows - leftRows; if (v < 0) v = -v; v = v / 3; rowResult = System.Math.Min(leftRows, rightRows) + v; } else { // If not equivalance, then we assume a result that's a little less // than cartesian. rowResult = (leftRows * rightRows) * 0.85d; } } } // None stat estimated cost joinExpression.CostRows = rowResult; joinExpression.CostTime = costTime; } else { throw new ApplicationException("Unknown join type " + joinType); } }