private static void CostAggregateFilterOp(Expression child, FilterExpression expression) { // The child cost values double childRows = child.CostRows; double childTime = child.CostTime; // TODO: We should check for full range aggregate, in which case we // know there will only be 1 row result. // Set the costs expression.CostTime = childTime + (childRows * 1); expression.CostRows = childRows; }
private static void CostSortFilterExpression(Expression child, FilterExpression expression) { // The child cost values double childRows = child.CostRows; double childTime = child.CostTime; // If child has an index we can use for the sort or is already // sorted by the filter terms, we don't need to incur the cost of the // sort. string indexName; TableName indexTableName; // The filter operation Expression filter = expression.Filter; FunctionExpression functionExp = filter as FunctionExpression; // Filter must be a composite function if (functionExp == null || !functionExp.Name.Equals("composite")) throw new ApplicationException("Expected composite function."); // Get the terms, etc int paramCount = functionExp.Parameters.Count; int termCount = paramCount / 2; // If 1 sort term, if (termCount == 1) { Expression sortExp = (Expression)functionExp.Parameters[0]; // Get the index candidate indexName = sortExp.IndexCandidate; indexTableName = sortExp.IndexTableName; } else { // Multiple terms, // Get the index candidate if there is one indexName = filter.IndexCandidate; indexTableName = filter.IndexTableName; } bool indexLookup = false; // If we have an index candidate, if (indexName != null) { // Index found, // Is the child operation a table where the index is available? TableName indexedTable = FetchFirstIndexedTable(child); indexLookup = indexedTable != null && indexedTable.Equals(indexTableName); } // If no index candidate or index not available, check if the child // is already ordered by this composite, if (!indexLookup) indexLookup = GraphCollatedByComposite(child, filter); // Cost of index lookup if (indexLookup) { expression.CostTime = childTime + (BTreeLookupCost * 2.0d); } else { // Cost of sort operation with no index involved in the operation expression.CostTime = childTime + (childRows * BTreeLookupCost); } // Set the costs expression.CostRows = childRows; }
private static void CostNoCostFilterExpression(Expression child, FilterExpression expression) { // Set the costs expression.CostTime = child.CostTime; expression.CostRows = child.CostRows; }
private void CostSingleFilterExpression(Expression child, FilterExpression expression) { // The child cost values double childRows = child.CostRows; double childTime = child.CostTime; // The filter operation Expression filter = expression.Filter; // If the filter is a range_set function, and the child is a table // alias then we check for index candidates. string funType = (string) filter.GetArgument("name"); // We can work out an estimate of the time cost now, bool indexApplicable = false; string indexName = null; TableName tableName = null; Expression compositeIndexExp = null; // Fetch the first table to which index information is applicable TableName firstIndexedTable = FetchFirstIndexedTable(child); if (firstIndexedTable != null) { // Ok, child of filter is a fetch table, so look for clauses that we // can use an index for // Get the index candidate if (filter.Type == ExpressionType.Function) { Expression param0 = (Expression) filter.GetArgument("arg0"); // Check if we can use an index for a range set function Expression varExp = null; if (funType.Equals("range_set")) { varExp = param0; } else { // Index is still applicable for parameter queries. The operator // must be sufficiently simple and contain 1 variable that is an // index candidate. if (QueryPlanner.IsSimpleComparison(funType)) { // Does is contain 1 variable that is an index candidate? Expression param1 = (Expression) filter.GetArgument("arg1"); if (param0.Type == ExpressionType.FetchVariable && param1.Type != ExpressionType.FetchVariable) { varExp = param0; } else if (param0.Type != ExpressionType.FetchVariable && param1.Type == ExpressionType.FetchVariable) { varExp = param1; } } } if (varExp != null) { indexName = varExp.IndexCandidate; tableName = varExp.IndexTableName; if (indexName != null) { indexApplicable = true; // Set the indexed ops field, which is an array of operations // representing the term of the index compositeIndexExp = FunctionExpression.Composite(varExp, true); } } } } // We use the index to predict worst case cost in an accurate way if (indexApplicable && funType.Equals("range_set")) { // If we have an index, and the filter is a range set, we query the index // directly to get worst case probability. // Get the variable. SelectableRange rangeSet = (SelectableRange) filter.GetArgument("arg1"); // The time to perform this operation is selectable range set // elements * (2 * LOOKUP_COST) long filterTimeCost; if (indexApplicable) { // Index time cost filterTimeCost = rangeSet.Count() * (BTreeLookupCost * 2); // Notify the graph that this filter must be ordered by the terms of // the expression regardless of how the processor decides to solve the // operation. expression.OrderRequired = compositeIndexExp; } else { // Scan time cost filterTimeCost = (long)((double)childRows * 1.1); } // Have we done a size estimate already on this filter? long? resultSize = (long?)filter.GetArgument("result_size_lookup"); if (resultSize == null) { // Fetch the index on the table IIndexSetDataSource rowIndex = transaction.GetIndex(tableName, indexName); // Do the index lookup and cost appropriately IRowCursor result = rowIndex.Select(rangeSet); resultSize = result.Count; filter.SetArgument("result_size_lookup", resultSize.Value); } // Row count is the worst case, either the child rows or the number of // elements in the index, whichever is smaller. double newRowCount = System.Math.Min((double)resultSize, childRows); // Note, this information is a very precise worst case expression.CostRows = newRowCount; expression.CostTime = childTime + filterTimeCost; return; } else { // This is a parameter operation eg. 'a = ?', '? > b' // We know we if we have an index to resolve this which we use for // time costing, but we don't know anything specific about the value // being searched. We always assume that something will be matched. // The time cost of this operation double filterTimeCost; double newRowCount; if (indexApplicable) { // Index lookup filterTimeCost = BTreeLookupCost * 2.0d; // Notify the graph that this filter must be ordered by the terms of // the expression regardless of how the processor decides to solve the // operation. expression.OrderRequired = compositeIndexExp; } else { // Complete scan of child filterTimeCost = childRows * 1.1d; } // If we are a simple function if (QueryPlanner.IsSimpleComparison(funType)) { // Fetch the first variable that is locally referencable from the // arguments Variable var = null; Expression varExp = (Expression) filter.GetArgument("arg0"); if (varExp.Type == ExpressionType.FetchVariable) { var = Dereference(expression, (Variable)varExp.GetArgument("var")); if (var == null) { varExp = (Expression) filter.GetArgument("arg1"); if (varExp.Type == ExpressionType.FetchVariable) var = Dereference(expression, (Variable)varExp.GetArgument("var")); } } // If we can't dereference it, assume worst case if (var == null) { newRowCount = childRows; } else { // No index, so defer to a probability estimate, double? cachedProbability = (double?)filter.GetArgument("result_probability"); if (cachedProbability == null) { // Get the column statistics object for this ColumnStatistics col_stats = transaction.GetColumnStatistics(var); // Estimated probability of the given function truth over a sample // of the data. cachedProbability = col_stats.ProbabilityEstimate(funType); filter.SetArgument("result_probability", cachedProbability.Value); } double predictedRowCount = childRows * cachedProbability.Value; // Round up. newRowCount = predictedRowCount + 1; } } else if (funType.Equals("range_set")) { // If we are a range_set // Get the variable. Expression varExp = (Expression) filter.GetArgument("arg0"); SelectableRange rangeSet = (SelectableRange)filter.GetArgument("arg1"); // Get the var, Variable var = (Variable) varExp.GetArgument("var"); // Dereference this variable var = Dereference(expression, var); // If we can't dereference it, assume worst case if (var == null) { newRowCount = childRows; } else { double probability; // If the var is an index candidate, indexName = varExp.IndexCandidate; tableName = varExp.IndexTableName; if (indexName != null) { // There's an index we can use! // Fetch the index on the table IIndexSetDataSource rowIndex = transaction.GetIndex(tableName, indexName); // Have we done a size estimate already on this filter? long? resultSize = (long?)filter.GetArgument("result_size_lookup"); if (resultSize == null) { // Do the index lookup and cost appropriately IRowCursor result = rowIndex.Select(rangeSet); resultSize = result.Count; filter.SetArgument("result_size_lookup", resultSize.Value); } // Calculate the probability, long indexSize = rowIndex.Select(SelectableRange.Full).Count; if (indexSize > 0) { probability = (double)resultSize / indexSize; } else { probability = 0; } } else { // No index, so defer to a probability estimate, double? cached_probability = (double?)filter.GetArgument("result_probability"); if (cached_probability == null) { // Get the column statistics object for this ColumnStatistics col_stats = transaction.GetColumnStatistics(var); // Estimated probability of the given function truth over a // sample of the data. cached_probability = col_stats.ProbabilityEstimate(rangeSet); filter.SetArgument("result_probability", cached_probability.Value); } probability = cached_probability.Value; } double predictedRowCount = childRows * probability; // Round up. newRowCount = predictedRowCount + 1; } } else { // Otherwise not a simple function, and can't really predict anything // about this. // Assume worst case, newRowCount = childRows; } // Set the costs expression.CostRows = newRowCount; expression.CostTime = childTime + filterTimeCost; return; } }
private ITable StaticFilter(ITable child, FilterExpression expression) { // The filter operation Expression filterExp = expression.Filter; // Execute the static expression SqlObject[] result = Result(DoExecute(filterExp)); // If true, if (result.Length == 1 && SqlObject.Compare(result[0], new SqlObject(true)) == 0) return child; return new SubsetTable(child); }
private ITable SortFilter(ITable table, FilterExpression expression) { // The filter operation which is a function that describes the sort terms Expression filterExp = expression.Filter; if (!(filterExp is FunctionExpression)) throw new ArgumentException("Expected a function as argument to the filter."); ITable resultTable = table; // If there's something to sort, if (table.RowCount > 1) { // Get the composite function representing the sort collation, FunctionExpression compositeExp = (FunctionExpression) filterExp; if (!compositeExp.Name.Equals("composite")) throw new ArgumentException("Invalid composite function for sorting."); // The natural ordering of the child Expression naturalChildOrder = GetTableOrderComposite(table); if (naturalChildOrder != null) { if (naturalChildOrder.Equals(compositeExp)) // No sort necessary, already sorted return table; // TODO: test for the reverse condition, which we can optimize // with a reverse row iterator. } int paramCount = compositeExp.Parameters.Count; int termCount = paramCount / 2; IIndexSetDataSource rowIndex; bool naturalOrder = true; // If 1 sort term, if (termCount == 1) { Expression sortExp = (Expression) compositeExp.Parameters[0]; naturalOrder = SqlObject.Equals((SqlObject)compositeExp.Parameters[1], SqlObject.True); // Get the index candidate string indexName = sortExp.IndexCandidate; TableName indexTableName = sortExp.IndexTableName; // Index available? rowIndex = GetIndex(table, indexName); } else { // Multiple terms, // Get the index candidate if there is one string indexName = compositeExp.IndexCandidate; TableName indexTableame = compositeExp.IndexTableName; // Index available? rowIndex = GetIndex(table, indexName); } // If we have an index, if (rowIndex != null) { IRowCursor sortedCursor = rowIndex.Select(SelectableRange.Full); if (!naturalOrder) // Reverse iterator, sortedCursor = new ReverseRowCursor(sortedCursor); SubsetTable sortedTable = new SubsetTable(table, sortedCursor); sortedTable.IndexRequestFallthrough = true; // Set the order composite function the describes how the subset is // naturally sorted. sortedTable.OrderComposite = (Expression) compositeExp.Clone(); resultTable = sortedTable; } else { // NOTE: There's lots of directions we can take for optimizing this // sort. For example, if the number of values being sorted meets some // criteria (such as all integers and less than 2 millions values) // then the values being sorted could be read onto the heap and sorted // in memory with a quick sort. // Scan sort, // The working set, IIndex<RowId> workingSet = transaction.CreateTemporaryIndex<RowId>(table.RowCount); // Create the resolver IndexResolver resolver = CreateResolver(table, compositeExp); // Iterator over the source table IRowCursor tableCursor = table.GetRowCursor(); // Wrap in a forward prefetch iterator tableCursor = new PrefetchRowCursor(tableCursor, table); // Use a buffer, RowId[] rowIds = new RowId[128]; while (tableCursor.MoveNext()) { int count = 0; while (tableCursor.MoveNext() && count < 128) { rowIds[count] = tableCursor.Current; ++count; } for (int i = 0; i < count; ++i) { RowId rowid = rowIds[i]; // Get the value, SqlObject[] values = resolver.GetValue(rowid); // Insert the record into sorted order in the working_set workingSet.Insert(values, rowid, resolver); } } // TODO: record 'workingSet' for future resolution. // The result, IRowCursor sortedCursor = new DefaultRowCursor(workingSet.GetCursor()); SubsetTable sortedTable = new SubsetTable(table, sortedCursor); sortedTable.IndexRequestFallthrough = true; // Set the order composite function the describes how the subset is // naturally sorted. sortedTable.OrderComposite = (Expression) compositeExp.Clone(); resultTable = sortedTable; } } return resultTable; }
private ITable SingleFilter(ITable table, FilterExpression expression) { // The filter operation Expression filterExp = expression.Filter; // A range set is eligible for indexes, etc if (filterExp.Type == ExpressionType.Function) { FunctionExpression functionExp = (FunctionExpression) filterExp; string funType = functionExp.Name; // Is this a range set? if (funType.Equals("range_set")) { // Get the var and the range set FetchVariableExpression varExp = (FetchVariableExpression) functionExp.Parameters[0]; Variable var = varExp.Variable; SelectableRange rangeSet = (SelectableRange) functionExp.Parameters[1]; // Is the var an index candidate? TableName indexTname = varExp.IndexTableName; string indexName = varExp.IndexCandidate; if (indexName != null) { // Try and get this index in the parent IIndexSetDataSource rowIndex = GetIndex(table, indexName); if (rowIndex != null) { // The order composite Expression orderComposite = expression.OrderRequired; return FilterByIndex(table, rowIndex, orderComposite, rangeSet); } } // Here if; no index found we can use, so we fall through to the scan } else { // Check to see if we have a simple expression we can use an index // for. For example, 'a = ?' // Check the function is a simple comparison if (QueryPlanner.IsSimpleComparison(funType)) { // Do we have a var on one side and a simple static expression on the // other? Expression p0 = (Expression) functionExp.Parameters[0]; Expression p1 = (Expression)functionExp.Parameters[1]; Expression var_op = null; Expression static_op = null; String comparison = null; ExpressionType p0type = p0.Type; ExpressionType p1type = p1.Type; if (p0type == ExpressionType.FetchVariable && p1type != ExpressionType.FetchVariable) { var_op = p0; static_op = p1; comparison = funType; } else if (p1type == ExpressionType.FetchVariable && p0type != ExpressionType.FetchVariable) { var_op = p1; static_op = p0; comparison = QueryPlanner.ReverseSimpleComparison(funType); } // Did we find an expression that is eligible? if (comparison != null) { // If the var_op is an index candidate. string indexName = var_op.IndexCandidate; TableName indexTableName = var_op.IndexTableName; if (indexName != null) { // We have an index, // Try and get this index in the parent IIndexSetDataSource rowIndex = GetIndex(table, indexName); if (rowIndex != null) { // Is the static locally static? if (IsLocallyStatic(table, static_op)) { // Resolve the static, ITable staticResult = DoExecute(static_op); // Assert we have 1 column and 1 row if (staticResult.RowCount != 1 && staticResult.Columns.Count != 1) throw new ApplicationException("Static operation gave incorrectly formatted result"); // Get the value IRowCursor rowCursor = staticResult.GetRowCursor(); if (!rowCursor.MoveNext()) throw new ApplicationException(); RowId rowId = rowCursor.Current; SqlObject staticVal = staticResult.GetValue(0, rowId); // The order composite Expression orderComposite = expression.OrderRequired; // Perform the index lookup, return FilterByIndex(table, rowIndex, orderComposite, comparison, staticVal); } else { // Here if; the static is not locally static } } else { // Here if; the parent isn't base table with the index // definition } } else { // Here if; the var isn't an index candidate } } else { // Here if; there isn't a fetch var on a side or both sides are // fetch vars } } else { // TODO: Check if it's a logical operator, so we can check for // constructs like 'a = 3 or a = ? or a = ?' which we can // potentially convert into a pointer union between 3 index // lookups. // Currently, fall through to a scan operation... } } } else { // Here if; the filter is not based on a function operation. } // We filter by a full scan on the table with the operation. return FilterByScan(table, filterExp); }
/// <summary> /// Prepares the given SearchExpression object. /// </summary> /// <param name="db"></param> /// <param name="fromSet"></param> /// <param name="expression"></param> /// <remarks> /// This goes through each element of the expression. If the /// element is a variable it is qualified. /// If the element is a <see cref="TableSelectExpression"/> it's /// converted to a <see cref="SelectStatement"/> object and prepared. /// </remarks> private static FilterExpression PrepareSearchExpression(IDatabaseConnection db, TableExpressionFromSet fromSet, FilterExpression expression) { // first check the expression is not null if (expression == null) return null; // This is used to prepare sub-queries and qualify variables in a // search expression such as WHERE or HAVING. // Prepare the sub-queries first expression = expression.Prepare(new ExpressionPreparerImpl(db, fromSet)); // Then qualify all the variables. Note that this will not qualify // variables in the sub-queries. expression = expression.Prepare(fromSet.ExpressionQualifier); return expression; }
private ITable ExpressionTableFilter(ITable table, FilterExpression op) { // The filter operation which is a function that describes the output // columns Expression filterExp = op.Filter; if (filterExp.Type != ExpressionType.Function) throw new ApplicationException("Expected a function."); FunctionExpression functionExp = (FunctionExpression)filterExp; // Function name and parameter count, string funName = functionExp.Name; if (!funName.Equals("table_out")) throw new ArgumentException(); int paramCount = functionExp.Parameters.Count; // Create the expression table data source ExpressionTable expressionTable = new ExpressionTable(table, this); // The number of parameters, for (int i = 0; i < paramCount; ++i) { Expression outExp = (Expression) functionExp.Parameters[i]; // This will always be an aliasvarname with an operation child which is // the expression we perform for the column. if (!(outExp is AliasVariableNameExpression)) throw new ApplicationException("Expected ALIASVARNAME."); // The label, Variable v = ((AliasVariableNameExpression)outExp).Alias; string label = v.Name; // The actual function Expression funExp = ((AliasVariableNameExpression) outExp).Child; // Work out the type, SqlType expType = GetExpressionType(table, funExp); // Add the column expressionTable.AddColumn(label, expType, funExp); } // Return the operation table return expressionTable; }
private ITable AggregateFilter(ITable child, FilterExpression expression) { // The filter operation FunctionExpression filterExp = (FunctionExpression) expression.Filter; // The filter operation is the sort composite AggregateTable aggregate = new AggregateTable(child, filterExp); // Create an empty index for the aggregate table and initialize // Note: Time cost of this is a scan on 'child' IIndex<long> emptyIndexContainer = transaction.CreateTemporaryIndex<long>(System.Math.Max(2, child.RowCount * 2)); aggregate.InitGroups(this, emptyIndexContainer); // Set the order composite aggregate.SetOrderCompositeIsChild(); // Return the table return aggregate; }
/// <summary> /// Evalutes the WHERE clause of the table expression. /// </summary> /// <param name="searchExpression"></param> /// <returns></returns> public IQueryPlanNode PlanSearchExpression(FilterExpression searchExpression) { // First perform all outer tables. PlanAllOuterJoins(); Expression exp = searchExpression == null ? null : searchExpression.Expression; return LogicalEvaluate(exp); }
private Expression CostAnalysisAndTransform(SelectExpression selectExpression) { Expression joinGraph = selectExpression.Join; Expression filterGraph = selectExpression.Filter; // The required final ordering of the select expression if necessary. // This is either the 'group by' ordering for an aggregate statement, // or 'order by' if it's not an aggregate. // Are we an aggregate? Expression[] resultOrderExps; bool[] resultOrderAsc; Expression sortComposite = null; bool aggregateExpression = false; if (selectExpression.IsAggregated) { // Yes, do we have group by clause? int groupbyCount = selectExpression.GroupBy.Count; resultOrderExps = new Expression[groupbyCount]; resultOrderAsc = new bool[groupbyCount]; for (int i = 0; i < groupbyCount; ++i) { resultOrderExps[i] = selectExpression.GroupBy[i]; resultOrderAsc[i] = true; // All group by ordering is ascending } // Note the aggregate, aggregateExpression = true; } else { // Not an aggregate statement, do we have a order by clause? int orderbyCount = selectExpression.OrderBy.Count; resultOrderExps = new Expression[orderbyCount]; resultOrderAsc = new bool[orderbyCount]; for (int i = 0; i < orderbyCount; ++i) { resultOrderExps[i] = selectExpression.OrderBy[i].Expression; resultOrderAsc[i] = selectExpression.OrderBy[i].IsAscending; } } // The sort composite if (resultOrderExps.Length > 0) { sortComposite = FunctionExpression.Composite(resultOrderExps, resultOrderAsc); } // Create a new query transform object QueryPlanner planner = new QueryPlanner(transaction); planner.SetFilterGraph(filterGraph); planner.SetJoinGraph(joinGraph); planner.SetResultSortComposite(sortComposite); Expression cheapResolution = planner.FindCheapResolution(); // If this is an aggregate query, we apply the aggregate filter to the // query plan. if (aggregateExpression) { FilterExpression aggregateFilter = new FilterExpression("aggregate", cheapResolution, sortComposite); cheapResolution = aggregateFilter; // Is there a having clause? Expression havingExp = selectExpression.Having; if (havingExp != null) { FilterExpression havingFilter = new FilterExpression("single_filter", cheapResolution, havingExp); cheapResolution = havingFilter; } // Is there an order by clause? int orderbyCount = selectExpression.OrderBy.Count; if (orderbyCount > 0) { Expression[] orderExps = new Expression[orderbyCount]; bool[] order_asc = new bool[orderbyCount]; for (int i = 0; i < orderbyCount; ++i) { orderExps[i] = selectExpression.OrderBy[i].Expression; order_asc[i] = selectExpression.OrderBy[i].IsAscending; } Expression aggrSortComposite = FunctionExpression.Composite(orderExps, order_asc); FilterExpression sortFilter = new FilterExpression("sort", cheapResolution, aggrSortComposite); cheapResolution = sortFilter; } } // cheap_resolution is the best plan found, now add decoration such as // filter terms, etc int outCount = selectExpression.Output.Count; FunctionExpression outFunction = new FunctionExpression("table_out"); for (int i = 0; i < outCount; ++i) { outFunction.Parameters.Add(selectExpression.Output[i].Expression); } // Set the filter, Expression outFilter = new FilterExpression("expression_table", cheapResolution, outFunction); QueryCostModel costModel = new QueryCostModel(transaction); costModel.ClearGraph(outFilter); costModel.Cost(outFilter, Double.PositiveInfinity, new int[1]); return outFilter; }
private List<TableName> RandomPlanSchedule(IList<QueryPredicate> expressions, IList<Expression> danglingExps, Expression joinGraph) { // Collect the set of table sources around the left branch, List<Expression> expList = new List<Expression>(); if (joinGraph is JoinExpression) { LeftDeepTableSources(expList, joinGraph); } else { expList.Add(joinGraph); } // Recurse on any right branches first (outer joins) and create a list of // sources List<TableName> sources = new List<TableName>(); int sz = expList.Count; for (int i = 0; i < sz; ++i) { Expression expression = expList[i]; if (expression is JoinExpression) { sources.AddRange(RandomPlanSchedule(expressions, danglingExps, expression)); } else { TableName tn = ((AliasTableNameExpression) expression).Alias; if (tn == null) throw new SystemException(); sources.Add(tn); } } // Now 'sources' is our domain of tables to work with // By the time this method returns, the domain 'sources' must be entirely // joined together in the danglingExps list. // The list of predicates that are entirely dependant on tables in this // source. List<QueryPredicate> predicates = new List<QueryPredicate>(); foreach(QueryPredicate expr in expressions) { int dependOnCount = expr.dependant_on.Count; // Some dependants if (dependOnCount > 0) { bool touchAll = true; foreach(TableName src in expr.dependant_on) { if (!sources.Contains(src)) { touchAll = false; break; } } // Add if the dependants of this expression are all contained within // the source domain. if (touchAll) { predicates.Add(expr); } } } while (true) { // Find a random predicate that can be scheduled in this domain. // Returns -1 if no predicate can be found in the set. If this is the // case and the source domain isn't entirely joined, then we perform a // cartesian join on any dangling tables. int ri = RandomPredicate(predicates, danglingExps); // If no predicates found, if (ri == -1) { // Break the main loop and return break; } // Remove the predicate from the list QueryPredicate predicate = predicates[ri]; predicates.RemoveAt(ri); // Pick the operations from the source this query is dependant on List<Expression> srcExps = new List<Expression>(); List<int> srcIds = new List<int>(); foreach(TableName tname in predicate.dependant_on) { int id = 0; foreach(Expression op in danglingExps) { if (IsASourceOf(tname, op)) { if (!srcIds.Contains(id)) { srcIds.Add(id); srcExps.Add(op); } break; } ++id; } } // Error condition if (srcExps.Count == 0) throw new ApplicationException("Unable to schedule predicate: " + predicate); // If we only found 1 predicate, we simply merge the predicate // expression as a scan operation. if (srcExps.Count <= 1) { int expPos = srcIds[0]; Expression oldExp = danglingExps[expPos]; // Make a filter with 'old_op' as the child and predicate.expression // as the filter danglingExps[expPos] = new FilterExpression("single_filter", oldExp, predicate.expression); } else { // If 2 or more // If more than 2, we randomly pick sources to merge as a cartesian // product while still maintaining the right requirement of the join // if there is one. if (srcExps.Count > 2) { // Randomize the list Util.CollectionsUtil.Shuffle(srcIds); // If the predicate has a right dependancy, put it on the end if (predicate.right_dependancy != null) { TableName farRight = predicate.right_dependancy[0]; int i = 0; foreach(int expId in srcIds) { Expression op = danglingExps[expId]; if (IsASourceOf(farRight, op)) { // swap with last element int lastI = srcIds.Count - 1; int lid = srcIds[lastI]; srcIds[lastI] = srcIds[i]; srcIds[i] = lid; } ++i; } } // Cartesian join the terms, left to right until we get to the last // element. Expression procExp = danglingExps[srcIds[0]]; for (int i = 1; i < srcIds.Count - 1; ++i) { procExp = new JoinExpression(procExp, danglingExps[srcIds[i]], JoinType.Cartesian, null); } // Remove the terms from the current layout list // Remember the expression on the right Expression leftExp1 = procExp; Expression rightExp1 = danglingExps[srcIds[srcIds.Count - 1]]; // Sort the id list int[] idSet = srcIds.ToArray(); Array.Sort(idSet); // Remove the values for (int i = idSet.Length - 1; i >= 0; --i) { danglingExps.RemoveAt(idSet[i]); } // Reset the src_ids and src_ops list srcIds.Clear(); srcExps.Clear(); // Add the left and right expression danglingExps.Add(leftExp1); danglingExps.Add(rightExp1); srcIds.Add(danglingExps.Count - 2); srcIds.Add(danglingExps.Count - 1); srcExps.Add(leftExp1); srcExps.Add(rightExp1); } // Ok, down to 2 to merge, int li; // Do we have a right requirement? if (predicate.right_dependancy != null) { // Yes, so either one src is part of the right dependancy or they // are both part of the right dependancy. Expression exp1 = srcExps[0]; Expression exp2 = srcExps[1]; int op1_c = 0; int op2_c = 0; foreach(TableName tname in predicate.right_dependancy) { if (IsASourceOf(tname, exp1)) { ++op1_c; } if (IsASourceOf(tname, exp2)) { ++op2_c; } } // If they are both part of the right dependancy, we cartesian join if (op1_c > 0 && op2_c > 0) { // TODO: throw new NotImplementedException(); } // If op1 is part of the right dependancy, if (op1_c > 0) { li = 1; } else { // If exp2 is part of the right dependancy, li = 0; } } else { // No right dependancy, // Heuristic - If one of the sources is not a fetch table command // then we have a greater chance to pick that as our left. This // encourages left deep scan graphs which are the sorts of graphs // we are interested in. ExpressionType type0 = srcExps[0].Type; ExpressionType type1 = srcExps[1].Type; if (type0 != ExpressionType.AliasTableName && type1 == ExpressionType.AliasTableName) { li = (random.Next(10) >= 2) ? 0 : 1; } else if (type1 != ExpressionType.AliasTableName && type0 == ExpressionType.AliasTableName) { li = (random.Next(10) >= 2) ? 1 : 0; } else { // Randomly pick if both are fetch table operations li = random.Next(2); } } Expression leftExp = srcExps[li]; int leftId = srcIds[li]; Expression rightExp = srcExps[(li + 1)%2]; int rightId = srcIds[(li + 1)%2]; // Schedule the join operation, // For 'join_inner', 'join_outer', etc // FIXME: check this ... JoinType jtype = !predicate.joinTypeSet ? JoinType.Inner : predicate.JoinType; // string join_type = "scan-" + jtype; JoinExpression join_op = new JoinExpression(leftExp, rightExp, jtype, predicate.expression); // Remove the left and right id from the list if (leftId > rightId) { danglingExps.RemoveAt(leftId); danglingExps.RemoveAt(rightId); } else { danglingExps.RemoveAt(rightId); danglingExps.RemoveAt(leftId); } // Add the new join danglingExps.Add(join_op); } } return sources; }
private Expression ProduceRandomPlan(long planSeed, IList<QueryPredicate> expressions, IList<Expression> sourceListExps, Expression joinGraph, Expression sortFunction, Expression staticExpression) { // The current list of dangling nodes - operations that have not yet been // joined by the query plan List<Expression> danglingExps = new List<Expression>(); // Populate it with our source tables for (int i = 0; i < sourceListExps.Count; i++) { Expression sourceExp = sourceListExps[i]; // Apply the static filter if necessary, if (staticExpression != null) { sourceExp = new FilterExpression("static_filter", sourceExp, staticExpression); } danglingExps.Add(sourceExp); } random = new Random((int)planSeed); RandomPlanSchedule(expressions, danglingExps, joinGraph); // Connect any remaining dangling operations to a cartesian product. while (danglingExps.Count > 1) { // We randomly connect the remaining elements together int p1 = 0; int p2 = 1; if (danglingExps.Count > 2) { p1 = random.Next(danglingExps.Count); p2 = p1; while (p2 == p1) { p2 = random.Next(danglingExps.Count); } } Expression left = danglingExps[p1]; Expression right = danglingExps[p2]; if (p1 > p2) { danglingExps.RemoveAt(p1); danglingExps.RemoveAt(p2); } else { danglingExps.RemoveAt(p2); danglingExps.RemoveAt(p1); } danglingExps.Add(new JoinExpression(left, right, JoinType.Cartesian, null)); } // The remaining operation is the query plan, Expression plan = danglingExps[0]; // Put the final ordering filter on the plan if the ordering is important if (sortFunction != null) plan = new FilterExpression("sort", plan, sortFunction); return plan; }
/// <summary> /// Forms a command plan <see cref="IQueryPlanNode"/> from the given /// <see cref="TableSelectExpression"/> and <see cref="TableExpressionFromSet"/>. /// </summary> /// <param name="db"></param> /// <param name="expression">Describes the <i>SELECT</i> command /// (or sub-command).</param> /// <param name="fromSet">Used to resolve expression references.</param> /// <param name="orderBy">A list of <see cref="ByColumn"/> objects /// that represent an optional <i>ORDER BY</i> clause. If this is null /// or the list is empty, no ordering is done.</param> /// <returns></returns> public static IQueryPlanNode FormQueryPlan(IDatabaseConnection db, TableSelectExpression expression, TableExpressionFromSet fromSet, IList<ByColumn> orderBy) { IQueryContext context = new DatabaseQueryContext(db); // ----- Resolve the SELECT list // If there are 0 columns selected, then we assume the result should // show all of the columns in the result. bool doSubsetColumn = (expression.Columns.Count != 0); // What we are selecting var columnSet = BuildColumnSet(expression, fromSet); // Prepare the column_set, columnSet.Prepare(context); ResolveOrderByRefs(columnSet, orderBy); // ----- // Set up plans for each table in the from clause of the command. For // sub-queries, we recurse. var tablePlanner = SetupPlanners(db, fromSet); // ----- // The WHERE and HAVING clauses FilterExpression whereClause = expression.Where; FilterExpression havingClause = expression.Having; whereClause = PrepareJoins(tablePlanner, expression, fromSet, whereClause); // Prepare the WHERE and HAVING clause, qualifies all variables and // prepares sub-queries. whereClause = PrepareSearchExpression(db, fromSet, whereClause); havingClause = PrepareSearchExpression(db, fromSet, havingClause); // Any extra Aggregate functions that are part of the HAVING clause that // we need to add. This is a list of a name followed by the expression // that contains the aggregate function. var extraAggregateFunctions = new List<Expression>(); if (havingClause != null && havingClause.Expression != null) { Expression newHavingClause = FilterHavingClause(havingClause.Expression, extraAggregateFunctions, context); havingClause = new FilterExpression(newHavingClause); } // Any GROUP BY functions, ObjectName[] groupByList; IList<Expression> groupByFunctions; var gsz = ResolveGroupBy(expression, fromSet, context, out groupByList, out groupByFunctions); // Resolve GROUP MAX variable to a reference in this from set ObjectName groupmaxColumn = ResolveGroupMax(expression, fromSet); // ----- // Now all the variables should be resolved and correlated variables set // up as appropriate. // If nothing in the FROM clause then simply evaluate the result of the // select if (fromSet.SetCount == 0) return EvaluateSingle(columnSet); // Plan the where clause. The returned node is the plan to evaluate the // WHERE clause. IQueryPlanNode node = tablePlanner.PlanSearchExpression(whereClause); Expression[] defFunList; string[] defFunNames; var fsz = MakeupFunctions(columnSet, extraAggregateFunctions, out defFunList, out defFunNames); node = PlanGroup(node, columnSet, groupmaxColumn, gsz, groupByList, groupByFunctions, fsz, defFunNames, defFunList); // The result column list List<SelectColumn> selectColumns = columnSet.SelectedColumns; int sz = selectColumns.Count; // Evaluate the having clause if necessary if (havingClause != null && havingClause.Expression != null) { // Before we evaluate the having expression we must substitute all the // aliased variables. Expression havingExpr = havingClause.Expression; havingExpr = SubstituteAliasedVariables(havingExpr, selectColumns); havingClause = new FilterExpression(havingExpr); PlanTableSource source = tablePlanner.SingleTableSource; source.UpdatePlan(node); node = tablePlanner.PlanSearchExpression(havingClause); } // Do we have a composite select expression to process? IQueryPlanNode rightComposite = null; if (expression.NextComposite != null) { TableSelectExpression compositeExpr = expression.NextComposite; // Generate the TableExpressionFromSet hierarchy for the expression, TableExpressionFromSet compositeFromSet = GenerateFromSet(compositeExpr, db); // Form the right plan rightComposite = FormQueryPlan(db, compositeExpr, compositeFromSet, null); } // Do we do a final subset column? ObjectName[] aliases = null; if (doSubsetColumn) { // Make up the lists ObjectName[] subsetVars = new ObjectName[sz]; aliases = new ObjectName[sz]; for (int i = 0; i < sz; ++i) { SelectColumn scol = selectColumns[i]; subsetVars[i] = scol.InternalName.Clone(); aliases[i] = scol.Alias.Clone(); } // If we are distinct then add the DistinctNode here if (expression.Distinct) node = new DistinctNode(node, subsetVars); // Process the ORDER BY? // Note that the ORDER BY has to occur before the subset call, but // after the distinct because distinct can affect the ordering of the // result. if (rightComposite == null && orderBy != null) node = PlanForOrderBy(node, orderBy, fromSet, selectColumns); // Rename the columns as specified in the SELECT node = new SubsetNode(node, subsetVars, aliases); } else { // Process the ORDER BY? if (rightComposite == null && orderBy != null) node = PlanForOrderBy(node, orderBy, fromSet, selectColumns); } // Do we have a composite to merge in? if (rightComposite != null) { // For the composite node = new CompositeNode(node, rightComposite, expression.CompositeFunction, expression.IsCompositeAll); // Final order by? if (orderBy != null) { node = PlanForOrderBy(node, orderBy, fromSet, selectColumns); } // Ensure a final subset node if (!(node is SubsetNode) && aliases != null) { node = new SubsetNode(node, aliases, aliases); } } return node; }
private ITable Filter(ITable table, FilterExpression op) { // The filter name string filter_name = op.Name; if (filter_name.Equals("single_filter")) return SingleFilter(table, op); if (filter_name.Equals("expression_table")) return ExpressionTableFilter(table, op); if (filter_name.Equals("sort")) return SortFilter(table, op); if (filter_name.Equals("static_filter")) return StaticFilter(table, op); if (filter_name.Equals("aggregate")) return AggregateFilter(table, op); throw new ApplicationException("Unknown filter type " + filter_name); }
private static void CostStaticFilterExpression(Expression child, FilterExpression expression) { // The child cost values double childRows = child.CostRows; double childTime = child.CostTime; // The filter operation Expression filter = expression.Filter; double estimatedChildRows = childRows; // If it's a fetch static, if (filter is FetchStaticExpression) { SqlObject[] val = ((FetchStaticExpression)filter).Values; // If not true, the filter will filter all, bool isTrue = false; if (val[0].Type.IsBoolean) { bool? b = val[0].Value.ToBoolean(); if (b.HasValue && b.Value.Equals(true)) isTrue = true; } if (!isTrue) { estimatedChildRows = 0.0d; } } // Set the time cost expression.CostRows = estimatedChildRows; expression.CostTime = childTime; }
private static FilterExpression PrepareJoins(QueryTableSetPlanner tablePlanner, TableSelectExpression expression, TableExpressionFromSet fromSet, FilterExpression whereClause) { // Look at the join set and resolve the ON Expression to this statement JoiningSet joinSet = expression.From.JoinSet; var result = whereClause; // Perform a quick scan and see if there are any outer joins in the // expression. bool allInnerJoins = true; for (int i = 0; i < joinSet.TableCount - 1; ++i) { JoinType type = joinSet.GetJoinType(i); if (type != JoinType.Inner) allInnerJoins = false; } // Prepare the joins for (int i = 0; i < joinSet.TableCount - 1; ++i) { JoinType type = joinSet.GetJoinType(i); Expression onExpression = joinSet.GetOnExpression(i); if (allInnerJoins) { // If the whole join set is inner joins then simply move the on // expression (if there is one) to the WHERE clause. if (onExpression != null) { result = result.Append(onExpression); } } else { // Not all inner joins, if (type == JoinType.Inner && onExpression == null) { // Regular join with no ON expression, so no preparation necessary } else { // Either an inner join with an ON expression, or an outer join with // ON expression if (onExpression == null) throw new Exception("No ON expression in join."); // Resolve the on_expression onExpression = onExpression.Prepare(fromSet.ExpressionQualifier); // And set it in the planner tablePlanner.SetJoinInfoBetweenSources(i, type, onExpression); } } } return result; }