private ITable Join(ITable left, ITable right, JoinExpression op) { // Get the type of join, JoinType joinType = op.JoinType; // The filter expression Expression filterExp = op.Filter; // If it's a simple relation bool simpleRelation = op.IsSimpleRelation; // If the join is not a simple relation, then we need to naturally join // and scan if (!simpleRelation) { JoinedTableBase result = new NaturalJoinedTable(left, right); result.SetOrderCompositeIsChild(); if (filterExp != null) // return the scan over the cartesian product return FilterByScan(result, filterExp); return result; } // This is a simple relation so we may not need to scan over the // cartesian join. A simple relation is of the type '[something1] // [comparison] [something2]' where something1 and 2 reference terms // in the right and left tables exclusively, or a multi variable // equivalence comparison such as 't1.a = t2.a and t1.b = t2.b'. // A join of this type should always be a scan on the left and lookup // on the right. // The process cost (roughly) long processCost = 0; // NOTE, these are marked up by the QueryCostModel (perhaps should move // this markup functionality in the planner. IList<Expression> leftVarExps = (IList<Expression>)op.GetArgument("!left_var_exps"); IList<Expression> rightVarExps = (IList<Expression>)op.GetArgument("!right_var_exps"); IList<string> functionTypes = (IList<string>)op.GetArgument("!function_types"); // Right index, if applicable string rIndexStr = (string)op.GetArgument("use_right_index"); TableName rIndexTableName = (TableName)op.GetArgument("use_right_index_table_name"); // If the right index is defined, then we know the cost model has // determined the right table has a single index we can use. IIndexSetDataSource rightIndex; IndexResolver rightResolver; if (rIndexStr != null) { // Fetch the index rightIndex = GetIndex(right, rIndexStr); // If no index, we screwed up somewhere. Error in cost model most // likely. if (rightIndex == null) throw new ApplicationException("Right index '" + rIndexStr + "' not found."); // Create a resolver for the right table IndexCollation rcollation = rightIndex.Collation; rightResolver = new CollationIndexResolver(right, rcollation); } else { // No right index, so we need to prepare a temporary index // We index on the right var ops (note that 'right_var_ops' will not // necessarily be a variable reference, it may be a complex expression). // Create the resolver for the term(s) on the right table Expression[] rops = new Expression[rightVarExps.Count]; rightVarExps.CopyTo(rops, 0); rightResolver = CreateResolver(right, rops); // The working set, IIndex<RowId> workingSet = transaction.CreateTemporaryIndex<RowId>(right.RowCount); // Iterate over the right table IRowCursor rightCursor = right.GetRowCursor(); // Wrap in a forward prefetch cursor rightCursor = new PrefetchRowCursor(rightCursor, right); while (rightCursor.MoveNext()) { // The rowid RowId rowid = rightCursor.Current; // Fetch the SqlObject SqlObject[] value = rightResolver.GetValue(rowid); // Index it workingSet.Insert(value, rowid, rightResolver); } // Map this into a RowIndex object, rightIndex = new IndexBasedIndexSetDataSource(right, rightResolver, workingSet); // Rough cost estimate of a sort on the right elements processCost += rightCursor.Count * 5; } // Now we have a rightIndex and rightResolver that describes the keys // we are searching for. Scan the left table and lookup values in the // right. // The join function string joinFunctionName = functionTypes[0]; // Work out the maximum number of elements needed to perform this join long maxSize; long leftSize = left.RowCount; long rightSize = right.RowCount; // Make sure to account for the possibility of overflow if (leftSize < Int32.MaxValue && rightSize < Int32.MaxValue) { maxSize = leftSize * rightSize; } else { // This is a poor estimate, but it meets the requirements of the // contract of 'createTemporaryIndex'. Idea: use a BigDecimal here? maxSize = Int64.MaxValue; } // Allocate the indexes IIndex<RowId> leftSet = transaction.CreateTemporaryIndex<RowId>(maxSize); IIndex<RowId> rightSet = transaction.CreateTemporaryIndex<RowId>(maxSize); // Create a resolver for the left terms Expression[] lops = new Expression[leftVarExps.Count]; leftVarExps.CopyTo(lops, 0); IndexResolver leftResolver = CreateResolver(left, lops); // Cursor over the left table IRowCursor leftCursor = left.GetRowCursor(); // Wrap in a forward prefetch cursor leftCursor = new PrefetchRowCursor(leftCursor, left); while (leftCursor.MoveNext()) { // The left rowid RowId leftRowid = leftCursor.Current; // TODO: Need to change this to support multi-column join // conditions, // Fetch it into a SqlObject SqlObject[] value = leftResolver.GetValue(leftRowid); // lookup in the right SelectableRange joinRange = SelectableRange.Full; joinRange = joinRange.Intersect(SelectableRange.GetOperatorFromFunction(joinFunctionName), value); IRowCursor matchedResult = rightIndex.Select(joinRange); // If there are elements if (matchedResult.Count > 0) { // For each matched element, add a left rowid and right rowid while (matchedResult.MoveNext()) { RowId rightRowid = matchedResult.Current; leftSet.Add(leftRowid); rightSet.Add(rightRowid); } } else { // If there are no elements, is this an outer join? if (joinType == JoinType.OuterLeft) { // Yes, so add left with a null entry, leftSet.Add(leftRowid); rightSet.Add(null); } } } // Rough cost estimate on the scan/lookup processCost += (left.RowCount + (left.RowCount * 5)); // Return the joined table. JoinedTableBase joinTable = new JoinedTable(left, right, leftSet, rightSet); joinTable.SetOrderCompositeIsChild(); return joinTable; }
public void PerformSample(SystemTransaction transaction) { // Translate into tables and column names ITable tableSource = transaction.GetTable(var.TableName); // DOn't bother unless the table has 64 or more values if (tableSource.RowCount < (DivisionPointCount * 2)) { sampleCount = 0; totalSize = 0; return; } // The number of elements in total totalSize = tableSource.RowCount; // The actual number of samples, sampleCount = (int)System.Math.Min(tableSource.RowCount / 2, MaxSampleCount); String col_name = var.Name; int colId = tableSource.Columns.IndexOf(var.Name); // Work out the size long size = tableSource.RowCount; // The sample point difference double sampleDiff = (double)size / sampleCount; // The index of the tables used in sampling IIndex<RowId> sampleIndex = transaction.CreateTemporaryIndex<RowId>(sampleCount); // Create a RowIndexCollation for this SqlType type; type = tableSource.Columns[colId].Type; IndexCollation collation = new IndexCollation(type, col_name); // Create the collation object, CollationIndexResolver resolver = new CollationIndexResolver(tableSource, collation); // The row cursor IRowCursor rowCursor = tableSource.GetRowCursor(); RowId[] sampleRowset = new RowId[sampleCount]; // First read in the row_ids we are sampling, { // The current sample point double p = 0; // The number read, int samplesRead = 0; // Make a sorted sample index of the dataset while (samplesRead < sampleCount) { long pos = ((long)p) - 1; pos = System.Math.Min(pos, tableSource.RowCount - 2); rowCursor.MoveTo(pos); if (!rowCursor.MoveNext()) throw new SystemException(); RowId rowId = rowCursor.Current; sampleRowset[samplesRead] = rowId; // Should this be Math.random(sample_diff * 2) for random distribution // of the samples? p += sampleDiff; ++samplesRead; } } // Now read the samples, { int samplePoint = 0; foreach (RowId rowId in sampleRowset) { // Hint ahead the samples we are picking, if ((samplePoint % 24) == 0) { for (int i = samplePoint; i < samplePoint + 24 && i < sampleRowset.Length; ++i) { tableSource.PrefetchValue(-1, sampleRowset[i]); } } // Pick the sample and sort it, SqlObject[] sample = new SqlObject[] { tableSource.GetValue(colId, rowId) }; sampleIndex.Insert(sample, rowId, resolver); ++samplePoint; } } // Now extract the interesting sample points from the sorted set IIndexCursor<RowId> samplesCursor = sampleIndex.GetCursor(); long sampleIndexSize = sampleIndex.Count; double divisionDiff = sampleIndexSize / (DivisionPointCount - 1); for (int i = 0; i < DivisionPointCount; ++i) { long samplePoint = (long)(divisionDiff * i); if (samplePoint >= sampleIndexSize) { samplePoint = sampleIndexSize - 1; } samplesCursor.Position = samplePoint - 1; if (!samplesCursor.MoveNext()) throw new SystemException(); RowId rowId = samplesCursor.Current; divisionPoints[i] = tableSource.GetValue(colId, rowId); } // Clear the temporary index sampleIndex.Clear(); }