private WBooleanExpression ConstructJoinCondition( CandidateJoinUnit candidateTree, IMatchJoinStatisticsCalculator statisticsCalculator, GraphMetaData metaData, out double joinSelectivity, out double sqlEstimatedJoinSelectivity) { joinSelectivity = 1.0; sqlEstimatedJoinSelectivity = 1.0; var root = candidateTree.TreeRoot; WBooleanExpression joinCondition = null; string nodeName = ""; // Update Nodes if (MaterializedNodeSplitCount.ContainsKey(root)) { MaterializedNodeSplitCount[root]++; nodeName = GetNodeRefName(root); joinCondition = new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = root.RefAlias }, new Identifier { Value = "GlobalNodeId" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = nodeName }, new Identifier { Value = "GlobalNodeId" } ), }, ComparisonType = BooleanComparisonType.Equals }; } else { nodeName = root.RefAlias; if (!Nodes.Contains(root)) { Nodes.Add(root); } MaterializedNodeSplitCount[root] = 0; } List <double> densityList = new List <double>(); List <MatchEdge> inEdges; if (UnmaterializedNodeMapping.TryGetValue(root, out inEdges)) { var firstEdge = inEdges.First(); bool materialized = EdgeMaterilizedDict[firstEdge]; UnmaterializedNodeMapping.Remove(root); joinSelectivity *= 1.0 / root.TableRowCount; // Component materialized edge to root if (materialized) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = firstEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = nodeName }, new Identifier { Value = "GlobalNodeId" } ) }, ComparisonType = BooleanComparisonType.Equals }); densityList.Add(root.GlobalNodeIdDensity); } // Component unmaterialized edge to root else { Statistics statistics = null; foreach (var edge in inEdges) { // Update component table TableRef = SpanTableRef(TableRef, edge, GetNodeRefName(edge.SourceNode), metaData); EdgeMaterilizedDict[edge] = true; joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = edge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = nodeName }, new Identifier { Value = "GlobalNodeId" } ) }, ComparisonType = BooleanComparisonType.Equals }); double selectivity; statistics = Statistics.UpdateHistogram(statistics, edge.Statistics, out selectivity); joinSelectivity *= selectivity; densityList.Add(root.GlobalNodeIdDensity); } SinkNodeStatisticsDict[root] = statistics; } } var jointEdges = candidateTree.MaterializedEdges; foreach (var jointEdge in jointEdges) { EdgeMaterilizedDict[jointEdge] = true; var sinkNode = jointEdge.SinkNode; // Leaf to component materialized node if (MaterializedNodeSplitCount.ContainsKey(sinkNode)) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = jointEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = sinkNode.RefAlias }, new Identifier { Value = "GlobalNodeId" } ) }, ComparisonType = BooleanComparisonType.Equals }); Statistics sinkNodeStatistics; if (!SinkNodeStatisticsDict.TryGetValue(sinkNode, out sinkNodeStatistics)) { sinkNodeStatistics = null; joinSelectivity *= 1.0 / sinkNode.TableRowCount; } double selectivity; var statistics = Statistics.UpdateHistogram(sinkNodeStatistics, jointEdge.Statistics, out selectivity); joinSelectivity *= selectivity; SinkNodeStatisticsDict[sinkNode] = statistics; densityList.Add(sinkNode.GlobalNodeIdDensity); } // Leaf to component unmaterialized node else { inEdges = UnmaterializedNodeMapping[sinkNode]; var firstEdge = inEdges.First(); bool materlizedEdge = EdgeMaterilizedDict[firstEdge]; // Leaf to materialized leaf if (materlizedEdge) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = jointEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = firstEdge.EdgeAlias }, new Identifier { Value = "Sink" } ) }, ComparisonType = BooleanComparisonType.Equals }); densityList.Add(Statistics.DefaultDensity); double selectivity; var statistics = Statistics.UpdateHistogram(SinkNodeStatisticsDict[sinkNode], jointEdge.Statistics, out selectivity); joinSelectivity *= selectivity; SinkNodeStatisticsDict[sinkNode] = statistics; } // Leaf to unmaterialized leaf else { Statistics compSinkNodeStatistics = null; foreach (var inEdge in inEdges) { TableRef = SpanTableRef(TableRef, inEdge, GetNodeRefName(inEdge.SourceNode), metaData); EdgeMaterilizedDict[inEdge] = true; joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = jointEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = inEdge.EdgeAlias }, new Identifier { Value = "Sink" } ) }, ComparisonType = BooleanComparisonType.Equals }); densityList.Add(Statistics.DefaultDensity); double selectivity; var leafToLeafStatistics = statisticsCalculator.GetLeafToLeafStatistics(jointEdge, inEdge, out selectivity); joinSelectivity *= selectivity; compSinkNodeStatistics = Statistics.UpdateHistogram(compSinkNodeStatistics, inEdge.Statistics, out selectivity); } SinkNodeStatisticsDict[sinkNode] = compSinkNodeStatistics; } } } var unmatEdges = candidateTree.UnmaterializedEdges; foreach (var unmatEdge in unmatEdges) { EdgeMaterilizedDict[unmatEdge] = false; if (!Nodes.Contains(unmatEdge.SinkNode)) { Nodes.Add(unmatEdge.SinkNode); } var sinkNodeInEdges = UnmaterializedNodeMapping.GetOrCreate(unmatEdge.SinkNode); sinkNodeInEdges.Add(unmatEdge); } // Calculate Estimated Join Selectivity & Estimated Node Size densityList.Sort(); for (int i = densityList.Count - 1; i >= 0; i--) { sqlEstimatedJoinSelectivity *= Math.Sqrt(sqlEstimatedJoinSelectivity) * densityList[i]; } return(joinCondition); }
/// <summary> /// Calculate join costs and update components using optimal join method & order /// </summary> /// <param name="nodeUnitCandidate"></param> /// <param name="joinCondition"></param> /// <param name="joinSelectivity"></param> /// <param name="estimatedSelectivity"></param> /// <returns></returns> private void ConstructPhysicalJoinAndUpdateCost( CandidateJoinUnit nodeUnitCandidate, WBooleanExpression joinCondition, double joinSelectivity, double estimatedSelectivity, GraphMetaData metaData) { var nodeDegrees = nodeUnitCandidate.EdgeDegrees; var nodeUnitSize = nodeUnitCandidate.TreeRoot.EstimatedRows * nodeDegrees; var estimatedNodeUnitSize = nodeUnitCandidate.TreeRoot.EstimatedRows * nodeUnitCandidate.SqlEstimatedEdgeDegrees; var componentSize = Cardinality; var estimatedCompSize = SqlEstimatedSize; var node = nodeUnitCandidate.TreeRoot; // If the node is already in the component, then only multiply the degree to get the size double nodeUnitActualSize; double newCompEstSize; if (MaterializedNodeSplitCount[node] > 0) { nodeUnitActualSize = nodeDegrees; var cEstEdge = Math.Pow(1000, EdgeMaterilizedDict.Count(e => !e.Value)); var cSize = SqlEstimatedSize / cEstEdge; var nSize = node.EstimatedRows; if (nSize > cSize) { newCompEstSize = estimatedNodeUnitSize * cEstEdge * estimatedSelectivity; } else { newCompEstSize = SqlEstimatedSize * Math.Pow(1000, nodeUnitCandidate.UnmaterializedEdges.Count) * estimatedSelectivity; } } else { nodeUnitActualSize = nodeUnitSize; newCompEstSize = SqlEstimatedSize * estimatedNodeUnitSize * estimatedSelectivity; } newCompEstSize = newCompEstSize < 1.0 ? 1.0 : newCompEstSize; bool firstJoin = MaterializedNodeSplitCount.Count == 2 && MaterializedNodeSplitCount.All(e => e.Value == 0); // Update TableRef double loopJoinOuterThreshold = 1e4; //1e6; double sizeFactor = 5; //1000; double maxMemory = 1e8; double loopCost = componentSize * Math.Log(nodeUnitCandidate.TreeRoot.EstimatedRows, 512) * 0.20; double hashCost = componentSize + nodeUnitSize; double cost; // Loop Join if ( nodeUnitCandidate.MaterializedEdges.Count == 0 && // the joins are purely leaf to sink join ( //componentSize < loopJoinOuterThreshold || // the outer table is relatively small loopCost < hashCost || (DeltaMemory + componentSize > maxMemory && DeltaMemory + nodeUnitSize > maxMemory) // memory is in pressure ) ) { if (firstJoin) { RightestTableRefSize = nodeUnitCandidate.TreeRoot.EstimatedRows; RightestTableAlias = GetNodeRefName(node); } TotalMemory = DeltaMemory; SqlEstimatedTotalMemory = SqlEstimatedDeltaMemory; //joinTable.JoinHint = JoinHint.Loop; SqlEstimatedSize = estimatedCompSize * estimatedNodeUnitSize / nodeUnitCandidate.TreeRoot.TableRowCount; cost = loopCost; //componentSize*Math.Log(nodeUnitCandidate.TreeRoot.EstimatedRows, 512); TableRef = new WParenthesisTableReference { Table = new WQualifiedJoin { FirstTableRef = TableRef, SecondTableRef = nodeUnitCandidate.ToTableReference(GetNodeRefName(nodeUnitCandidate.TreeRoot), metaData), JoinCondition = joinCondition, QualifiedJoinType = QualifiedJoinType.Inner, JoinHint = JoinHint.Loop } }; } // Hash Join else { cost = hashCost;//componentSize + nodeUnitSize; WBooleanExpression adjustedJoincondition; double adjustedSqlEstimatedSize; WTableReference buildTableReference; WTableReference probeTableReference; if (firstJoin) { var nodeInComp = MaterializedNodeSplitCount.Keys.First(e => e != node); if (nodeUnitSize < componentSize) { buildTableReference = AdjustEstimation(nodeUnitCandidate, GetNodeRefName(node), metaData, out adjustedJoincondition, out adjustedSqlEstimatedSize); probeTableReference = TableRef; TotalMemory = DeltaMemory = nodeUnitSize; SqlEstimatedTotalMemory = SqlEstimatedDeltaMemory = estimatedNodeUnitSize; RightestTableRefSize = nodeInComp.EstimatedRows; RightestTableAlias = GetNodeRefName(nodeInComp); } else { RightestTableRefSize = nodeInComp.EstimatedRows; RightestTableAlias = GetNodeRefName(nodeInComp); buildTableReference = AdjustEstimation(this, out adjustedJoincondition, out adjustedSqlEstimatedSize); probeTableReference = nodeUnitCandidate.ToTableReference(GetNodeRefName(nodeUnitCandidate.TreeRoot), metaData); TotalMemory = DeltaMemory = componentSize; SqlEstimatedTotalMemory = SqlEstimatedDeltaMemory = SqlEstimatedSize; RightestTableRefSize = nodeUnitCandidate.TreeRoot.EstimatedRows; RightestTableAlias = GetNodeRefName(node); } } // Left Deep else if (componentSize * sizeFactor < nodeUnitSize) { // Adjust estimation in sql server buildTableReference = AdjustEstimation(this, out adjustedJoincondition, out adjustedSqlEstimatedSize); probeTableReference = nodeUnitCandidate.ToTableReference(GetNodeRefName(nodeUnitCandidate.TreeRoot), metaData); var curDeltaMemory = componentSize; TotalMemory = DeltaMemory + curDeltaMemory; DeltaMemory = curDeltaMemory; var curDeltaEstimateMemory = SqlEstimatedSize; SqlEstimatedTotalMemory = SqlEstimatedDeltaMemory + curDeltaEstimateMemory; SqlEstimatedDeltaMemory = curDeltaEstimateMemory; RightestTableAlias = GetNodeRefName(node); RightestTableRefSize = nodeUnitCandidate.TreeRoot.EstimatedRows; } // Right Deep else { buildTableReference = AdjustEstimation(nodeUnitCandidate, GetNodeRefName(node), metaData, out adjustedJoincondition, out adjustedSqlEstimatedSize); probeTableReference = TableRef; TotalMemory += nodeUnitSize; DeltaMemory = TotalMemory; SqlEstimatedTotalMemory += estimatedNodeUnitSize; SqlEstimatedDeltaMemory = SqlEstimatedTotalMemory; } newCompEstSize *= adjustedSqlEstimatedSize; TableRef = new WParenthesisTableReference { Table = new WQualifiedJoin { FirstTableRef = buildTableReference, SecondTableRef = probeTableReference, JoinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, adjustedJoincondition), QualifiedJoinType = QualifiedJoinType.Inner, JoinHint = JoinHint.Hash } }; SqlEstimatedSize = newCompEstSize < 1.0 ? 1.0 : newCompEstSize; } //Update Size Cardinality *= nodeUnitActualSize * joinSelectivity; // Debug #if DEBUG //foreach (var item in MaterializedNodeSplitCount.Where(e => e.Key != node)) //{ // Trace.Write(item.Key.RefAlias + ","); //} //Trace.Write(node.RefAlias); //Trace.Write(" Size:" + Cardinality + " Cost:" + cost); //Trace.Write(" Method:" + ((TableRef as WParenthesisTableReference).Table as WQualifiedJoin).JoinHint); //Trace.WriteLine(" --> Total Cost:" + Cost); #endif // Update Cost Cost += cost; }