/// <summary> /// Calculate the number used for adjusting the SQL Server estimation in the downsize function. /// </summary> /// <param name="metaData"></param> /// <param name="joinCondition"></param> /// <param name="candidateJoinUnit"></param> /// <param name="nodeAlias"></param> /// <param name="affectedSqlEstimatedSize"></param> private static WTableReference AdjustEstimation(CandidateJoinUnit candidateJoinUnit, string nodeAlias, GraphMetaData metaData, out WBooleanExpression joinCondition, out double affectedSqlEstimatedSize) { const int sizeFactor = 10; int estimateFactor = 0; double size = candidateJoinUnit.EdgeDegrees; double estimatedSize = candidateJoinUnit.SqlEstimatedEdgeDegrees; double shrinkSize = candidateJoinUnit.TreeRoot.EstimatedRows; WTableReference tableReference = candidateJoinUnit.ToTableReference(nodeAlias, metaData); affectedSqlEstimatedSize = 1.0; joinCondition = null; if (size > sizeFactor * estimatedSize) { estimateFactor = (int)Math.Ceiling(size / estimatedSize); } else if (sizeFactor * size < estimatedSize) { shrinkSize = 1.0 / (1 - Math.Pow((1 - 1.0 / shrinkSize), 1.5)); affectedSqlEstimatedSize /= shrinkSize; estimatedSize /= shrinkSize; estimateFactor = (int)Math.Ceiling(size / estimatedSize); joinCondition = ConstructDownSizeJoinCondition(nodeAlias); } if (estimateFactor > 1) { double affectedUpSize; tableReference = ConstructUpSizeTableReference(tableReference, estimateFactor, out affectedUpSize); affectedSqlEstimatedSize *= affectedUpSize; } return(tableReference); }
public override List <Tuple <MatchNode, MatchEdge, List <MatchEdge>, List <MatchEdge>, List <MatchEdge> > > GetOptimizedTraversalOrder(ConnectedComponent subGraph) { if (subGraph.Nodes.Count == 1) { return (this.GenerateTraversalOrderFromTraversalChain( new List <Tuple <MatchNode, MatchEdge, MatchNode, List <MatchEdge>, List <MatchEdge> > > { new Tuple <MatchNode, MatchEdge, MatchNode, List <MatchEdge>, List <MatchEdge> >( subGraph.Nodes.First().Value, null, null, null, null) })); } // If it exists, pick a node without incoming edges as the start point List <MatchComponent> componentStates = subGraph.Nodes.Where(node => node.Value.ReverseNeighbors.Count == 0). Select(node => new MatchComponent(node.Value)).Take(1).ToList(); // Otherwise, pick a node randomly as the start point if (!componentStates.Any()) { componentStates.Add(new MatchComponent(subGraph.Nodes.First().Value)); } // DP while (componentStates.Any()) { List <MatchComponent> nextCompnentStates = new List <MatchComponent>(); // Iterate on current components foreach (MatchComponent curComponent in componentStates) { OneHeightTree nodeUnits = this.GetNodeUnits(subGraph, curComponent); if (nodeUnits == null && curComponent.ActiveNodeCount == subGraph.ActiveNodeCount && curComponent.EdgeMaterilizedDict.Count(e => e.Value == true) == subGraph.Edges.Count(e => e.Value.IsDanglingEdge == false)) { return(this.GenerateTraversalOrderFromTraversalChain(curComponent.TraversalChain)); } CandidateJoinUnit candidateUnit = this.GetCandidateUnits2(nodeUnits, curComponent); // Add it to the current component to generate next states MatchComponent newComponent = this.GetNextState(curComponent, candidateUnit); if (nextCompnentStates.Count >= MaxStates) { throw new GraphViewException("This graph pattern is not supported yet."); } nextCompnentStates.Add(newComponent); } componentStates = nextCompnentStates; } return(null); }
internal MatchComponent GetNextState2(MatchComponent curComponent, CandidateJoinUnit candidateTree) { // Deep copy the component var newComponent = new MatchComponent(curComponent); // Update component UpdateComponent(newComponent, candidateTree); // Construct traversal chain and Update join cost ConstructTraversalChainAndUpdateCost2(newComponent, candidateTree); return(newComponent); }
/// <summary> /// Transit from current component to the new component in the next state given the Node Unit /// </summary> /// <param name="candidateTree"></param> /// <param name="statisticsCalculator"></param> /// <returns></returns> public MatchComponent GetNextState( CandidateJoinUnit candidateTree, IMatchJoinStatisticsCalculator statisticsCalculator, GraphMetaData metaData) { // Deep copy the component var newComponent = new MatchComponent(this); // Constrcuts join conditions and retrieves join selectivity double joinSelectivity; double sqlEstimatedJoinSelectivity; var joinCondition = newComponent.ConstructJoinCondition(candidateTree, statisticsCalculator, metaData, out joinSelectivity, out sqlEstimatedJoinSelectivity); // Constructs physical join method and join table references newComponent.ConstructPhysicalJoinAndUpdateCost(candidateTree, joinCondition, joinSelectivity, sqlEstimatedJoinSelectivity, metaData); return(newComponent); }
/// <summary> /// Calculate the number used for adjusting the SQL Server estimation in the downsize function. /// </summary> /// <param name="metaData"></param> /// <param name="joinCondition"></param> /// <param name="candidateJoinUnit"></param> /// <param name="nodeAlias"></param> /// <param name="affectedSqlEstimatedSize"></param> //private static WTableReference AdjustEstimation(CandidateJoinUnit candidateJoinUnit, string nodeAlias, GraphMetaData metaData, out WBooleanExpression joinCondition, out double affectedSqlEstimatedSize) //{ // const int sizeFactor = 10; // int estimateFactor = 0; // double size = candidateJoinUnit.EdgeDegrees; // double estimatedSize = candidateJoinUnit.SqlEstimatedEdgeDegrees; // double shrinkSize = candidateJoinUnit.TreeRoot.EstimatedRows; // WTableReference tableReference = candidateJoinUnit.ToTableReference(nodeAlias, metaData); // affectedSqlEstimatedSize = 1.0; // joinCondition = null; // if (size > sizeFactor * estimatedSize) // { // estimateFactor = (int)Math.Ceiling(size / estimatedSize); // } // else if (sizeFactor*size < estimatedSize) // { // shrinkSize = 1.0/(1 - Math.Pow((1 - 1.0/shrinkSize), 1.5)); // affectedSqlEstimatedSize /= shrinkSize; // estimatedSize /= shrinkSize; // estimateFactor = (int) Math.Ceiling(size/estimatedSize); // joinCondition = ConstructDownSizeJoinCondition(nodeAlias); // } // if (estimateFactor > 1) // { // double affectedUpSize; // tableReference = ConstructUpSizeTableReference(tableReference, estimateFactor, // out affectedUpSize); // affectedSqlEstimatedSize *= affectedUpSize; // } // return tableReference; //} private static WTableReference AdjustEstimation(CandidateJoinUnit candidateJoinUnit, string nodeAlias, GraphMetaData metaData, out WBooleanExpression joinCondition, out double affectedSqlEstimatedSize) { const int sizeFactor = 10; int estimateFactor = 0; List<MatchEdge> matEdges = candidateJoinUnit.PreMatOutgoingEdges; double size = matEdges.Select(e => e.AverageDegree).Aggregate(1.0, (cur, next) => cur * next); double estimatedSize = Math.Pow(1000, matEdges.Count); double shrinkSize = candidateJoinUnit.TreeRoot.EstimatedRows; WTableReference tableReference = candidateJoinUnit.ToTableReference(nodeAlias, nodeAlias, metaData); affectedSqlEstimatedSize = 1.0; joinCondition = null; if (size > sizeFactor * estimatedSize) { estimateFactor = (int)Math.Ceiling(size / estimatedSize); } else if (sizeFactor * size < estimatedSize) { shrinkSize = 1.0 / (1 - Math.Pow((1 - 1.0 / shrinkSize), 1.5)); affectedSqlEstimatedSize /= shrinkSize; estimatedSize /= shrinkSize; estimateFactor = (int)Math.Ceiling(size / estimatedSize); joinCondition = ConstructDownSizeJoinCondition(nodeAlias); } if (estimateFactor > 1) { double affectedUpSize; tableReference = ConstructUpSizeTableReference(tableReference, estimateFactor, out affectedUpSize); affectedSqlEstimatedSize *= affectedUpSize; } return tableReference; }
/// <summary> /// Transit from current component to the new component in the next state given the Node Unit /// </summary> /// <param name="candidateTree"></param> /// <param name="statisticsCalculator"></param> /// <param name="metaData"></param> /// <param name="srcNodeStatisticsDict"></param> /// <returns></returns> public MatchComponent GetNextState( CandidateJoinUnit candidateTree, IMatchJoinStatisticsCalculator statisticsCalculator, GraphMetaData metaData, Dictionary<Tuple<string, bool>, Statistics> srcNodeStatisticsDict) { // Deep copy the component var newComponent = new MatchComponent(this); // Constrcuts join conditions and retrieves join selectivity double preJoinSelectivity, postJoinSelectivity, sqlEstimatedJoinSelectivity; var joinCondition = newComponent.ConstructJoinCondition(candidateTree, statisticsCalculator, metaData, srcNodeStatisticsDict, out preJoinSelectivity, out postJoinSelectivity, out sqlEstimatedJoinSelectivity); // Constructs physical join method and join table references newComponent.ConstructPhysicalJoinAndUpdateCost(candidateTree, joinCondition, preJoinSelectivity, postJoinSelectivity, sqlEstimatedJoinSelectivity, metaData); return newComponent; }
private WBooleanExpression ConstructJoinCondition( CandidateJoinUnit candidateTree, IMatchJoinStatisticsCalculator statisticsCalculator, GraphMetaData metaData, out double joinSelectivity, out double sqlEstimatedJoinSelectivity) { joinSelectivity = 1.0; sqlEstimatedJoinSelectivity = 1.0; var root = candidateTree.TreeRoot; WBooleanExpression joinCondition = null; string nodeName = ""; // Update Nodes if (MaterializedNodeSplitCount.ContainsKey(root)) { MaterializedNodeSplitCount[root]++; nodeName = GetNodeRefName(root); joinCondition = new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = root.RefAlias }, new Identifier { Value = "GlobalNodeId" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = nodeName }, new Identifier { Value = "GlobalNodeId" } ), }, ComparisonType = BooleanComparisonType.Equals }; } else { nodeName = root.RefAlias; if (!Nodes.Contains(root)) Nodes.Add(root); MaterializedNodeSplitCount[root] = 0; } List<double> densityList = new List<double>(); List<MatchEdge> inEdges; if (UnmaterializedNodeMapping.TryGetValue(root, out inEdges)) { var firstEdge = inEdges.First(); bool materialized = EdgeMaterilizedDict[firstEdge]; UnmaterializedNodeMapping.Remove(root); joinSelectivity *= 1.0 / root.TableRowCount; // Component materialized edge to root if (materialized) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = firstEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = nodeName }, new Identifier { Value = "GlobalNodeId" } ) }, ComparisonType = BooleanComparisonType.Equals }); densityList.Add(root.GlobalNodeIdDensity); } // Component unmaterialized edge to root else { Statistics statistics = null; foreach (var edge in inEdges) { // Update component table TableRef = SpanTableRef(TableRef, edge, GetNodeRefName(edge.SourceNode),metaData); EdgeMaterilizedDict[edge] = true; joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = edge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = nodeName }, new Identifier { Value = "GlobalNodeId" } ) }, ComparisonType = BooleanComparisonType.Equals }); double selectivity; statistics = Statistics.UpdateHistogram(statistics, edge.Statistics, out selectivity); joinSelectivity *= selectivity; densityList.Add(root.GlobalNodeIdDensity); } SinkNodeStatisticsDict[root] = statistics; } } var jointEdges = candidateTree.MaterializedEdges; foreach (var jointEdge in jointEdges) { EdgeMaterilizedDict[jointEdge] = true; var sinkNode = jointEdge.SinkNode; // Leaf to component materialized node if (MaterializedNodeSplitCount.ContainsKey(sinkNode)) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = jointEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = sinkNode.RefAlias }, new Identifier { Value = "GlobalNodeId" } ) }, ComparisonType = BooleanComparisonType.Equals }); Statistics sinkNodeStatistics; if (!SinkNodeStatisticsDict.TryGetValue(sinkNode, out sinkNodeStatistics)) { sinkNodeStatistics = null; joinSelectivity *= 1.0 / sinkNode.TableRowCount; } double selectivity; var statistics = Statistics.UpdateHistogram(sinkNodeStatistics, jointEdge.Statistics, out selectivity); joinSelectivity *= selectivity; SinkNodeStatisticsDict[sinkNode] = statistics; densityList.Add(sinkNode.GlobalNodeIdDensity); } // Leaf to component unmaterialized node else { inEdges = UnmaterializedNodeMapping[sinkNode]; var firstEdge = inEdges.First(); bool materlizedEdge = EdgeMaterilizedDict[firstEdge]; // Leaf to materialized leaf if (materlizedEdge) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = jointEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = firstEdge.EdgeAlias }, new Identifier { Value = "Sink" } ) }, ComparisonType = BooleanComparisonType.Equals }); densityList.Add(Statistics.DefaultDensity); double selectivity; var statistics = Statistics.UpdateHistogram(SinkNodeStatisticsDict[sinkNode], jointEdge.Statistics, out selectivity); joinSelectivity *= selectivity; SinkNodeStatisticsDict[sinkNode] = statistics; } // Leaf to unmaterialized leaf else { Statistics compSinkNodeStatistics = null; foreach (var inEdge in inEdges) { TableRef = SpanTableRef(TableRef, inEdge, GetNodeRefName(inEdge.SourceNode),metaData); EdgeMaterilizedDict[inEdge] = true; joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = jointEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = inEdge.EdgeAlias }, new Identifier { Value = "Sink" } ) }, ComparisonType = BooleanComparisonType.Equals }); densityList.Add(Statistics.DefaultDensity); double selectivity; var leafToLeafStatistics = statisticsCalculator.GetLeafToLeafStatistics(jointEdge, inEdge, out selectivity); joinSelectivity *= selectivity; compSinkNodeStatistics = Statistics.UpdateHistogram(compSinkNodeStatistics, inEdge.Statistics, out selectivity); } SinkNodeStatisticsDict[sinkNode] = compSinkNodeStatistics; } } } var unmatEdges = candidateTree.UnmaterializedEdges; foreach (var unmatEdge in unmatEdges) { EdgeMaterilizedDict[unmatEdge] = false; if (!Nodes.Contains(unmatEdge.SinkNode)) Nodes.Add(unmatEdge.SinkNode); var sinkNodeInEdges = UnmaterializedNodeMapping.GetOrCreate(unmatEdge.SinkNode); sinkNodeInEdges.Add(unmatEdge); } // Calculate Estimated Join Selectivity & Estimated Node Size densityList.Sort(); for (int i = densityList.Count - 1; i >= 0; i--) { sqlEstimatedJoinSelectivity *= Math.Sqrt(sqlEstimatedJoinSelectivity) * densityList[i]; } return joinCondition; }
/// <summary> /// Calculate join costs and update components using optimal join method & order /// </summary> /// <param name="nodeUnitCandidate"></param> /// <param name="joinCondition"></param> /// <param name="joinSelectivity"></param> /// <param name="estimatedSelectivity"></param> /// <returns></returns> private void ConstructPhysicalJoinAndUpdateCost( CandidateJoinUnit nodeUnitCandidate, WBooleanExpression joinCondition, double joinSelectivity, double estimatedSelectivity, GraphMetaData metaData) { var nodeDegrees = nodeUnitCandidate.EdgeDegrees; var nodeUnitSize = nodeUnitCandidate.TreeRoot.EstimatedRows * nodeDegrees; var estimatedNodeUnitSize = nodeUnitCandidate.TreeRoot.EstimatedRows* nodeUnitCandidate.SqlEstimatedEdgeDegrees; var componentSize = Cardinality; var estimatedCompSize = SqlEstimatedSize; var node = nodeUnitCandidate.TreeRoot; // If the node is already in the component, then only multiply the degree to get the size double nodeUnitActualSize; double newCompEstSize; if (MaterializedNodeSplitCount[node] > 0) { nodeUnitActualSize = nodeDegrees; var cEstEdge = Math.Pow(1000, EdgeMaterilizedDict.Count(e => !e.Value)); var cSize = SqlEstimatedSize / cEstEdge; var nSize = node.EstimatedRows; if (nSize > cSize) { newCompEstSize = estimatedNodeUnitSize * cEstEdge * estimatedSelectivity; } else { newCompEstSize = SqlEstimatedSize * Math.Pow(1000, nodeUnitCandidate.UnmaterializedEdges.Count) * estimatedSelectivity; } } else { nodeUnitActualSize = nodeUnitSize; newCompEstSize = SqlEstimatedSize * estimatedNodeUnitSize * estimatedSelectivity; } newCompEstSize = newCompEstSize < 1.0 ? 1.0 : newCompEstSize; bool firstJoin = MaterializedNodeSplitCount.Count == 2 && MaterializedNodeSplitCount.All(e => e.Value == 0); // Update TableRef double loopJoinOuterThreshold = 1e4;//1e6; double sizeFactor = 5;//1000; double maxMemory = 1e8; double loopCost = componentSize*Math.Log(nodeUnitCandidate.TreeRoot.EstimatedRows, 512); double hashCost = componentSize + nodeUnitSize; double cost; // Loop Join if ( nodeUnitCandidate.MaterializedEdges.Count == 0 && // the joins are purely leaf to sink join ( //componentSize < loopJoinOuterThreshold || // the outer table is relatively small loopCost < hashCost || (DeltaMemory + componentSize > maxMemory && DeltaMemory + nodeUnitSize > maxMemory) // memory is in pressure ) ) { if (firstJoin) { RightestTableRefSize = nodeUnitCandidate.TreeRoot.EstimatedRows; RightestTableAlias = GetNodeRefName(node); } TotalMemory = DeltaMemory; SqlEstimatedTotalMemory = SqlEstimatedDeltaMemory; //joinTable.JoinHint = JoinHint.Loop; SqlEstimatedSize = estimatedCompSize * estimatedNodeUnitSize / nodeUnitCandidate.TreeRoot.TableRowCount; cost = loopCost; //componentSize*Math.Log(nodeUnitCandidate.TreeRoot.EstimatedRows, 512); TableRef = new WParenthesisTableReference { Table = new WQualifiedJoin { FirstTableRef = TableRef, SecondTableRef = nodeUnitCandidate.ToTableReference(GetNodeRefName(nodeUnitCandidate.TreeRoot), metaData), JoinCondition = joinCondition, QualifiedJoinType = QualifiedJoinType.Inner, JoinHint = JoinHint.Loop } }; } // Hash Join else { cost = hashCost;//componentSize + nodeUnitSize; WBooleanExpression adjustedJoincondition; double adjustedSqlEstimatedSize; WTableReference buildTableReference; WTableReference probeTableReference; if (firstJoin) { var nodeInComp = MaterializedNodeSplitCount.Keys.First(e => e != node); if (nodeUnitSize < componentSize) { buildTableReference = AdjustEstimation(nodeUnitCandidate, GetNodeRefName(node), metaData, out adjustedJoincondition, out adjustedSqlEstimatedSize); probeTableReference = TableRef; TotalMemory = DeltaMemory = nodeUnitSize; SqlEstimatedTotalMemory = SqlEstimatedDeltaMemory = estimatedNodeUnitSize; RightestTableRefSize = nodeInComp.EstimatedRows; RightestTableAlias = GetNodeRefName(nodeInComp); } else { RightestTableRefSize = nodeInComp.EstimatedRows; RightestTableAlias = GetNodeRefName(nodeInComp); buildTableReference = AdjustEstimation(this, out adjustedJoincondition, out adjustedSqlEstimatedSize); probeTableReference = nodeUnitCandidate.ToTableReference(GetNodeRefName(nodeUnitCandidate.TreeRoot), metaData); TotalMemory = DeltaMemory = componentSize; SqlEstimatedTotalMemory = SqlEstimatedDeltaMemory = SqlEstimatedSize; RightestTableRefSize = nodeUnitCandidate.TreeRoot.EstimatedRows; RightestTableAlias = GetNodeRefName(node); } } // Left Deep else if (componentSize*sizeFactor < nodeUnitSize) { // Adjust estimation in sql server buildTableReference = AdjustEstimation(this, out adjustedJoincondition, out adjustedSqlEstimatedSize); probeTableReference = nodeUnitCandidate.ToTableReference(GetNodeRefName(nodeUnitCandidate.TreeRoot), metaData); var curDeltaMemory = componentSize; TotalMemory = DeltaMemory + curDeltaMemory; DeltaMemory = curDeltaMemory; var curDeltaEstimateMemory = SqlEstimatedSize; SqlEstimatedTotalMemory = SqlEstimatedDeltaMemory + curDeltaEstimateMemory; SqlEstimatedDeltaMemory = curDeltaEstimateMemory; RightestTableAlias = GetNodeRefName(node); RightestTableRefSize = nodeUnitCandidate.TreeRoot.EstimatedRows; } // Right Deep else { buildTableReference = AdjustEstimation(nodeUnitCandidate, GetNodeRefName(node), metaData, out adjustedJoincondition, out adjustedSqlEstimatedSize); probeTableReference = TableRef; TotalMemory += nodeUnitSize; DeltaMemory = TotalMemory; SqlEstimatedTotalMemory += estimatedNodeUnitSize; SqlEstimatedDeltaMemory = SqlEstimatedTotalMemory; } newCompEstSize *= adjustedSqlEstimatedSize; TableRef = new WParenthesisTableReference { Table = new WQualifiedJoin { FirstTableRef = buildTableReference, SecondTableRef = probeTableReference, JoinCondition = WBooleanBinaryExpression.Conjunction(joinCondition,adjustedJoincondition), QualifiedJoinType = QualifiedJoinType.Inner, JoinHint = JoinHint.Hash } }; SqlEstimatedSize = newCompEstSize < 1.0 ? 1.0 : newCompEstSize; } //Update Size Cardinality *= nodeUnitActualSize * joinSelectivity; // Debug #if DEBUG //foreach (var item in MaterializedNodeSplitCount.Where(e => e.Key != node)) //{ // Trace.Write(item.Key.RefAlias + ","); //} //Trace.Write(node.RefAlias); //Trace.Write(" Size:" + Cardinality + " Cost:" + cost); //Trace.Write(" Method:" + ((TableRef as WParenthesisTableReference).Table as WQualifiedJoin).JoinHint); //Trace.WriteLine(" --> Total Cost:" + Cost); #endif // Update Cost Cost += cost; }
/// <summary> /// Calculate the number used for adjusting the SQL Server estimation in the downsize function. /// </summary> /// <param name="metaData"></param> /// <param name="joinCondition"></param> /// <param name="candidateJoinUnit"></param> /// <param name="nodeAlias"></param> /// <param name="affectedSqlEstimatedSize"></param> private static WTableReference AdjustEstimation(CandidateJoinUnit candidateJoinUnit, string nodeAlias, GraphMetaData metaData, out WBooleanExpression joinCondition, out double affectedSqlEstimatedSize) { const int sizeFactor = 10; int estimateFactor = 0; double size = candidateJoinUnit.EdgeDegrees; double estimatedSize = candidateJoinUnit.SqlEstimatedEdgeDegrees; double shrinkSize = candidateJoinUnit.TreeRoot.EstimatedRows; WTableReference tableReference = candidateJoinUnit.ToTableReference(nodeAlias, metaData); affectedSqlEstimatedSize = 1.0; joinCondition = null; if (size > sizeFactor * estimatedSize) { estimateFactor = (int)Math.Ceiling(size / estimatedSize); } else if (sizeFactor*size < estimatedSize) { shrinkSize = 1.0/(1 - Math.Pow((1 - 1.0/shrinkSize), 1.5)); affectedSqlEstimatedSize /= shrinkSize; estimatedSize /= shrinkSize; estimateFactor = (int) Math.Ceiling(size/estimatedSize); joinCondition = ConstructDownSizeJoinCondition(nodeAlias); } if (estimateFactor > 1) { double affectedUpSize; tableReference = ConstructUpSizeTableReference(tableReference, estimateFactor, out affectedUpSize); affectedSqlEstimatedSize *= affectedUpSize; } return tableReference; }
private void ConstructTraversalChainAndUpdateCost2(MatchComponent curComponent, CandidateJoinUnit nodeUnitCandidate) { var inPreMatEdges = nodeUnitCandidate.PreMatIncomingEdges; var inPostMatEdges = nodeUnitCandidate.PostMatIncomingEdges; var outPostMatEdges = nodeUnitCandidate.PostMatOutgoingEdges; curComponent.TraversalChain2.Add( new Tuple <MatchNode, MatchEdge, MatchNode, List <MatchEdge>, List <MatchEdge> >( inPreMatEdges[0].SourceNode, inPreMatEdges[0], inPreMatEdges[0].SinkNode, outPostMatEdges, inPostMatEdges)); }
private void UpdateComponent(MatchComponent curComponent, CandidateJoinUnit candidateTree) { Dictionary <string, MatchNode> nodes = curComponent.Nodes; Dictionary <MatchEdge, bool> edgeMaterializedDict = curComponent.EdgeMaterilizedDict; Dictionary <MatchNode, List <MatchEdge> > unmaterializedNodeMapping = curComponent.UnmaterializedNodeMapping; MatchNode root = candidateTree.TreeRoot; if (!nodes.ContainsKey(root.NodeAlias)) { nodes.Add(root.NodeAlias, new MatchNode(root)); } curComponent.MaterializedNodeSplitCount[root] = 0; List <Tuple <MaterializedOrder, MatchEdge> > inEdges = candidateTree.PreMatIncomingEdges.Select( e => new Tuple <MaterializedOrder, MatchEdge>(MaterializedOrder.Pre, e)) .Union( candidateTree.PostMatIncomingEdges.Select( e => new Tuple <MaterializedOrder, MatchEdge>(MaterializedOrder.Post, e))) .ToList(); List <Tuple <MaterializedOrder, MatchEdge> > outEdges = candidateTree.PreMatOutgoingEdges.Select( e => new Tuple <MaterializedOrder, MatchEdge>(MaterializedOrder.Pre, e)) .Union( candidateTree.PostMatOutgoingEdges.Select( e => new Tuple <MaterializedOrder, MatchEdge>(MaterializedOrder.Post, e))) .ToList(); if (inEdges.Any()) { unmaterializedNodeMapping.Remove(root); foreach (Tuple <MaterializedOrder, MatchEdge> t in inEdges) { MaterializedOrder order = t.Item1; MatchEdge edge = t.Item2; edgeMaterializedDict[edge] = true; List <string> adjListProperties = this.PopulateAdjacencyListProperties(edge); MatchNode node = curComponent.Nodes[edge.SourceNode.NodeAlias]; foreach (string adjListProperty in adjListProperties) { node.Properties.Add(adjListProperty); } } } if (outEdges.Any()) { foreach (Tuple <MaterializedOrder, MatchEdge> t in outEdges) { MaterializedOrder order = t.Item1; MatchEdge edge = t.Item2; edgeMaterializedDict[edge] = true; List <string> adjListProperties = this.PopulateAdjacencyListProperties(edge); MatchNode node = curComponent.Nodes[edge.SourceNode.NodeAlias]; foreach (string adjListProperty in adjListProperties) { node.Properties.Add(adjListProperty); } } } List <MatchEdge> unmatEdges = candidateTree.UnmaterializedEdges; foreach (MatchEdge unmatEdge in unmatEdges) { edgeMaterializedDict[unmatEdge] = false;; List <MatchEdge> unmatNodeInEdges = unmaterializedNodeMapping.GetOrCreate(unmatEdge.SinkNode); unmatNodeInEdges.Add(unmatEdge); } }
private void UpdateComponent(MatchComponent curComponent, CandidateJoinUnit candidateTree) { var nodes = curComponent.Nodes; var edgeMaterializedDict = curComponent.EdgeMaterilizedDict; var unmaterializedNodeMapping = curComponent.UnmaterializedNodeMapping; var root = candidateTree.TreeRoot; if (!nodes.Contains(root)) { nodes.Add(root); } curComponent.MaterializedNodeSplitCount[root] = 0; var inEdges = candidateTree.PreMatIncomingEdges.Select( e => new Tuple <MaterializedOrder, MatchEdge>(MaterializedOrder.Pre, e)) .Union( candidateTree.PostMatIncomingEdges.Select( e => new Tuple <MaterializedOrder, MatchEdge>(MaterializedOrder.Post, e))) .ToList(); var outEdges = candidateTree.PreMatOutgoingEdges.Select( e => new Tuple <MaterializedOrder, MatchEdge>(MaterializedOrder.Pre, e)) .Union( candidateTree.PostMatOutgoingEdges.Select( e => new Tuple <MaterializedOrder, MatchEdge>(MaterializedOrder.Post, e))) .ToList(); if (inEdges.Any()) { unmaterializedNodeMapping.Remove(root); foreach (var t in inEdges) { var order = t.Item1; var edge = t.Item2; edgeMaterializedDict[edge] = true; } } if (outEdges.Any()) { foreach (var t in outEdges) { var order = t.Item1; var edge = t.Item2; edgeMaterializedDict[edge] = true; } } var unmatEdges = candidateTree.UnmaterializedEdges; foreach (var unmatEdge in unmatEdges) { edgeMaterializedDict[unmatEdge] = false;; var unmatNodeInEdges = unmaterializedNodeMapping.GetOrCreate(unmatEdge.SinkNode); unmatNodeInEdges.Add(unmatEdge); } }
private WBooleanExpression ConstructJoinCondition( CandidateJoinUnit candidateTree, IMatchJoinStatisticsCalculator statisticsCalculator, GraphMetaData metaData, out double joinSelectivity, out double sqlEstimatedJoinSelectivity) { joinSelectivity = 1.0; sqlEstimatedJoinSelectivity = 1.0; var root = candidateTree.TreeRoot; WBooleanExpression joinCondition = null; string nodeName = ""; // Update Nodes if (MaterializedNodeSplitCount.ContainsKey(root)) { MaterializedNodeSplitCount[root]++; nodeName = GetNodeRefName(root); joinCondition = new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = root.RefAlias }, new Identifier { Value = "GlobalNodeId" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = nodeName }, new Identifier { Value = "GlobalNodeId" } ), }, ComparisonType = BooleanComparisonType.Equals }; } else { nodeName = root.RefAlias; if (!Nodes.Contains(root)) { Nodes.Add(root); } MaterializedNodeSplitCount[root] = 0; } List <double> densityList = new List <double>(); List <MatchEdge> inEdges; if (UnmaterializedNodeMapping.TryGetValue(root, out inEdges)) { var firstEdge = inEdges.First(); bool materialized = EdgeMaterilizedDict[firstEdge]; UnmaterializedNodeMapping.Remove(root); joinSelectivity *= 1.0 / root.TableRowCount; // Component materialized edge to root if (materialized) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = firstEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = nodeName }, new Identifier { Value = "GlobalNodeId" } ) }, ComparisonType = BooleanComparisonType.Equals }); densityList.Add(root.GlobalNodeIdDensity); } // Component unmaterialized edge to root else { Statistics statistics = null; foreach (var edge in inEdges) { // Update component table TableRef = SpanTableRef(TableRef, edge, GetNodeRefName(edge.SourceNode), metaData); EdgeMaterilizedDict[edge] = true; joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = edge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = nodeName }, new Identifier { Value = "GlobalNodeId" } ) }, ComparisonType = BooleanComparisonType.Equals }); double selectivity; statistics = Statistics.UpdateHistogram(statistics, edge.Statistics, out selectivity); joinSelectivity *= selectivity; densityList.Add(root.GlobalNodeIdDensity); } SinkNodeStatisticsDict[root] = statistics; } } var jointEdges = candidateTree.MaterializedEdges; foreach (var jointEdge in jointEdges) { EdgeMaterilizedDict[jointEdge] = true; var sinkNode = jointEdge.SinkNode; // Leaf to component materialized node if (MaterializedNodeSplitCount.ContainsKey(sinkNode)) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = jointEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = sinkNode.RefAlias }, new Identifier { Value = "GlobalNodeId" } ) }, ComparisonType = BooleanComparisonType.Equals }); Statistics sinkNodeStatistics; if (!SinkNodeStatisticsDict.TryGetValue(sinkNode, out sinkNodeStatistics)) { sinkNodeStatistics = null; joinSelectivity *= 1.0 / sinkNode.TableRowCount; } double selectivity; var statistics = Statistics.UpdateHistogram(sinkNodeStatistics, jointEdge.Statistics, out selectivity); joinSelectivity *= selectivity; SinkNodeStatisticsDict[sinkNode] = statistics; densityList.Add(sinkNode.GlobalNodeIdDensity); } // Leaf to component unmaterialized node else { inEdges = UnmaterializedNodeMapping[sinkNode]; var firstEdge = inEdges.First(); bool materlizedEdge = EdgeMaterilizedDict[firstEdge]; // Leaf to materialized leaf if (materlizedEdge) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = jointEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = firstEdge.EdgeAlias }, new Identifier { Value = "Sink" } ) }, ComparisonType = BooleanComparisonType.Equals }); densityList.Add(Statistics.DefaultDensity); double selectivity; var statistics = Statistics.UpdateHistogram(SinkNodeStatisticsDict[sinkNode], jointEdge.Statistics, out selectivity); joinSelectivity *= selectivity; SinkNodeStatisticsDict[sinkNode] = statistics; } // Leaf to unmaterialized leaf else { Statistics compSinkNodeStatistics = null; foreach (var inEdge in inEdges) { TableRef = SpanTableRef(TableRef, inEdge, GetNodeRefName(inEdge.SourceNode), metaData); EdgeMaterilizedDict[inEdge] = true; joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = jointEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = inEdge.EdgeAlias }, new Identifier { Value = "Sink" } ) }, ComparisonType = BooleanComparisonType.Equals }); densityList.Add(Statistics.DefaultDensity); double selectivity; var leafToLeafStatistics = statisticsCalculator.GetLeafToLeafStatistics(jointEdge, inEdge, out selectivity); joinSelectivity *= selectivity; compSinkNodeStatistics = Statistics.UpdateHistogram(compSinkNodeStatistics, inEdge.Statistics, out selectivity); } SinkNodeStatisticsDict[sinkNode] = compSinkNodeStatistics; } } } var unmatEdges = candidateTree.UnmaterializedEdges; foreach (var unmatEdge in unmatEdges) { EdgeMaterilizedDict[unmatEdge] = false; if (!Nodes.Contains(unmatEdge.SinkNode)) { Nodes.Add(unmatEdge.SinkNode); } var sinkNodeInEdges = UnmaterializedNodeMapping.GetOrCreate(unmatEdge.SinkNode); sinkNodeInEdges.Add(unmatEdge); } // Calculate Estimated Join Selectivity & Estimated Node Size densityList.Sort(); for (int i = densityList.Count - 1; i >= 0; i--) { sqlEstimatedJoinSelectivity *= Math.Sqrt(sqlEstimatedJoinSelectivity) * densityList[i]; } return(joinCondition); }
/// <summary> /// Calculate join costs and update components using optimal join method & order /// </summary> /// <param name="nodeUnitCandidate"></param> /// <param name="joinCondition"></param> /// <param name="joinSelectivity"></param> /// <param name="estimatedSelectivity"></param> /// <returns></returns> private void ConstructPhysicalJoinAndUpdateCost( CandidateJoinUnit nodeUnitCandidate, WBooleanExpression joinCondition, double joinSelectivity, double estimatedSelectivity, GraphMetaData metaData) { var nodeDegrees = nodeUnitCandidate.EdgeDegrees; var nodeUnitSize = nodeUnitCandidate.TreeRoot.EstimatedRows * nodeDegrees; var estimatedNodeUnitSize = nodeUnitCandidate.TreeRoot.EstimatedRows * nodeUnitCandidate.SqlEstimatedEdgeDegrees; var componentSize = Cardinality; var estimatedCompSize = SqlEstimatedSize; var node = nodeUnitCandidate.TreeRoot; // If the node is already in the component, then only multiply the degree to get the size double nodeUnitActualSize; double newCompEstSize; if (MaterializedNodeSplitCount[node] > 0) { nodeUnitActualSize = nodeDegrees; var cEstEdge = Math.Pow(1000, EdgeMaterilizedDict.Count(e => !e.Value)); var cSize = SqlEstimatedSize / cEstEdge; var nSize = node.EstimatedRows; if (nSize > cSize) { newCompEstSize = estimatedNodeUnitSize * cEstEdge * estimatedSelectivity; } else { newCompEstSize = SqlEstimatedSize * Math.Pow(1000, nodeUnitCandidate.UnmaterializedEdges.Count) * estimatedSelectivity; } } else { nodeUnitActualSize = nodeUnitSize; newCompEstSize = SqlEstimatedSize * estimatedNodeUnitSize * estimatedSelectivity; } newCompEstSize = newCompEstSize < 1.0 ? 1.0 : newCompEstSize; bool firstJoin = MaterializedNodeSplitCount.Count == 2 && MaterializedNodeSplitCount.All(e => e.Value == 0); // Update TableRef double loopJoinOuterThreshold = 1e4; //1e6; double sizeFactor = 5; //1000; double maxMemory = 1e8; double loopCost = componentSize * Math.Log(nodeUnitCandidate.TreeRoot.EstimatedRows, 512) * 0.20; double hashCost = componentSize + nodeUnitSize; double cost; // Loop Join if ( nodeUnitCandidate.MaterializedEdges.Count == 0 && // the joins are purely leaf to sink join ( //componentSize < loopJoinOuterThreshold || // the outer table is relatively small loopCost < hashCost || (DeltaMemory + componentSize > maxMemory && DeltaMemory + nodeUnitSize > maxMemory) // memory is in pressure ) ) { if (firstJoin) { RightestTableRefSize = nodeUnitCandidate.TreeRoot.EstimatedRows; RightestTableAlias = GetNodeRefName(node); } TotalMemory = DeltaMemory; SqlEstimatedTotalMemory = SqlEstimatedDeltaMemory; //joinTable.JoinHint = JoinHint.Loop; SqlEstimatedSize = estimatedCompSize * estimatedNodeUnitSize / nodeUnitCandidate.TreeRoot.TableRowCount; cost = loopCost; //componentSize*Math.Log(nodeUnitCandidate.TreeRoot.EstimatedRows, 512); TableRef = new WParenthesisTableReference { Table = new WQualifiedJoin { FirstTableRef = TableRef, SecondTableRef = nodeUnitCandidate.ToTableReference(GetNodeRefName(nodeUnitCandidate.TreeRoot), metaData), JoinCondition = joinCondition, QualifiedJoinType = QualifiedJoinType.Inner, JoinHint = JoinHint.Loop } }; } // Hash Join else { cost = hashCost;//componentSize + nodeUnitSize; WBooleanExpression adjustedJoincondition; double adjustedSqlEstimatedSize; WTableReference buildTableReference; WTableReference probeTableReference; if (firstJoin) { var nodeInComp = MaterializedNodeSplitCount.Keys.First(e => e != node); if (nodeUnitSize < componentSize) { buildTableReference = AdjustEstimation(nodeUnitCandidate, GetNodeRefName(node), metaData, out adjustedJoincondition, out adjustedSqlEstimatedSize); probeTableReference = TableRef; TotalMemory = DeltaMemory = nodeUnitSize; SqlEstimatedTotalMemory = SqlEstimatedDeltaMemory = estimatedNodeUnitSize; RightestTableRefSize = nodeInComp.EstimatedRows; RightestTableAlias = GetNodeRefName(nodeInComp); } else { RightestTableRefSize = nodeInComp.EstimatedRows; RightestTableAlias = GetNodeRefName(nodeInComp); buildTableReference = AdjustEstimation(this, out adjustedJoincondition, out adjustedSqlEstimatedSize); probeTableReference = nodeUnitCandidate.ToTableReference(GetNodeRefName(nodeUnitCandidate.TreeRoot), metaData); TotalMemory = DeltaMemory = componentSize; SqlEstimatedTotalMemory = SqlEstimatedDeltaMemory = SqlEstimatedSize; RightestTableRefSize = nodeUnitCandidate.TreeRoot.EstimatedRows; RightestTableAlias = GetNodeRefName(node); } } // Left Deep else if (componentSize * sizeFactor < nodeUnitSize) { // Adjust estimation in sql server buildTableReference = AdjustEstimation(this, out adjustedJoincondition, out adjustedSqlEstimatedSize); probeTableReference = nodeUnitCandidate.ToTableReference(GetNodeRefName(nodeUnitCandidate.TreeRoot), metaData); var curDeltaMemory = componentSize; TotalMemory = DeltaMemory + curDeltaMemory; DeltaMemory = curDeltaMemory; var curDeltaEstimateMemory = SqlEstimatedSize; SqlEstimatedTotalMemory = SqlEstimatedDeltaMemory + curDeltaEstimateMemory; SqlEstimatedDeltaMemory = curDeltaEstimateMemory; RightestTableAlias = GetNodeRefName(node); RightestTableRefSize = nodeUnitCandidate.TreeRoot.EstimatedRows; } // Right Deep else { buildTableReference = AdjustEstimation(nodeUnitCandidate, GetNodeRefName(node), metaData, out adjustedJoincondition, out adjustedSqlEstimatedSize); probeTableReference = TableRef; TotalMemory += nodeUnitSize; DeltaMemory = TotalMemory; SqlEstimatedTotalMemory += estimatedNodeUnitSize; SqlEstimatedDeltaMemory = SqlEstimatedTotalMemory; } newCompEstSize *= adjustedSqlEstimatedSize; TableRef = new WParenthesisTableReference { Table = new WQualifiedJoin { FirstTableRef = buildTableReference, SecondTableRef = probeTableReference, JoinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, adjustedJoincondition), QualifiedJoinType = QualifiedJoinType.Inner, JoinHint = JoinHint.Hash } }; SqlEstimatedSize = newCompEstSize < 1.0 ? 1.0 : newCompEstSize; } //Update Size Cardinality *= nodeUnitActualSize * joinSelectivity; // Debug #if DEBUG //foreach (var item in MaterializedNodeSplitCount.Where(e => e.Key != node)) //{ // Trace.Write(item.Key.RefAlias + ","); //} //Trace.Write(node.RefAlias); //Trace.Write(" Size:" + Cardinality + " Cost:" + cost); //Trace.Write(" Method:" + ((TableRef as WParenthesisTableReference).Table as WQualifiedJoin).JoinHint); //Trace.WriteLine(" --> Total Cost:" + Cost); #endif // Update Cost Cost += cost; }
private void ConstructTraversalChainAndUpdateCost(MatchComponent curComponent, CandidateJoinUnit nodeUnitCandidate) { List <MatchEdge> inPreMatEdges = nodeUnitCandidate.PreMatIncomingEdges; List <MatchEdge> inPostMatEdges = nodeUnitCandidate.PostMatIncomingEdges; List <MatchEdge> outPostMatEdges = nodeUnitCandidate.PostMatOutgoingEdges; // // Item1: sourceNode // Item2: traversalEdge // Item3: sinkNode // Item4: backwardingEdges // Item5: forwardingEdges // curComponent.TraversalChain.Add( new Tuple <MatchNode, MatchEdge, MatchNode, List <MatchEdge>, List <MatchEdge> >( curComponent.Nodes[inPreMatEdges[0].SourceNode.NodeAlias], inPreMatEdges[0], curComponent.Nodes[inPreMatEdges[0].SinkNode.NodeAlias], outPostMatEdges, inPostMatEdges)); }
private WBooleanExpression ConstructJoinCondition( CandidateJoinUnit candidateTree, IMatchJoinStatisticsCalculator statisticsCalculator, GraphMetaData metaData, Dictionary<Tuple<string, bool>, Statistics> srcNodeStatisticsDict, out double preJoinSelectivity, out double postJoinSelectivity, out double sqlEstimatedJoinSelectivity) { const double sizeThreshold = 1e8; const int loopJoinFactorThreshold = 20; preJoinSelectivity = 1.0; postJoinSelectivity = 1.0; sqlEstimatedJoinSelectivity = 1.0; var firstJoin = MaterializedNodeSplitCount.Count == 1; MatchNode firstNode = null; if (firstJoin) firstNode = Nodes.First(); var root = candidateTree.TreeRoot; WBooleanExpression joinCondition = null; WBooleanExpression whereCondition = null; string nodeName = root.RefAlias; if (!Nodes.Contains(root)) Nodes.Add(root); MaterializedNodeSplitCount[root] = 0; var inEdges = candidateTree.PreMatIncomingEdges.Select( e => new Tuple<MaterializedOrder, MatchEdge>(MaterializedOrder.Pre, e)) .Union( candidateTree.PostMatIncomingEdges.Select( e => new Tuple<MaterializedOrder, MatchEdge>(MaterializedOrder.Post, e))) .ToList(); var outEdges = candidateTree.PreMatOutgoingEdges.Select( e => new Tuple<MaterializedOrder, MatchEdge>(MaterializedOrder.Pre, e)) .Union( candidateTree.PostMatOutgoingEdges.Select( e => new Tuple<MaterializedOrder, MatchEdge>(MaterializedOrder.Post, e))) .ToList(); var densityList = new List<double>(); var inPostCount = 0; var outPostCount = 0; if (inEdges.Any()) { UnmaterializedNodeMapping.Remove(root); //joinSelectivity *= 1.0 / root.TableRowCount; Statistics statistics = null; Statistics srcNodeStat = null; foreach (var t in inEdges) { var order = t.Item1; var edge = t.Item2; var globalNodeIdRef = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier {Value = nodeName}, new Identifier {Value = "GlobalNodeId"} ) }; var newCondition = new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier {Value = edge.EdgeAlias}, new Identifier {Value = "Sink"} ), }, SecondExpr = order == MaterializedOrder.Post && inPostCount > 0 ? new WBinaryExpression { ExpressionType = BinaryExpressionType.Add, FirstExpr = globalNodeIdRef, SecondExpr = new WValueExpression { SingleQuoted = false, Value = "0", } } : (WScalarExpression)globalNodeIdRef, ComparisonType = BooleanComparisonType.Equals }; EdgeMaterilizedDict[edge] = true; double selectivity; statistics = Statistics.UpdateHistogram(statistics, edge.Statistics, out selectivity); if (order == MaterializedOrder.Pre) { preJoinSelectivity *= selectivity; joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, newCondition); } else { ++inPostCount; postJoinSelectivity *= selectivity; whereCondition = WBooleanBinaryExpression.Conjunction(whereCondition, newCondition); } if (firstJoin) { double srcNodeSelectivity; srcNodeStat = Statistics.UpdateHistogram(srcNodeStat, srcNodeStatisticsDict[new Tuple<string, bool>(edge.EdgeAlias, edge.IsReversedEdge)], out srcNodeSelectivity); } densityList.Add(root.GlobalNodeIdDensity); } if (firstJoin) SinkNodeStatisticsDict[firstNode] = srcNodeStat; SinkNodeStatisticsDict[root] = statistics; } if (candidateTree.JoinHint == JoinHint.Loop) { var size = Cardinality*candidateTree.PreMatIncomingEdges.Select(e => e.AverageDegree) .Aggregate(1.0, (cur, next) => cur*next)*preJoinSelectivity; if (size >= sizeThreshold && size > root.EstimatedRows * loopJoinFactorThreshold) candidateTree.JoinHint = JoinHint.Hash; } if (outEdges.Any()) { foreach (var t in outEdges) { var order = t.Item1; var edge = t.Item2; var sinkNode = edge.SinkNode; var globalNodeIdRef = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier {Value = sinkNode.RefAlias}, new Identifier {Value = "GlobalNodeId"} ) }; var newCondition = new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier {Value = edge.EdgeAlias}, new Identifier {Value = "Sink"} ), }, SecondExpr = order == MaterializedOrder.Post && outPostCount > 0 ? new WBinaryExpression { ExpressionType = BinaryExpressionType.Add, FirstExpr = globalNodeIdRef, SecondExpr = new WValueExpression { SingleQuoted = false, Value = "0", } } : (WScalarExpression)globalNodeIdRef, ComparisonType = BooleanComparisonType.Equals }; EdgeMaterilizedDict[edge] = true; Statistics sinkNodeStatistics; if (!SinkNodeStatisticsDict.TryGetValue(sinkNode, out sinkNodeStatistics)) { sinkNodeStatistics = null; //joinSelectivity *= 1.0 / sinkNode.TableRowCount; } double selectivity; var statistics = Statistics.UpdateHistogram(sinkNodeStatistics, edge.Statistics, out selectivity); if (order == MaterializedOrder.Pre) { preJoinSelectivity *= selectivity; joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, newCondition); } else { ++outPostCount; postJoinSelectivity *= selectivity; whereCondition = WBooleanBinaryExpression.Conjunction(whereCondition, newCondition); } SinkNodeStatisticsDict[sinkNode] = statistics; densityList.Add(sinkNode.GlobalNodeIdDensity); } } var unmatEdges = candidateTree.UnmaterializedEdges; foreach (var unmatEdge in unmatEdges) { EdgeMaterilizedDict[unmatEdge] = false;; var unmatNodeInEdges = UnmaterializedNodeMapping.GetOrCreate(unmatEdge.SinkNode); unmatNodeInEdges.Add(unmatEdge); } densityList.Sort(); for (int i = densityList.Count - 1; i >= 0; i--) { sqlEstimatedJoinSelectivity *= Math.Sqrt(sqlEstimatedJoinSelectivity) * densityList[i]; } WhereCondition = WBooleanBinaryExpression.Conjunction(WhereCondition, whereCondition); return joinCondition; }
/// <summary> /// Calculate join costs and update components using optimal join method & order /// </summary> /// <param name="nodeUnitCandidate"></param> /// <param name="joinCondition"></param> /// <param name="preJoinSelectivity"></param> /// <param name="postJoinSelectivity"></param> /// <param name="estimatedSelectivity"></param> /// <param name="metaData"></param> /// <param name="isExecutable"></param> private void ConstructPhysicalJoinAndUpdateCost( CandidateJoinUnit nodeUnitCandidate, WBooleanExpression joinCondition, double preJoinSelectivity, double postJoinSelectivity, double estimatedSelectivity, GraphMetaData metaData) { const double scaleFactor = 1.5; const int sqlInPreMatEdgeSelectivityThreshold = 5; var firstJoin = MaterializedNodeSplitCount.Count == 2; var inPreMatEdges = nodeUnitCandidate.PreMatIncomingEdges; var inPostMatEdges = nodeUnitCandidate.PostMatIncomingEdges; var outPreMatEdges = nodeUnitCandidate.PreMatOutgoingEdges; var outPostMatEdges = nodeUnitCandidate.PostMatOutgoingEdges; var postMatEdges = inPostMatEdges.Select(e => new Tuple<MatchEdge, EdgeDir>(e, EdgeDir.In)) .Union( outPostMatEdges.Select(e => new Tuple<MatchEdge, EdgeDir>(e, EdgeDir.Out))) .OrderBy(t => t.Item1.AverageDegree) .ToList(); var compDegrees = inPreMatEdges.Select(e => e.AverageDegree).Aggregate(1.0, (cur, next) => cur*next); var nodeDegrees = outPreMatEdges.Select(e => e.AverageDegree).Aggregate(1.0, (cur, next) => cur * next); var root = nodeUnitCandidate.TreeRoot; var componentSize = Cardinality; double sizeFactor = 5; // 1000; var loopCost = nodeUnitCandidate.JoinHint == JoinHint.Loop ? componentSize * compDegrees * Math.Log(root.EstimatedRows, 512) : double.MaxValue; // only calc the size of table used to join var matCompSizeWhenJoin = componentSize*compDegrees; var matUnitSizeWhenJoin = root.EstimatedRows*nodeDegrees; var hashCost = matCompSizeWhenJoin + matUnitSizeWhenJoin; double loopJoinOuterThreshold = 1e4;//1e6; double maxMemory = 1e8; double cost; // loop join if (nodeUnitCandidate.JoinHint == JoinHint.Loop //inPreMatEdges.Any() && !outPreMatEdges.Any() && //( // //componentSize < loopJoinOuterThreshold || // the outer table is relatively small // loopCost < hashCost || // (DeltaMemory + matCompSizeWhenJoin > maxMemory && DeltaMemory + matUnitSizeWhenJoin > maxMemory) // // memory is in pressure //) ) { if (firstJoin) { RightestTableRefSize = nodeUnitCandidate.TreeRoot.EstimatedRows; RightestTableAlias = root.RefAlias; LastTable = Nodes.First(n => n.NodeAlias != root.NodeAlias); //LastTableAlias = LastTable.RefAlias; LastJoinHint = JoinHint.Loop; LastJoinSqlEstCardinality = LastTable.TableRowCount; } cost = loopCost; var sqlInPreMatEdgesSelectivity = 1.0; for (var i = 1; i < inPreMatEdges.Count && i <= sqlInPreMatEdgeSelectivityThreshold; ++i) sqlInPreMatEdgesSelectivity = Math.Sqrt(sqlInPreMatEdgesSelectivity)/10; var sqlEstPreJoinEdgeSize = Math.Pow(100, LastJoinPostMatEdgesCount)*Math.Pow(1000, inPreMatEdges.Count)* sqlInPreMatEdgesSelectivity; var sqlEstPreJoinInputSize = LastJoinSqlEstCardinality * (LastJoinHint == JoinHint.Loop ? LastTable.EstimatedRows / LastTable.TableRowCount : 1.0) * sqlEstPreJoinEdgeSize; var estimateFactor = 0; if (matCompSizeWhenJoin >= sqlEstPreJoinInputSize * scaleFactor) estimateFactor = (int) Math.Ceiling(matCompSizeWhenJoin/sqlEstPreJoinInputSize); else if (matCompSizeWhenJoin*scaleFactor < sqlEstPreJoinInputSize) estimateFactor = -1; var affectedUpsize = 1.0; if (estimateFactor >= (int) Math.Ceiling(scaleFactor)) { if (LastJoinHint == JoinHint.Loop) TableRef = ConstructUpSizeTableReference(TableRef, estimateFactor, LastTableAlias, DumbType.Node, out affectedUpsize); else if (LastJoinPostMatEdgesCount > 0) TableRef = ConstructUpSizeTableReference(TableRef, estimateFactor, LastPostMatEdgeAlias, DumbType.Edge, out affectedUpsize); else TableRef = ConstructUpSizeTableReference(TableRef, estimateFactor, out affectedUpsize); } else if (estimateFactor == -1 && LastJoinHint == JoinHint.Loop) { sqlEstPreJoinInputSize = LastJoinSqlEstCardinality* Math.Sqrt(LastTable.EstimatedRows/LastTable.TableRowCount)/ LastTable.TableRowCount * 1.5 * sqlEstPreJoinEdgeSize; if (matCompSizeWhenJoin >= sqlEstPreJoinInputSize*scaleFactor) TableRef = ConstructUpSizeTableReference(TableRef, (int) Math.Ceiling(matCompSizeWhenJoin/sqlEstPreJoinInputSize), LastTableAlias, DumbType.Node, out affectedUpsize); } foreach (var edge in inPreMatEdges) TableRef = SpanTableRef(TableRef, edge, edge.SourceNode.RefAlias, LastTableAlias, metaData); WTableReference table = new WQualifiedJoin { FirstTableRef = TableRef, SecondTableRef = nodeUnitCandidate.ToTableReference(root.RefAlias, root.RefAlias, metaData), JoinCondition = estimateFactor == -1 ? WBooleanBinaryExpression.Conjunction(joinCondition, ConstructDownSizeJoinCondition(LastTableAlias)) : joinCondition, QualifiedJoinType = QualifiedJoinType.Inner, JoinHint = JoinHint.Loop }; table = postMatEdges.Aggregate(table, (current, next) => new WUnqualifiedJoin { FirstTableRef = current, SecondTableRef = next.Item1.ToSchemaObjectFunction(next.Item1.SourceNode.RefAlias, next.Item2 == EdgeDir.In ? root.RefAlias : LastTableAlias, metaData), UnqualifiedJoinType = UnqualifiedJoinType.CrossApply, }); TableRef = new WParenthesisTableReference { Table = table, }; LastTable = root; LastTableAlias = root.RefAlias; LastJoinHint = JoinHint.Loop; LastJoinSqlEstCardinality = sqlEstPreJoinInputSize*affectedUpsize; LastJoinPostMatEdgesCount = postMatEdges.Count; LastPostMatEdgeAlias = LastJoinPostMatEdgesCount > 0 ? postMatEdges.Last().Item1.EdgeAlias : null; SqlEstimatedSize = sqlEstPreJoinInputSize*root.EstimatedRows/root.TableRowCount* Math.Pow(100, postMatEdges.Count); SqlEstimatedSize = SqlEstimatedSize < 1.0 ? 1.0 : SqlEstimatedSize; Cardinality = matCompSizeWhenJoin * inPostMatEdges.Select(e => e.AverageDegree).Aggregate(1.0, (cur, next) => cur * next) * matUnitSizeWhenJoin * outPostMatEdges.Select(e => e.AverageDegree).Aggregate(1.0, (cur, next) => cur * next) * preJoinSelectivity / root.TableRowCount * postJoinSelectivity; } // hash join else { cost = hashCost; WBooleanExpression adjustedJoincondition = null; WTableReference buildTableReference = nodeUnitCandidate.ToTableReference(root.RefAlias, root.RefAlias, metaData); double affectedUpsize = 1.0, sqlEstPreJoinEdgeSize; int estimateFactor; var sqlEstPreJoinInputSize = root.EstimatedRows; if (firstJoin) { LastTable = Nodes.First(n => n.NodeAlias != root.NodeAlias); //LastTableAlias = LastTable.RefAlias; LastJoinHint = JoinHint.Loop; LastJoinSqlEstCardinality = LastTable.TableRowCount; } // Build table adjustment if (outPreMatEdges.Any()) { var sqlOutPreMatEdgesSelectivity = 1.0; foreach (var group in outPreMatEdges.GroupBy(e => e.SinkNode)) { var selectivity = 1.0; for (var i = 1; i < group.Count() && i <= sqlInPreMatEdgeSelectivityThreshold; ++i) selectivity = Math.Sqrt(selectivity) / 10; sqlOutPreMatEdgesSelectivity *= selectivity; } sqlEstPreJoinEdgeSize = Math.Pow(1000, outPreMatEdges.Count) * sqlOutPreMatEdgesSelectivity; sqlEstPreJoinInputSize *= sqlEstPreJoinEdgeSize; estimateFactor = 0; if (matUnitSizeWhenJoin >= sqlEstPreJoinInputSize*scaleFactor) estimateFactor = (int) Math.Ceiling(matUnitSizeWhenJoin/sqlEstPreJoinInputSize); else if (matUnitSizeWhenJoin*scaleFactor < sqlEstPreJoinInputSize) { estimateFactor = -1; adjustedJoincondition = ConstructDownSizeJoinCondition(root.RefAlias); } if (estimateFactor >= (int) Math.Ceiling(scaleFactor)) buildTableReference = ConstructUpSizeTableReference(buildTableReference, estimateFactor, root.RefAlias, DumbType.Node, out affectedUpsize); else if (estimateFactor == -1) { sqlEstPreJoinInputSize = Math.Sqrt(root.EstimatedRows/root.TableRowCount)*1.5*sqlEstPreJoinEdgeSize; if (matUnitSizeWhenJoin >= sqlEstPreJoinInputSize*scaleFactor) buildTableReference = ConstructUpSizeTableReference(buildTableReference, (int)Math.Ceiling(matUnitSizeWhenJoin / sqlEstPreJoinInputSize), root.RefAlias, DumbType.Node, out affectedUpsize); } } sqlEstPreJoinInputSize *= affectedUpsize; // Cardinality update Cardinality = matCompSizeWhenJoin * inPostMatEdges.Select(e => e.AverageDegree).Aggregate(1.0, (cur, next) => cur * next) * matUnitSizeWhenJoin * outPostMatEdges.Select(e => e.AverageDegree).Aggregate(1.0, (cur, next) => cur * next) * preJoinSelectivity / root.TableRowCount * postJoinSelectivity; // Output adjustment var postJoinUpsizeFactor = -1; var sqlInPreMatEdgesSelectivity = 1.0; for (var i = 1; i < inPreMatEdges.Count; ++i) sqlInPreMatEdgesSelectivity = Math.Sqrt(sqlInPreMatEdgesSelectivity) / 10; sqlEstPreJoinEdgeSize = Math.Pow(100, LastJoinPostMatEdgesCount) * Math.Pow(1000, inPreMatEdges.Count) * sqlInPreMatEdgesSelectivity; var probeSqlEstCardinality = LastJoinSqlEstCardinality * (LastJoinHint == JoinHint.Loop ? LastTable.EstimatedRows / LastTable.TableRowCount : 1.0) * sqlEstPreJoinEdgeSize; var hashJoinSqlSelectivity = outPreMatEdges.Any() ? Math.Pow(Math.Sqrt(0.001), outPreMatEdges.GroupBy(e => e.SinkNode).Count()) : (root.EstimatedRows/root.TableRowCount) / root.EstimatedRows; var sqlEstHashCardinality = sqlEstPreJoinInputSize*probeSqlEstCardinality*hashJoinSqlSelectivity* Math.Pow(100, postMatEdges.Count); estimateFactor = 0; if (Cardinality >= sqlEstHashCardinality*scaleFactor) estimateFactor = (int) Math.Ceiling(Cardinality/sqlEstHashCardinality); else if (Cardinality*scaleFactor < sqlEstHashCardinality) { estimateFactor = -1; adjustedJoincondition = WBooleanBinaryExpression.Conjunction(adjustedJoincondition, ConstructDownSizeJoinCondition(LastTableAlias)); } if (estimateFactor >= (int) Math.Ceiling(scaleFactor)) postJoinUpsizeFactor = estimateFactor; else if (estimateFactor == -1 && LastJoinHint == JoinHint.Loop) { probeSqlEstCardinality = LastJoinSqlEstCardinality* Math.Sqrt(LastTable.EstimatedRows/LastTable.TableRowCount)/ LastTable.TableRowCount * 1.5 * sqlEstPreJoinEdgeSize; sqlEstHashCardinality = sqlEstPreJoinInputSize*probeSqlEstCardinality*hashJoinSqlSelectivity* Math.Pow(100, postMatEdges.Count); if (Cardinality >= sqlEstHashCardinality*scaleFactor) postJoinUpsizeFactor = (int) Math.Ceiling(Cardinality/sqlEstHashCardinality); } foreach (var edge in inPreMatEdges) TableRef = SpanTableRef(TableRef, edge, edge.SourceNode.RefAlias, LastTableAlias, metaData); WTableReference table = new WQualifiedJoin { FirstTableRef = buildTableReference, SecondTableRef = TableRef, JoinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, adjustedJoincondition), QualifiedJoinType = QualifiedJoinType.Inner, JoinHint = JoinHint.Hash }; table = postMatEdges.Aggregate(table, (current, next) => new WUnqualifiedJoin { FirstTableRef = current, SecondTableRef = next.Item1.ToSchemaObjectFunction(next.Item1.SourceNode.RefAlias, next.Item2 == EdgeDir.In ? root.RefAlias : LastTableAlias, metaData), UnqualifiedJoinType = UnqualifiedJoinType.CrossApply, }); if (postJoinUpsizeFactor != -1) { if (postMatEdges.Any()) table = ConstructUpSizeTableReference(table, postJoinUpsizeFactor, postMatEdges.Last().Item1.EdgeAlias, DumbType.Edge, out affectedUpsize); else table = ConstructUpSizeTableReference(table, postJoinUpsizeFactor, out affectedUpsize); } TableRef = new WParenthesisTableReference { Table = table, }; LastJoinHint = JoinHint.Hash; LastJoinSqlEstCardinality = sqlEstHashCardinality * affectedUpsize; LastJoinPostMatEdgesCount = 0; LastPostMatEdgeAlias = null; SqlEstimatedSize = LastJoinSqlEstCardinality; SqlEstimatedSize = SqlEstimatedSize < 1.0 ? 1.0 : SqlEstimatedSize; } Cost += cost; // Debug #if DEBUG //foreach (var item in MaterializedNodeSplitCount.Where(e => e.Key != node)) //{ // Trace.Write(item.Key.RefAlias + ","); //} //Trace.Write(root.RefAlias); //Trace.Write(" Size:" + Cardinality + " Cost:" + cost); //Trace.Write(" Method:" + ((TableRef as WParenthesisTableReference).Table as WQualifiedJoin).JoinHint); //Trace.WriteLine(" --> Total Cost:" + Cost); #endif }
private void ConstructTraversalChainAndUpdateCost(MatchComponent curComponent, CandidateJoinUnit nodeUnitCandidate) { var inPreMatEdges = nodeUnitCandidate.PreMatIncomingEdges; curComponent.TraversalChain.Add(new Tuple <MatchNode, MatchEdge>(inPreMatEdges[0].SourceNode, inPreMatEdges[0])); }