public override List <Tuple <MatchNode, MatchEdge, List <MatchEdge>, List <MatchEdge>, List <MatchEdge> > > GetOptimizedTraversalOrder(ConnectedComponent subGraph) { if (subGraph.Nodes.Count == 1) { return (this.GenerateTraversalOrderFromTraversalChain( new List <Tuple <MatchNode, MatchEdge, MatchNode, List <MatchEdge>, List <MatchEdge> > > { new Tuple <MatchNode, MatchEdge, MatchNode, List <MatchEdge>, List <MatchEdge> >( subGraph.Nodes.First().Value, null, null, null, null) })); } // If it exists, pick a node without incoming edges as the start point List <MatchComponent> componentStates = subGraph.Nodes.Where(node => node.Value.ReverseNeighbors.Count == 0). Select(node => new MatchComponent(node.Value)).Take(1).ToList(); // Otherwise, pick a node randomly as the start point if (!componentStates.Any()) { componentStates.Add(new MatchComponent(subGraph.Nodes.First().Value)); } // DP while (componentStates.Any()) { List <MatchComponent> nextCompnentStates = new List <MatchComponent>(); // Iterate on current components foreach (MatchComponent curComponent in componentStates) { OneHeightTree nodeUnits = this.GetNodeUnits(subGraph, curComponent); if (nodeUnits == null && curComponent.ActiveNodeCount == subGraph.ActiveNodeCount && curComponent.EdgeMaterilizedDict.Count(e => e.Value == true) == subGraph.Edges.Count(e => e.Value.IsDanglingEdge == false)) { return(this.GenerateTraversalOrderFromTraversalChain(curComponent.TraversalChain)); } CandidateJoinUnit candidateUnit = this.GetCandidateUnits2(nodeUnits, curComponent); // Add it to the current component to generate next states MatchComponent newComponent = this.GetNextState(curComponent, candidateUnit); if (nextCompnentStates.Count >= MaxStates) { throw new GraphViewException("This graph pattern is not supported yet."); } nextCompnentStates.Add(newComponent); } componentStates = nextCompnentStates; } return(null); }
/// <summary> /// Transit from current component to the new component in the next state given the Node Unit /// </summary> /// <param name="candidateTree"></param> /// <param name="densityDict"></param> /// <param name="subGraph"></param> /// <param name="statisticsCalculator"></param> /// <returns></returns> public MatchComponent GetNextState( OneHeightTree candidateTree, Dictionary <string, double> densityDict, IMatchJoinStatisticsCalculator statisticsCalculator) { var newComponent = new MatchComponent(this); var root = candidateTree.TreeRoot; WBooleanExpression joinCondition = null; string nodeName = ""; // Update Nodes if (newComponent.MaterializedNodeSplitCount.ContainsKey(root)) { newComponent.MaterializedNodeSplitCount[root]++; nodeName = newComponent.GetNodeRefName(root); joinCondition = new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = root.RefAlias }, new Identifier { Value = "GlobalNodeId" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = nodeName }, new Identifier { Value = "GlobalNodeId" } ), }, ComparisonType = BooleanComparisonType.Equals }; } else { nodeName = root.RefAlias; newComponent.Nodes.Add(root); newComponent.MaterializedNodeSplitCount[root] = 0; newComponent.StatisticsDict[root] = new ColumnStatistics { Selectivity = 1.0 / root.TableRowCount }; } // Constructs table reference WTableReference nodeTable = new WNamedTableReference { Alias = new Identifier { Value = nodeName }, TableObjectName = root.TableObjectName }; WTableReference compTable = newComponent.TableRef; // Updates join conditions double selectivity = 1.0; double degrees = 1.0; var DensityCount = new Dictionary <string, int>(StringComparer.CurrentCultureIgnoreCase); List <MatchEdge> inEdges; if (newComponent.UnmaterializedNodeMapping.TryGetValue(root, out inEdges)) { var firstEdge = inEdges.First(); bool materialized = newComponent.EdgeMaterilizedDict[firstEdge]; newComponent.UnmaterializedNodeMapping.Remove(root); selectivity *= 1.0 / root.TableRowCount; // Component materialized edge to root if (materialized) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = firstEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = nodeName }, new Identifier { Value = "GlobalNodeId" } ) }, ComparisonType = BooleanComparisonType.Equals }); //var statistics = ColumnStatistics.UpdateHistogram(newComponent.StatisticsDict[root], // new ColumnStatistics {Selectivity = 1.0/root.TableRowCount}); //selectivity *= statistics.Selectivity; //newComponent.StatisticsDict[root] = statistics; if (DensityCount.ContainsKey(root.TableObjectName.ToString())) { DensityCount[root.TableObjectName.ToString()]++; } else { DensityCount[root.TableObjectName.ToString()] = 1; } } // Component unmaterialized edge to root else { ColumnStatistics statistics = null; foreach (var edge in inEdges) { // Update component table compTable = SpanTableRef(compTable, edge, newComponent.GetNodeRefName(edge.SourceNode)); newComponent.EdgeMaterilizedDict[edge] = true; joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = edge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = nodeName }, new Identifier { Value = "GlobalNodeId" } ) }, ComparisonType = BooleanComparisonType.Equals }); statistics = ColumnStatistics.UpdateHistogram(statistics, newComponent.Context.GetEdgeStatistics(edge)); selectivity *= statistics.Selectivity; } newComponent.StatisticsDict[root] = statistics; if (DensityCount.ContainsKey(root.TableObjectName.ToString())) { DensityCount[root.TableObjectName.ToString()] += inEdges.Count; } else { DensityCount[root.TableObjectName.ToString()] = inEdges.Count; } } } var jointEdges = candidateTree.MaterializedEdges; int sinkToSinkCount = 0; foreach (var jointEdge in jointEdges) { // Update node table nodeTable = SpanTableRef(nodeTable, jointEdge, nodeName); degrees *= jointEdge.AverageDegree; newComponent.EdgeMaterilizedDict[jointEdge] = true; var sinkNode = jointEdge.SinkNode; // Leaf to component materialized node if (newComponent.MaterializedNodeSplitCount.ContainsKey(sinkNode)) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = jointEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = sinkNode.RefAlias }, new Identifier { Value = "GlobalNodeId" } ) }, ComparisonType = BooleanComparisonType.Equals }); var statistics = ColumnStatistics.UpdateHistogram(newComponent.StatisticsDict[sinkNode], newComponent.Context.GetEdgeStatistics(jointEdge)); selectivity *= statistics.Selectivity; newComponent.StatisticsDict[sinkNode] = statistics; if (DensityCount.ContainsKey(sinkNode.TableObjectName.ToString())) { DensityCount[sinkNode.TableObjectName.ToString()]++; } else { DensityCount[sinkNode.TableObjectName.ToString()] = 1; } } // Leaf to component unmaterialized node else { inEdges = newComponent.UnmaterializedNodeMapping[sinkNode]; var firstEdge = inEdges.First(); bool materlizedEdge = newComponent.EdgeMaterilizedDict[firstEdge]; // Leaf to materialized leaf if (materlizedEdge) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = jointEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = firstEdge.EdgeAlias }, new Identifier { Value = "Sink" } ) }, ComparisonType = BooleanComparisonType.Equals }); sinkToSinkCount++; var statistics = ColumnStatistics.UpdateHistogram(newComponent.StatisticsDict[sinkNode], newComponent.Context.GetEdgeStatistics(jointEdge)); selectivity *= statistics.Selectivity; newComponent.StatisticsDict[sinkNode] = statistics; } // Leaf to unmaterialized leaf else { ColumnStatistics compSinkNodeStatistics = null; foreach (var inEdge in inEdges) { compTable = SpanTableRef(compTable, inEdge, newComponent.GetNodeRefName(inEdge.SourceNode)); newComponent.EdgeMaterilizedDict[inEdge] = true; joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = jointEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = inEdge.EdgeAlias }, new Identifier { Value = "Sink" } ) }, ComparisonType = BooleanComparisonType.Equals }); sinkToSinkCount++; var leafToLeafStatistics = statisticsCalculator.GetLeafToLeafStatistics(jointEdge, inEdge); selectivity *= leafToLeafStatistics.Selectivity; compSinkNodeStatistics = ColumnStatistics.UpdateHistogram(compSinkNodeStatistics, newComponent.Context.GetEdgeStatistics(inEdge)); } newComponent.StatisticsDict[sinkNode] = compSinkNodeStatistics; } } } var unmatEdges = candidateTree.UnmaterializedEdges; foreach (var unmatEdge in unmatEdges) { newComponent.EdgeMaterilizedDict[unmatEdge] = false; newComponent.Nodes.Add(unmatEdge.SinkNode); var sinkNodeInEdges = newComponent.UnmaterializedNodeMapping.GetOrCreate(unmatEdge.SinkNode); sinkNodeInEdges.Add(unmatEdge); degrees *= unmatEdge.AverageDegree; } // Calculate Estimated Join Selectivity & Estimated Node Size double estimatedSelectity = 1.0; int count = 0; bool sinkJoin = false; foreach (var item in densityDict.Where(e => DensityCount.ContainsKey(e.Key))) { var density = item.Value; var curJoinCount = DensityCount[item.Key]; var curJoinSelectitivy = Math.Pow(density, 2 - Math.Pow(2, 1 - curJoinCount)); if (!sinkJoin && ColumnStatistics.DefaultDensity < density) { var curSinkJoinSelectivity = Math.Pow(ColumnStatistics.DefaultDensity, 2 - Math.Pow(2, 1 - sinkToSinkCount)); estimatedSelectity *= Math.Pow(curSinkJoinSelectivity, Math.Pow(2, -count)); count += sinkToSinkCount; sinkJoin = true; } estimatedSelectity *= Math.Pow(curJoinSelectitivy, Math.Pow(2, -count)); count += curJoinCount; } var estimatedNodeUnitSize = root.EstimatedRows * Math.Pow(1000, candidateTree.MaterializedEdges.Count + candidateTree.UnmaterializedEdges.Count); // Update Table Reference newComponent.TableRef = GetPlanAndUpdateCost(candidateTree, newComponent, nodeTable, compTable, joinCondition, degrees, selectivity, estimatedNodeUnitSize, estimatedSelectity); return(newComponent); }
/// <summary> /// Calculate join costs and update components using optimal join method & order /// </summary> /// <param name="nodeUnitCandidate"></param> /// <param name="component"></param> /// <param name="nodeTable"></param> /// <param name="componentTable"></param> /// <param name="joinCondition"></param> /// <param name="nodeDegrees"></param> /// <param name="estimatedNodeUnitSize"></param> /// <param name="estimatedSelectivity"></param> /// <returns></returns> private static WTableReference GetPlanAndUpdateCost( OneHeightTree nodeUnitCandidate, MatchComponent component, WTableReference nodeTable, WTableReference componentTable, WBooleanExpression joinCondition, double nodeDegrees, double joinSelectivity, double estimatedNodeUnitSize, double estimatedSelectivity) { var nodeUnitSize = nodeUnitCandidate.TreeRoot.EstimatedRows * nodeDegrees; var componentSize = component.Size; var estimatedCompSize = component.EstimateSize; var cost = nodeUnitSize + componentSize; //var joinSelectivity = // nodeWithJoinMapping.SelectivityProduct; //nodeWithJoinMapping.ExponentialSelevtivityProduct; WQualifiedJoin joinTable = new WQualifiedJoin { FirstTableRef = componentTable, SecondTableRef = nodeTable, JoinCondition = joinCondition, QualifiedJoinType = QualifiedJoinType.Inner, JoinHint = JoinHint.Hash }; var node = nodeUnitCandidate.TreeRoot; // If the node is already in the component, then only multiply the degree to get the size double nodeUnitActualSize; if (component.MaterializedNodeSplitCount[node] > 0) { nodeUnitActualSize = nodeDegrees; var cEstEdge = Math.Pow(1000, component.EdgeMaterilizedDict.Count(e => !e.Value)); var cSize = component.EstimateSize / cEstEdge; var nSize = node.EstimatedRows; if (nSize > cSize) { component.EstimateSize = estimatedNodeUnitSize * cEstEdge * estimatedSelectivity; } else { component.EstimateSize = component.EstimateSize * Math.Pow(1000, nodeUnitCandidate.UnmaterializedEdges.Count) * estimatedSelectivity; } } else { nodeUnitActualSize = nodeUnitSize; component.EstimateSize *= estimatedNodeUnitSize * estimatedSelectivity; } //Update Size component.Size *= nodeUnitActualSize * joinSelectivity; // Update Cost component.Cost += cost; // Debug #if DEBUG //Trace.Write(component.NodeUnits.Count+" "); //foreach (var n in component.NodeUnits.Where(e => e.Key != node.Node.ExposedName)) //{ // Trace.Write(n.Value.NodeRefName); //} //Trace.Write(component.NodeUnits[node.Node.ExposedName].NodeRefName+" "); //Trace.Write(" "+(long)component.Cost+" "+(long)component.Size); //Trace.Write(" "); //foreach (var item in component.PopulatedEdgesName) //{ // Trace.Write(item + " "); //} //Trace.Write("; "); //foreach (var unpopulatedEdge in component.UnpopulatedEdges) //{ // Trace.Write(unpopulatedEdge.Alias + " "); //} //Trace.WriteLine(""); #endif // Update TableRef // Only consider the size in the first join if (component.MaterializedNodeSplitCount.Count == 2 && component.MaterializedNodeSplitCount.All(e => e.Value == 0)) { var nodeInComp = component.MaterializedNodeSplitCount.Keys.First(e => e != node); if (nodeUnitSize < componentSize) { joinTable.FirstTableRef = nodeTable; joinTable.SecondTableRef = componentTable; component.TotalMemory = component.DeltaMemory = nodeUnitSize; component.EstimateTotalMemory = component.EstimateDeltaMemory = estimatedNodeUnitSize; component.RightestTableRefSize = nodeInComp.EstimatedRows; component.FatherOfRightestTableRef = new Tuple <WQualifiedJoin, String>(joinTable, component.GetNodeRefName(nodeInComp)); AdjustEstimation(component, nodeTable, joinTable, nodeUnitSize, estimatedNodeUnitSize, nodeUnitCandidate.TreeRoot.EstimatedRows, new Tuple <WQualifiedJoin, string>(joinTable, component.GetNodeRefName(node))); } else { component.TotalMemory = component.DeltaMemory = componentSize; component.EstimateTotalMemory = component.EstimateDeltaMemory = component.EstimateSize; component.RightestTableRefSize = nodeUnitCandidate.TreeRoot.EstimatedRows; component.FatherOfRightestTableRef = new Tuple <WQualifiedJoin, String>(joinTable, component.GetNodeRefName(node)); AdjustEstimation(component, componentTable, joinTable, componentSize, estimatedCompSize, nodeInComp.EstimatedRows, new Tuple <WQualifiedJoin, string>(joinTable, component.GetNodeRefName(nodeInComp))); } } else { double sizeFactor = 5;//1000; double maxMemory = 1e8; double loopJoinInnerThreshold = 10000; double loopJoinOuterThreshold = 1000000; // Left Deep if (componentSize * sizeFactor < nodeUnitSize) { var curDeltaMemory = componentSize; component.TotalMemory = component.DeltaMemory + curDeltaMemory; component.DeltaMemory = curDeltaMemory; var curDeltaEstimateMemory = component.EstimateSize; component.EstimateTotalMemory = component.EstimateDeltaMemory + curDeltaEstimateMemory; component.EstimateDeltaMemory = curDeltaEstimateMemory; // Adjust estimation in sql server AdjustEstimation(component, componentTable, joinTable, componentSize, estimatedCompSize, component.RightestTableRefSize, component.FatherOfRightestTableRef); component.FatherOfRightestTableRef = new Tuple <WQualifiedJoin, string>(joinTable, component.GetNodeRefName(node)); component.RightestTableRefSize = nodeUnitCandidate.TreeRoot.EstimatedRows; } else { // Loop Join if ( //((nodeUnitSize < loopJoinInnerThreshold /*&& componentSize < loopJoinOuterThreshold*/) || component.DeltaMemory + componentSize > maxMemory / 100) && ((nodeUnitSize < loopJoinInnerThreshold && componentSize < loopJoinOuterThreshold) || component.DeltaMemory + componentSize > maxMemory) && nodeUnitCandidate.MaterializedEdges.Count == 0) { component.TotalMemory = component.DeltaMemory; component.EstimateTotalMemory = component.EstimateDeltaMemory; joinTable.JoinHint = JoinHint.Loop; component.EstimateSize = estimatedCompSize * estimatedNodeUnitSize / nodeUnitCandidate.TreeRoot.TableRowCount; } // Right Deep else { joinTable.FirstTableRef = nodeTable; joinTable.SecondTableRef = componentTable; AdjustEstimation(component, nodeTable, joinTable, nodeUnitSize, estimatedNodeUnitSize, node.EstimatedRows, new Tuple <WQualifiedJoin, string>(joinTable, component.GetNodeRefName(node))); component.TotalMemory += nodeUnitSize; component.DeltaMemory = component.TotalMemory; component.EstimateTotalMemory += estimatedNodeUnitSize; component.EstimateDeltaMemory = component.EstimateTotalMemory; } } } return(new WParenthesisTableReference { Table = joinTable }); }
private CandidateJoinUnit GetCandidateUnits2(OneHeightTree tree, MatchComponent component) { var revEdgeDict = Graph.ReversedEdgeDict; var root = tree.TreeRoot; List <MatchEdge> inEdges; component.UnmaterializedNodeMapping.TryGetValue(root, out inEdges); var outEdges = new List <MatchEdge>(); var unpopEdges = new List <MatchEdge>(); foreach (var edge in tree.Edges) { if (component.Nodes.Contains(edge.SinkNode)) { outEdges.Add(edge); } else { unpopEdges.Add(edge); } } var rawEdges = new Dictionary <string, Tuple <MatchEdge, EdgeDir> >(); var extInEdges = new Dictionary <string, MatchEdge>(); if (inEdges != null) { rawEdges = inEdges.ToDictionary(edge => edge.EdgeAlias, edge => new Tuple <MatchEdge, EdgeDir>(edge, EdgeDir.In)); extInEdges = inEdges.ToDictionary(edge => edge.EdgeAlias); } foreach (var edge in outEdges) { var key = edge.EdgeAlias; rawEdges.Add(key, new Tuple <MatchEdge, EdgeDir>(edge, EdgeDir.Out)); extInEdges.Add(key, revEdgeDict[key]); } if (extInEdges.Any()) { var firstEdge = extInEdges.FirstOrDefault(e => e.Value.IsReversed == false); if (firstEdge.Value == null) { firstEdge = extInEdges.First(); } var preMatInEdges = new Dictionary <string, MatchEdge> { { firstEdge.Key, firstEdge.Value } }; var postMatEdges = rawEdges.Where(entry => !preMatInEdges.ContainsKey(entry.Key)) .Select(entry => entry.Value).ToList(); // Both edge will be forced to choose the incoming direction type var postMatIncomingEdges = postMatEdges.Where(entry => entry.Item2 == EdgeDir.In || (entry.Item2 == EdgeDir.Out && entry.Item1.EdgeType == WEdgeType.BothEdge)) .Select(entry => (entry.Item2 == EdgeDir.In ? entry.Item1 : revEdgeDict[entry.Item1.EdgeAlias])).ToList(); var postMatOutgoingEdges = postMatEdges.Where(entry => entry.Item2 == EdgeDir.Out && entry.Item1.EdgeType != WEdgeType.BothEdge) .Select(entry => entry.Item1).ToList(); return(new CandidateJoinUnit { TreeRoot = root, PreMatIncomingEdges = preMatInEdges.Select(entry => entry.Value).ToList(), PreMatOutgoingEdges = new List <MatchEdge>(), PostMatIncomingEdges = postMatIncomingEdges, PostMatOutgoingEdges = postMatOutgoingEdges, UnmaterializedEdges = unpopEdges, }); } else { throw new GraphViewException("This graph pattern is not yet supported."); } }
private CandidateJoinUnit GetCandidateUnits(OneHeightTree tree, MatchComponent component) { var nodeMatEdgesDict = component.NodeToMaterializedEdgesDict; var revEdgeDict = Graph.ReversedEdgeDict; var root = tree.TreeRoot; nodeMatEdgesDict[root.NodeAlias] = new List <Tuple <MatchEdge, MaterializedEdgeType> >(); List <MatchEdge> inEdges; component.UnmaterializedNodeMapping.TryGetValue(root, out inEdges); var outEdges = new List <MatchEdge>(); var unpopEdges = new List <MatchEdge>(); foreach (var edge in tree.Edges) { if (component.Nodes.Contains(edge.SinkNode)) { outEdges.Add(edge); } else { unpopEdges.Add(edge); } } var rawEdges = new Dictionary <string, Tuple <MatchEdge, EdgeDir> >(); var extInEdges = new Dictionary <string, MatchEdge>(); if (inEdges != null) { rawEdges = inEdges.ToDictionary(edge => edge.EdgeAlias, edge => new Tuple <MatchEdge, EdgeDir>(edge, EdgeDir.In)); extInEdges = inEdges.ToDictionary(edge => edge.EdgeAlias); } foreach (var edge in outEdges) { var key = edge.EdgeAlias; rawEdges.Add(key, new Tuple <MatchEdge, EdgeDir>(edge, EdgeDir.Out)); extInEdges.Add(key, revEdgeDict[key]); } if (extInEdges.Any()) { var firstEdge = extInEdges.FirstOrDefault(e => e.Value.IsReversed == false); if (firstEdge.Value == null) { firstEdge = extInEdges.First(); } var preMatInEdges = new Dictionary <string, MatchEdge> { { firstEdge.Key, firstEdge.Value } }; var postMatEdges = rawEdges.Where(entry => !preMatInEdges.ContainsKey(entry.Key)) .Select(entry => entry.Value).ToList(); var postMatIncomingEdges = postMatEdges.Where(entry => entry.Item2 == EdgeDir.In) .Select(entry => entry.Item1).ToList(); var postMatOutgoingEdges = postMatEdges.Where(entry => entry.Item2 == EdgeDir.Out) .Select(entry => entry.Item1).ToList(); nodeMatEdgesDict[firstEdge.Value.SourceNode.NodeAlias].Add( new Tuple <MatchEdge, MaterializedEdgeType>(firstEdge.Value, MaterializedEdgeType.TraversalEdge)); foreach (var t in postMatEdges) { var edge = t.Item1; var type = t.Item2 == EdgeDir.In ? MaterializedEdgeType.RemainingEdge : MaterializedEdgeType.ReverseCheckEdge; nodeMatEdgesDict[edge.SourceNode.NodeAlias].Add(new Tuple <MatchEdge, MaterializedEdgeType>(edge, type)); } return(new CandidateJoinUnit { TreeRoot = root, PreMatIncomingEdges = preMatInEdges.Select(entry => entry.Value).ToList(), PreMatOutgoingEdges = new List <MatchEdge>(), PostMatIncomingEdges = postMatIncomingEdges, PostMatOutgoingEdges = postMatOutgoingEdges, UnmaterializedEdges = unpopEdges, }); } else { throw new GraphViewException("This graph pattern is not yet supported."); } }
/// <summary> /// Transit from current component to the new component in the next state given the Node Unit /// </summary> /// <param name="candidateTree"></param> /// <param name="densityDict"></param> /// <param name="subGraph"></param> /// <param name="statisticsCalculator"></param> /// <returns></returns> public MatchComponent GetNextState( OneHeightTree candidateTree, Dictionary<string, double> densityDict, IMatchJoinStatisticsCalculator statisticsCalculator) { var newComponent = new MatchComponent(this); var root = candidateTree.TreeRoot; WBooleanExpression joinCondition = null; string nodeName = ""; // Update Nodes if (newComponent.MaterializedNodeSplitCount.ContainsKey(root)) { newComponent.MaterializedNodeSplitCount[root]++; nodeName = newComponent.GetNodeRefName(root); joinCondition = new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier {Value = root.RefAlias}, new Identifier {Value = "GlobalNodeId"} ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier {Value = nodeName}, new Identifier {Value = "GlobalNodeId"} ), }, ComparisonType = BooleanComparisonType.Equals }; } else { nodeName = root.RefAlias; newComponent.Nodes.Add(root); newComponent.MaterializedNodeSplitCount[root] = 0; newComponent.StatisticsDict[root] = new ColumnStatistics {Selectivity = 1.0/root.TableRowCount}; } // Constructs table reference WTableReference nodeTable = new WNamedTableReference { Alias = new Identifier { Value = nodeName }, TableObjectName = root.TableObjectName }; WTableReference compTable = newComponent.TableRef; // Updates join conditions double selectivity = 1.0; double degrees = 1.0; var DensityCount = new Dictionary<string, int>(StringComparer.CurrentCultureIgnoreCase); List<MatchEdge> inEdges; if (newComponent.UnmaterializedNodeMapping.TryGetValue(root, out inEdges)) { var firstEdge = inEdges.First(); bool materialized = newComponent.EdgeMaterilizedDict[firstEdge]; newComponent.UnmaterializedNodeMapping.Remove(root); selectivity *= 1.0/root.TableRowCount; // Component materialized edge to root if (materialized) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier {Value = firstEdge.EdgeAlias}, new Identifier {Value = "Sink"} ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier {Value = nodeName}, new Identifier {Value = "GlobalNodeId"} ) }, ComparisonType = BooleanComparisonType.Equals }); //var statistics = ColumnStatistics.UpdateHistogram(newComponent.StatisticsDict[root], // new ColumnStatistics {Selectivity = 1.0/root.TableRowCount}); //selectivity *= statistics.Selectivity; //newComponent.StatisticsDict[root] = statistics; if (DensityCount.ContainsKey(root.TableObjectName.ToString())) DensityCount[root.TableObjectName.ToString()]++; else DensityCount[root.TableObjectName.ToString()] = 1; } // Component unmaterialized edge to root else { ColumnStatistics statistics = null; foreach (var edge in inEdges) { // Update component table compTable = SpanTableRef(compTable, edge, newComponent.GetNodeRefName(edge.SourceNode)); newComponent.EdgeMaterilizedDict[edge] = true; joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier {Value = edge.EdgeAlias}, new Identifier {Value = "Sink"} ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier {Value = nodeName}, new Identifier {Value = "GlobalNodeId"} ) }, ComparisonType = BooleanComparisonType.Equals }); statistics = ColumnStatistics.UpdateHistogram(statistics, newComponent.Context.GetEdgeStatistics(edge)); selectivity *= statistics.Selectivity; } newComponent.StatisticsDict[root] = statistics; if (DensityCount.ContainsKey(root.TableObjectName.ToString())) DensityCount[root.TableObjectName.ToString()]+=inEdges.Count; else DensityCount[root.TableObjectName.ToString()] = inEdges.Count; } } var jointEdges = candidateTree.MaterializedEdges; int sinkToSinkCount = 0; foreach (var jointEdge in jointEdges) { // Update node table nodeTable = SpanTableRef(nodeTable, jointEdge, nodeName); degrees *= jointEdge.AverageDegree; newComponent.EdgeMaterilizedDict[jointEdge] = true; var sinkNode = jointEdge.SinkNode; // Leaf to component materialized node if (newComponent.MaterializedNodeSplitCount.ContainsKey(sinkNode)) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier {Value = jointEdge.EdgeAlias}, new Identifier {Value = "Sink"} ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier {Value = sinkNode.RefAlias}, new Identifier {Value = "GlobalNodeId"} ) }, ComparisonType = BooleanComparisonType.Equals }); var statistics = ColumnStatistics.UpdateHistogram(newComponent.StatisticsDict[sinkNode], newComponent.Context.GetEdgeStatistics(jointEdge)); selectivity *= statistics.Selectivity; newComponent.StatisticsDict[sinkNode] = statistics; if (DensityCount.ContainsKey(sinkNode.TableObjectName.ToString())) DensityCount[sinkNode.TableObjectName.ToString()]++; else DensityCount[sinkNode.TableObjectName.ToString()] = 1; } // Leaf to component unmaterialized node else { inEdges = newComponent.UnmaterializedNodeMapping[sinkNode]; var firstEdge = inEdges.First(); bool materlizedEdge = newComponent.EdgeMaterilizedDict[firstEdge]; // Leaf to materialized leaf if (materlizedEdge) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier {Value = jointEdge.EdgeAlias}, new Identifier {Value = "Sink"} ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = firstEdge.EdgeAlias}, new Identifier {Value = "Sink"} ) }, ComparisonType = BooleanComparisonType.Equals }); sinkToSinkCount++; var statistics = ColumnStatistics.UpdateHistogram(newComponent.StatisticsDict[sinkNode], newComponent.Context.GetEdgeStatistics(jointEdge)); selectivity *= statistics.Selectivity; newComponent.StatisticsDict[sinkNode] = statistics; } // Leaf to unmaterialized leaf else { ColumnStatistics compSinkNodeStatistics = null; foreach (var inEdge in inEdges) { compTable = SpanTableRef(compTable, inEdge, newComponent.GetNodeRefName(inEdge.SourceNode)); newComponent.EdgeMaterilizedDict[inEdge] = true; joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = jointEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = inEdge.EdgeAlias }, new Identifier { Value = "Sink" } ) }, ComparisonType = BooleanComparisonType.Equals }); sinkToSinkCount++; var leafToLeafStatistics = statisticsCalculator.GetLeafToLeafStatistics(jointEdge, inEdge); selectivity *= leafToLeafStatistics.Selectivity; compSinkNodeStatistics = ColumnStatistics.UpdateHistogram(compSinkNodeStatistics, newComponent.Context.GetEdgeStatistics(inEdge)); } newComponent.StatisticsDict[sinkNode] = compSinkNodeStatistics; } } } var unmatEdges = candidateTree.UnmaterializedEdges; foreach (var unmatEdge in unmatEdges) { newComponent.EdgeMaterilizedDict[unmatEdge] = false; newComponent.Nodes.Add(unmatEdge.SinkNode); var sinkNodeInEdges = newComponent.UnmaterializedNodeMapping.GetOrCreate(unmatEdge.SinkNode); sinkNodeInEdges.Add(unmatEdge); degrees *= unmatEdge.AverageDegree; } // Calculate Estimated Join Selectivity & Estimated Node Size double estimatedSelectity = 1.0; int count = 0; bool sinkJoin = false; foreach (var item in densityDict.Where(e => DensityCount.ContainsKey(e.Key))) { var density = item.Value; var curJoinCount = DensityCount[item.Key]; var curJoinSelectitivy = Math.Pow(density, 2 - Math.Pow(2, 1 - curJoinCount)); if (!sinkJoin && ColumnStatistics.DefaultDensity < density) { var curSinkJoinSelectivity = Math.Pow(ColumnStatistics.DefaultDensity, 2 - Math.Pow(2, 1 - sinkToSinkCount)); estimatedSelectity *= Math.Pow(curSinkJoinSelectivity, Math.Pow(2, -count)); count += sinkToSinkCount; sinkJoin = true; } estimatedSelectity *= Math.Pow(curJoinSelectitivy, Math.Pow(2, -count)); count += curJoinCount; } var estimatedNodeUnitSize = root.EstimatedRows* Math.Pow(1000, candidateTree.MaterializedEdges.Count + candidateTree.UnmaterializedEdges.Count); // Update Table Reference newComponent.TableRef = GetPlanAndUpdateCost(candidateTree, newComponent, nodeTable, compTable, joinCondition, degrees, selectivity, estimatedNodeUnitSize, estimatedSelectity); return newComponent; }
/// <summary> /// Calculate join costs and update components using optimal join method & order /// </summary> /// <param name="nodeUnitCandidate"></param> /// <param name="component"></param> /// <param name="nodeTable"></param> /// <param name="componentTable"></param> /// <param name="joinCondition"></param> /// <param name="nodeDegrees"></param> /// <param name="estimatedNodeUnitSize"></param> /// <param name="estimatedSelectivity"></param> /// <returns></returns> private static WTableReference GetPlanAndUpdateCost( OneHeightTree nodeUnitCandidate, MatchComponent component, WTableReference nodeTable, WTableReference componentTable, WBooleanExpression joinCondition, double nodeDegrees, double joinSelectivity, double estimatedNodeUnitSize, double estimatedSelectivity) { var nodeUnitSize = nodeUnitCandidate.TreeRoot.EstimatedRows * nodeDegrees; var componentSize = component.Size; var estimatedCompSize = component.EstimateSize; var cost = nodeUnitSize + componentSize; //var joinSelectivity = // nodeWithJoinMapping.SelectivityProduct; //nodeWithJoinMapping.ExponentialSelevtivityProduct; WQualifiedJoin joinTable = new WQualifiedJoin { FirstTableRef = componentTable, SecondTableRef = nodeTable, JoinCondition = joinCondition, QualifiedJoinType = QualifiedJoinType.Inner, JoinHint = JoinHint.Hash }; var node = nodeUnitCandidate.TreeRoot; // If the node is already in the component, then only multiply the degree to get the size double nodeUnitActualSize; if (component.MaterializedNodeSplitCount[node] > 0) { nodeUnitActualSize = nodeDegrees; var cEstEdge = Math.Pow(1000, component.EdgeMaterilizedDict.Count(e=>!e.Value)); var cSize = component.EstimateSize/cEstEdge; var nSize = node.EstimatedRows; if (nSize > cSize) { component.EstimateSize = estimatedNodeUnitSize*cEstEdge*estimatedSelectivity; } else { component.EstimateSize = component.EstimateSize*Math.Pow(1000, nodeUnitCandidate.UnmaterializedEdges.Count)* estimatedSelectivity; } } else { nodeUnitActualSize = nodeUnitSize; component.EstimateSize *= estimatedNodeUnitSize * estimatedSelectivity; } //Update Size component.Size *= nodeUnitActualSize * joinSelectivity; // Update Cost component.Cost += cost; // Debug #if DEBUG //Trace.Write(component.NodeUnits.Count+" "); //foreach (var n in component.NodeUnits.Where(e => e.Key != node.Node.ExposedName)) //{ // Trace.Write(n.Value.NodeRefName); //} //Trace.Write(component.NodeUnits[node.Node.ExposedName].NodeRefName+" "); //Trace.Write(" "+(long)component.Cost+" "+(long)component.Size); //Trace.Write(" "); //foreach (var item in component.PopulatedEdgesName) //{ // Trace.Write(item + " "); //} //Trace.Write("; "); //foreach (var unpopulatedEdge in component.UnpopulatedEdges) //{ // Trace.Write(unpopulatedEdge.Alias + " "); //} //Trace.WriteLine(""); #endif // Update TableRef // Only consider the size in the first join if (component.MaterializedNodeSplitCount.Count == 2 && component.MaterializedNodeSplitCount.All(e => e.Value == 0)) { var nodeInComp = component.MaterializedNodeSplitCount.Keys.First(e => e != node); if (nodeUnitSize < componentSize) { joinTable.FirstTableRef = nodeTable; joinTable.SecondTableRef = componentTable; component.TotalMemory = component.DeltaMemory = nodeUnitSize; component.EstimateTotalMemory = component.EstimateDeltaMemory = estimatedNodeUnitSize; component.RightestTableRefSize = nodeInComp.EstimatedRows; component.FatherOfRightestTableRef = new Tuple<WQualifiedJoin, String>(joinTable, component.GetNodeRefName(nodeInComp)); AdjustEstimation(component, nodeTable, joinTable, nodeUnitSize, estimatedNodeUnitSize, nodeUnitCandidate.TreeRoot.EstimatedRows, new Tuple<WQualifiedJoin, string>(joinTable, component.GetNodeRefName(node))); } else { component.TotalMemory = component.DeltaMemory = componentSize; component.EstimateTotalMemory = component.EstimateDeltaMemory = component.EstimateSize; component.RightestTableRefSize = nodeUnitCandidate.TreeRoot.EstimatedRows; component.FatherOfRightestTableRef = new Tuple<WQualifiedJoin, String>(joinTable, component.GetNodeRefName(node)); AdjustEstimation(component, componentTable, joinTable, componentSize, estimatedCompSize, nodeInComp.EstimatedRows, new Tuple<WQualifiedJoin, string>(joinTable, component.GetNodeRefName(nodeInComp))); } } else { double sizeFactor = 5;//1000; double maxMemory = 1e8; double loopJoinInnerThreshold = 10000; double loopJoinOuterThreshold = 1000000; // Left Deep if (componentSize*sizeFactor < nodeUnitSize) { var curDeltaMemory = componentSize; component.TotalMemory = component.DeltaMemory + curDeltaMemory; component.DeltaMemory = curDeltaMemory; var curDeltaEstimateMemory = component.EstimateSize; component.EstimateTotalMemory = component.EstimateDeltaMemory + curDeltaEstimateMemory; component.EstimateDeltaMemory = curDeltaEstimateMemory; // Adjust estimation in sql server AdjustEstimation(component, componentTable, joinTable, componentSize, estimatedCompSize, component.RightestTableRefSize, component.FatherOfRightestTableRef); component.FatherOfRightestTableRef = new Tuple<WQualifiedJoin, string>(joinTable, component.GetNodeRefName(node)); component.RightestTableRefSize = nodeUnitCandidate.TreeRoot.EstimatedRows; } else { // Loop Join if ( //((nodeUnitSize < loopJoinInnerThreshold /*&& componentSize < loopJoinOuterThreshold*/) || component.DeltaMemory + componentSize > maxMemory / 100) && ((nodeUnitSize < loopJoinInnerThreshold && componentSize < loopJoinOuterThreshold) || component.DeltaMemory + componentSize > maxMemory) && nodeUnitCandidate.MaterializedEdges.Count==0) { component.TotalMemory = component.DeltaMemory; component.EstimateTotalMemory = component.EstimateDeltaMemory; joinTable.JoinHint = JoinHint.Loop; component.EstimateSize = estimatedCompSize*estimatedNodeUnitSize/ nodeUnitCandidate.TreeRoot.TableRowCount; } // Right Deep else { joinTable.FirstTableRef = nodeTable; joinTable.SecondTableRef = componentTable; AdjustEstimation(component, nodeTable, joinTable, nodeUnitSize, estimatedNodeUnitSize, node.EstimatedRows, new Tuple<WQualifiedJoin, string>(joinTable, component.GetNodeRefName(node))); component.TotalMemory += nodeUnitSize; component.DeltaMemory = component.TotalMemory; component.EstimateTotalMemory += estimatedNodeUnitSize; component.EstimateDeltaMemory = component.EstimateTotalMemory; } } } return new WParenthesisTableReference { Table = joinTable }; }
private CandidateJoinUnit GetCandidateUnits2(OneHeightTree tree, MatchComponent component) { Dictionary <string, MatchEdge> revEdgeDict = this.Graph.ReversedEdgeDict; MatchNode root = tree.TreeRoot; List <MatchEdge> inEdges; component.UnmaterializedNodeMapping.TryGetValue(root, out inEdges); List <MatchEdge> outEdges = new List <MatchEdge>(); List <MatchEdge> unpopEdges = new List <MatchEdge>(); foreach (MatchEdge edge in tree.Edges) { if (component.Nodes.ContainsKey(edge.SinkNode.NodeAlias)) { outEdges.Add(edge); } else { unpopEdges.Add(edge); } } Dictionary <string, Tuple <MatchEdge, EdgeDir> > rawEdges = new Dictionary <string, Tuple <MatchEdge, EdgeDir> >(); Dictionary <string, MatchEdge> extInEdges = new Dictionary <string, MatchEdge>(); if (inEdges != null) { rawEdges = inEdges.ToDictionary(edge => edge.EdgeAlias, edge => new Tuple <MatchEdge, EdgeDir>(edge, EdgeDir.In)); extInEdges = inEdges.ToDictionary(edge => edge.EdgeAlias); } foreach (MatchEdge edge in outEdges) { string key = edge.EdgeAlias; rawEdges.Add(key, new Tuple <MatchEdge, EdgeDir>(edge, EdgeDir.Out)); extInEdges.Add(key, revEdgeDict[key]); } if (extInEdges.Any()) { KeyValuePair <string, MatchEdge> firstEdge = extInEdges.FirstOrDefault(e => e.Value.IsReversed == false); if (firstEdge.Value == null) { firstEdge = extInEdges.First(); } Dictionary <string, MatchEdge> preMatInEdges = new Dictionary <string, MatchEdge> { { firstEdge.Key, firstEdge.Value } }; List <Tuple <MatchEdge, EdgeDir> > postMatEdges = rawEdges.Where(entry => !preMatInEdges.ContainsKey(entry.Key)) .Select(entry => entry.Value).ToList(); List <MatchEdge> postMatIncomingEdges = postMatEdges.Where(entry => entry.Item2 == EdgeDir.In).Select(entry => entry.Item1).ToList(); List <MatchEdge> postMatOutgoingEdges = postMatEdges.Where(entry => entry.Item2 == EdgeDir.Out).Select(entry => entry.Item1).ToList(); return(new CandidateJoinUnit { TreeRoot = root, PreMatIncomingEdges = preMatInEdges.Select(entry => entry.Value).ToList(), PreMatOutgoingEdges = new List <MatchEdge>(), PostMatIncomingEdges = postMatIncomingEdges, PostMatOutgoingEdges = postMatOutgoingEdges, UnmaterializedEdges = unpopEdges, }); } else { throw new GraphViewException("This graph pattern is not yet supported."); } }