/// <summary> /// Transit from current component to the new component in the next state given the Node Unit /// </summary> /// <param name="candidateTree"></param> /// <param name="densityDict"></param> /// <param name="subGraph"></param> /// <param name="statisticsCalculator"></param> /// <returns></returns> public MatchComponent GetNextState( OneHeightTree candidateTree, Dictionary <string, double> densityDict, IMatchJoinStatisticsCalculator statisticsCalculator) { var newComponent = new MatchComponent(this); var root = candidateTree.TreeRoot; WBooleanExpression joinCondition = null; string nodeName = ""; // Update Nodes if (newComponent.MaterializedNodeSplitCount.ContainsKey(root)) { newComponent.MaterializedNodeSplitCount[root]++; nodeName = newComponent.GetNodeRefName(root); joinCondition = new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = root.RefAlias }, new Identifier { Value = "GlobalNodeId" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = nodeName }, new Identifier { Value = "GlobalNodeId" } ), }, ComparisonType = BooleanComparisonType.Equals }; } else { nodeName = root.RefAlias; newComponent.Nodes.Add(root); newComponent.MaterializedNodeSplitCount[root] = 0; newComponent.StatisticsDict[root] = new ColumnStatistics { Selectivity = 1.0 / root.TableRowCount }; } // Constructs table reference WTableReference nodeTable = new WNamedTableReference { Alias = new Identifier { Value = nodeName }, TableObjectName = root.TableObjectName }; WTableReference compTable = newComponent.TableRef; // Updates join conditions double selectivity = 1.0; double degrees = 1.0; var DensityCount = new Dictionary <string, int>(StringComparer.CurrentCultureIgnoreCase); List <MatchEdge> inEdges; if (newComponent.UnmaterializedNodeMapping.TryGetValue(root, out inEdges)) { var firstEdge = inEdges.First(); bool materialized = newComponent.EdgeMaterilizedDict[firstEdge]; newComponent.UnmaterializedNodeMapping.Remove(root); selectivity *= 1.0 / root.TableRowCount; // Component materialized edge to root if (materialized) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = firstEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = nodeName }, new Identifier { Value = "GlobalNodeId" } ) }, ComparisonType = BooleanComparisonType.Equals }); //var statistics = ColumnStatistics.UpdateHistogram(newComponent.StatisticsDict[root], // new ColumnStatistics {Selectivity = 1.0/root.TableRowCount}); //selectivity *= statistics.Selectivity; //newComponent.StatisticsDict[root] = statistics; if (DensityCount.ContainsKey(root.TableObjectName.ToString())) { DensityCount[root.TableObjectName.ToString()]++; } else { DensityCount[root.TableObjectName.ToString()] = 1; } } // Component unmaterialized edge to root else { ColumnStatistics statistics = null; foreach (var edge in inEdges) { // Update component table compTable = SpanTableRef(compTable, edge, newComponent.GetNodeRefName(edge.SourceNode)); newComponent.EdgeMaterilizedDict[edge] = true; joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = edge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = nodeName }, new Identifier { Value = "GlobalNodeId" } ) }, ComparisonType = BooleanComparisonType.Equals }); statistics = ColumnStatistics.UpdateHistogram(statistics, newComponent.Context.GetEdgeStatistics(edge)); selectivity *= statistics.Selectivity; } newComponent.StatisticsDict[root] = statistics; if (DensityCount.ContainsKey(root.TableObjectName.ToString())) { DensityCount[root.TableObjectName.ToString()] += inEdges.Count; } else { DensityCount[root.TableObjectName.ToString()] = inEdges.Count; } } } var jointEdges = candidateTree.MaterializedEdges; int sinkToSinkCount = 0; foreach (var jointEdge in jointEdges) { // Update node table nodeTable = SpanTableRef(nodeTable, jointEdge, nodeName); degrees *= jointEdge.AverageDegree; newComponent.EdgeMaterilizedDict[jointEdge] = true; var sinkNode = jointEdge.SinkNode; // Leaf to component materialized node if (newComponent.MaterializedNodeSplitCount.ContainsKey(sinkNode)) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = jointEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = sinkNode.RefAlias }, new Identifier { Value = "GlobalNodeId" } ) }, ComparisonType = BooleanComparisonType.Equals }); var statistics = ColumnStatistics.UpdateHistogram(newComponent.StatisticsDict[sinkNode], newComponent.Context.GetEdgeStatistics(jointEdge)); selectivity *= statistics.Selectivity; newComponent.StatisticsDict[sinkNode] = statistics; if (DensityCount.ContainsKey(sinkNode.TableObjectName.ToString())) { DensityCount[sinkNode.TableObjectName.ToString()]++; } else { DensityCount[sinkNode.TableObjectName.ToString()] = 1; } } // Leaf to component unmaterialized node else { inEdges = newComponent.UnmaterializedNodeMapping[sinkNode]; var firstEdge = inEdges.First(); bool materlizedEdge = newComponent.EdgeMaterilizedDict[firstEdge]; // Leaf to materialized leaf if (materlizedEdge) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = jointEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = firstEdge.EdgeAlias }, new Identifier { Value = "Sink" } ) }, ComparisonType = BooleanComparisonType.Equals }); sinkToSinkCount++; var statistics = ColumnStatistics.UpdateHistogram(newComponent.StatisticsDict[sinkNode], newComponent.Context.GetEdgeStatistics(jointEdge)); selectivity *= statistics.Selectivity; newComponent.StatisticsDict[sinkNode] = statistics; } // Leaf to unmaterialized leaf else { ColumnStatistics compSinkNodeStatistics = null; foreach (var inEdge in inEdges) { compTable = SpanTableRef(compTable, inEdge, newComponent.GetNodeRefName(inEdge.SourceNode)); newComponent.EdgeMaterilizedDict[inEdge] = true; joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = jointEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = inEdge.EdgeAlias }, new Identifier { Value = "Sink" } ) }, ComparisonType = BooleanComparisonType.Equals }); sinkToSinkCount++; var leafToLeafStatistics = statisticsCalculator.GetLeafToLeafStatistics(jointEdge, inEdge); selectivity *= leafToLeafStatistics.Selectivity; compSinkNodeStatistics = ColumnStatistics.UpdateHistogram(compSinkNodeStatistics, newComponent.Context.GetEdgeStatistics(inEdge)); } newComponent.StatisticsDict[sinkNode] = compSinkNodeStatistics; } } } var unmatEdges = candidateTree.UnmaterializedEdges; foreach (var unmatEdge in unmatEdges) { newComponent.EdgeMaterilizedDict[unmatEdge] = false; newComponent.Nodes.Add(unmatEdge.SinkNode); var sinkNodeInEdges = newComponent.UnmaterializedNodeMapping.GetOrCreate(unmatEdge.SinkNode); sinkNodeInEdges.Add(unmatEdge); degrees *= unmatEdge.AverageDegree; } // Calculate Estimated Join Selectivity & Estimated Node Size double estimatedSelectity = 1.0; int count = 0; bool sinkJoin = false; foreach (var item in densityDict.Where(e => DensityCount.ContainsKey(e.Key))) { var density = item.Value; var curJoinCount = DensityCount[item.Key]; var curJoinSelectitivy = Math.Pow(density, 2 - Math.Pow(2, 1 - curJoinCount)); if (!sinkJoin && ColumnStatistics.DefaultDensity < density) { var curSinkJoinSelectivity = Math.Pow(ColumnStatistics.DefaultDensity, 2 - Math.Pow(2, 1 - sinkToSinkCount)); estimatedSelectity *= Math.Pow(curSinkJoinSelectivity, Math.Pow(2, -count)); count += sinkToSinkCount; sinkJoin = true; } estimatedSelectity *= Math.Pow(curJoinSelectitivy, Math.Pow(2, -count)); count += curJoinCount; } var estimatedNodeUnitSize = root.EstimatedRows * Math.Pow(1000, candidateTree.MaterializedEdges.Count + candidateTree.UnmaterializedEdges.Count); // Update Table Reference newComponent.TableRef = GetPlanAndUpdateCost(candidateTree, newComponent, nodeTable, compTable, joinCondition, degrees, selectivity, estimatedNodeUnitSize, estimatedSelectity); return(newComponent); }
/// <summary> /// Transit from current component to the new component in the next state given the Node Unit /// </summary> /// <param name="candidateTree"></param> /// <param name="densityDict"></param> /// <param name="subGraph"></param> /// <param name="statisticsCalculator"></param> /// <returns></returns> public MatchComponent GetNextState( OneHeightTree candidateTree, Dictionary<string, double> densityDict, IMatchJoinStatisticsCalculator statisticsCalculator) { var newComponent = new MatchComponent(this); var root = candidateTree.TreeRoot; WBooleanExpression joinCondition = null; string nodeName = ""; // Update Nodes if (newComponent.MaterializedNodeSplitCount.ContainsKey(root)) { newComponent.MaterializedNodeSplitCount[root]++; nodeName = newComponent.GetNodeRefName(root); joinCondition = new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier {Value = root.RefAlias}, new Identifier {Value = "GlobalNodeId"} ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier {Value = nodeName}, new Identifier {Value = "GlobalNodeId"} ), }, ComparisonType = BooleanComparisonType.Equals }; } else { nodeName = root.RefAlias; newComponent.Nodes.Add(root); newComponent.MaterializedNodeSplitCount[root] = 0; newComponent.StatisticsDict[root] = new ColumnStatistics {Selectivity = 1.0/root.TableRowCount}; } // Constructs table reference WTableReference nodeTable = new WNamedTableReference { Alias = new Identifier { Value = nodeName }, TableObjectName = root.TableObjectName }; WTableReference compTable = newComponent.TableRef; // Updates join conditions double selectivity = 1.0; double degrees = 1.0; var DensityCount = new Dictionary<string, int>(StringComparer.CurrentCultureIgnoreCase); List<MatchEdge> inEdges; if (newComponent.UnmaterializedNodeMapping.TryGetValue(root, out inEdges)) { var firstEdge = inEdges.First(); bool materialized = newComponent.EdgeMaterilizedDict[firstEdge]; newComponent.UnmaterializedNodeMapping.Remove(root); selectivity *= 1.0/root.TableRowCount; // Component materialized edge to root if (materialized) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier {Value = firstEdge.EdgeAlias}, new Identifier {Value = "Sink"} ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier {Value = nodeName}, new Identifier {Value = "GlobalNodeId"} ) }, ComparisonType = BooleanComparisonType.Equals }); //var statistics = ColumnStatistics.UpdateHistogram(newComponent.StatisticsDict[root], // new ColumnStatistics {Selectivity = 1.0/root.TableRowCount}); //selectivity *= statistics.Selectivity; //newComponent.StatisticsDict[root] = statistics; if (DensityCount.ContainsKey(root.TableObjectName.ToString())) DensityCount[root.TableObjectName.ToString()]++; else DensityCount[root.TableObjectName.ToString()] = 1; } // Component unmaterialized edge to root else { ColumnStatistics statistics = null; foreach (var edge in inEdges) { // Update component table compTable = SpanTableRef(compTable, edge, newComponent.GetNodeRefName(edge.SourceNode)); newComponent.EdgeMaterilizedDict[edge] = true; joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier {Value = edge.EdgeAlias}, new Identifier {Value = "Sink"} ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier {Value = nodeName}, new Identifier {Value = "GlobalNodeId"} ) }, ComparisonType = BooleanComparisonType.Equals }); statistics = ColumnStatistics.UpdateHistogram(statistics, newComponent.Context.GetEdgeStatistics(edge)); selectivity *= statistics.Selectivity; } newComponent.StatisticsDict[root] = statistics; if (DensityCount.ContainsKey(root.TableObjectName.ToString())) DensityCount[root.TableObjectName.ToString()]+=inEdges.Count; else DensityCount[root.TableObjectName.ToString()] = inEdges.Count; } } var jointEdges = candidateTree.MaterializedEdges; int sinkToSinkCount = 0; foreach (var jointEdge in jointEdges) { // Update node table nodeTable = SpanTableRef(nodeTable, jointEdge, nodeName); degrees *= jointEdge.AverageDegree; newComponent.EdgeMaterilizedDict[jointEdge] = true; var sinkNode = jointEdge.SinkNode; // Leaf to component materialized node if (newComponent.MaterializedNodeSplitCount.ContainsKey(sinkNode)) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier {Value = jointEdge.EdgeAlias}, new Identifier {Value = "Sink"} ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier {Value = sinkNode.RefAlias}, new Identifier {Value = "GlobalNodeId"} ) }, ComparisonType = BooleanComparisonType.Equals }); var statistics = ColumnStatistics.UpdateHistogram(newComponent.StatisticsDict[sinkNode], newComponent.Context.GetEdgeStatistics(jointEdge)); selectivity *= statistics.Selectivity; newComponent.StatisticsDict[sinkNode] = statistics; if (DensityCount.ContainsKey(sinkNode.TableObjectName.ToString())) DensityCount[sinkNode.TableObjectName.ToString()]++; else DensityCount[sinkNode.TableObjectName.ToString()] = 1; } // Leaf to component unmaterialized node else { inEdges = newComponent.UnmaterializedNodeMapping[sinkNode]; var firstEdge = inEdges.First(); bool materlizedEdge = newComponent.EdgeMaterilizedDict[firstEdge]; // Leaf to materialized leaf if (materlizedEdge) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier {Value = jointEdge.EdgeAlias}, new Identifier {Value = "Sink"} ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = firstEdge.EdgeAlias}, new Identifier {Value = "Sink"} ) }, ComparisonType = BooleanComparisonType.Equals }); sinkToSinkCount++; var statistics = ColumnStatistics.UpdateHistogram(newComponent.StatisticsDict[sinkNode], newComponent.Context.GetEdgeStatistics(jointEdge)); selectivity *= statistics.Selectivity; newComponent.StatisticsDict[sinkNode] = statistics; } // Leaf to unmaterialized leaf else { ColumnStatistics compSinkNodeStatistics = null; foreach (var inEdge in inEdges) { compTable = SpanTableRef(compTable, inEdge, newComponent.GetNodeRefName(inEdge.SourceNode)); newComponent.EdgeMaterilizedDict[inEdge] = true; joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = jointEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = inEdge.EdgeAlias }, new Identifier { Value = "Sink" } ) }, ComparisonType = BooleanComparisonType.Equals }); sinkToSinkCount++; var leafToLeafStatistics = statisticsCalculator.GetLeafToLeafStatistics(jointEdge, inEdge); selectivity *= leafToLeafStatistics.Selectivity; compSinkNodeStatistics = ColumnStatistics.UpdateHistogram(compSinkNodeStatistics, newComponent.Context.GetEdgeStatistics(inEdge)); } newComponent.StatisticsDict[sinkNode] = compSinkNodeStatistics; } } } var unmatEdges = candidateTree.UnmaterializedEdges; foreach (var unmatEdge in unmatEdges) { newComponent.EdgeMaterilizedDict[unmatEdge] = false; newComponent.Nodes.Add(unmatEdge.SinkNode); var sinkNodeInEdges = newComponent.UnmaterializedNodeMapping.GetOrCreate(unmatEdge.SinkNode); sinkNodeInEdges.Add(unmatEdge); degrees *= unmatEdge.AverageDegree; } // Calculate Estimated Join Selectivity & Estimated Node Size double estimatedSelectity = 1.0; int count = 0; bool sinkJoin = false; foreach (var item in densityDict.Where(e => DensityCount.ContainsKey(e.Key))) { var density = item.Value; var curJoinCount = DensityCount[item.Key]; var curJoinSelectitivy = Math.Pow(density, 2 - Math.Pow(2, 1 - curJoinCount)); if (!sinkJoin && ColumnStatistics.DefaultDensity < density) { var curSinkJoinSelectivity = Math.Pow(ColumnStatistics.DefaultDensity, 2 - Math.Pow(2, 1 - sinkToSinkCount)); estimatedSelectity *= Math.Pow(curSinkJoinSelectivity, Math.Pow(2, -count)); count += sinkToSinkCount; sinkJoin = true; } estimatedSelectity *= Math.Pow(curJoinSelectitivy, Math.Pow(2, -count)); count += curJoinCount; } var estimatedNodeUnitSize = root.EstimatedRows* Math.Pow(1000, candidateTree.MaterializedEdges.Count + candidateTree.UnmaterializedEdges.Count); // Update Table Reference newComponent.TableRef = GetPlanAndUpdateCost(candidateTree, newComponent, nodeTable, compTable, joinCondition, degrees, selectivity, estimatedNodeUnitSize, estimatedSelectity); return newComponent; }
private WBooleanExpression ConstructJoinCondition( CandidateJoinUnit candidateTree, IMatchJoinStatisticsCalculator statisticsCalculator, GraphMetaData metaData, out double joinSelectivity, out double sqlEstimatedJoinSelectivity) { joinSelectivity = 1.0; sqlEstimatedJoinSelectivity = 1.0; var root = candidateTree.TreeRoot; WBooleanExpression joinCondition = null; string nodeName = ""; // Update Nodes if (MaterializedNodeSplitCount.ContainsKey(root)) { MaterializedNodeSplitCount[root]++; nodeName = GetNodeRefName(root); joinCondition = new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = root.RefAlias }, new Identifier { Value = "GlobalNodeId" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = nodeName }, new Identifier { Value = "GlobalNodeId" } ), }, ComparisonType = BooleanComparisonType.Equals }; } else { nodeName = root.RefAlias; if (!Nodes.Contains(root)) Nodes.Add(root); MaterializedNodeSplitCount[root] = 0; } List<double> densityList = new List<double>(); List<MatchEdge> inEdges; if (UnmaterializedNodeMapping.TryGetValue(root, out inEdges)) { var firstEdge = inEdges.First(); bool materialized = EdgeMaterilizedDict[firstEdge]; UnmaterializedNodeMapping.Remove(root); joinSelectivity *= 1.0 / root.TableRowCount; // Component materialized edge to root if (materialized) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = firstEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = nodeName }, new Identifier { Value = "GlobalNodeId" } ) }, ComparisonType = BooleanComparisonType.Equals }); densityList.Add(root.GlobalNodeIdDensity); } // Component unmaterialized edge to root else { Statistics statistics = null; foreach (var edge in inEdges) { // Update component table TableRef = SpanTableRef(TableRef, edge, GetNodeRefName(edge.SourceNode),metaData); EdgeMaterilizedDict[edge] = true; joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = edge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = nodeName }, new Identifier { Value = "GlobalNodeId" } ) }, ComparisonType = BooleanComparisonType.Equals }); double selectivity; statistics = Statistics.UpdateHistogram(statistics, edge.Statistics, out selectivity); joinSelectivity *= selectivity; densityList.Add(root.GlobalNodeIdDensity); } SinkNodeStatisticsDict[root] = statistics; } } var jointEdges = candidateTree.MaterializedEdges; foreach (var jointEdge in jointEdges) { EdgeMaterilizedDict[jointEdge] = true; var sinkNode = jointEdge.SinkNode; // Leaf to component materialized node if (MaterializedNodeSplitCount.ContainsKey(sinkNode)) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = jointEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = sinkNode.RefAlias }, new Identifier { Value = "GlobalNodeId" } ) }, ComparisonType = BooleanComparisonType.Equals }); Statistics sinkNodeStatistics; if (!SinkNodeStatisticsDict.TryGetValue(sinkNode, out sinkNodeStatistics)) { sinkNodeStatistics = null; joinSelectivity *= 1.0 / sinkNode.TableRowCount; } double selectivity; var statistics = Statistics.UpdateHistogram(sinkNodeStatistics, jointEdge.Statistics, out selectivity); joinSelectivity *= selectivity; SinkNodeStatisticsDict[sinkNode] = statistics; densityList.Add(sinkNode.GlobalNodeIdDensity); } // Leaf to component unmaterialized node else { inEdges = UnmaterializedNodeMapping[sinkNode]; var firstEdge = inEdges.First(); bool materlizedEdge = EdgeMaterilizedDict[firstEdge]; // Leaf to materialized leaf if (materlizedEdge) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = jointEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = firstEdge.EdgeAlias }, new Identifier { Value = "Sink" } ) }, ComparisonType = BooleanComparisonType.Equals }); densityList.Add(Statistics.DefaultDensity); double selectivity; var statistics = Statistics.UpdateHistogram(SinkNodeStatisticsDict[sinkNode], jointEdge.Statistics, out selectivity); joinSelectivity *= selectivity; SinkNodeStatisticsDict[sinkNode] = statistics; } // Leaf to unmaterialized leaf else { Statistics compSinkNodeStatistics = null; foreach (var inEdge in inEdges) { TableRef = SpanTableRef(TableRef, inEdge, GetNodeRefName(inEdge.SourceNode),metaData); EdgeMaterilizedDict[inEdge] = true; joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = jointEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = inEdge.EdgeAlias }, new Identifier { Value = "Sink" } ) }, ComparisonType = BooleanComparisonType.Equals }); densityList.Add(Statistics.DefaultDensity); double selectivity; var leafToLeafStatistics = statisticsCalculator.GetLeafToLeafStatistics(jointEdge, inEdge, out selectivity); joinSelectivity *= selectivity; compSinkNodeStatistics = Statistics.UpdateHistogram(compSinkNodeStatistics, inEdge.Statistics, out selectivity); } SinkNodeStatisticsDict[sinkNode] = compSinkNodeStatistics; } } } var unmatEdges = candidateTree.UnmaterializedEdges; foreach (var unmatEdge in unmatEdges) { EdgeMaterilizedDict[unmatEdge] = false; if (!Nodes.Contains(unmatEdge.SinkNode)) Nodes.Add(unmatEdge.SinkNode); var sinkNodeInEdges = UnmaterializedNodeMapping.GetOrCreate(unmatEdge.SinkNode); sinkNodeInEdges.Add(unmatEdge); } // Calculate Estimated Join Selectivity & Estimated Node Size densityList.Sort(); for (int i = densityList.Count - 1; i >= 0; i--) { sqlEstimatedJoinSelectivity *= Math.Sqrt(sqlEstimatedJoinSelectivity) * densityList[i]; } return joinCondition; }
private WBooleanExpression ConstructJoinCondition( CandidateJoinUnit candidateTree, IMatchJoinStatisticsCalculator statisticsCalculator, GraphMetaData metaData, out double joinSelectivity, out double sqlEstimatedJoinSelectivity) { joinSelectivity = 1.0; sqlEstimatedJoinSelectivity = 1.0; var root = candidateTree.TreeRoot; WBooleanExpression joinCondition = null; string nodeName = ""; // Update Nodes if (MaterializedNodeSplitCount.ContainsKey(root)) { MaterializedNodeSplitCount[root]++; nodeName = GetNodeRefName(root); joinCondition = new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = root.RefAlias }, new Identifier { Value = "GlobalNodeId" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = nodeName }, new Identifier { Value = "GlobalNodeId" } ), }, ComparisonType = BooleanComparisonType.Equals }; } else { nodeName = root.RefAlias; if (!Nodes.Contains(root)) { Nodes.Add(root); } MaterializedNodeSplitCount[root] = 0; } List <double> densityList = new List <double>(); List <MatchEdge> inEdges; if (UnmaterializedNodeMapping.TryGetValue(root, out inEdges)) { var firstEdge = inEdges.First(); bool materialized = EdgeMaterilizedDict[firstEdge]; UnmaterializedNodeMapping.Remove(root); joinSelectivity *= 1.0 / root.TableRowCount; // Component materialized edge to root if (materialized) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = firstEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = nodeName }, new Identifier { Value = "GlobalNodeId" } ) }, ComparisonType = BooleanComparisonType.Equals }); densityList.Add(root.GlobalNodeIdDensity); } // Component unmaterialized edge to root else { Statistics statistics = null; foreach (var edge in inEdges) { // Update component table TableRef = SpanTableRef(TableRef, edge, GetNodeRefName(edge.SourceNode), metaData); EdgeMaterilizedDict[edge] = true; joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = edge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = nodeName }, new Identifier { Value = "GlobalNodeId" } ) }, ComparisonType = BooleanComparisonType.Equals }); double selectivity; statistics = Statistics.UpdateHistogram(statistics, edge.Statistics, out selectivity); joinSelectivity *= selectivity; densityList.Add(root.GlobalNodeIdDensity); } SinkNodeStatisticsDict[root] = statistics; } } var jointEdges = candidateTree.MaterializedEdges; foreach (var jointEdge in jointEdges) { EdgeMaterilizedDict[jointEdge] = true; var sinkNode = jointEdge.SinkNode; // Leaf to component materialized node if (MaterializedNodeSplitCount.ContainsKey(sinkNode)) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = jointEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = sinkNode.RefAlias }, new Identifier { Value = "GlobalNodeId" } ) }, ComparisonType = BooleanComparisonType.Equals }); Statistics sinkNodeStatistics; if (!SinkNodeStatisticsDict.TryGetValue(sinkNode, out sinkNodeStatistics)) { sinkNodeStatistics = null; joinSelectivity *= 1.0 / sinkNode.TableRowCount; } double selectivity; var statistics = Statistics.UpdateHistogram(sinkNodeStatistics, jointEdge.Statistics, out selectivity); joinSelectivity *= selectivity; SinkNodeStatisticsDict[sinkNode] = statistics; densityList.Add(sinkNode.GlobalNodeIdDensity); } // Leaf to component unmaterialized node else { inEdges = UnmaterializedNodeMapping[sinkNode]; var firstEdge = inEdges.First(); bool materlizedEdge = EdgeMaterilizedDict[firstEdge]; // Leaf to materialized leaf if (materlizedEdge) { joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = jointEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = firstEdge.EdgeAlias }, new Identifier { Value = "Sink" } ) }, ComparisonType = BooleanComparisonType.Equals }); densityList.Add(Statistics.DefaultDensity); double selectivity; var statistics = Statistics.UpdateHistogram(SinkNodeStatisticsDict[sinkNode], jointEdge.Statistics, out selectivity); joinSelectivity *= selectivity; SinkNodeStatisticsDict[sinkNode] = statistics; } // Leaf to unmaterialized leaf else { Statistics compSinkNodeStatistics = null; foreach (var inEdge in inEdges) { TableRef = SpanTableRef(TableRef, inEdge, GetNodeRefName(inEdge.SourceNode), metaData); EdgeMaterilizedDict[inEdge] = true; joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression { FirstExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = jointEdge.EdgeAlias }, new Identifier { Value = "Sink" } ), }, SecondExpr = new WColumnReferenceExpression { ColumnType = ColumnType.Regular, MultiPartIdentifier = new WMultiPartIdentifier( new Identifier { Value = inEdge.EdgeAlias }, new Identifier { Value = "Sink" } ) }, ComparisonType = BooleanComparisonType.Equals }); densityList.Add(Statistics.DefaultDensity); double selectivity; var leafToLeafStatistics = statisticsCalculator.GetLeafToLeafStatistics(jointEdge, inEdge, out selectivity); joinSelectivity *= selectivity; compSinkNodeStatistics = Statistics.UpdateHistogram(compSinkNodeStatistics, inEdge.Statistics, out selectivity); } SinkNodeStatisticsDict[sinkNode] = compSinkNodeStatistics; } } } var unmatEdges = candidateTree.UnmaterializedEdges; foreach (var unmatEdge in unmatEdges) { EdgeMaterilizedDict[unmatEdge] = false; if (!Nodes.Contains(unmatEdge.SinkNode)) { Nodes.Add(unmatEdge.SinkNode); } var sinkNodeInEdges = UnmaterializedNodeMapping.GetOrCreate(unmatEdge.SinkNode); sinkNodeInEdges.Add(unmatEdge); } // Calculate Estimated Join Selectivity & Estimated Node Size densityList.Sort(); for (int i = densityList.Count - 1; i >= 0; i--) { sqlEstimatedJoinSelectivity *= Math.Sqrt(sqlEstimatedJoinSelectivity) * densityList[i]; } return(joinCondition); }