Пример #1
0
        public ColumnStatistics GetLeafToLeafStatistics(MatchEdge nodeEdge, MatchEdge componentEdge)
        {
            var edgeTuple = new Tuple <string, string>(nodeEdge.EdgeAlias, componentEdge.EdgeAlias);

            if (LeafToLeafSelectivity.ContainsKey(edgeTuple))
            {
                return(LeafToLeafSelectivity[edgeTuple]);
            }

            var mergedStatistics = ColumnStatistics.UpdateHistogram(Context.GetEdgeStatistics(nodeEdge), Context.GetEdgeStatistics(componentEdge));

            LeafToLeafSelectivity[edgeTuple] = mergedStatistics;
            return(mergedStatistics);
        }
Пример #2
0
        public MatchComponent(MatchNode node) : this()
        {
            Nodes.Add(node);
            MaterializedNodeSplitCount[node] = 0;
            StatisticsDict[node]             = new ColumnStatistics {
                Selectivity = 1.0 / node.TableRowCount
            };

            Size         *= node.EstimatedRows;
            EstimateSize *= node.EstimatedRows;
            TableRef      = new WNamedTableReference
            {
                Alias = new Identifier {
                    Value = node.RefAlias
                },
                TableObjectName = node.TableObjectName
            };
        }
Пример #3
0
        /// <summary>
        /// Transit from current component to the new component in the next state given the Node Unit
        /// </summary>
        /// <param name="candidateTree"></param>
        /// <param name="densityDict"></param>
        /// <param name="subGraph"></param>
        /// <param name="statisticsCalculator"></param>
        /// <returns></returns>
        public MatchComponent GetNextState(
            OneHeightTree candidateTree,
            Dictionary <string, double> densityDict,
            IMatchJoinStatisticsCalculator statisticsCalculator)
        {
            var newComponent = new MatchComponent(this);
            var root         = candidateTree.TreeRoot;

            WBooleanExpression joinCondition = null;
            string             nodeName      = "";


            // Update Nodes
            if (newComponent.MaterializedNodeSplitCount.ContainsKey(root))
            {
                newComponent.MaterializedNodeSplitCount[root]++;
                nodeName      = newComponent.GetNodeRefName(root);
                joinCondition = new WBooleanComparisonExpression
                {
                    FirstExpr = new WColumnReferenceExpression
                    {
                        ColumnType          = ColumnType.Regular,
                        MultiPartIdentifier = new WMultiPartIdentifier(
                            new Identifier {
                            Value = root.RefAlias
                        },
                            new Identifier {
                            Value = "GlobalNodeId"
                        }
                            ),
                    },
                    SecondExpr = new WColumnReferenceExpression
                    {
                        ColumnType          = ColumnType.Regular,
                        MultiPartIdentifier = new WMultiPartIdentifier(
                            new Identifier {
                            Value = nodeName
                        },
                            new Identifier {
                            Value = "GlobalNodeId"
                        }
                            ),
                    },
                    ComparisonType = BooleanComparisonType.Equals
                };
            }
            else
            {
                nodeName = root.RefAlias;
                newComponent.Nodes.Add(root);
                newComponent.MaterializedNodeSplitCount[root] = 0;
                newComponent.StatisticsDict[root]             = new ColumnStatistics {
                    Selectivity = 1.0 / root.TableRowCount
                };
            }

            // Constructs table reference
            WTableReference nodeTable = new WNamedTableReference
            {
                Alias = new Identifier {
                    Value = nodeName
                },
                TableObjectName = root.TableObjectName
            };
            WTableReference compTable = newComponent.TableRef;

            // Updates join conditions
            double selectivity  = 1.0;
            double degrees      = 1.0;
            var    DensityCount = new Dictionary <string, int>(StringComparer.CurrentCultureIgnoreCase);

            List <MatchEdge> inEdges;

            if (newComponent.UnmaterializedNodeMapping.TryGetValue(root, out inEdges))
            {
                var  firstEdge    = inEdges.First();
                bool materialized = newComponent.EdgeMaterilizedDict[firstEdge];
                newComponent.UnmaterializedNodeMapping.Remove(root);
                selectivity *= 1.0 / root.TableRowCount;

                // Component materialized edge to root
                if (materialized)
                {
                    joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression
                    {
                        FirstExpr = new WColumnReferenceExpression
                        {
                            ColumnType          = ColumnType.Regular,
                            MultiPartIdentifier = new WMultiPartIdentifier(
                                new Identifier {
                                Value = firstEdge.EdgeAlias
                            },
                                new Identifier {
                                Value = "Sink"
                            }
                                ),
                        },
                        SecondExpr = new WColumnReferenceExpression
                        {
                            ColumnType          = ColumnType.Regular,
                            MultiPartIdentifier = new WMultiPartIdentifier(
                                new Identifier {
                                Value = nodeName
                            },
                                new Identifier {
                                Value = "GlobalNodeId"
                            }
                                )
                        },
                        ComparisonType = BooleanComparisonType.Equals
                    });

                    //var statistics = ColumnStatistics.UpdateHistogram(newComponent.StatisticsDict[root],
                    //    new ColumnStatistics {Selectivity = 1.0/root.TableRowCount});
                    //selectivity *= statistics.Selectivity;
                    //newComponent.StatisticsDict[root] = statistics;

                    if (DensityCount.ContainsKey(root.TableObjectName.ToString()))
                    {
                        DensityCount[root.TableObjectName.ToString()]++;
                    }
                    else
                    {
                        DensityCount[root.TableObjectName.ToString()] = 1;
                    }
                }
                // Component unmaterialized edge to root
                else
                {
                    ColumnStatistics statistics = null;
                    foreach (var edge in inEdges)
                    {
                        // Update component table
                        compTable = SpanTableRef(compTable, edge, newComponent.GetNodeRefName(edge.SourceNode));

                        newComponent.EdgeMaterilizedDict[edge] = true;
                        joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition,
                                                                             new WBooleanComparisonExpression
                        {
                            FirstExpr = new WColumnReferenceExpression
                            {
                                ColumnType          = ColumnType.Regular,
                                MultiPartIdentifier = new WMultiPartIdentifier(
                                    new Identifier {
                                    Value = edge.EdgeAlias
                                },
                                    new Identifier {
                                    Value = "Sink"
                                }
                                    ),
                            },
                            SecondExpr = new WColumnReferenceExpression
                            {
                                ColumnType          = ColumnType.Regular,
                                MultiPartIdentifier = new WMultiPartIdentifier(
                                    new Identifier {
                                    Value = nodeName
                                },
                                    new Identifier {
                                    Value = "GlobalNodeId"
                                }
                                    )
                            },
                            ComparisonType = BooleanComparisonType.Equals
                        });
                        statistics = ColumnStatistics.UpdateHistogram(statistics,
                                                                      newComponent.Context.GetEdgeStatistics(edge));
                        selectivity *= statistics.Selectivity;
                    }
                    newComponent.StatisticsDict[root] = statistics;

                    if (DensityCount.ContainsKey(root.TableObjectName.ToString()))
                    {
                        DensityCount[root.TableObjectName.ToString()] += inEdges.Count;
                    }
                    else
                    {
                        DensityCount[root.TableObjectName.ToString()] = inEdges.Count;
                    }
                }
            }

            var jointEdges      = candidateTree.MaterializedEdges;
            int sinkToSinkCount = 0;

            foreach (var jointEdge in jointEdges)
            {
                // Update node table
                nodeTable = SpanTableRef(nodeTable, jointEdge, nodeName);
                degrees  *= jointEdge.AverageDegree;

                newComponent.EdgeMaterilizedDict[jointEdge] = true;
                var sinkNode = jointEdge.SinkNode;
                // Leaf to component materialized node
                if (newComponent.MaterializedNodeSplitCount.ContainsKey(sinkNode))
                {
                    joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition,
                                                                         new WBooleanComparisonExpression
                    {
                        FirstExpr = new WColumnReferenceExpression
                        {
                            ColumnType          = ColumnType.Regular,
                            MultiPartIdentifier = new WMultiPartIdentifier(
                                new Identifier {
                                Value = jointEdge.EdgeAlias
                            },
                                new Identifier {
                                Value = "Sink"
                            }
                                ),
                        },
                        SecondExpr = new WColumnReferenceExpression
                        {
                            ColumnType          = ColumnType.Regular,
                            MultiPartIdentifier = new WMultiPartIdentifier(
                                new Identifier {
                                Value = sinkNode.RefAlias
                            },
                                new Identifier {
                                Value = "GlobalNodeId"
                            }
                                )
                        },
                        ComparisonType = BooleanComparisonType.Equals
                    });
                    var statistics = ColumnStatistics.UpdateHistogram(newComponent.StatisticsDict[sinkNode],
                                                                      newComponent.Context.GetEdgeStatistics(jointEdge));
                    selectivity *= statistics.Selectivity;
                    newComponent.StatisticsDict[sinkNode] = statistics;

                    if (DensityCount.ContainsKey(sinkNode.TableObjectName.ToString()))
                    {
                        DensityCount[sinkNode.TableObjectName.ToString()]++;
                    }
                    else
                    {
                        DensityCount[sinkNode.TableObjectName.ToString()] = 1;
                    }
                }
                // Leaf to component unmaterialized node
                else
                {
                    inEdges = newComponent.UnmaterializedNodeMapping[sinkNode];
                    var  firstEdge      = inEdges.First();
                    bool materlizedEdge = newComponent.EdgeMaterilizedDict[firstEdge];

                    // Leaf to materialized leaf
                    if (materlizedEdge)
                    {
                        joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition,
                                                                             new WBooleanComparisonExpression
                        {
                            FirstExpr = new WColumnReferenceExpression
                            {
                                ColumnType          = ColumnType.Regular,
                                MultiPartIdentifier = new WMultiPartIdentifier(
                                    new Identifier {
                                    Value = jointEdge.EdgeAlias
                                },
                                    new Identifier {
                                    Value = "Sink"
                                }
                                    ),
                            },
                            SecondExpr = new WColumnReferenceExpression
                            {
                                ColumnType          = ColumnType.Regular,
                                MultiPartIdentifier = new WMultiPartIdentifier(
                                    new Identifier {
                                    Value = firstEdge.EdgeAlias
                                },
                                    new Identifier {
                                    Value = "Sink"
                                }
                                    )
                            },
                            ComparisonType = BooleanComparisonType.Equals
                        });

                        sinkToSinkCount++;
                        var statistics = ColumnStatistics.UpdateHistogram(newComponent.StatisticsDict[sinkNode],
                                                                          newComponent.Context.GetEdgeStatistics(jointEdge));
                        selectivity *= statistics.Selectivity;
                        newComponent.StatisticsDict[sinkNode] = statistics;
                    }
                    // Leaf to unmaterialized leaf
                    else
                    {
                        ColumnStatistics compSinkNodeStatistics = null;
                        foreach (var inEdge in inEdges)
                        {
                            compTable = SpanTableRef(compTable, inEdge, newComponent.GetNodeRefName(inEdge.SourceNode));
                            newComponent.EdgeMaterilizedDict[inEdge] = true;
                            joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition,
                                                                                 new WBooleanComparisonExpression
                            {
                                FirstExpr = new WColumnReferenceExpression
                                {
                                    ColumnType          = ColumnType.Regular,
                                    MultiPartIdentifier = new WMultiPartIdentifier(
                                        new Identifier {
                                        Value = jointEdge.EdgeAlias
                                    },
                                        new Identifier {
                                        Value = "Sink"
                                    }
                                        ),
                                },
                                SecondExpr = new WColumnReferenceExpression
                                {
                                    ColumnType          = ColumnType.Regular,
                                    MultiPartIdentifier = new WMultiPartIdentifier(
                                        new Identifier {
                                        Value = inEdge.EdgeAlias
                                    },
                                        new Identifier {
                                        Value = "Sink"
                                    }
                                        )
                                },
                                ComparisonType = BooleanComparisonType.Equals
                            });

                            sinkToSinkCount++;
                            var leafToLeafStatistics = statisticsCalculator.GetLeafToLeafStatistics(jointEdge, inEdge);
                            selectivity           *= leafToLeafStatistics.Selectivity;
                            compSinkNodeStatistics =
                                ColumnStatistics.UpdateHistogram(compSinkNodeStatistics,
                                                                 newComponent.Context.GetEdgeStatistics(inEdge));
                        }
                        newComponent.StatisticsDict[sinkNode] = compSinkNodeStatistics;
                    }
                }
            }

            var unmatEdges = candidateTree.UnmaterializedEdges;

            foreach (var unmatEdge in unmatEdges)
            {
                newComponent.EdgeMaterilizedDict[unmatEdge] = false;
                newComponent.Nodes.Add(unmatEdge.SinkNode);
                var sinkNodeInEdges = newComponent.UnmaterializedNodeMapping.GetOrCreate(unmatEdge.SinkNode);
                sinkNodeInEdges.Add(unmatEdge);
                degrees *= unmatEdge.AverageDegree;
            }

            // Calculate Estimated Join Selectivity & Estimated Node Size
            double estimatedSelectity = 1.0;
            int    count    = 0;
            bool   sinkJoin = false;

            foreach (var item in densityDict.Where(e => DensityCount.ContainsKey(e.Key)))
            {
                var density            = item.Value;
                var curJoinCount       = DensityCount[item.Key];
                var curJoinSelectitivy = Math.Pow(density, 2 - Math.Pow(2, 1 - curJoinCount));
                if (!sinkJoin && ColumnStatistics.DefaultDensity < density)
                {
                    var curSinkJoinSelectivity = Math.Pow(ColumnStatistics.DefaultDensity,
                                                          2 - Math.Pow(2, 1 - sinkToSinkCount));
                    estimatedSelectity *= Math.Pow(curSinkJoinSelectivity, Math.Pow(2, -count));
                    count   += sinkToSinkCount;
                    sinkJoin = true;
                }
                estimatedSelectity *= Math.Pow(curJoinSelectitivy, Math.Pow(2, -count));
                count += curJoinCount;
            }

            var estimatedNodeUnitSize = root.EstimatedRows *
                                        Math.Pow(1000, candidateTree.MaterializedEdges.Count + candidateTree.UnmaterializedEdges.Count);


            // Update Table Reference
            newComponent.TableRef = GetPlanAndUpdateCost(candidateTree, newComponent, nodeTable, compTable, joinCondition,
                                                         degrees, selectivity, estimatedNodeUnitSize, estimatedSelectity);

            return(newComponent);
        }
Пример #4
0
        /// <summary>
        /// Merger Two Histograms
        /// </summary>
        /// <param name="curStatistics"></param>
        /// <param name="newStatistics"></param>
        /// <param name="currentJoin"></param>
        /// <returns></returns>
        internal static ColumnStatistics UpdateHistogram(ColumnStatistics curStatistics, ColumnStatistics newStatistics)
        {
            if (curStatistics == null)
            {
                return(newStatistics);
            }
            else if (newStatistics == null)
            {
                return(curStatistics);
            }
            var resHistogram = new Dictionary <long, Tuple <double, bool> >();
            var curHistogram = curStatistics.Histogram;
            var newHistogram = newStatistics.Histogram;

            if (!curHistogram.Any())
            {
                return(new ColumnStatistics
                {
                    Density = newStatistics.Density,
                    Histogram = newHistogram,
                    Selectivity = curStatistics.Selectivity * newStatistics.Selectivity,
                });
            }
            if (!newHistogram.Any())
            {
                return(new ColumnStatistics
                {
                    Density = curStatistics.Density,
                    Histogram = curHistogram,
                    Selectivity = curStatistics.Selectivity * newStatistics.Selectivity,
                });
            }
            var curNotPopularCount = 0.0;
            var newNotPopularCount = 0.0;
            var curDefaultRow      = curStatistics.Density * curStatistics.RowCount;
            var newDefaultRow      = newStatistics.Density * newStatistics.RowCount;
            IEnumerator <KeyValuePair <long, Tuple <double, bool> > > newEntry = null;
            bool        fisrstMatch      = false;
            bool        newHistogramEnd  = false;
            double      resRowCount      = 0.0;
            List <long> notPopularValues = new List <long>();

            foreach (var entry in curHistogram)
            {
                if (!fisrstMatch)
                {
                    if (newHistogram.ContainsKey(entry.Key))
                    {
                        fisrstMatch = true;
                        var entry1 = entry;
                        newEntry = newHistogram.SkipWhile(e => e.Key != entry1.Key).GetEnumerator();
                        newEntry.MoveNext();
                        if (!entry.Value.Item2)
                        {
                            curNotPopularCount -= entry.Value.Item1;
                        }
                        if (!newEntry.Current.Value.Item2)
                        {
                            newNotPopularCount -= newEntry.Current.Value.Item1;
                        }
                    }
                }
                if (fisrstMatch)
                {
                    if (newHistogramEnd || entry.Key < newEntry.Current.Key)
                    {
                        var curTuple = entry.Value;
                        if (curTuple.Item2 == true)
                        {
                            var tmpCount = curTuple.Item1 * newDefaultRow;
                            resRowCount += tmpCount;
                            resHistogram.Add(entry.Key, new Tuple <double, bool>(tmpCount, true));
                        }
                        else
                        {
                            notPopularValues.Add(entry.Key);
                            curNotPopularCount += curTuple.Item1;
                            resHistogram.Add(entry.Key, null);
                        }
                    }
                    else if (entry.Key > newEntry.Current.Key)
                    {
                        while (entry.Key > newEntry.Current.Key)
                        {
                            var newTuple = newEntry.Current.Value;
                            if (newTuple.Item2 == true)
                            {
                                var tmpCount = newTuple.Item1 * curDefaultRow;
                                resRowCount += tmpCount;
                                resHistogram.Add(newEntry.Current.Key, new Tuple <double, bool>(tmpCount, true));
                            }
                            else
                            {
                                notPopularValues.Add(newEntry.Current.Key);
                                newNotPopularCount += newTuple.Item1;
                                resHistogram.Add(newEntry.Current.Key, null);
                            }
                            if (!newEntry.MoveNext())
                            {
                                newHistogramEnd = true;
                                break;
                            }
                        }
                        if (newHistogramEnd)
                        {
                            break;
                        }
                    }
                    else
                    {
                        var curTuple = entry.Value;
                        var newTuple = newEntry.Current.Value;
                        if (curTuple.Item2 == false && newTuple.Item2 == false)
                        {
                            notPopularValues.Add(entry.Key);
                            curNotPopularCount += curTuple.Item1;
                            newNotPopularCount += newTuple.Item1;
                            resHistogram.Add(entry.Key, null);
                        }
                        else
                        {
                            var count1   = curTuple.Item2 ? curTuple.Item1 : curDefaultRow;
                            var count2   = newTuple.Item2 ? newTuple.Item1 : newDefaultRow;
                            var tmpCount = count1 * count2;
                            resRowCount += tmpCount;
                            resHistogram.Add(entry.Key, new Tuple <double, bool>(tmpCount, true));
                        }
                        if (!newEntry.MoveNext())
                        {
                            newHistogramEnd = true;
                        }
                    }
                }
            }
            double density = -1;

            if (notPopularValues.Any())
            {
                var resDefaultRow = curNotPopularCount * newNotPopularCount *
                                    Math.Min(curDefaultRow / curNotPopularCount, newDefaultRow / newNotPopularCount);
                resRowCount  += resDefaultRow;
                resDefaultRow = resDefaultRow / notPopularValues.Count;
                density       = resDefaultRow / resRowCount;
                foreach (var value in notPopularValues)
                {
                    resHistogram[value] = new Tuple <double, bool>(resDefaultRow, false);
                }
            }

            return(new ColumnStatistics
            {
                Histogram = resHistogram,
                Density = density < 0 ? Math.Max(curStatistics.Density, newStatistics.Density) : density,
                MaxValue = Math.Max(curStatistics.MaxValue, newStatistics.MaxValue),
                RowCount = resRowCount,
                Selectivity = resRowCount / (curStatistics.RowCount * newStatistics.RowCount),
            });
        }
Пример #5
0
        public MatchComponent(MatchNode node):this()
        {
            Nodes.Add(node);
            MaterializedNodeSplitCount[node] = 0;
            StatisticsDict[node] = new ColumnStatistics{Selectivity = 1.0/node.TableRowCount};

            Size *= node.EstimatedRows;
            EstimateSize *= node.EstimatedRows;
            TableRef = new WNamedTableReference
            {
                Alias = new Identifier { Value = node.RefAlias},
                TableObjectName = node.TableObjectName
            };
        }
        /// <summary>
        /// Update the statistics histogram for the edge given the sink id list.
        /// Bucket size is pre-defined
        /// </summary>
        /// <param name="edge"></param>
        /// <param name="sinkList"></param>
        private void UpdateEdgeHistogram(MatchEdge edge, List<long> sinkList)
        {
            sinkList.Sort();
            var rowCount = sinkList.Count;
            var statistics = new ColumnStatistics
            {
                RowCount = rowCount
            };
            var height = (int)(rowCount / BucketNum);
            var popBucketCount = 0;
            var popValueCount = 0;
            var bucketCount = 0;
            // If number in each bucket is very small, then generate a Frequency Histogram
            if (height < 2)
            {
                bucketCount = rowCount;
                long preValue = sinkList[0];
                int count = 1;
                int distCount = 1;
                for (int i = 1; i < rowCount; i++)
                {
                    var curValue = sinkList[i];
                    if (curValue == preValue)
                    {
                        count++;
                    }
                    else
                    {
                        if (count > 1)
                        {
                            popBucketCount += count;
                            popValueCount++;
                        }
                        statistics.Histogram.Add(preValue, new Tuple<double, bool>(count, count > 1));
                        count = 1;
                        preValue = curValue;
                        distCount++;
                    }
                }
                if (count > 1)
                {
                    popBucketCount += count;
                    popValueCount++;
                }
                statistics.Histogram.Add(preValue, new Tuple<double, bool>(count, count > 1));
                statistics.MaxValue = preValue;
                // Simple Denstity
                //statistics.Density = 1.0 / distCount;
                // Advanced Density
                statistics.Density = bucketCount == popBucketCount
                    ? 0
                    : 1.0 * (bucketCount - popBucketCount) / bucketCount / (distCount - popValueCount);
            }

            // Generate a Height-balanced Histogram
            else
            {
                long preValue = sinkList[0];
                int count = 0;
                int distCount = 1;
                for (int i = 1; i < rowCount; i++)
                {
                    if (i % height == height - 1)
                    {
                        bucketCount++;
                        var curValue = sinkList[i];
                        if (curValue == preValue)
                            count += height;
                        else
                        {
                            distCount++;
                            if (count > height)
                            {
                                popBucketCount += count / height;
                                popValueCount++;
                            }
                            //count = count == 0 ? height : count;
                            statistics.Histogram.Add(preValue, new Tuple<double, bool>(count, count > height));
                            preValue = curValue;
                            count = height;
                        }
                    }
                }
                if (count > height)
                {
                    popBucketCount += count / height;
                    popValueCount++;
                }
                statistics.Histogram.Add(preValue, new Tuple<double, bool>(count, count > height));
                statistics.MaxValue = preValue;
                // Simple Density
                //statistics.Density = 1.0 / distCount;
                // Advanced Density
                statistics.Density = bucketCount == popBucketCount
                    ? 0
                    : 1.0 * (bucketCount - popBucketCount) / bucketCount / (distCount - popValueCount);
            }
            _context.AddEdgeStatistics(edge, statistics);
        }
Пример #7
0
 public void AddEdgeStatistics(MatchEdge edge, ColumnStatistics statistics)
 {
     _edgeStatisticses.Add(edge, statistics);
 }
Пример #8
0
        /// <summary>
        /// Merger Two Histograms
        /// </summary>
        /// <param name="curStatistics"></param>
        /// <param name="newStatistics"></param>
        /// <param name="currentJoin"></param>
        /// <returns></returns>
        internal static ColumnStatistics UpdateHistogram(ColumnStatistics curStatistics, ColumnStatistics newStatistics)
        {
            if (curStatistics == null)
                return newStatistics;
            else if (newStatistics == null)
                return curStatistics;
            var resHistogram = new Dictionary<long, Tuple<double, bool>>();
            var curHistogram = curStatistics.Histogram;
            var newHistogram = newStatistics.Histogram;
            if (!curHistogram.Any())
            {
                return new ColumnStatistics
                {
                    Density = newStatistics.Density,
                    Histogram = newHistogram,
                    Selectivity = curStatistics.Selectivity * newStatistics.Selectivity,
                };
            }
            if (!newHistogram.Any())
            {
                return new ColumnStatistics
                {
                    Density = curStatistics.Density,
                    Histogram = curHistogram,
                    Selectivity = curStatistics.Selectivity * newStatistics.Selectivity,
                };
            }
            var curNotPopularCount = 0.0;
            var newNotPopularCount = 0.0;
            var curDefaultRow = curStatistics.Density * curStatistics.RowCount;
            var newDefaultRow = newStatistics.Density * newStatistics.RowCount;
            IEnumerator<KeyValuePair<long, Tuple<double, bool>>> newEntry = null;
            bool fisrstMatch = false;
            bool newHistogramEnd = false;
            double resRowCount = 0.0;
            List<long> notPopularValues = new List<long>();

            foreach (var entry in curHistogram)
            {
                if (!fisrstMatch)
                {
                    if (newHistogram.ContainsKey(entry.Key))
                    {
                        fisrstMatch = true;
                        var entry1 = entry;
                        newEntry = newHistogram.SkipWhile(e => e.Key != entry1.Key).GetEnumerator();
                        newEntry.MoveNext();
                        if (!entry.Value.Item2)
                            curNotPopularCount -= entry.Value.Item1;
                        if (!newEntry.Current.Value.Item2)
                            newNotPopularCount -= newEntry.Current.Value.Item1;
                    }
                }
                if (fisrstMatch)
                {
                    if (newHistogramEnd || entry.Key < newEntry.Current.Key)
                    {
                        var curTuple = entry.Value;
                        if (curTuple.Item2 == true)
                        {
                            var tmpCount = curTuple.Item1 * newDefaultRow;
                            resRowCount += tmpCount;
                            resHistogram.Add(entry.Key, new Tuple<double, bool>(tmpCount, true));
                        }
                        else
                        {
                            notPopularValues.Add(entry.Key);
                            curNotPopularCount += curTuple.Item1;
                            resHistogram.Add(entry.Key, null);
                        }
                    }
                    else if (entry.Key > newEntry.Current.Key)
                    {
                        while (entry.Key > newEntry.Current.Key)
                        {
                            var newTuple = newEntry.Current.Value;
                            if (newTuple.Item2 == true)
                            {
                                var tmpCount = newTuple.Item1 * curDefaultRow;
                                resRowCount += tmpCount;
                                resHistogram.Add(newEntry.Current.Key, new Tuple<double, bool>(tmpCount, true));
                            }
                            else
                            {
                                notPopularValues.Add(newEntry.Current.Key);
                                newNotPopularCount += newTuple.Item1;
                                resHistogram.Add(newEntry.Current.Key, null);
                            }
                            if (!newEntry.MoveNext())
                            {
                                newHistogramEnd = true;
                                break;
                            }
                        }
                        if (newHistogramEnd)
                        {
                            break;
                        }
                    }
                    else
                    {
                        var curTuple = entry.Value;
                        var newTuple = newEntry.Current.Value;
                        if (curTuple.Item2 == false && newTuple.Item2 == false)
                        {
                            notPopularValues.Add(entry.Key);
                            curNotPopularCount += curTuple.Item1;
                            newNotPopularCount += newTuple.Item1;
                            resHistogram.Add(entry.Key, null);
                        }
                        else
                        {
                            var count1 = curTuple.Item2 ? curTuple.Item1 : curDefaultRow;
                            var count2 = newTuple.Item2 ? newTuple.Item1 : newDefaultRow;
                            var tmpCount = count1 * count2;
                            resRowCount += tmpCount;
                            resHistogram.Add(entry.Key, new Tuple<double, bool>(tmpCount, true));
                        }
                        if (!newEntry.MoveNext())
                        {
                            newHistogramEnd = true;
                        }
                    }


                }
            }
            double density = -1;
            if (notPopularValues.Any())
            {
                var resDefaultRow = curNotPopularCount * newNotPopularCount *
                                    Math.Min(curDefaultRow / curNotPopularCount, newDefaultRow / newNotPopularCount);
                resRowCount += resDefaultRow;
                resDefaultRow = resDefaultRow / notPopularValues.Count;
                density = resDefaultRow / resRowCount;
                foreach (var value in notPopularValues)
                {
                    resHistogram[value] = new Tuple<double, bool>(resDefaultRow, false);
                }
            }

            return new ColumnStatistics
            {
                Histogram = resHistogram,
                Density = density < 0 ? Math.Max(curStatistics.Density, newStatistics.Density) : density,
                MaxValue = Math.Max(curStatistics.MaxValue, newStatistics.MaxValue),
                RowCount = resRowCount,
                Selectivity = resRowCount / (curStatistics.RowCount * newStatistics.RowCount),
            };
        }
Пример #9
0
 public void AddEdgeStatistics(MatchEdge edge, ColumnStatistics statistics)
 {
     _edgeStatisticses.Add(edge, statistics);
 }