/// <summary>
        /// Transit from current component to the new component in the next state given the Node Unit
        /// </summary>
        /// <param name="candidateTree"></param>
        /// <param name="statisticsCalculator"></param>
        /// <returns></returns>
        public MatchComponent GetNextState(
            CandidateJoinUnit candidateTree,
            IMatchJoinStatisticsCalculator statisticsCalculator,
            GraphMetaData metaData)
            // Deep copy the component
            var newComponent = new MatchComponent(this);

            // Constrcuts join conditions and retrieves join selectivity
            double joinSelectivity;
            double sqlEstimatedJoinSelectivity;
            var    joinCondition = newComponent.ConstructJoinCondition(candidateTree, statisticsCalculator, metaData, out joinSelectivity,
                                                                       out sqlEstimatedJoinSelectivity);

            // Constructs physical join method and join table references
            newComponent.ConstructPhysicalJoinAndUpdateCost(candidateTree, joinCondition,
                                                            joinSelectivity, sqlEstimatedJoinSelectivity, metaData);

Exemple #2
        /// <summary>
        /// Transit from current component to the new component in the next state given the Node Unit
        /// </summary>
        /// <param name="candidateTree"></param>
        /// <param name="densityDict"></param>
        /// <param name="subGraph"></param>
        /// <param name="statisticsCalculator"></param>
        /// <returns></returns>
        public MatchComponent GetNextState(
            OneHeightTree candidateTree,
            Dictionary <string, double> densityDict,
            IMatchJoinStatisticsCalculator statisticsCalculator)
            var newComponent = new MatchComponent(this);
            var root         = candidateTree.TreeRoot;

            WBooleanExpression joinCondition = null;
            string             nodeName      = "";

            // Update Nodes
            if (newComponent.MaterializedNodeSplitCount.ContainsKey(root))
                nodeName      = newComponent.GetNodeRefName(root);
                joinCondition = new WBooleanComparisonExpression
                    FirstExpr = new WColumnReferenceExpression
                        ColumnType          = ColumnType.Regular,
                        MultiPartIdentifier = new WMultiPartIdentifier(
                            new Identifier {
                            Value = root.RefAlias
                            new Identifier {
                            Value = "GlobalNodeId"
                    SecondExpr = new WColumnReferenceExpression
                        ColumnType          = ColumnType.Regular,
                        MultiPartIdentifier = new WMultiPartIdentifier(
                            new Identifier {
                            Value = nodeName
                            new Identifier {
                            Value = "GlobalNodeId"
                    ComparisonType = BooleanComparisonType.Equals
                nodeName = root.RefAlias;
                newComponent.MaterializedNodeSplitCount[root] = 0;
                newComponent.StatisticsDict[root]             = new ColumnStatistics {
                    Selectivity = 1.0 / root.TableRowCount

            // Constructs table reference
            WTableReference nodeTable = new WNamedTableReference
                Alias = new Identifier {
                    Value = nodeName
                TableObjectName = root.TableObjectName
            WTableReference compTable = newComponent.TableRef;

            // Updates join conditions
            double selectivity  = 1.0;
            double degrees      = 1.0;
            var    DensityCount = new Dictionary <string, int>(StringComparer.CurrentCultureIgnoreCase);

            List <MatchEdge> inEdges;

            if (newComponent.UnmaterializedNodeMapping.TryGetValue(root, out inEdges))
                var  firstEdge    = inEdges.First();
                bool materialized = newComponent.EdgeMaterilizedDict[firstEdge];
                selectivity *= 1.0 / root.TableRowCount;

                // Component materialized edge to root
                if (materialized)
                    joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression
                        FirstExpr = new WColumnReferenceExpression
                            ColumnType          = ColumnType.Regular,
                            MultiPartIdentifier = new WMultiPartIdentifier(
                                new Identifier {
                                Value = firstEdge.EdgeAlias
                                new Identifier {
                                Value = "Sink"
                        SecondExpr = new WColumnReferenceExpression
                            ColumnType          = ColumnType.Regular,
                            MultiPartIdentifier = new WMultiPartIdentifier(
                                new Identifier {
                                Value = nodeName
                                new Identifier {
                                Value = "GlobalNodeId"
                        ComparisonType = BooleanComparisonType.Equals

                    //var statistics = ColumnStatistics.UpdateHistogram(newComponent.StatisticsDict[root],
                    //    new ColumnStatistics {Selectivity = 1.0/root.TableRowCount});
                    //selectivity *= statistics.Selectivity;
                    //newComponent.StatisticsDict[root] = statistics;

                    if (DensityCount.ContainsKey(root.TableObjectName.ToString()))
                        DensityCount[root.TableObjectName.ToString()] = 1;
                // Component unmaterialized edge to root
                    ColumnStatistics statistics = null;
                    foreach (var edge in inEdges)
                        // Update component table
                        compTable = SpanTableRef(compTable, edge, newComponent.GetNodeRefName(edge.SourceNode));

                        newComponent.EdgeMaterilizedDict[edge] = true;
                        joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition,
                                                                             new WBooleanComparisonExpression
                            FirstExpr = new WColumnReferenceExpression
                                ColumnType          = ColumnType.Regular,
                                MultiPartIdentifier = new WMultiPartIdentifier(
                                    new Identifier {
                                    Value = edge.EdgeAlias
                                    new Identifier {
                                    Value = "Sink"
                            SecondExpr = new WColumnReferenceExpression
                                ColumnType          = ColumnType.Regular,
                                MultiPartIdentifier = new WMultiPartIdentifier(
                                    new Identifier {
                                    Value = nodeName
                                    new Identifier {
                                    Value = "GlobalNodeId"
                            ComparisonType = BooleanComparisonType.Equals
                        statistics = ColumnStatistics.UpdateHistogram(statistics,
                        selectivity *= statistics.Selectivity;
                    newComponent.StatisticsDict[root] = statistics;

                    if (DensityCount.ContainsKey(root.TableObjectName.ToString()))
                        DensityCount[root.TableObjectName.ToString()] += inEdges.Count;
                        DensityCount[root.TableObjectName.ToString()] = inEdges.Count;

            var jointEdges      = candidateTree.MaterializedEdges;
            int sinkToSinkCount = 0;

            foreach (var jointEdge in jointEdges)
                // Update node table
                nodeTable = SpanTableRef(nodeTable, jointEdge, nodeName);
                degrees  *= jointEdge.AverageDegree;

                newComponent.EdgeMaterilizedDict[jointEdge] = true;
                var sinkNode = jointEdge.SinkNode;
                // Leaf to component materialized node
                if (newComponent.MaterializedNodeSplitCount.ContainsKey(sinkNode))
                    joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition,
                                                                         new WBooleanComparisonExpression
                        FirstExpr = new WColumnReferenceExpression
                            ColumnType          = ColumnType.Regular,
                            MultiPartIdentifier = new WMultiPartIdentifier(
                                new Identifier {
                                Value = jointEdge.EdgeAlias
                                new Identifier {
                                Value = "Sink"
                        SecondExpr = new WColumnReferenceExpression
                            ColumnType          = ColumnType.Regular,
                            MultiPartIdentifier = new WMultiPartIdentifier(
                                new Identifier {
                                Value = sinkNode.RefAlias
                                new Identifier {
                                Value = "GlobalNodeId"
                        ComparisonType = BooleanComparisonType.Equals
                    var statistics = ColumnStatistics.UpdateHistogram(newComponent.StatisticsDict[sinkNode],
                    selectivity *= statistics.Selectivity;
                    newComponent.StatisticsDict[sinkNode] = statistics;

                    if (DensityCount.ContainsKey(sinkNode.TableObjectName.ToString()))
                        DensityCount[sinkNode.TableObjectName.ToString()] = 1;
                // Leaf to component unmaterialized node
                    inEdges = newComponent.UnmaterializedNodeMapping[sinkNode];
                    var  firstEdge      = inEdges.First();
                    bool materlizedEdge = newComponent.EdgeMaterilizedDict[firstEdge];

                    // Leaf to materialized leaf
                    if (materlizedEdge)
                        joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition,
                                                                             new WBooleanComparisonExpression
                            FirstExpr = new WColumnReferenceExpression
                                ColumnType          = ColumnType.Regular,
                                MultiPartIdentifier = new WMultiPartIdentifier(
                                    new Identifier {
                                    Value = jointEdge.EdgeAlias
                                    new Identifier {
                                    Value = "Sink"
                            SecondExpr = new WColumnReferenceExpression
                                ColumnType          = ColumnType.Regular,
                                MultiPartIdentifier = new WMultiPartIdentifier(
                                    new Identifier {
                                    Value = firstEdge.EdgeAlias
                                    new Identifier {
                                    Value = "Sink"
                            ComparisonType = BooleanComparisonType.Equals

                        var statistics = ColumnStatistics.UpdateHistogram(newComponent.StatisticsDict[sinkNode],
                        selectivity *= statistics.Selectivity;
                        newComponent.StatisticsDict[sinkNode] = statistics;
                    // Leaf to unmaterialized leaf
                        ColumnStatistics compSinkNodeStatistics = null;
                        foreach (var inEdge in inEdges)
                            compTable = SpanTableRef(compTable, inEdge, newComponent.GetNodeRefName(inEdge.SourceNode));
                            newComponent.EdgeMaterilizedDict[inEdge] = true;
                            joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition,
                                                                                 new WBooleanComparisonExpression
                                FirstExpr = new WColumnReferenceExpression
                                    ColumnType          = ColumnType.Regular,
                                    MultiPartIdentifier = new WMultiPartIdentifier(
                                        new Identifier {
                                        Value = jointEdge.EdgeAlias
                                        new Identifier {
                                        Value = "Sink"
                                SecondExpr = new WColumnReferenceExpression
                                    ColumnType          = ColumnType.Regular,
                                    MultiPartIdentifier = new WMultiPartIdentifier(
                                        new Identifier {
                                        Value = inEdge.EdgeAlias
                                        new Identifier {
                                        Value = "Sink"
                                ComparisonType = BooleanComparisonType.Equals

                            var leafToLeafStatistics = statisticsCalculator.GetLeafToLeafStatistics(jointEdge, inEdge);
                            selectivity           *= leafToLeafStatistics.Selectivity;
                            compSinkNodeStatistics =
                        newComponent.StatisticsDict[sinkNode] = compSinkNodeStatistics;

            var unmatEdges = candidateTree.UnmaterializedEdges;

            foreach (var unmatEdge in unmatEdges)
                newComponent.EdgeMaterilizedDict[unmatEdge] = false;
                var sinkNodeInEdges = newComponent.UnmaterializedNodeMapping.GetOrCreate(unmatEdge.SinkNode);
                degrees *= unmatEdge.AverageDegree;

            // Calculate Estimated Join Selectivity & Estimated Node Size
            double estimatedSelectity = 1.0;
            int    count    = 0;
            bool   sinkJoin = false;

            foreach (var item in densityDict.Where(e => DensityCount.ContainsKey(e.Key)))
                var density            = item.Value;
                var curJoinCount       = DensityCount[item.Key];
                var curJoinSelectitivy = Math.Pow(density, 2 - Math.Pow(2, 1 - curJoinCount));
                if (!sinkJoin && ColumnStatistics.DefaultDensity < density)
                    var curSinkJoinSelectivity = Math.Pow(ColumnStatistics.DefaultDensity,
                                                          2 - Math.Pow(2, 1 - sinkToSinkCount));
                    estimatedSelectity *= Math.Pow(curSinkJoinSelectivity, Math.Pow(2, -count));
                    count   += sinkToSinkCount;
                    sinkJoin = true;
                estimatedSelectity *= Math.Pow(curJoinSelectitivy, Math.Pow(2, -count));
                count += curJoinCount;

            var estimatedNodeUnitSize = root.EstimatedRows *
                                        Math.Pow(1000, candidateTree.MaterializedEdges.Count + candidateTree.UnmaterializedEdges.Count);

            // Update Table Reference
            newComponent.TableRef = GetPlanAndUpdateCost(candidateTree, newComponent, nodeTable, compTable, joinCondition,
                                                         degrees, selectivity, estimatedNodeUnitSize, estimatedSelectity);

        private WBooleanExpression ConstructJoinCondition(
            CandidateJoinUnit candidateTree,
            IMatchJoinStatisticsCalculator statisticsCalculator,
            GraphMetaData metaData,
            Dictionary<Tuple<string, bool>, Statistics> srcNodeStatisticsDict,
            out double preJoinSelectivity,
            out double postJoinSelectivity,
            out double sqlEstimatedJoinSelectivity)
            const double sizeThreshold = 1e8;
            const int loopJoinFactorThreshold = 20;

            preJoinSelectivity = 1.0;
            postJoinSelectivity = 1.0;
            sqlEstimatedJoinSelectivity = 1.0;

            var firstJoin = MaterializedNodeSplitCount.Count == 1;
            MatchNode firstNode = null;
            if (firstJoin)
                firstNode = Nodes.First();
            var root = candidateTree.TreeRoot;

            WBooleanExpression joinCondition = null;
            WBooleanExpression whereCondition = null;
            string nodeName = root.RefAlias;

            if (!Nodes.Contains(root))
            MaterializedNodeSplitCount[root] = 0;

            var inEdges =
                    e => new Tuple<MaterializedOrder, MatchEdge>(MaterializedOrder.Pre, e))
                            e => new Tuple<MaterializedOrder, MatchEdge>(MaterializedOrder.Post, e)))

            var outEdges =
                    e => new Tuple<MaterializedOrder, MatchEdge>(MaterializedOrder.Pre, e))
                            e => new Tuple<MaterializedOrder, MatchEdge>(MaterializedOrder.Post, e)))

            var densityList = new List<double>();
            var inPostCount = 0;
            var outPostCount = 0;

            if (inEdges.Any())
                //joinSelectivity *= 1.0 / root.TableRowCount;

                Statistics statistics = null;
                Statistics srcNodeStat = null;
                foreach (var t in inEdges)
                    var order = t.Item1;
                    var edge = t.Item2;

                    var globalNodeIdRef = new WColumnReferenceExpression
                        ColumnType = ColumnType.Regular,
                        MultiPartIdentifier = new WMultiPartIdentifier(
                            new Identifier {Value = nodeName},
                            new Identifier {Value = "GlobalNodeId"}

                    var newCondition = new WBooleanComparisonExpression
                        FirstExpr = new WColumnReferenceExpression
                            ColumnType = ColumnType.Regular,
                            MultiPartIdentifier = new WMultiPartIdentifier(
                                new Identifier {Value = edge.EdgeAlias},
                                new Identifier {Value = "Sink"}
                        SecondExpr = order == MaterializedOrder.Post && inPostCount > 0
                            ? new WBinaryExpression
                                ExpressionType = BinaryExpressionType.Add,
                                FirstExpr = globalNodeIdRef,
                                SecondExpr = new WValueExpression
                                    SingleQuoted = false,
                                    Value = "0",
                            : (WScalarExpression)globalNodeIdRef,
                        ComparisonType = BooleanComparisonType.Equals
                    EdgeMaterilizedDict[edge] = true;

                    double selectivity;
                    statistics = Statistics.UpdateHistogram(statistics,
                        edge.Statistics, out selectivity);

                    if (order == MaterializedOrder.Pre)
                        preJoinSelectivity *= selectivity;
                        joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, newCondition);
                        postJoinSelectivity *= selectivity;
                        whereCondition = WBooleanBinaryExpression.Conjunction(whereCondition, newCondition);

                    if (firstJoin)
                        double srcNodeSelectivity;
                        srcNodeStat = Statistics.UpdateHistogram(srcNodeStat,
                            srcNodeStatisticsDict[new Tuple<string, bool>(edge.EdgeAlias, edge.IsReversedEdge)],
                            out srcNodeSelectivity);


                if (firstJoin)
                    SinkNodeStatisticsDict[firstNode] = srcNodeStat;
                SinkNodeStatisticsDict[root] = statistics;

            if (candidateTree.JoinHint == JoinHint.Loop)
                var size = Cardinality*candidateTree.PreMatIncomingEdges.Select(e => e.AverageDegree)
                    .Aggregate(1.0, (cur, next) => cur*next)*preJoinSelectivity;
                if (size >= sizeThreshold && size > root.EstimatedRows * loopJoinFactorThreshold)
                    candidateTree.JoinHint = JoinHint.Hash;

            if (outEdges.Any())
                foreach (var t in outEdges)
                    var order = t.Item1;
                    var edge = t.Item2;
                    var sinkNode = edge.SinkNode;

                    var globalNodeIdRef = new WColumnReferenceExpression
                        ColumnType = ColumnType.Regular,
                        MultiPartIdentifier = new WMultiPartIdentifier(
                            new Identifier {Value = sinkNode.RefAlias},
                            new Identifier {Value = "GlobalNodeId"}

                    var newCondition = new WBooleanComparisonExpression
                        FirstExpr = new WColumnReferenceExpression
                            ColumnType = ColumnType.Regular,
                            MultiPartIdentifier = new WMultiPartIdentifier(
                                new Identifier {Value = edge.EdgeAlias},
                                new Identifier {Value = "Sink"}
                        SecondExpr = order == MaterializedOrder.Post && outPostCount > 0
                            ? new WBinaryExpression
                                ExpressionType = BinaryExpressionType.Add,
                                FirstExpr = globalNodeIdRef,
                                SecondExpr = new WValueExpression
                                    SingleQuoted = false,
                                    Value = "0",
                            : (WScalarExpression)globalNodeIdRef,
                        ComparisonType = BooleanComparisonType.Equals
                    EdgeMaterilizedDict[edge] = true;

                    Statistics sinkNodeStatistics;
                    if (!SinkNodeStatisticsDict.TryGetValue(sinkNode, out sinkNodeStatistics))
                        sinkNodeStatistics = null;
                        //joinSelectivity *= 1.0 / sinkNode.TableRowCount;
                    double selectivity;
                    var statistics = Statistics.UpdateHistogram(sinkNodeStatistics,
                        edge.Statistics, out selectivity);

                    if (order == MaterializedOrder.Pre)
                        preJoinSelectivity *= selectivity;
                        joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, newCondition);
                        postJoinSelectivity *= selectivity;
                        whereCondition = WBooleanBinaryExpression.Conjunction(whereCondition, newCondition);

                    SinkNodeStatisticsDict[sinkNode] = statistics;

            var unmatEdges = candidateTree.UnmaterializedEdges;
            foreach (var unmatEdge in unmatEdges)
                EdgeMaterilizedDict[unmatEdge] = false;;
                var unmatNodeInEdges = UnmaterializedNodeMapping.GetOrCreate(unmatEdge.SinkNode);

            for (int i = densityList.Count - 1; i >= 0; i--)
                sqlEstimatedJoinSelectivity *= Math.Sqrt(sqlEstimatedJoinSelectivity) * densityList[i];

            WhereCondition = WBooleanBinaryExpression.Conjunction(WhereCondition, whereCondition);

            return joinCondition;
        /// <summary>
        /// Transit from current component to the new component in the next state given the Node Unit
        /// </summary>
        /// <param name="candidateTree"></param>
        /// <param name="statisticsCalculator"></param>
        /// <param name="metaData"></param>
        /// <param name="srcNodeStatisticsDict"></param>
        /// <returns></returns>
        public MatchComponent GetNextState(
            CandidateJoinUnit candidateTree,
            IMatchJoinStatisticsCalculator statisticsCalculator,
            GraphMetaData metaData,
            Dictionary<Tuple<string, bool>, Statistics> srcNodeStatisticsDict)
            // Deep copy the component
            var newComponent = new MatchComponent(this);

            // Constrcuts join conditions and retrieves join selectivity
            double preJoinSelectivity, postJoinSelectivity, sqlEstimatedJoinSelectivity;

            var joinCondition = newComponent.ConstructJoinCondition(candidateTree, statisticsCalculator, metaData, srcNodeStatisticsDict,
                out preJoinSelectivity, out postJoinSelectivity, out sqlEstimatedJoinSelectivity);

            // Constructs physical join method and join table references
            newComponent.ConstructPhysicalJoinAndUpdateCost(candidateTree, joinCondition,
               preJoinSelectivity, postJoinSelectivity, sqlEstimatedJoinSelectivity, metaData);

            return newComponent;
Exemple #5
        private WBooleanExpression ConstructJoinCondition(
            CandidateJoinUnit candidateTree,
            IMatchJoinStatisticsCalculator statisticsCalculator,
            GraphMetaData metaData,
            out double joinSelectivity, 
            out double sqlEstimatedJoinSelectivity)
            joinSelectivity = 1.0;
            sqlEstimatedJoinSelectivity = 1.0;

            var root = candidateTree.TreeRoot;

            WBooleanExpression joinCondition = null;
            string nodeName = "";

            // Update Nodes
            if (MaterializedNodeSplitCount.ContainsKey(root))
                nodeName = GetNodeRefName(root);
                joinCondition = new WBooleanComparisonExpression
                    FirstExpr = new WColumnReferenceExpression
                        ColumnType = ColumnType.Regular,
                        MultiPartIdentifier = new WMultiPartIdentifier(
                            new Identifier { Value = root.RefAlias },
                            new Identifier { Value = "GlobalNodeId" }
                    SecondExpr = new WColumnReferenceExpression
                        ColumnType = ColumnType.Regular,
                        MultiPartIdentifier = new WMultiPartIdentifier(
                            new Identifier { Value = nodeName },
                            new Identifier { Value = "GlobalNodeId" }
                    ComparisonType = BooleanComparisonType.Equals
                nodeName = root.RefAlias;
                if (!Nodes.Contains(root))
                MaterializedNodeSplitCount[root] = 0;

            List<double> densityList = new List<double>();

            List<MatchEdge> inEdges;
            if (UnmaterializedNodeMapping.TryGetValue(root, out inEdges))
                var firstEdge = inEdges.First();
                bool materialized = EdgeMaterilizedDict[firstEdge];
                joinSelectivity *= 1.0 / root.TableRowCount;

                // Component materialized edge to root
                if (materialized)
                    joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression
                        FirstExpr = new WColumnReferenceExpression
                            ColumnType = ColumnType.Regular,
                            MultiPartIdentifier = new WMultiPartIdentifier(
                                new Identifier { Value = firstEdge.EdgeAlias },
                                new Identifier { Value = "Sink" }
                        SecondExpr = new WColumnReferenceExpression
                            ColumnType = ColumnType.Regular,
                            MultiPartIdentifier = new WMultiPartIdentifier(
                                new Identifier { Value = nodeName },
                                new Identifier { Value = "GlobalNodeId" }
                        ComparisonType = BooleanComparisonType.Equals

                // Component unmaterialized edge to root                
                    Statistics statistics = null;
                    foreach (var edge in inEdges)
                        // Update component table
                        TableRef = SpanTableRef(TableRef, edge, GetNodeRefName(edge.SourceNode),metaData);

                        EdgeMaterilizedDict[edge] = true;
                        joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition,
                            new WBooleanComparisonExpression
                                FirstExpr = new WColumnReferenceExpression
                                    ColumnType = ColumnType.Regular,
                                    MultiPartIdentifier = new WMultiPartIdentifier(
                                        new Identifier { Value = edge.EdgeAlias },
                                        new Identifier { Value = "Sink" }
                                SecondExpr = new WColumnReferenceExpression
                                    ColumnType = ColumnType.Regular,
                                    MultiPartIdentifier = new WMultiPartIdentifier(
                                        new Identifier { Value = nodeName },
                                        new Identifier { Value = "GlobalNodeId" }
                                ComparisonType = BooleanComparisonType.Equals
                        double selectivity;
                        statistics = Statistics.UpdateHistogram(statistics,
                            edge.Statistics, out selectivity);
                        joinSelectivity *= selectivity;
                    SinkNodeStatisticsDict[root] = statistics;



            var jointEdges = candidateTree.MaterializedEdges;
            foreach (var jointEdge in jointEdges)

                EdgeMaterilizedDict[jointEdge] = true;
                var sinkNode = jointEdge.SinkNode;
                // Leaf to component materialized node
                if (MaterializedNodeSplitCount.ContainsKey(sinkNode))
                    joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition,
                        new WBooleanComparisonExpression
                            FirstExpr = new WColumnReferenceExpression
                                ColumnType = ColumnType.Regular,
                                MultiPartIdentifier = new WMultiPartIdentifier(
                                    new Identifier { Value = jointEdge.EdgeAlias },
                                    new Identifier { Value = "Sink" }
                            SecondExpr = new WColumnReferenceExpression
                                ColumnType = ColumnType.Regular,
                                MultiPartIdentifier = new WMultiPartIdentifier(
                                    new Identifier { Value = sinkNode.RefAlias },
                                    new Identifier { Value = "GlobalNodeId" }
                            ComparisonType = BooleanComparisonType.Equals
                    Statistics sinkNodeStatistics;
                    if (!SinkNodeStatisticsDict.TryGetValue(sinkNode, out sinkNodeStatistics))
                        sinkNodeStatistics = null;
                        joinSelectivity *= 1.0 / sinkNode.TableRowCount;
                    double selectivity;
                    var statistics = Statistics.UpdateHistogram(sinkNodeStatistics,
                        jointEdge.Statistics, out selectivity);
                    joinSelectivity *= selectivity;
                    SinkNodeStatisticsDict[sinkNode] = statistics;
                // Leaf to component unmaterialized node
                    inEdges = UnmaterializedNodeMapping[sinkNode];
                    var firstEdge = inEdges.First();
                    bool materlizedEdge = EdgeMaterilizedDict[firstEdge];

                    // Leaf to materialized leaf
                    if (materlizedEdge)
                        joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition,
                            new WBooleanComparisonExpression
                                FirstExpr = new WColumnReferenceExpression
                                    ColumnType = ColumnType.Regular,
                                    MultiPartIdentifier = new WMultiPartIdentifier(
                                        new Identifier { Value = jointEdge.EdgeAlias },
                                        new Identifier { Value = "Sink" }
                                SecondExpr = new WColumnReferenceExpression
                                    ColumnType = ColumnType.Regular,
                                    MultiPartIdentifier = new WMultiPartIdentifier(
                                        new Identifier { Value = firstEdge.EdgeAlias },
                                        new Identifier { Value = "Sink" }
                                ComparisonType = BooleanComparisonType.Equals

                        double selectivity;
                        var statistics = Statistics.UpdateHistogram(SinkNodeStatisticsDict[sinkNode],
                            jointEdge.Statistics, out selectivity);
                        joinSelectivity *= selectivity;
                        SinkNodeStatisticsDict[sinkNode] = statistics;
                    // Leaf to unmaterialized leaf
                        Statistics compSinkNodeStatistics = null;
                        foreach (var inEdge in inEdges)
                            TableRef = SpanTableRef(TableRef, inEdge, GetNodeRefName(inEdge.SourceNode),metaData);
                            EdgeMaterilizedDict[inEdge] = true;
                            joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition,
                            new WBooleanComparisonExpression
                                FirstExpr = new WColumnReferenceExpression
                                    ColumnType = ColumnType.Regular,
                                    MultiPartIdentifier = new WMultiPartIdentifier(
                                        new Identifier { Value = jointEdge.EdgeAlias },
                                        new Identifier { Value = "Sink" }
                                SecondExpr = new WColumnReferenceExpression
                                    ColumnType = ColumnType.Regular,
                                    MultiPartIdentifier = new WMultiPartIdentifier(
                                        new Identifier { Value = inEdge.EdgeAlias },
                                        new Identifier { Value = "Sink" }
                                ComparisonType = BooleanComparisonType.Equals


                            double selectivity;
                            var leafToLeafStatistics = statisticsCalculator.GetLeafToLeafStatistics(jointEdge, inEdge,
                                out selectivity);
                            joinSelectivity *= selectivity;
                            compSinkNodeStatistics =
                                    inEdge.Statistics, out selectivity);
                        SinkNodeStatisticsDict[sinkNode] = compSinkNodeStatistics;

            var unmatEdges = candidateTree.UnmaterializedEdges;
            foreach (var unmatEdge in unmatEdges)
                EdgeMaterilizedDict[unmatEdge] = false;
                if (!Nodes.Contains(unmatEdge.SinkNode))
                var sinkNodeInEdges = UnmaterializedNodeMapping.GetOrCreate(unmatEdge.SinkNode);


            // Calculate Estimated Join Selectivity & Estimated Node Size
            for (int i = densityList.Count - 1; i >= 0; i--)
                sqlEstimatedJoinSelectivity *= Math.Sqrt(sqlEstimatedJoinSelectivity) * densityList[i];

            return joinCondition;
Exemple #6
        /// <summary>
        /// Transit from current component to the new component in the next state given the Node Unit
        /// </summary>
        /// <param name="candidateTree"></param>
        /// <param name="densityDict"></param>
        /// <param name="subGraph"></param>
        /// <param name="statisticsCalculator"></param>
        /// <returns></returns>
        public MatchComponent GetNextState(
            OneHeightTree candidateTree, 
            Dictionary<string, double> densityDict, 
            IMatchJoinStatisticsCalculator statisticsCalculator)
            var newComponent = new MatchComponent(this);
            var root = candidateTree.TreeRoot;

            WBooleanExpression joinCondition = null;
            string nodeName = "";

            // Update Nodes
            if (newComponent.MaterializedNodeSplitCount.ContainsKey(root))
                nodeName = newComponent.GetNodeRefName(root);
                joinCondition = new WBooleanComparisonExpression
                    FirstExpr = new WColumnReferenceExpression
                        ColumnType = ColumnType.Regular,
                        MultiPartIdentifier = new WMultiPartIdentifier(
                            new Identifier {Value = root.RefAlias},
                            new Identifier {Value = "GlobalNodeId"}
                    SecondExpr = new WColumnReferenceExpression
                        ColumnType = ColumnType.Regular,
                        MultiPartIdentifier = new WMultiPartIdentifier(
                            new Identifier {Value = nodeName},
                            new Identifier {Value = "GlobalNodeId"}
                    ComparisonType = BooleanComparisonType.Equals
                nodeName = root.RefAlias;
                newComponent.MaterializedNodeSplitCount[root] = 0;
                newComponent.StatisticsDict[root] = new ColumnStatistics {Selectivity = 1.0/root.TableRowCount};


            // Constructs table reference
            WTableReference nodeTable = new WNamedTableReference
                Alias = new Identifier { Value = nodeName },
                TableObjectName = root.TableObjectName
            WTableReference compTable = newComponent.TableRef;

            // Updates join conditions
            double selectivity = 1.0;
            double degrees = 1.0;
            var DensityCount = new Dictionary<string, int>(StringComparer.CurrentCultureIgnoreCase);

            List<MatchEdge> inEdges;
            if (newComponent.UnmaterializedNodeMapping.TryGetValue(root, out inEdges))
                var firstEdge = inEdges.First();
                bool materialized = newComponent.EdgeMaterilizedDict[firstEdge];
                selectivity *= 1.0/root.TableRowCount;

                // Component materialized edge to root
                if (materialized)
                    joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression
                        FirstExpr = new WColumnReferenceExpression
                            ColumnType = ColumnType.Regular,
                            MultiPartIdentifier = new WMultiPartIdentifier(
                                new Identifier {Value = firstEdge.EdgeAlias},
                                new Identifier {Value = "Sink"}
                        SecondExpr = new WColumnReferenceExpression
                            ColumnType = ColumnType.Regular,
                            MultiPartIdentifier = new WMultiPartIdentifier(
                                new Identifier {Value = nodeName},
                                new Identifier {Value = "GlobalNodeId"}
                        ComparisonType = BooleanComparisonType.Equals

                    //var statistics = ColumnStatistics.UpdateHistogram(newComponent.StatisticsDict[root],
                    //    new ColumnStatistics {Selectivity = 1.0/root.TableRowCount});
                    //selectivity *= statistics.Selectivity;
                    //newComponent.StatisticsDict[root] = statistics;

                    if (DensityCount.ContainsKey(root.TableObjectName.ToString()))
                        DensityCount[root.TableObjectName.ToString()] = 1;
                // Component unmaterialized edge to root                
                    ColumnStatistics statistics = null;
                    foreach (var edge in inEdges)
                        // Update component table
                        compTable = SpanTableRef(compTable, edge, newComponent.GetNodeRefName(edge.SourceNode));

                        newComponent.EdgeMaterilizedDict[edge] = true;
                        joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition,
                            new WBooleanComparisonExpression
                                FirstExpr = new WColumnReferenceExpression
                                    ColumnType = ColumnType.Regular,
                                    MultiPartIdentifier = new WMultiPartIdentifier(
                                        new Identifier {Value = edge.EdgeAlias},
                                        new Identifier {Value = "Sink"}
                                SecondExpr = new WColumnReferenceExpression
                                    ColumnType = ColumnType.Regular,
                                    MultiPartIdentifier = new WMultiPartIdentifier(
                                        new Identifier {Value = nodeName},
                                        new Identifier {Value = "GlobalNodeId"}
                                ComparisonType = BooleanComparisonType.Equals
                        statistics = ColumnStatistics.UpdateHistogram(statistics,
                        selectivity *= statistics.Selectivity;

                    newComponent.StatisticsDict[root] = statistics;

                    if (DensityCount.ContainsKey(root.TableObjectName.ToString()))
                        DensityCount[root.TableObjectName.ToString()] = inEdges.Count;

            var jointEdges = candidateTree.MaterializedEdges;
            int sinkToSinkCount = 0;
            foreach (var jointEdge in jointEdges)
                // Update node table
                nodeTable = SpanTableRef(nodeTable, jointEdge, nodeName);
                degrees *= jointEdge.AverageDegree;

                newComponent.EdgeMaterilizedDict[jointEdge] = true;
                var sinkNode = jointEdge.SinkNode;
                // Leaf to component materialized node
                if (newComponent.MaterializedNodeSplitCount.ContainsKey(sinkNode))
                    joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition,
                        new WBooleanComparisonExpression
                            FirstExpr = new WColumnReferenceExpression
                                ColumnType = ColumnType.Regular,
                                MultiPartIdentifier = new WMultiPartIdentifier(
                                    new Identifier {Value = jointEdge.EdgeAlias},
                                    new Identifier {Value = "Sink"}
                            SecondExpr = new WColumnReferenceExpression
                                ColumnType = ColumnType.Regular,
                                MultiPartIdentifier = new WMultiPartIdentifier(
                                    new Identifier {Value = sinkNode.RefAlias},
                                    new Identifier {Value = "GlobalNodeId"}
                            ComparisonType = BooleanComparisonType.Equals
                    var statistics = ColumnStatistics.UpdateHistogram(newComponent.StatisticsDict[sinkNode],
                    selectivity *= statistics.Selectivity;
                    newComponent.StatisticsDict[sinkNode] = statistics;

                    if (DensityCount.ContainsKey(sinkNode.TableObjectName.ToString()))
                        DensityCount[sinkNode.TableObjectName.ToString()] = 1;
                // Leaf to component unmaterialized node
                    inEdges = newComponent.UnmaterializedNodeMapping[sinkNode];
                    var firstEdge = inEdges.First();
                    bool materlizedEdge = newComponent.EdgeMaterilizedDict[firstEdge];
                    // Leaf to materialized leaf
                    if (materlizedEdge)
                        joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition,
                            new WBooleanComparisonExpression
                                FirstExpr = new WColumnReferenceExpression
                                    ColumnType = ColumnType.Regular,
                                    MultiPartIdentifier = new WMultiPartIdentifier(
                                        new Identifier {Value = jointEdge.EdgeAlias},
                                        new Identifier {Value = "Sink"}
                                SecondExpr = new WColumnReferenceExpression
                                    ColumnType = ColumnType.Regular,
                                    MultiPartIdentifier = new WMultiPartIdentifier(
                                        new Identifier { Value = firstEdge.EdgeAlias},
                                        new Identifier {Value = "Sink"}
                                ComparisonType = BooleanComparisonType.Equals

                        var statistics = ColumnStatistics.UpdateHistogram(newComponent.StatisticsDict[sinkNode],
                        selectivity *= statistics.Selectivity;
                        newComponent.StatisticsDict[sinkNode] = statistics;
                    // Leaf to unmaterialized leaf
                        ColumnStatistics compSinkNodeStatistics = null;
                        foreach (var inEdge in inEdges)
                            compTable = SpanTableRef(compTable, inEdge, newComponent.GetNodeRefName(inEdge.SourceNode));
                            newComponent.EdgeMaterilizedDict[inEdge] = true;
                            joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition,
                            new WBooleanComparisonExpression
                                FirstExpr = new WColumnReferenceExpression
                                    ColumnType = ColumnType.Regular,
                                    MultiPartIdentifier = new WMultiPartIdentifier(
                                        new Identifier { Value = jointEdge.EdgeAlias },
                                        new Identifier { Value = "Sink" }
                                SecondExpr = new WColumnReferenceExpression
                                    ColumnType = ColumnType.Regular,
                                    MultiPartIdentifier = new WMultiPartIdentifier(
                                        new Identifier { Value = inEdge.EdgeAlias },
                                        new Identifier { Value = "Sink" }
                                ComparisonType = BooleanComparisonType.Equals

                            var leafToLeafStatistics = statisticsCalculator.GetLeafToLeafStatistics(jointEdge, inEdge);
                            selectivity *= leafToLeafStatistics.Selectivity;
                            compSinkNodeStatistics =
                        newComponent.StatisticsDict[sinkNode] = compSinkNodeStatistics;

            var unmatEdges = candidateTree.UnmaterializedEdges;
            foreach (var unmatEdge in unmatEdges)
                newComponent.EdgeMaterilizedDict[unmatEdge] = false;
                var sinkNodeInEdges = newComponent.UnmaterializedNodeMapping.GetOrCreate(unmatEdge.SinkNode);
                degrees *= unmatEdge.AverageDegree;


            // Calculate Estimated Join Selectivity & Estimated Node Size
            double estimatedSelectity = 1.0;
            int count = 0;
            bool sinkJoin = false;
            foreach (var item in densityDict.Where(e => DensityCount.ContainsKey(e.Key)))
                var density = item.Value;
                var curJoinCount = DensityCount[item.Key];
                var curJoinSelectitivy = Math.Pow(density, 2 - Math.Pow(2, 1 - curJoinCount));
                if (!sinkJoin && ColumnStatistics.DefaultDensity < density)
                    var curSinkJoinSelectivity = Math.Pow(ColumnStatistics.DefaultDensity,
                        2 - Math.Pow(2, 1 - sinkToSinkCount));
                    estimatedSelectity *= Math.Pow(curSinkJoinSelectivity, Math.Pow(2, -count));
                    count += sinkToSinkCount;
                    sinkJoin = true;
                estimatedSelectity *= Math.Pow(curJoinSelectitivy, Math.Pow(2, -count));
                count += curJoinCount;

            var estimatedNodeUnitSize = root.EstimatedRows*
                                        Math.Pow(1000, candidateTree.MaterializedEdges.Count + candidateTree.UnmaterializedEdges.Count);

            // Update Table Reference
            newComponent.TableRef = GetPlanAndUpdateCost(candidateTree, newComponent, nodeTable, compTable, joinCondition,
                degrees, selectivity, estimatedNodeUnitSize, estimatedSelectity);

            return newComponent;
        private WBooleanExpression ConstructJoinCondition(
            CandidateJoinUnit candidateTree,
            IMatchJoinStatisticsCalculator statisticsCalculator,
            GraphMetaData metaData,
            out double joinSelectivity,
            out double sqlEstimatedJoinSelectivity)
            joinSelectivity             = 1.0;
            sqlEstimatedJoinSelectivity = 1.0;

            var root = candidateTree.TreeRoot;

            WBooleanExpression joinCondition = null;
            string             nodeName      = "";

            // Update Nodes
            if (MaterializedNodeSplitCount.ContainsKey(root))
                nodeName      = GetNodeRefName(root);
                joinCondition = new WBooleanComparisonExpression
                    FirstExpr = new WColumnReferenceExpression
                        ColumnType          = ColumnType.Regular,
                        MultiPartIdentifier = new WMultiPartIdentifier(
                            new Identifier {
                            Value = root.RefAlias
                            new Identifier {
                            Value = "GlobalNodeId"
                    SecondExpr = new WColumnReferenceExpression
                        ColumnType          = ColumnType.Regular,
                        MultiPartIdentifier = new WMultiPartIdentifier(
                            new Identifier {
                            Value = nodeName
                            new Identifier {
                            Value = "GlobalNodeId"
                    ComparisonType = BooleanComparisonType.Equals
                nodeName = root.RefAlias;
                if (!Nodes.Contains(root))
                MaterializedNodeSplitCount[root] = 0;

            List <double> densityList = new List <double>();

            List <MatchEdge> inEdges;

            if (UnmaterializedNodeMapping.TryGetValue(root, out inEdges))
                var  firstEdge    = inEdges.First();
                bool materialized = EdgeMaterilizedDict[firstEdge];
                joinSelectivity *= 1.0 / root.TableRowCount;

                // Component materialized edge to root
                if (materialized)
                    joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition, new WBooleanComparisonExpression
                        FirstExpr = new WColumnReferenceExpression
                            ColumnType          = ColumnType.Regular,
                            MultiPartIdentifier = new WMultiPartIdentifier(
                                new Identifier {
                                Value = firstEdge.EdgeAlias
                                new Identifier {
                                Value = "Sink"
                        SecondExpr = new WColumnReferenceExpression
                            ColumnType          = ColumnType.Regular,
                            MultiPartIdentifier = new WMultiPartIdentifier(
                                new Identifier {
                                Value = nodeName
                                new Identifier {
                                Value = "GlobalNodeId"
                        ComparisonType = BooleanComparisonType.Equals

                // Component unmaterialized edge to root
                    Statistics statistics = null;
                    foreach (var edge in inEdges)
                        // Update component table
                        TableRef = SpanTableRef(TableRef, edge, GetNodeRefName(edge.SourceNode), metaData);

                        EdgeMaterilizedDict[edge] = true;
                        joinCondition             = WBooleanBinaryExpression.Conjunction(joinCondition,
                                                                                         new WBooleanComparisonExpression
                            FirstExpr = new WColumnReferenceExpression
                                ColumnType          = ColumnType.Regular,
                                MultiPartIdentifier = new WMultiPartIdentifier(
                                    new Identifier {
                                    Value = edge.EdgeAlias
                                    new Identifier {
                                    Value = "Sink"
                            SecondExpr = new WColumnReferenceExpression
                                ColumnType          = ColumnType.Regular,
                                MultiPartIdentifier = new WMultiPartIdentifier(
                                    new Identifier {
                                    Value = nodeName
                                    new Identifier {
                                    Value = "GlobalNodeId"
                            ComparisonType = BooleanComparisonType.Equals
                        double selectivity;
                        statistics = Statistics.UpdateHistogram(statistics,
                                                                edge.Statistics, out selectivity);
                        joinSelectivity *= selectivity;
                    SinkNodeStatisticsDict[root] = statistics;

            var jointEdges = candidateTree.MaterializedEdges;

            foreach (var jointEdge in jointEdges)
                EdgeMaterilizedDict[jointEdge] = true;
                var sinkNode = jointEdge.SinkNode;
                // Leaf to component materialized node
                if (MaterializedNodeSplitCount.ContainsKey(sinkNode))
                    joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition,
                                                                         new WBooleanComparisonExpression
                        FirstExpr = new WColumnReferenceExpression
                            ColumnType          = ColumnType.Regular,
                            MultiPartIdentifier = new WMultiPartIdentifier(
                                new Identifier {
                                Value = jointEdge.EdgeAlias
                                new Identifier {
                                Value = "Sink"
                        SecondExpr = new WColumnReferenceExpression
                            ColumnType          = ColumnType.Regular,
                            MultiPartIdentifier = new WMultiPartIdentifier(
                                new Identifier {
                                Value = sinkNode.RefAlias
                                new Identifier {
                                Value = "GlobalNodeId"
                        ComparisonType = BooleanComparisonType.Equals
                    Statistics sinkNodeStatistics;
                    if (!SinkNodeStatisticsDict.TryGetValue(sinkNode, out sinkNodeStatistics))
                        sinkNodeStatistics = null;
                        joinSelectivity   *= 1.0 / sinkNode.TableRowCount;
                    double selectivity;
                    var    statistics = Statistics.UpdateHistogram(sinkNodeStatistics,
                                                                   jointEdge.Statistics, out selectivity);
                    joinSelectivity *= selectivity;
                    SinkNodeStatisticsDict[sinkNode] = statistics;
                // Leaf to component unmaterialized node
                    inEdges = UnmaterializedNodeMapping[sinkNode];
                    var  firstEdge      = inEdges.First();
                    bool materlizedEdge = EdgeMaterilizedDict[firstEdge];

                    // Leaf to materialized leaf
                    if (materlizedEdge)
                        joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition,
                                                                             new WBooleanComparisonExpression
                            FirstExpr = new WColumnReferenceExpression
                                ColumnType          = ColumnType.Regular,
                                MultiPartIdentifier = new WMultiPartIdentifier(
                                    new Identifier {
                                    Value = jointEdge.EdgeAlias
                                    new Identifier {
                                    Value = "Sink"
                            SecondExpr = new WColumnReferenceExpression
                                ColumnType          = ColumnType.Regular,
                                MultiPartIdentifier = new WMultiPartIdentifier(
                                    new Identifier {
                                    Value = firstEdge.EdgeAlias
                                    new Identifier {
                                    Value = "Sink"
                            ComparisonType = BooleanComparisonType.Equals

                        double selectivity;
                        var    statistics = Statistics.UpdateHistogram(SinkNodeStatisticsDict[sinkNode],
                                                                       jointEdge.Statistics, out selectivity);
                        joinSelectivity *= selectivity;
                        SinkNodeStatisticsDict[sinkNode] = statistics;
                    // Leaf to unmaterialized leaf
                        Statistics compSinkNodeStatistics = null;
                        foreach (var inEdge in inEdges)
                            TableRef = SpanTableRef(TableRef, inEdge, GetNodeRefName(inEdge.SourceNode), metaData);
                            EdgeMaterilizedDict[inEdge] = true;
                            joinCondition = WBooleanBinaryExpression.Conjunction(joinCondition,
                                                                                 new WBooleanComparisonExpression
                                FirstExpr = new WColumnReferenceExpression
                                    ColumnType          = ColumnType.Regular,
                                    MultiPartIdentifier = new WMultiPartIdentifier(
                                        new Identifier {
                                        Value = jointEdge.EdgeAlias
                                        new Identifier {
                                        Value = "Sink"
                                SecondExpr = new WColumnReferenceExpression
                                    ColumnType          = ColumnType.Regular,
                                    MultiPartIdentifier = new WMultiPartIdentifier(
                                        new Identifier {
                                        Value = inEdge.EdgeAlias
                                        new Identifier {
                                        Value = "Sink"
                                ComparisonType = BooleanComparisonType.Equals


                            double selectivity;
                            var    leafToLeafStatistics = statisticsCalculator.GetLeafToLeafStatistics(jointEdge, inEdge,
                                                                                                       out selectivity);
                            joinSelectivity       *= selectivity;
                            compSinkNodeStatistics =
                                                           inEdge.Statistics, out selectivity);
                        SinkNodeStatisticsDict[sinkNode] = compSinkNodeStatistics;

            var unmatEdges = candidateTree.UnmaterializedEdges;

            foreach (var unmatEdge in unmatEdges)
                EdgeMaterilizedDict[unmatEdge] = false;
                if (!Nodes.Contains(unmatEdge.SinkNode))
                var sinkNodeInEdges = UnmaterializedNodeMapping.GetOrCreate(unmatEdge.SinkNode);

            // Calculate Estimated Join Selectivity & Estimated Node Size
            for (int i = densityList.Count - 1; i >= 0; i--)
                sqlEstimatedJoinSelectivity *= Math.Sqrt(sqlEstimatedJoinSelectivity) * densityList[i];
