Esempio n. 1
0
        IEnumerable <BitVector> enumerateCmp(JoinGraph graph, BitVector S1)
        {
            int ntables = graph.vertices_.Count;

            Debug.Assert(S1 != SetOp.EmptySet);

            // min(S1) := min({i|v_i \in S1})
            int minS1 = SetOp.MinTableIndex(S1);

            // B_i(W) := {vj |v_j \in W, j <= i}
            // X = union (B_min(S1), S)
            BitVector BminS1 = SetOp.OrderBeforeSet(minS1);
            BitVector X      = SetOp.Union(BminS1, S1);

            // N = neighbour(S1) \ X
            BitVector N = SetOp.Substract(graph.NeighboursExcluding(S1), X);

            // for all(vi 2 N by descending i)
            foreach (int vi in SetOp.TablesDescending(N))
            {
                // emit {v_i}
                BitVector VI = SetOp.SingletonSet(vi);
                yield return(VI);

                // recursively invoke enumerateCmp(graph, {v_i}, X union (B_i intersect N))
                BitVector Bi  = SetOp.OrderBeforeSet(vi);
                BitVector BiN = SetOp.Intersect(Bi, N);
                foreach (var csg in enumerateCsgRecursive(graph,
                                                          VI, SetOp.Union(X, BiN)))
                {
                    yield return(csg);
                }
            }
        }
Esempio n. 2
0
        // TDBasic uses naive partition algorithm. Phd table 2.7.
        // Its complexity is essentially the same as DP_bushy but it does not has the hard
        // requirements like DP algorithm that it has to enumerate bottom up but more like
        // on-demand driven, so it is more flexible and looks natural.
        //
        override internal PhysicNode Run(JoinGraph graph, BigInteger expectC1)
        {
            int       ntables = graph.vertices_.Count;
            BitVector S       = (1 << ntables) - 1;

            Console.WriteLine("TDBasic(naive) #tables: " + ntables);

            // initialization: enqueue all single tables
            InitByInsertBasicTables(graph);

            c1_ = 0;
            var result   = TDPGSub(graph, S, expectC1);
            var expectC2 = BigInteger.Pow(3, ntables) - BigInteger.Pow(2, ntables + 1) + 1;

            Console.WriteLine("dp: {0}, expected c1: {1} == c1: {2}",
                              expectC2, expectC1, c1_);
            if (!expectC1.IsZero)
            {
                Debug.Assert(c1_ == expectC1);
            }
            Console.WriteLine(result);

            // verify that it generates same tree as DPBushy
            Debug.Assert((new DPBushy()).Run(graph, BigInteger.Zero).Equals(result));
            return(result);
        }
Esempio n. 3
0
        IEnumerable <BitVector> enumerateCsgRecursive(JoinGraph graph, BitVector S, BitVector X)
        {
            // N = neighbour(S) \ X
            BitVector N = SetOp.Substract(graph.NeighboursExcluding(S), X);

            // Console.WriteLine("N: " + BitHelper.ToString(N));

            // for all non-empty S' subsetof(N), emit (S union S')
            if (N != SetOp.EmptySet)
            {
                VancePartition partitioner = new VancePartition(N);
                foreach (var S_prime in partitioner.Next(true))
                {
                    yield return(SetOp.Union(S, S_prime));
                }

                // for all non-empty S' subsetof(N), recursively invoke (graph, (S union S'), (X union N))
                partitioner = new VancePartition(N);
                foreach (var S_prime in partitioner.Next(true))
                {
                    foreach (var v in enumerateCsgRecursive(graph,
                                                            SetOp.Union(S, S_prime), SetOp.Union(X, N)))
                    {
                        yield return(v);
                    }
                }
            }
        }
Esempio n. 4
0
        internal CsgCmpPair(JoinGraph graph, BitVector S1, BitVector S2)
        {
            S1_ = S1; S2_ = S2;
            S_  = SetOp.Union(S1, S2);

            Verify(graph);
        }
Esempio n. 5
0
        void Verify(JoinGraph graph)
        {
            // not-overlapped
            Debug.Assert(SetOp.Intersect(S1_, S2_) == SetOp.EmptySet);

            // verify S1_, S2_ itself is connected and S1_ and S2_ is connected
            //   we form a small JoinQuery of it and verify that all nodes included
            //
            Debug.Assert(graph.IsConnected(S1_) && graph.IsConnected(S2_) && graph.IsConnected(S_));
        }
Esempio n. 6
0
 // enumerate all csg-cmp-pairs
 IEnumerable <CsgCmpPair> csg_cmp_pairs(JoinGraph graph)
 {
     foreach (BitVector S1 in enumerateCsg(graph))
     {
         foreach (BitVector S2 in enumerateCmp(graph, S1))
         {
             // Console.WriteLine("S1:{0}, S2:{1}", BitHelper.ToString(S1), BitHelper.ToString(S2));
             yield return(new CsgCmpPair(graph, S1, S2));
         }
     }
 }
Esempio n. 7
0
        PhysicNode TDPGSub(JoinGraph graph, BitVector S, BigInteger expectC1)
        {
            if (bestTree_[S] == null)
            {
                // for all partitioning S1, S2, build tree (TDPGSub(G|S1), TDPGSub(G|S2))
                foreach (var ccp in NaiveNext(graph, S))
                {
                    CreateMinimalJoinTree(TDPGSub(graph, ccp.S1_, 0),
                                          TDPGSub(graph, ccp.S2_, 0));
                }
            }

            return(bestTree_[S]);
        }
Esempio n. 8
0
        static public void Test()
        {
            DPccp solver = new DPccp();

            // book figure 3.12
            var       tables    = new string[] { "T1", "T2", "T3", "T4", "T5" };
            JoinGraph figure312 = new JoinGraph(tables, new string[] { "T1*T2", "T1*T3", "T1*T4", "T3*T4", "T5*T2", "T5*T3", "T5*T4" });

            Debug.Assert(figure312.joinbits_.Count == 5 && figure312.preds_.Count == 7);
            solver.Reset().Run(figure312);

            // full test
            DoTest(new DPccp());
        }
Esempio n. 9
0
        override internal PhysicNode Run(JoinGraph graph, BigInteger expectC1)
        {
            int ntables = graph.vertices_.Count;

            Console.WriteLine("DPccp #tables: " + ntables);

            // prerequisite: sort tables per DFS order
            graph.ReorderBFS();

            // initialization: enqueue all single tables
            InitByInsertBasicTables(graph);

            ulong c1 = 0;

            foreach (var pair in csg_cmp_pairs(graph))
            {
                c1++;
                BitVector S1 = pair.S1_;
                BitVector S2 = pair.S2_;
                BitVector S  = pair.S_;

                var currTree = CreateMinimalJoinTree(bestTree_[S1], bestTree_[S2]);
                Debug.Assert(bestTree_[S].Cost() == currTree.Cost());
            }

            var nbushy = Space.Count_General_Bushy_CP(ntables);
            var ndp    = BigInteger.Pow(3, ntables) - BigInteger.Pow(2, ntables + 1) + 1;

            Console.WriteLine("bushy: {0}, expected: {1} == c1: {2}", nbushy, expectC1, c1);
            if (!expectC1.IsZero)
            {
                Debug.Assert(c1 == expectC1);
            }

            var result = bestTree_[(1 << ntables) - 1];
            // Console.WriteLine(result.Explain());

            bool verify = false;

            if (verify)
            {
                // verify that it generates same tree as DPBushy - we can't verify that the tree are
                // the same because we may generate two different join trees with the same cost. So
                // we do cost verificaiton here.
                //
                var bushy = (new DPBushy()).Run(graph, expectC1);
                Debug.Assert(bushy.InclusiveCost().Equals(result.InclusiveCost()));
            }
            return(result);
        }
Esempio n. 10
0
        // We assume plan is with normalized shape:
        //    LogicFilter
        //        LogicJoin
        //               ...
        // There shall be only 1 join filter on top.
        // Subqueries is not considered here.
        //
        internal static JoinGraph ExtractJoinGraph(LogicNode plan,
                                                   out LogicNode filterNodeParent, out int index, out LogicFilter filterNode)
        {
            // find the join filter
            var parents = new List <LogicNode>();
            var indexes = new List <int>();
            var filters = new List <LogicFilter>();

            plan.FindNodeTypeMatch <LogicFilter>(parents, indexes, filters);
            var joinfilters = filters.Where(x => x.child_() is LogicJoin).ToList();

            Debug.Assert(joinfilters.Count <= 1);
            if (joinfilters.Count == 1)
            {
                JoinGraph graph      = null;
                var       joinfilter = joinfilters[0];
                var       topjoin    = joinfilter.child_() as LogicJoin;

                // vertices are non-join nodes. We don't do any cross boundary optimization
                // (say pull aggregation up thus we have bigger join space etc), which is
                // the job of upper layer.
                //
                var vertices = new List <LogicNode>();
                topjoin.VisitEach(x =>
                {
                    if (!(x is LogicJoin))
                    {
                        vertices.Add(x as LogicNode);
                    }
                });

                graph            = new JoinGraph(vertices, joinfilters[0].filter_.FilterToAndList());
                index            = indexes[0];
                filterNodeParent = parents[0];
                filterNode       = joinfilter;
                Debug.Assert(filterNodeParent is null || filterNodeParent.children_[index] == filterNode);
                return(graph);
            }

            // there is no join or we can't handle this query
            filterNodeParent = null;
            index            = -1;
            filterNode       = null;
            return(null);
        }
Esempio n. 11
0
 // initialization: enqueue all vertex nodes
 protected void InitByInsertBasicTables(JoinGraph graph)
 {
     graph_ = graph;
     foreach (var logic in graph.vertices_)
     {
         BitVector contained = 1 << graph.vertices_.IndexOf(logic);
         logic.tableContained_ = contained;
         if (graph.memo_ is null)
         {
             bestTree_[contained] = new PhysicScanTable(logic);
         }
         else
         {
             // vertices are already inserted into memo
             var cgroup   = graph.memo_.LookupCGroup(logic);
             var logicref = new LogicMemoRef(cgroup);
             bestTree_[contained] = new PhysicMemoRef(logicref);
         }
     }
 }
Esempio n. 12
0
        IEnumerable <BitVector> enumerateCsg(JoinGraph graph)
        {
            int ntables = graph.vertices_.Count;

            for (int i = ntables - 1; i >= 0; i--)
            {
                // emit S
                //   S: {vi}
                BitVector VI = SetOp.SingletonSet(i);
                // Console.WriteLine("S: {0}", i);
                yield return(VI);

                // EnumerateCsgRec (G, S, X)
                //   X: {vj|j<=i}
                BitVector X = SetOp.OrderBeforeSet(i);
                foreach (var csg in enumerateCsgRecursive(graph, VI, X))
                {
                    yield return(csg);
                }
            }
        }
Esempio n. 13
0
        // Naive partitioning
        //   Similar to DPBushy algorithm
        IEnumerable <CsgCmpPair> NaiveNext(JoinGraph graph, BitVector S)
        {
            // it is connected because the 1st iteration is connected and when
            // we generate for next iteration, we checked S1,S2.
            //
            Debug.Assert(graph.IsConnected(S));

            VancePartition partitioner = new VancePartition(S);

            foreach (var S1 in partitioner.Next())
            {
                c1_++;
                BitVector S2 = SetOp.Substract(S, S1);
                if (S1 < S2)
                {
                    if (!graph.IsConnected(S1) || !graph.IsConnected(S2))
                    {
                        continue;
                    }

                    yield return(new CsgCmpPair(graph, S1, S2));
                }
            }
        }
Esempio n. 14
0
 public LogicJoinBlock(LogicJoin join, JoinGraph graph)
 {
     graph_ = graph;
     join_  = join;
     children_.AddRange(graph.vertices_);
 }
Esempio n. 15
0
        override internal PhysicNode Run(JoinGraph graph, BigInteger expectC1)
        {
            int ntables = graph.vertices_.Count;
            var Trees   = new List <PhysicNode>();

            Console.WriteLine("GOO #tables: " + ntables);

            // Treees = {R1, R2, ..., Rn}
            graph_ = graph;
            foreach (var logic in graph.vertices_)
            {
                BitVector contained = 1 << graph.vertices_.IndexOf(logic);
                logic.tableContained_ = contained;
                if (graph.memo_ is null)
                {
                    Trees.Add(new PhysicScanTable(logic));
                }
                else
                {
                    // vertices are already inserted into memo
                    var cgroup   = graph.memo_.LookupCGroup(logic);
                    var logicref = new LogicMemoRef(cgroup);
                    Trees.Add(new PhysicMemoRef(logicref));
                }
            }

            while (Trees.Count != 1)
            {
                PhysicNode Ti = null, Tj = null;
                PhysicNode bestJoin = null;

                // find Ti, Tj in Trees s.t. i < j (avoid duplicates) and TixTj is minimal
                for (int i = 0; i < Trees.Count; i++)
                {
                    for (int j = i + 1; j < Trees.Count; j++)
                    {
                        var join = CreateMinimalJoinTree(Trees[i], Trees[j], true);
                        if (bestJoin == null || join.Cost() < bestJoin.Cost())
                        {
                            bestJoin = join;
                            Ti       = Trees[i];
                            Tj       = Trees[j];
                        }
                    }
                }

                Debug.Assert(Ti != null && Tj != null);
                Trees.Remove(Ti); Trees.Remove(Tj);
                Trees.Add(bestJoin);
            }

            // compare with DPccp solver
            //   ideally, DPccp shall always generate better plans since GOO is heuristic - but DPccp does not consider
            //   CP, so in some cases where CP is beneficial, GOO can win
            //
            var result = Trees[0];
            var dpccp  = new DPccp().Run(graph, expectC1);

            Console.WriteLine(result);
            if (dpccp.InclusiveCost() < result.InclusiveCost())
            {
                Console.WriteLine("warning: GOO non optimal plan: {0} vs. {1}", dpccp.InclusiveCost(), result.InclusiveCost());
            }
            if (dpccp.Cost() > result.Cost())
            {
                Console.WriteLine("warning: DPCC shall consider CP in the case: {0} vs. {1}", dpccp.InclusiveCost(), result.InclusiveCost());
            }

            return(result);
        }
Esempio n. 16
0
 internal abstract PhysicNode Run(JoinGraph graph, BigInteger expectC1 = new BigInteger());
Esempio n. 17
0
        override internal PhysicNode Run(JoinGraph graph, BigInteger expectC1)
        {
            int ntables = graph.vertices_.Count;

            Console.WriteLine("DP_Bushy #tables: " + ntables);

            // initialization: enqueue all single tables
            InitByInsertBasicTables(graph);

            // loop through all candidates trees, CP included
            ulong c1 = 0, c2 = 0;

            for (BitVector S = 1; S < (1 << ntables); S++)
            {
                if (bestTree_[S] != null)
                {
                    continue;
                }

                // need connected subgraphs if not consider CP
                if (!graph.IsConnected(S))
                {
                    // this partition enumeration is only to record #c2
                    c2 += (ulong)(new VancePartition(S)).Next().ToArray().Length;
                    continue;
                }

                // for all S_1 subset of S do
                VancePartition partitioner = new VancePartition(S);
                foreach (var S1 in partitioner.Next())
                {
                    c2++;
                    BitVector S2 = SetOp.Substract(S, S1);

                    // requires S1 < S2 to avoid commutative duplication
                    Debug.Assert(S1 != S2);
                    if (S1 < S2)
                    {
                        // need connected subgraphs if not consider CP
                        if (!graph.IsConnected(S1) || !graph.IsConnected(S2))
                        {
                            continue;
                        }

                        // find a connected pair, get the best join tree between them
                        c1++;
                        var currTree = CreateMinimalJoinTree(bestTree_[S1], bestTree_[S2]);
                        Debug.Assert(bestTree_[S].Cost() == currTree.Cost());
                    }
                }
            }

            // verify # loops for enumeration completeness:
            // 1. mumber of bushy trees
            // 2. expectC2/c2: number of trees DP considered (P68) and number of trees generated
            // 3. expectC1/c1: number of trees considered
            //
            var nbushy   = Space.Count_General_Bushy_CP(ntables);
            var expectC2 = BigInteger.Pow(3, ntables) - BigInteger.Pow(2, ntables + 1) + 1;

            Console.WriteLine("bushy: {0}, dp: {1} == c2: {2}; expected c1: {3} == c1: {4}",
                              nbushy, expectC2, c2,
                              expectC1, c1);
            Debug.Assert(expectC2 == c2);
            if (!expectC1.IsZero)
            {
                Debug.Assert(c1 == expectC1);
            }

            var result = bestTree_[(1 << ntables) - 1];

            Console.WriteLine(result.Explain());
            return(result);
        }