IEnumerable <BitVector> enumerateCmp(JoinGraph graph, BitVector S1) { int ntables = graph.vertices_.Count; Debug.Assert(S1 != SetOp.EmptySet); // min(S1) := min({i|v_i \in S1}) int minS1 = SetOp.MinTableIndex(S1); // B_i(W) := {vj |v_j \in W, j <= i} // X = union (B_min(S1), S) BitVector BminS1 = SetOp.OrderBeforeSet(minS1); BitVector X = SetOp.Union(BminS1, S1); // N = neighbour(S1) \ X BitVector N = SetOp.Substract(graph.NeighboursExcluding(S1), X); // for all(vi 2 N by descending i) foreach (int vi in SetOp.TablesDescending(N)) { // emit {v_i} BitVector VI = SetOp.SingletonSet(vi); yield return(VI); // recursively invoke enumerateCmp(graph, {v_i}, X union (B_i intersect N)) BitVector Bi = SetOp.OrderBeforeSet(vi); BitVector BiN = SetOp.Intersect(Bi, N); foreach (var csg in enumerateCsgRecursive(graph, VI, SetOp.Union(X, BiN))) { yield return(csg); } } }
// TDBasic uses naive partition algorithm. Phd table 2.7. // Its complexity is essentially the same as DP_bushy but it does not has the hard // requirements like DP algorithm that it has to enumerate bottom up but more like // on-demand driven, so it is more flexible and looks natural. // override internal PhysicNode Run(JoinGraph graph, BigInteger expectC1) { int ntables = graph.vertices_.Count; BitVector S = (1 << ntables) - 1; Console.WriteLine("TDBasic(naive) #tables: " + ntables); // initialization: enqueue all single tables InitByInsertBasicTables(graph); c1_ = 0; var result = TDPGSub(graph, S, expectC1); var expectC2 = BigInteger.Pow(3, ntables) - BigInteger.Pow(2, ntables + 1) + 1; Console.WriteLine("dp: {0}, expected c1: {1} == c1: {2}", expectC2, expectC1, c1_); if (!expectC1.IsZero) { Debug.Assert(c1_ == expectC1); } Console.WriteLine(result); // verify that it generates same tree as DPBushy Debug.Assert((new DPBushy()).Run(graph, BigInteger.Zero).Equals(result)); return(result); }
IEnumerable <BitVector> enumerateCsgRecursive(JoinGraph graph, BitVector S, BitVector X) { // N = neighbour(S) \ X BitVector N = SetOp.Substract(graph.NeighboursExcluding(S), X); // Console.WriteLine("N: " + BitHelper.ToString(N)); // for all non-empty S' subsetof(N), emit (S union S') if (N != SetOp.EmptySet) { VancePartition partitioner = new VancePartition(N); foreach (var S_prime in partitioner.Next(true)) { yield return(SetOp.Union(S, S_prime)); } // for all non-empty S' subsetof(N), recursively invoke (graph, (S union S'), (X union N)) partitioner = new VancePartition(N); foreach (var S_prime in partitioner.Next(true)) { foreach (var v in enumerateCsgRecursive(graph, SetOp.Union(S, S_prime), SetOp.Union(X, N))) { yield return(v); } } } }
internal CsgCmpPair(JoinGraph graph, BitVector S1, BitVector S2) { S1_ = S1; S2_ = S2; S_ = SetOp.Union(S1, S2); Verify(graph); }
void Verify(JoinGraph graph) { // not-overlapped Debug.Assert(SetOp.Intersect(S1_, S2_) == SetOp.EmptySet); // verify S1_, S2_ itself is connected and S1_ and S2_ is connected // we form a small JoinQuery of it and verify that all nodes included // Debug.Assert(graph.IsConnected(S1_) && graph.IsConnected(S2_) && graph.IsConnected(S_)); }
// enumerate all csg-cmp-pairs IEnumerable <CsgCmpPair> csg_cmp_pairs(JoinGraph graph) { foreach (BitVector S1 in enumerateCsg(graph)) { foreach (BitVector S2 in enumerateCmp(graph, S1)) { // Console.WriteLine("S1:{0}, S2:{1}", BitHelper.ToString(S1), BitHelper.ToString(S2)); yield return(new CsgCmpPair(graph, S1, S2)); } } }
PhysicNode TDPGSub(JoinGraph graph, BitVector S, BigInteger expectC1) { if (bestTree_[S] == null) { // for all partitioning S1, S2, build tree (TDPGSub(G|S1), TDPGSub(G|S2)) foreach (var ccp in NaiveNext(graph, S)) { CreateMinimalJoinTree(TDPGSub(graph, ccp.S1_, 0), TDPGSub(graph, ccp.S2_, 0)); } } return(bestTree_[S]); }
static public void Test() { DPccp solver = new DPccp(); // book figure 3.12 var tables = new string[] { "T1", "T2", "T3", "T4", "T5" }; JoinGraph figure312 = new JoinGraph(tables, new string[] { "T1*T2", "T1*T3", "T1*T4", "T3*T4", "T5*T2", "T5*T3", "T5*T4" }); Debug.Assert(figure312.joinbits_.Count == 5 && figure312.preds_.Count == 7); solver.Reset().Run(figure312); // full test DoTest(new DPccp()); }
override internal PhysicNode Run(JoinGraph graph, BigInteger expectC1) { int ntables = graph.vertices_.Count; Console.WriteLine("DPccp #tables: " + ntables); // prerequisite: sort tables per DFS order graph.ReorderBFS(); // initialization: enqueue all single tables InitByInsertBasicTables(graph); ulong c1 = 0; foreach (var pair in csg_cmp_pairs(graph)) { c1++; BitVector S1 = pair.S1_; BitVector S2 = pair.S2_; BitVector S = pair.S_; var currTree = CreateMinimalJoinTree(bestTree_[S1], bestTree_[S2]); Debug.Assert(bestTree_[S].Cost() == currTree.Cost()); } var nbushy = Space.Count_General_Bushy_CP(ntables); var ndp = BigInteger.Pow(3, ntables) - BigInteger.Pow(2, ntables + 1) + 1; Console.WriteLine("bushy: {0}, expected: {1} == c1: {2}", nbushy, expectC1, c1); if (!expectC1.IsZero) { Debug.Assert(c1 == expectC1); } var result = bestTree_[(1 << ntables) - 1]; // Console.WriteLine(result.Explain()); bool verify = false; if (verify) { // verify that it generates same tree as DPBushy - we can't verify that the tree are // the same because we may generate two different join trees with the same cost. So // we do cost verificaiton here. // var bushy = (new DPBushy()).Run(graph, expectC1); Debug.Assert(bushy.InclusiveCost().Equals(result.InclusiveCost())); } return(result); }
// We assume plan is with normalized shape: // LogicFilter // LogicJoin // ... // There shall be only 1 join filter on top. // Subqueries is not considered here. // internal static JoinGraph ExtractJoinGraph(LogicNode plan, out LogicNode filterNodeParent, out int index, out LogicFilter filterNode) { // find the join filter var parents = new List <LogicNode>(); var indexes = new List <int>(); var filters = new List <LogicFilter>(); plan.FindNodeTypeMatch <LogicFilter>(parents, indexes, filters); var joinfilters = filters.Where(x => x.child_() is LogicJoin).ToList(); Debug.Assert(joinfilters.Count <= 1); if (joinfilters.Count == 1) { JoinGraph graph = null; var joinfilter = joinfilters[0]; var topjoin = joinfilter.child_() as LogicJoin; // vertices are non-join nodes. We don't do any cross boundary optimization // (say pull aggregation up thus we have bigger join space etc), which is // the job of upper layer. // var vertices = new List <LogicNode>(); topjoin.VisitEach(x => { if (!(x is LogicJoin)) { vertices.Add(x as LogicNode); } }); graph = new JoinGraph(vertices, joinfilters[0].filter_.FilterToAndList()); index = indexes[0]; filterNodeParent = parents[0]; filterNode = joinfilter; Debug.Assert(filterNodeParent is null || filterNodeParent.children_[index] == filterNode); return(graph); } // there is no join or we can't handle this query filterNodeParent = null; index = -1; filterNode = null; return(null); }
// initialization: enqueue all vertex nodes protected void InitByInsertBasicTables(JoinGraph graph) { graph_ = graph; foreach (var logic in graph.vertices_) { BitVector contained = 1 << graph.vertices_.IndexOf(logic); logic.tableContained_ = contained; if (graph.memo_ is null) { bestTree_[contained] = new PhysicScanTable(logic); } else { // vertices are already inserted into memo var cgroup = graph.memo_.LookupCGroup(logic); var logicref = new LogicMemoRef(cgroup); bestTree_[contained] = new PhysicMemoRef(logicref); } } }
IEnumerable <BitVector> enumerateCsg(JoinGraph graph) { int ntables = graph.vertices_.Count; for (int i = ntables - 1; i >= 0; i--) { // emit S // S: {vi} BitVector VI = SetOp.SingletonSet(i); // Console.WriteLine("S: {0}", i); yield return(VI); // EnumerateCsgRec (G, S, X) // X: {vj|j<=i} BitVector X = SetOp.OrderBeforeSet(i); foreach (var csg in enumerateCsgRecursive(graph, VI, X)) { yield return(csg); } } }
// Naive partitioning // Similar to DPBushy algorithm IEnumerable <CsgCmpPair> NaiveNext(JoinGraph graph, BitVector S) { // it is connected because the 1st iteration is connected and when // we generate for next iteration, we checked S1,S2. // Debug.Assert(graph.IsConnected(S)); VancePartition partitioner = new VancePartition(S); foreach (var S1 in partitioner.Next()) { c1_++; BitVector S2 = SetOp.Substract(S, S1); if (S1 < S2) { if (!graph.IsConnected(S1) || !graph.IsConnected(S2)) { continue; } yield return(new CsgCmpPair(graph, S1, S2)); } } }
public LogicJoinBlock(LogicJoin join, JoinGraph graph) { graph_ = graph; join_ = join; children_.AddRange(graph.vertices_); }
override internal PhysicNode Run(JoinGraph graph, BigInteger expectC1) { int ntables = graph.vertices_.Count; var Trees = new List <PhysicNode>(); Console.WriteLine("GOO #tables: " + ntables); // Treees = {R1, R2, ..., Rn} graph_ = graph; foreach (var logic in graph.vertices_) { BitVector contained = 1 << graph.vertices_.IndexOf(logic); logic.tableContained_ = contained; if (graph.memo_ is null) { Trees.Add(new PhysicScanTable(logic)); } else { // vertices are already inserted into memo var cgroup = graph.memo_.LookupCGroup(logic); var logicref = new LogicMemoRef(cgroup); Trees.Add(new PhysicMemoRef(logicref)); } } while (Trees.Count != 1) { PhysicNode Ti = null, Tj = null; PhysicNode bestJoin = null; // find Ti, Tj in Trees s.t. i < j (avoid duplicates) and TixTj is minimal for (int i = 0; i < Trees.Count; i++) { for (int j = i + 1; j < Trees.Count; j++) { var join = CreateMinimalJoinTree(Trees[i], Trees[j], true); if (bestJoin == null || join.Cost() < bestJoin.Cost()) { bestJoin = join; Ti = Trees[i]; Tj = Trees[j]; } } } Debug.Assert(Ti != null && Tj != null); Trees.Remove(Ti); Trees.Remove(Tj); Trees.Add(bestJoin); } // compare with DPccp solver // ideally, DPccp shall always generate better plans since GOO is heuristic - but DPccp does not consider // CP, so in some cases where CP is beneficial, GOO can win // var result = Trees[0]; var dpccp = new DPccp().Run(graph, expectC1); Console.WriteLine(result); if (dpccp.InclusiveCost() < result.InclusiveCost()) { Console.WriteLine("warning: GOO non optimal plan: {0} vs. {1}", dpccp.InclusiveCost(), result.InclusiveCost()); } if (dpccp.Cost() > result.Cost()) { Console.WriteLine("warning: DPCC shall consider CP in the case: {0} vs. {1}", dpccp.InclusiveCost(), result.InclusiveCost()); } return(result); }
internal abstract PhysicNode Run(JoinGraph graph, BigInteger expectC1 = new BigInteger());
override internal PhysicNode Run(JoinGraph graph, BigInteger expectC1) { int ntables = graph.vertices_.Count; Console.WriteLine("DP_Bushy #tables: " + ntables); // initialization: enqueue all single tables InitByInsertBasicTables(graph); // loop through all candidates trees, CP included ulong c1 = 0, c2 = 0; for (BitVector S = 1; S < (1 << ntables); S++) { if (bestTree_[S] != null) { continue; } // need connected subgraphs if not consider CP if (!graph.IsConnected(S)) { // this partition enumeration is only to record #c2 c2 += (ulong)(new VancePartition(S)).Next().ToArray().Length; continue; } // for all S_1 subset of S do VancePartition partitioner = new VancePartition(S); foreach (var S1 in partitioner.Next()) { c2++; BitVector S2 = SetOp.Substract(S, S1); // requires S1 < S2 to avoid commutative duplication Debug.Assert(S1 != S2); if (S1 < S2) { // need connected subgraphs if not consider CP if (!graph.IsConnected(S1) || !graph.IsConnected(S2)) { continue; } // find a connected pair, get the best join tree between them c1++; var currTree = CreateMinimalJoinTree(bestTree_[S1], bestTree_[S2]); Debug.Assert(bestTree_[S].Cost() == currTree.Cost()); } } } // verify # loops for enumeration completeness: // 1. mumber of bushy trees // 2. expectC2/c2: number of trees DP considered (P68) and number of trees generated // 3. expectC1/c1: number of trees considered // var nbushy = Space.Count_General_Bushy_CP(ntables); var expectC2 = BigInteger.Pow(3, ntables) - BigInteger.Pow(2, ntables + 1) + 1; Console.WriteLine("bushy: {0}, dp: {1} == c2: {2}; expected c1: {3} == c1: {4}", nbushy, expectC2, c2, expectC1, c1); Debug.Assert(expectC2 == c2); if (!expectC1.IsZero) { Debug.Assert(c1 == expectC1); } var result = bestTree_[(1 << ntables) - 1]; Console.WriteLine(result.Explain()); return(result); }