IEnumerable <BitVector> enumerateCmp(JoinGraph graph, BitVector S1) { int ntables = graph.vertices_.Count; Debug.Assert(S1 != SetOp.EmptySet); // min(S1) := min({i|v_i \in S1}) int minS1 = SetOp.MinTableIndex(S1); // B_i(W) := {vj |v_j \in W, j <= i} // X = union (B_min(S1), S) BitVector BminS1 = SetOp.OrderBeforeSet(minS1); BitVector X = SetOp.Union(BminS1, S1); // N = neighbour(S1) \ X BitVector N = SetOp.Substract(graph.NeighboursExcluding(S1), X); // for all(vi 2 N by descending i) foreach (int vi in SetOp.TablesDescending(N)) { // emit {v_i} BitVector VI = SetOp.SingletonSet(vi); yield return(VI); // recursively invoke enumerateCmp(graph, {v_i}, X union (B_i intersect N)) BitVector Bi = SetOp.OrderBeforeSet(vi); BitVector BiN = SetOp.Intersect(Bi, N); foreach (var csg in enumerateCsgRecursive(graph, VI, SetOp.Union(X, BiN))) { yield return(csg); } } }
static public void Test() { BitVector S1 = 0b0011_0010; BitVector S2 = 0b0100_0011; Debug.Assert(SetOp.Union(S1, S2) == 0b0111_0011); Debug.Assert(SetOp.Union(S1, S2) == SetOp.Union(S2, S1)); Debug.Assert(SetOp.Intersect(S1, S2) == 0b0000_0010); Debug.Assert(SetOp.Intersect(S1, S2) == SetOp.Intersect(S2, S1)); Debug.Assert(SetOp.Substract(S1, S2) == 0b0011_0000); Debug.Assert(SetOp.Substract(S2, S1) == 0b0100_0001); Debug.Assert(SetOp.CountSetBits(S1) == 3); Debug.Assert(SetOp.MinTableIndex(S1) == 1); Debug.Assert(SetOp.MinTableIndex(S2) == 0); Debug.Assert(SetOp.OrderBeforeSet(3) == 15); var l = SetOp.TablesAscending(S2).ToList(); Debug.Assert(l.SequenceEqual(new List <int>() { 0, 1, 6 })); l = SetOp.TablesDescending(S2).ToList(); Debug.Assert(l.SequenceEqual(new List <int>() { 6, 1, 0 })); }
IEnumerable <BitVector> enumerateCsgRecursive(JoinGraph graph, BitVector S, BitVector X) { // N = neighbour(S) \ X BitVector N = SetOp.Substract(graph.NeighboursExcluding(S), X); // Console.WriteLine("N: " + BitHelper.ToString(N)); // for all non-empty S' subsetof(N), emit (S union S') if (N != SetOp.EmptySet) { VancePartition partitioner = new VancePartition(N); foreach (var S_prime in partitioner.Next(true)) { yield return(SetOp.Union(S, S_prime)); } // for all non-empty S' subsetof(N), recursively invoke (graph, (S union S'), (X union N)) partitioner = new VancePartition(N); foreach (var S_prime in partitioner.Next(true)) { foreach (var v in enumerateCsgRecursive(graph, SetOp.Union(S, S_prime), SetOp.Union(X, N))) { yield return(v); } } } }
internal CsgCmpPair(JoinGraph graph, BitVector S1, BitVector S2) { S1_ = S1; S2_ = S2; S_ = SetOp.Union(S1, S2); Verify(graph); }
// Similar to Neighbours() but exclusing S itself internal BitVector NeighboursExcluding(BitVector S) { BitVector result = Neighbours(S); result = SetOp.Substract(result, S); Debug.Assert(SetOp.Intersect(result, S) == SetOp.EmptySet); return(result); }
static internal string ToString(BitVector S) { string r = ""; foreach (var t in SetOp.TablesAscending(S)) { r += t + ", "; } return(r); }
void Verify(JoinGraph graph) { // not-overlapped Debug.Assert(SetOp.Intersect(S1_, S2_) == SetOp.EmptySet); // verify S1_, S2_ itself is connected and S1_ and S2_ is connected // we form a small JoinQuery of it and verify that all nodes included // Debug.Assert(graph.IsConnected(S1_) && graph.IsConnected(S2_) && graph.IsConnected(S_)); }
// given a set of tables, returns the set of its neighbours BitVector Neighbours(BitVector S) { BitVector result = 0; int ntables = vertices_.Count; foreach (var t in SetOp.TablesAscending(S)) { foreach (var n in NeighboursOf(t)) { result |= (long)(1 << n); } } return(result); }
// read through join exprs and mark the contain join bits // say T2.a = T5.a // T2 is at tables_[3] and T5 is at tables_[7] // then // joinbits_[3].bits_.Set(7) // joinbits_[7].bits_.Set(3) // void markJoinBitsFromJoinPred(List <Expr> preds) { var npreds = preds.Count; predContained_ = new List <BitVector>(npreds); for (int i = 0; i < npreds; i++) { var p = preds[i]; var i12 = ParseJoinPredExpr(p); int i1 = i12[0], i2 = i12[1]; // there could be multiple join predicates between two relations // so no need to verify duplicates here (!joinbits_[i1][i2]) joinbits_[i1][i2] = true; joinbits_[i2][i1] = true; // mark predicate containage as well predContained_.Add(SetOp.SingletonSet(i1) | SetOp.SingletonSet(i2)); } }
IEnumerable <BitVector> enumerateCsg(JoinGraph graph) { int ntables = graph.vertices_.Count; for (int i = ntables - 1; i >= 0; i--) { // emit S // S: {vi} BitVector VI = SetOp.SingletonSet(i); // Console.WriteLine("S: {0}", i); yield return(VI); // EnumerateCsgRec (G, S, X) // X: {vj|j<=i} BitVector X = SetOp.OrderBeforeSet(i); foreach (var csg in enumerateCsgRecursive(graph, VI, X)) { yield return(csg); } } }
// There are two ways of enumeration: // 1. full set is not included // 2. full set is included // internal IEnumerable <BitVector> Next(bool fullsetIncluded = false) { BitVector S1, S2, S; int counter = 0; S = S_; S1 = 0; do { S1 = S & (S1 - S); if (S1 != S || fullsetIncluded) { counter++; S2 = SetOp.CoveredSubstract(S, S1); //Console.WriteLine(Convert.ToString(S1, 2).PadLeft(8,'0') + ":" + Convert.ToString(S2, 2).PadLeft(8, '0')); yield return(S1); } } while (S1 != S); // result includes all combinations except emtpy and full set (optional) Debug.Assert(counter - (fullsetIncluded ? 1 : 0) == (1 << SetOp.CountSetBits(S)) - 2); }
// extra a subgraph for the given nodes set S // (1) we shall also remove both uncovered nodes and edges // (2) make this as fast as possible as it is tightly tested in // DP_bushy algorithm // // say we have // A - B - C // \ D // SubGraph(ABD) => {A-D and non-connected node C}. // internal JoinGraph SubGraph(BitVector S) { var subvert = new List <LogicNode>(); var subtablelist = SetOp.TablesAscending(S).ToList(); foreach (var t in subtablelist) { subvert.Add(vertices_[t]); } var subjoins = new List <Expr>(); foreach (var j in preds_) { var i12 = ParseJoinPredExpr(j); int i1 = i12[0], i2 = i12[1]; if (subtablelist.Contains(i1) && subtablelist.Contains(i2)) { subjoins.Add(j); } } return(new JoinGraph(subvert, subjoins)); }
// Naive partitioning // Similar to DPBushy algorithm IEnumerable <CsgCmpPair> NaiveNext(JoinGraph graph, BitVector S) { // it is connected because the 1st iteration is connected and when // we generate for next iteration, we checked S1,S2. // Debug.Assert(graph.IsConnected(S)); VancePartition partitioner = new VancePartition(S); foreach (var S1 in partitioner.Next()) { c1_++; BitVector S2 = SetOp.Substract(S, S1); if (S1 < S2) { if (!graph.IsConnected(S1) || !graph.IsConnected(S2)) { continue; } yield return(new CsgCmpPair(graph, S1, S2)); } } }
override internal PhysicNode Run(JoinGraph graph, BigInteger expectC1) { int ntables = graph.vertices_.Count; Console.WriteLine("DP_Bushy #tables: " + ntables); // initialization: enqueue all single tables InitByInsertBasicTables(graph); // loop through all candidates trees, CP included ulong c1 = 0, c2 = 0; for (BitVector S = 1; S < (1 << ntables); S++) { if (bestTree_[S] != null) { continue; } // need connected subgraphs if not consider CP if (!graph.IsConnected(S)) { // this partition enumeration is only to record #c2 c2 += (ulong)(new VancePartition(S)).Next().ToArray().Length; continue; } // for all S_1 subset of S do VancePartition partitioner = new VancePartition(S); foreach (var S1 in partitioner.Next()) { c2++; BitVector S2 = SetOp.Substract(S, S1); // requires S1 < S2 to avoid commutative duplication Debug.Assert(S1 != S2); if (S1 < S2) { // need connected subgraphs if not consider CP if (!graph.IsConnected(S1) || !graph.IsConnected(S2)) { continue; } // find a connected pair, get the best join tree between them c1++; var currTree = CreateMinimalJoinTree(bestTree_[S1], bestTree_[S2]); Debug.Assert(bestTree_[S].Cost() == currTree.Cost()); } } } // verify # loops for enumeration completeness: // 1. mumber of bushy trees // 2. expectC2/c2: number of trees DP considered (P68) and number of trees generated // 3. expectC1/c1: number of trees considered // var nbushy = Space.Count_General_Bushy_CP(ntables); var expectC2 = BigInteger.Pow(3, ntables) - BigInteger.Pow(2, ntables + 1) + 1; Console.WriteLine("bushy: {0}, dp: {1} == c2: {2}; expected c1: {3} == c1: {4}", nbushy, expectC2, c2, expectC1, c1); Debug.Assert(expectC2 == c2); if (!expectC1.IsZero) { Debug.Assert(c1 == expectC1); } var result = bestTree_[(1 << ntables) - 1]; Console.WriteLine(result.Explain()); return(result); }