public static void TestMCMC(Pair <int>[] edges, int parallelism, string filename) { double epsilon = 0.1; var graph = PINQCollection <Pair <int> > .Input(parallelism); // introduce the sensitive graph data graph.OnNext(edges.Select(x => new Weighted <Pair <int> >(x, +1.0))); #region take preliminary node and edge multiplicity measurements // count number of nodes var doubleNodes = 0.5 + graph.GroupBy(e => e.a, i => i, (i, l) => l.Count) .Concat(graph.Select(e => e.b)) .Shave(1.0, (i, x) => i) .Where(x => x == 0) .Count(y => y, epsilon / 2)[0] * 2; var numNodes = (int)doubleNodes; Console.WriteLine("num of nodes: " + numNodes + " " + doubleNodes); //count multiedges and selfloops var multiplicity = graph.Shave(1.0, (i, x) => new Pair <int> { a = i, b = x.a == x.b ? 1 : 0 }) .Count(y => y, epsilon); #endregion #region warm start measurements (in and out degree distributions and ccdf) var synth = graph.WarmStart(numNodes, numNodes, epsilon).ToArray(); //foreach (var edge in synth) Console.WriteLine("Synth: {0}", edge); //synth.PrintDegreeDistribution("synth"); //print the graph after warm start //System.IO.File.WriteAllLines(filename + "-synthGraph.txt", synth.Select(x => x.a + "\t" + x.b)); #endregion #region define a graph analysis graph.TrianglesByDegree(0.1); //Clustering(graph, 0.1); //Console.WriteLine("measuring tris"); //graph.jddTriangles(Enumerable.Range(10, 10).Select(x => new Pair<int>(10 * x, 10 * x)).ToArray(), 0.1); //Console.WriteLine("measured tris"); #endregion // revealing this directly violates differential privacy; for measurement only. Console.WriteLine("Error on real data: {0}", Observation.TotalError); // remove sensitive graph data and introduce synthetic graph data. graph.OnNext(edges.Select(x => new Weighted <Pair <int> >(x, -1.0))); graph.OnNext(synth.Select(x => new Weighted <Pair <int> >(x, +1.0))); // perform mcmc steps. var mcmcIterations = 1000000; var mcmcd = graph.ComputeWithSwaps(synth, numNodes, mcmcIterations).ToArray(); //write the synthetic graph //System.IO.File.WriteAllLines(filename + "-newresultingGraph.txt", mcmcd.Select(x => x.a + "\t" + x.b)); // remove the mcmc graph data and re-introduce sensitive graph data. graph.OnNext(mcmcd.Select(x => new Weighted <Pair <int> >(x, -1.0))); graph.OnNext(edges.Select(x => new Weighted <Pair <int> >(x, +1.0))); // revealing this directly violates differential privace; for measurement only. Console.WriteLine("Final error {0}", Observation.TotalError); }
public static void DoExperiments(string path) { Console.WriteLine("Begin to load input data."); var customer_file_path = path + @"/customer.txt"; var order_file_path = path + @"/order.txt"; var supplier_file_path = path + @"/supplier.txt"; var lineitem_file_path = path + @"/lineitem.txt"; var ps_file_path = path + @"/ps.txt"; var r1_file_path = path + @"/R1.txt"; var r2_file_path = path + @"/R2.txt"; var r3_file_path = path + @"/R3.txt"; var r4_file_path = path + @"/R4.txt"; var r5_file_path = path + @"/R5.txt"; var r6_file_path = path + @"/R6.txt"; var customer_file = System.IO.File.ReadAllLines(customer_file_path); var customer = customer_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray(); var supplier_file = System.IO.File.ReadAllLines(supplier_file_path); var supplier = supplier_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray(); var order_file = System.IO.File.ReadAllLines(order_file_path); var order = order_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray(); var lineitem_file = System.IO.File.ReadAllLines(lineitem_file_path); var lineitem = lineitem_file.Select(x => x.Split(' ')).Select(x => new Triple(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]), Convert.ToInt32(x[2]))).ToArray(); var ps_file = System.IO.File.ReadAllLines(ps_file_path); var ps = ps_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray(); var r1_file = System.IO.File.ReadAllLines(r1_file_path); var r1 = r1_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray(); var r2_file = System.IO.File.ReadAllLines(r2_file_path); var r2 = r2_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray(); var r3_file = System.IO.File.ReadAllLines(r3_file_path); var r3 = r3_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray(); var r4_file = System.IO.File.ReadAllLines(r4_file_path); var r4 = r4_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray(); var r5_file = System.IO.File.ReadAllLines(r5_file_path); var r5 = r5_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray(); var r6_file = System.IO.File.ReadAllLines(r6_file_path); var r6 = r6_file.Select(x => x.Split(' ')).Select(x => new Triple(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]), Convert.ToInt32(x[2]))).ToArray(); Console.WriteLine("Finish loading input data."); Console.WriteLine("Begin to add weights for data."); var w_c = PINQCollection <Pair <int> > .Input(4); w_c.OnNext(customer.Select(x => new Weighted <Pair <int> >(x, +1.0))); var w_s = PINQCollection <Pair <int> > .Input(4); w_s.OnNext(supplier.Select(x => new Weighted <Pair <int> >(x, +1.0))); var w_o = PINQCollection <Pair <int> > .Input(4); w_o.OnNext(order.Select(x => new Weighted <Pair <int> >(x, +1.0))); var w_ps = PINQCollection <Pair <int> > .Input(4); w_ps.OnNext(ps.Select(x => new Weighted <Pair <int> >(x, +1.0))); var w_l = PINQCollection <Triple> .Input(4); w_l.OnNext(lineitem.Select(x => new Weighted <Triple>(x, +1.0))); var w_r1 = PINQCollection <Pair <int> > .Input(4); w_r1.OnNext(r1.Select(x => new Weighted <Pair <int> >(x, +1.0))); var w_r2 = PINQCollection <Pair <int> > .Input(4); w_r2.OnNext(r2.Select(x => new Weighted <Pair <int> >(x, +1.0))); var w_r3 = PINQCollection <Pair <int> > .Input(4); w_r3.OnNext(r3.Select(x => new Weighted <Pair <int> >(x, +1.0))); var w_r4 = PINQCollection <Pair <int> > .Input(4); w_r4.OnNext(r4.Select(x => new Weighted <Pair <int> >(x, +1.0))); var w_r5 = PINQCollection <Pair <int> > .Input(4); w_r5.OnNext(r5.Select(x => new Weighted <Pair <int> >(x, +1.0))); var w_r6 = PINQCollection <Triple> .Input(4); w_r6.OnNext(r6.Select(x => new Weighted <Triple>(x, +1.0))); Console.WriteLine("Finish adding weights to data."); Stopwatch sw = new Stopwatch(); sw.Start(); double total = 0; int repeat_time = 10; Console.WriteLine("For query 1: "); for (int i = 0; i < repeat_time; i++) { var w_c_o = w_c.Join(w_o, x => x.a, y => y.b, (x, y) => new Triple(x.b, x.a, y.a)); var w_s_l = w_l.Join(w_s, x => x.c, y => y.a, (x, y) => new Triple(x.a, x.b, x.c)); var w_c_o_s_l = w_c_o.Join(w_s_l, x => x.c, y => y.a, (x, y) => true); var q1_result = w_c_o_s_l.Count(x => true, 100000)[true]; total += q1_result; } sw.Stop(); Console.WriteLine("Result is " + total / repeat_time); TimeSpan ts2 = sw.Elapsed; Console.WriteLine("Time: " + ts2.TotalMilliseconds / repeat_time / 1000 + "s"); sw.Start(); total = 0; Console.WriteLine("For query 2: "); for (int i = 0; i < repeat_time; i++) { var w_s_ps = w_s.Join(w_ps, x => x.a, y => y.b, (x, y) => new Triple(x.b, x.a, y.a)); var w_o_l = w_o.Join(w_l, x => x.a, y => y.a, (x, y) => new Triple(y.c, y.b, y.a)); var w_s_ps_o_l = w_s_ps.Join(w_o_l, x => new Pair <int>(x.b, x.c), y => new Pair <int>(y.a, y.b), (x, y) => true); var q2_result = w_s_ps_o_l.Count(x => true, 100000)[true]; total += q2_result; } sw.Stop(); Console.WriteLine("Result is " + total / repeat_time); ts2 = sw.Elapsed; Console.WriteLine("Time: " + ts2.TotalMilliseconds / repeat_time / 1000 + "s"); sw.Start(); total = 0; Console.WriteLine("For query 3: "); for (int i = 0; i < repeat_time; i++) { var w_c_o = w_c.Join(w_o, x => x.a, y => y.b, (x, y) => new Triple(x.b, x.a, y.a)); var w_s_l = w_s.Join(w_l, x => x.a, y => y.c, (x, y) => new Triple(x.b, x.a, y.a)); var w_c_o_s_l = w_c_o.Join(w_s_l, x => new Pair <int>(x.a, x.c), y => new Pair <int>(y.a, y.c), (x, y) => true); var q3_result = w_c_o_s_l.Count(x => true, 100000)[true]; total += q3_result; } sw.Stop(); Console.WriteLine("Result is " + total / repeat_time); ts2 = sw.Elapsed; Console.WriteLine("Time: " + ts2.TotalMilliseconds / repeat_time / 1000 + "s"); sw.Start(); total = 0; Console.WriteLine("For query 4: "); for (int i = 0; i < repeat_time; i++) { var w_r12 = w_r1.Join(w_r2, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b)); var w_r123 = w_r12.Join(w_r3, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b)); var w_r1234 = w_r123.Join(w_r4, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b)); var w_r12345 = w_r1234.Join(w_r5, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b)); var q4_result = w_r12345.Count(x => true, 100000)[true]; total += q4_result; } sw.Stop(); Console.WriteLine("Result is " + total / repeat_time); ts2 = sw.Elapsed; Console.WriteLine("Time: " + ts2.TotalMilliseconds / repeat_time / 1000 + "s"); sw.Start(); total = 0; Console.WriteLine("For query 5: "); for (int i = 0; i < repeat_time; i++) { var w_r12 = w_r1.Join(w_r2, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b)); var w_r123 = w_r12.Join(w_r3, x => new Pair <int>(x.a, x.b), y => new Pair <int>(y.b, y.a), (x, y) => true); var q5_result = w_r123.Count(x => true, 100000)[true]; total += q5_result; } sw.Stop(); Console.WriteLine("Result is " + total / repeat_time); ts2 = sw.Elapsed; Console.WriteLine("Time: " + ts2.TotalMilliseconds / repeat_time / 1000 + "s"); sw.Start(); total = 0; Console.WriteLine("For query 6: "); for (int i = 0; i < repeat_time; i++) { var w_r12 = w_r1.Join(w_r2, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b)); var w_r123 = w_r12.Join(w_r3, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b)); var w_r1234 = w_r123.Join(w_r4, x => new Pair <int>(x.a, x.b), y => new Pair <int>(y.b, y.a), (x, y) => true); var q6_result = w_r1234.Count(x => true, 100000)[true]; total += q6_result; } sw.Stop(); Console.WriteLine("Result is " + total / repeat_time); ts2 = sw.Elapsed; Console.WriteLine("Time: " + ts2.TotalMilliseconds / repeat_time / 1000 + "s"); sw.Start(); total = 0; Console.WriteLine("For query 7: "); for (int i = 0; i < repeat_time; i++) { var w_r12 = w_r1.Join(w_r2, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b)); var w_r123 = w_r12.Join(w_r3, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b)); var w_r1234 = w_r123.Join(w_r4, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b)); var w_r12345 = w_r1234.Join(w_r5, x => new Pair <int>(x.a, x.b), y => new Pair <int>(y.b, y.a), (x, y) => true); var q7_result = w_r12345.Count(x => true, 100000)[true]; total += q7_result; } sw.Stop(); Console.WriteLine("Result is " + total / repeat_time); ts2 = sw.Elapsed; Console.WriteLine("Time: " + ts2.TotalMilliseconds / repeat_time / 1000 + "s"); sw.Start(); total = 0; Console.WriteLine("For query 8: "); for (int i = 0; i < repeat_time; i++) { var w_r34 = w_r3.Join(w_r4, x => x.b, y => y.a, (x, y) => new Triple(x.a, x.b, y.b)); var w_r345 = w_r34.Join(w_r5, x => new Pair(x.a, x.c), y => new Pair(y.b, y.a), (x, y) => new Triple(x.a, x.b, x.c)); var w_r3456 = w_r345.Join(w_r6, x => x, y => y, (x, y) => true); var q8_result = w_r3456.Count(x => true, 100000)[true]; total += q8_result; } sw.Stop(); Console.WriteLine("Result is " + total / repeat_time); ts2 = sw.Elapsed; Console.WriteLine("Time: " + ts2.TotalMilliseconds / repeat_time / 1000 + "s"); }