Ejemplo n.º 1
0
        public static void TestMCMC(Pair <int>[] edges, int parallelism, string filename)
        {
            double epsilon = 0.1;
            var    graph   = PINQCollection <Pair <int> > .Input(parallelism);

            // introduce the sensitive graph data
            graph.OnNext(edges.Select(x => new Weighted <Pair <int> >(x, +1.0)));

            #region take preliminary node and edge multiplicity measurements

            // count number of nodes
            var doubleNodes = 0.5 + graph.GroupBy(e => e.a, i => i, (i, l) => l.Count)
                              .Concat(graph.Select(e => e.b))
                              .Shave(1.0, (i, x) => i)
                              .Where(x => x == 0)
                              .Count(y => y, epsilon / 2)[0] * 2;
            var numNodes = (int)doubleNodes;
            Console.WriteLine("num of nodes: " + numNodes + " " + doubleNodes);
            //count multiedges and selfloops
            var multiplicity = graph.Shave(1.0, (i, x) => new Pair <int> {
                a = i, b = x.a == x.b ? 1 : 0
            })
                               .Count(y => y, epsilon);

            #endregion

            #region warm start measurements (in and out degree distributions and ccdf)

            var synth = graph.WarmStart(numNodes, numNodes, epsilon).ToArray();

            //foreach (var edge in synth) Console.WriteLine("Synth: {0}", edge);
            //synth.PrintDegreeDistribution("synth");

            //print the graph after warm start
            //System.IO.File.WriteAllLines(filename + "-synthGraph.txt", synth.Select(x => x.a + "\t" + x.b));

            #endregion

            #region define a graph analysis

            graph.TrianglesByDegree(0.1);

            //Clustering(graph, 0.1);
            //Console.WriteLine("measuring tris");
            //graph.jddTriangles(Enumerable.Range(10, 10).Select(x => new Pair<int>(10 * x, 10 * x)).ToArray(), 0.1);
            //Console.WriteLine("measured tris");

            #endregion

            // revealing this directly violates differential privacy; for measurement only.
            Console.WriteLine("Error on real data: {0}", Observation.TotalError);

            // remove sensitive graph data and introduce synthetic graph data.
            graph.OnNext(edges.Select(x => new Weighted <Pair <int> >(x, -1.0)));
            graph.OnNext(synth.Select(x => new Weighted <Pair <int> >(x, +1.0)));

            // perform mcmc steps.
            var mcmcIterations = 1000000;
            var mcmcd          = graph.ComputeWithSwaps(synth, numNodes, mcmcIterations).ToArray();

            //write the synthetic graph
            //System.IO.File.WriteAllLines(filename + "-newresultingGraph.txt", mcmcd.Select(x => x.a + "\t" + x.b));

            // remove the mcmc graph data and re-introduce sensitive graph data.
            graph.OnNext(mcmcd.Select(x => new Weighted <Pair <int> >(x, -1.0)));
            graph.OnNext(edges.Select(x => new Weighted <Pair <int> >(x, +1.0)));

            // revealing this directly violates differential privace; for measurement only.
            Console.WriteLine("Final error {0}", Observation.TotalError);
        }
Ejemplo n.º 2
0
        public static void DoExperiments(string path)
        {
            Console.WriteLine("Begin to load input data.");
            var customer_file_path = path + @"/customer.txt";
            var order_file_path    = path + @"/order.txt";
            var supplier_file_path = path + @"/supplier.txt";
            var lineitem_file_path = path + @"/lineitem.txt";
            var ps_file_path       = path + @"/ps.txt";
            var r1_file_path       = path + @"/R1.txt";
            var r2_file_path       = path + @"/R2.txt";
            var r3_file_path       = path + @"/R3.txt";
            var r4_file_path       = path + @"/R4.txt";
            var r5_file_path       = path + @"/R5.txt";
            var r6_file_path       = path + @"/R6.txt";
            var customer_file      = System.IO.File.ReadAllLines(customer_file_path);
            var customer           = customer_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray();
            var supplier_file      = System.IO.File.ReadAllLines(supplier_file_path);
            var supplier           = supplier_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray();
            var order_file         = System.IO.File.ReadAllLines(order_file_path);
            var order         = order_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray();
            var lineitem_file = System.IO.File.ReadAllLines(lineitem_file_path);
            var lineitem      = lineitem_file.Select(x => x.Split(' ')).Select(x => new Triple(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]), Convert.ToInt32(x[2]))).ToArray();
            var ps_file       = System.IO.File.ReadAllLines(ps_file_path);
            var ps            = ps_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray();
            var r1_file       = System.IO.File.ReadAllLines(r1_file_path);
            var r1            = r1_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray();
            var r2_file       = System.IO.File.ReadAllLines(r2_file_path);
            var r2            = r2_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray();
            var r3_file       = System.IO.File.ReadAllLines(r3_file_path);
            var r3            = r3_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray();
            var r4_file       = System.IO.File.ReadAllLines(r4_file_path);
            var r4            = r4_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray();
            var r5_file       = System.IO.File.ReadAllLines(r5_file_path);
            var r5            = r5_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray();
            var r6_file       = System.IO.File.ReadAllLines(r6_file_path);
            var r6            = r6_file.Select(x => x.Split(' ')).Select(x => new Triple(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]), Convert.ToInt32(x[2]))).ToArray();

            Console.WriteLine("Finish loading input data.");
            Console.WriteLine("Begin to add weights for data.");
            var w_c = PINQCollection <Pair <int> > .Input(4);

            w_c.OnNext(customer.Select(x => new Weighted <Pair <int> >(x, +1.0)));
            var w_s = PINQCollection <Pair <int> > .Input(4);

            w_s.OnNext(supplier.Select(x => new Weighted <Pair <int> >(x, +1.0)));
            var w_o = PINQCollection <Pair <int> > .Input(4);

            w_o.OnNext(order.Select(x => new Weighted <Pair <int> >(x, +1.0)));
            var w_ps = PINQCollection <Pair <int> > .Input(4);

            w_ps.OnNext(ps.Select(x => new Weighted <Pair <int> >(x, +1.0)));
            var w_l = PINQCollection <Triple> .Input(4);

            w_l.OnNext(lineitem.Select(x => new Weighted <Triple>(x, +1.0)));
            var w_r1 = PINQCollection <Pair <int> > .Input(4);

            w_r1.OnNext(r1.Select(x => new Weighted <Pair <int> >(x, +1.0)));
            var w_r2 = PINQCollection <Pair <int> > .Input(4);

            w_r2.OnNext(r2.Select(x => new Weighted <Pair <int> >(x, +1.0)));
            var w_r3 = PINQCollection <Pair <int> > .Input(4);

            w_r3.OnNext(r3.Select(x => new Weighted <Pair <int> >(x, +1.0)));
            var w_r4 = PINQCollection <Pair <int> > .Input(4);

            w_r4.OnNext(r4.Select(x => new Weighted <Pair <int> >(x, +1.0)));
            var w_r5 = PINQCollection <Pair <int> > .Input(4);

            w_r5.OnNext(r5.Select(x => new Weighted <Pair <int> >(x, +1.0)));
            var w_r6 = PINQCollection <Triple> .Input(4);

            w_r6.OnNext(r6.Select(x => new Weighted <Triple>(x, +1.0)));
            Console.WriteLine("Finish adding weights to data.");
            Stopwatch sw = new Stopwatch();

            sw.Start();
            double total       = 0;
            int    repeat_time = 10;

            Console.WriteLine("For query 1: ");
            for (int i = 0; i < repeat_time; i++)
            {
                var w_c_o     = w_c.Join(w_o, x => x.a, y => y.b, (x, y) => new Triple(x.b, x.a, y.a));
                var w_s_l     = w_l.Join(w_s, x => x.c, y => y.a, (x, y) => new Triple(x.a, x.b, x.c));
                var w_c_o_s_l = w_c_o.Join(w_s_l, x => x.c, y => y.a, (x, y) => true);
                var q1_result = w_c_o_s_l.Count(x => true, 100000)[true];
                total += q1_result;
            }
            sw.Stop();
            Console.WriteLine("Result is " + total / repeat_time);
            TimeSpan ts2 = sw.Elapsed;

            Console.WriteLine("Time: " + ts2.TotalMilliseconds / repeat_time / 1000 + "s");

            sw.Start();
            total = 0;
            Console.WriteLine("For query 2: ");
            for (int i = 0; i < repeat_time; i++)
            {
                var w_s_ps     = w_s.Join(w_ps, x => x.a, y => y.b, (x, y) => new Triple(x.b, x.a, y.a));
                var w_o_l      = w_o.Join(w_l, x => x.a, y => y.a, (x, y) => new Triple(y.c, y.b, y.a));
                var w_s_ps_o_l = w_s_ps.Join(w_o_l, x => new Pair <int>(x.b, x.c), y => new Pair <int>(y.a, y.b), (x, y) => true);
                var q2_result  = w_s_ps_o_l.Count(x => true, 100000)[true];
                total += q2_result;
            }
            sw.Stop();
            Console.WriteLine("Result is " + total / repeat_time);
            ts2 = sw.Elapsed;
            Console.WriteLine("Time: " + ts2.TotalMilliseconds / repeat_time / 1000 + "s");

            sw.Start();
            total = 0;
            Console.WriteLine("For query 3: ");
            for (int i = 0; i < repeat_time; i++)
            {
                var w_c_o     = w_c.Join(w_o, x => x.a, y => y.b, (x, y) => new Triple(x.b, x.a, y.a));
                var w_s_l     = w_s.Join(w_l, x => x.a, y => y.c, (x, y) => new Triple(x.b, x.a, y.a));
                var w_c_o_s_l = w_c_o.Join(w_s_l, x => new Pair <int>(x.a, x.c), y => new Pair <int>(y.a, y.c), (x, y) => true);
                var q3_result = w_c_o_s_l.Count(x => true, 100000)[true];
                total += q3_result;
            }
            sw.Stop();
            Console.WriteLine("Result is " + total / repeat_time);
            ts2 = sw.Elapsed;
            Console.WriteLine("Time: " + ts2.TotalMilliseconds / repeat_time / 1000 + "s");

            sw.Start();
            total = 0;
            Console.WriteLine("For query 4: ");
            for (int i = 0; i < repeat_time; i++)
            {
                var w_r12     = w_r1.Join(w_r2, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b));
                var w_r123    = w_r12.Join(w_r3, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b));
                var w_r1234   = w_r123.Join(w_r4, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b));
                var w_r12345  = w_r1234.Join(w_r5, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b));
                var q4_result = w_r12345.Count(x => true, 100000)[true];
                total += q4_result;
            }
            sw.Stop();
            Console.WriteLine("Result is " + total / repeat_time);
            ts2 = sw.Elapsed;
            Console.WriteLine("Time: " + ts2.TotalMilliseconds / repeat_time / 1000 + "s");

            sw.Start();
            total = 0;
            Console.WriteLine("For query 5: ");
            for (int i = 0; i < repeat_time; i++)
            {
                var w_r12     = w_r1.Join(w_r2, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b));
                var w_r123    = w_r12.Join(w_r3, x => new Pair <int>(x.a, x.b), y => new Pair <int>(y.b, y.a), (x, y) => true);
                var q5_result = w_r123.Count(x => true, 100000)[true];
                total += q5_result;
            }
            sw.Stop();
            Console.WriteLine("Result is " + total / repeat_time);
            ts2 = sw.Elapsed;
            Console.WriteLine("Time: " + ts2.TotalMilliseconds / repeat_time / 1000 + "s");

            sw.Start();
            total = 0;
            Console.WriteLine("For query 6: ");
            for (int i = 0; i < repeat_time; i++)
            {
                var w_r12     = w_r1.Join(w_r2, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b));
                var w_r123    = w_r12.Join(w_r3, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b));
                var w_r1234   = w_r123.Join(w_r4, x => new Pair <int>(x.a, x.b), y => new Pair <int>(y.b, y.a), (x, y) => true);
                var q6_result = w_r1234.Count(x => true, 100000)[true];
                total += q6_result;
            }
            sw.Stop();
            Console.WriteLine("Result is " + total / repeat_time);
            ts2 = sw.Elapsed;
            Console.WriteLine("Time: " + ts2.TotalMilliseconds / repeat_time / 1000 + "s");

            sw.Start();
            total = 0;
            Console.WriteLine("For query 7: ");
            for (int i = 0; i < repeat_time; i++)
            {
                var w_r12     = w_r1.Join(w_r2, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b));
                var w_r123    = w_r12.Join(w_r3, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b));
                var w_r1234   = w_r123.Join(w_r4, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b));
                var w_r12345  = w_r1234.Join(w_r5, x => new Pair <int>(x.a, x.b), y => new Pair <int>(y.b, y.a), (x, y) => true);
                var q7_result = w_r12345.Count(x => true, 100000)[true];
                total += q7_result;
            }
            sw.Stop();
            Console.WriteLine("Result is " + total / repeat_time);
            ts2 = sw.Elapsed;
            Console.WriteLine("Time: " + ts2.TotalMilliseconds / repeat_time / 1000 + "s");

            sw.Start();
            total = 0;
            Console.WriteLine("For query 8: ");
            for (int i = 0; i < repeat_time; i++)
            {
                var w_r34     = w_r3.Join(w_r4, x => x.b, y => y.a, (x, y) => new Triple(x.a, x.b, y.b));
                var w_r345    = w_r34.Join(w_r5, x => new Pair(x.a, x.c), y => new Pair(y.b, y.a), (x, y) => new Triple(x.a, x.b, x.c));
                var w_r3456   = w_r345.Join(w_r6, x => x, y => y, (x, y) => true);
                var q8_result = w_r3456.Count(x => true, 100000)[true];
                total += q8_result;
            }
            sw.Stop();
            Console.WriteLine("Result is " + total / repeat_time);
            ts2 = sw.Elapsed;
            Console.WriteLine("Time: " + ts2.TotalMilliseconds / repeat_time / 1000 + "s");
        }