public static Count <int> InDegSeq <T>(this PINQCollection <Pair <T> > edges, double epsilon) where T : IEquatable <T> { return(edges.Select(x => x.b) .Transpose() .Transpose() .Count(i => i, epsilon)); }
public static IEnumerable <Pair <int> > WarmStart(this PINQCollection <Pair <int> > graph, int maxdeg, int numnodes, double eps) { maxdeg = Math.Max(1, maxdeg); numnodes = Math.Max(1, numnodes); Console.WriteLine("Doing WarmStart with maxdeg {0} and numnodes {1}", maxdeg, numnodes); var odegrees = graph.OutDegCCDF(eps); // measures ccdf of outgoing degrees var idegrees = graph.InDegCCDF(eps); // measures ccdf of incoming degrees var idegrseq = graph.InDegSeq(eps); // measures cdf of outgoing degrees var odegrseq = graph.OutDegSeq(eps); // measures cdf of incoming degrees var ods = Enumerable.Range(0, maxdeg).Select(i => odegrees[i]).ToArray(); var ids = Enumerable.Range(0, maxdeg).Select(i => idegrees[i]).ToArray(); var odq = Enumerable.Range(0, numnodes).Select(i => odegrseq[i]).ToArray(); var idq = Enumerable.Range(0, numnodes).Select(i => idegrseq[i]).ToArray(); var idfitted = DegreeSequenceFitter.FitDegSeq(ids, idq); var odfitted = DegreeSequenceFitter.FitDegSeq(ods, odq); //System.IO.File.WriteAllLines("odegrees.txt", ods.Select((x,i) => i + "\t" + x)); //System.IO.File.WriteAllLines("odegrseq.txt", odq.Select((x, i) => i + "\t" + x)); //System.IO.File.WriteAllLines("odfitted.txt", odfitted.Select((x, i) => i + "\t" + x)); var synthGraph = DegreeSequenceFitter.GenerateGraph(odfitted, idfitted, numnodes); return(synthGraph); }
public static PINQCollection <Pair <int> > JointDegrees(this PINQCollection <Pair <int> > graph) { var aDegrees = graph.DPCount(x => x.a, (k, i) => new Pair <int>(k, i - 1)); var bDegrees = graph.DPCount(x => x.b, (k, i) => new Pair <int>(k, i - 1)); var aDegEdge = graph.Join(aDegrees, edge => edge.a, pair => pair.a, (edge, pair) => new Pair <Pair <int>, int>(edge, pair.b)); var bDegEdge = graph.Join(bDegrees, edge => edge.b, pair => pair.a, (edge, pair) => new Pair <Pair <int>, int>(edge, pair.b)); return(aDegEdge.Join(bDegEdge, aTriple => aTriple.a, bTriple => bTriple.a, (aTriple, bTriple) => new Pair <int>(aTriple.b, bTriple.b))); }
public static void Clustering(this PINQCollection <Pair <int> > graph, double epsilon) { //order the graph before, it improves both number of triangles and clustering coefficient graph = graph.Select(edge => new Pair <int>(Math.Min(edge.a, edge.b), Math.Max(edge.a, edge.b))); var length2Path = graph.Join(graph, x => x.b, y => y.a, x => x.a, y => y.b, (k, x, y) => new Triple(x, k, y)).Where(node => node.a != node.c); var triangles = graph.Intersect(length2Path.Select(x => new Pair <int>(x.a, x.c))); Console.WriteLine("the number of triangles is: {0}", triangles.Count(x => true, epsilon)[true]); }
public static void Triangles(this PINQCollection <Pair <int> > graph, Pair <int>[] buckets, double epsilon) { var symmGraph = graph.Select(edge => new Pair <int>(edge.b, edge.a)); var undirectedGraph = graph.Concat(symmGraph); var abc = undirectedGraph.Join(undirectedGraph, x => x.b, y => y.a, (x, y) => new Triple(x.a, x.b, y.b)); var bca = abc.Select(x => new Triple(x.b, x.c, x.a)); var cab = abc.Select(x => new Triple(x.c, x.a, x.b)); var result = abc.Intersect(bca).Intersect(cab); Console.WriteLine("triangles: {0}", result.Count(x => true, epsilon)[true]); }
public static Count <Triple> Triangles(this PINQCollection <Pair <int> > graph, double epsilon) { var degrees = graph.DPCount(edge => edge.a, (k, i) => new Pair <int>(k, i - 1)); var len2paths = graph.Join(graph, edge1 => edge1.b, edge2 => edge2.a, (edge1, edge2) => new Triple(edge1.a, edge1.b, edge2.b)); var abcDb = len2paths.Join(degrees, path => path.b, degr => degr.a, (path, degr) => new Pair <Triple, int>(path, degr.b)); var bcaDc = abcDb.Select(pair => new Pair <Triple, int>(new Triple(pair.a.b, pair.a.c, pair.a.a), pair.b)); var cabDa = abcDb.Select(pair => new Pair <Triple, int>(new Triple(pair.a.c, pair.a.a, pair.a.b), pair.b)); var results = abcDb.Join(bcaDc, abc => abc.a, bca => bca.a, (abc, bca) => new Pair <Triple, Pair <int> >(abc.a, new Pair <int>(abc.b, bca.b))) .Join(cabDa, abc => abc.a, cab => cab.a, (abc, cab) => new Triple(cab.b, abc.b.a, abc.b.b)); return(results.Count(x => x, epsilon)); }
public static void TrianglesByDegree(this PINQCollection <Pair <int> > graph, double epsilon) { Console.WriteLine("in TrianglesByDegree..."); // form (b, db) pairs each with weight 1/2 var bDegs = graph.GroupBy(e => e.a, e => e.b, (k, i) => new VertexData(k, i)); // form length 2 paths (a,b,c) weight 1/2db. var path = graph.Join(graph, x => x.b, y => y.a, x => x.a, y => y.b, (key, x, y) => new Triple(x, key, y));//.Where(x => x.a != x.c); // form ((a,b,c), db) tuples, with weights 1/2db(1 + db). var abc = path.Join(bDegs, x => x.b, y => y.name, x => x, y => y.edges.Length, (key, x, y) => new Pair <Triple, int>(x, y)); // rotate to get ((c,a,b),db) or equivalently ((a,b,c),dc) var cab = abc.Select(x => new Pair <Triple, int>(new Triple(x.a.c, x.a.a, x.a.b), x.b)); // rotate to get ((b,c,a),db) or equivalently ((a,b,c),da) var bca = abc.Select(x => new Pair <Triple, int>(new Triple(x.a.b, x.a.c, x.a.a), x.b)); // form length ((a,b,c),da,db) tuples with weight 1/2(da(1+da) + db(1 +db)) var tuple = abc.Join(bca, x => x.a, y => y.a, x => x.b, y => y.b, (key, x, y) => new Pair <Triple, Pair>(key, new Pair(y, x))); // form length ((a,b,c),da,db,db) tuples with weight 1/2(da(1+da) + db(1+db) + dc(1+dc)) var tuple2 = tuple.Join(cab, x => x.a, y => y.a, x => x.b, y => y.b, (key, x, y) => new Pair <Triple, Triple>(key, new Triple(x.a, x.b, y))); // transform to (da,db,dc) tuples where da < db < dc var tris = tuple2.Select(x => x.b); tris = tris.Select(x => new Triple(Math.Min(Math.Min(x.a, x.b), x.c), median(x.a, x.b, x.c), Math.Max(x.a, Math.Max(x.b, x.c)))); // return the noisy histogram var result = tris.Count(x => x, epsilon); }
public static PINQCollection <Triple> jddTriangles(this PINQCollection <Pair <int> > graph, Pair <int>[] buckets, double epsilon) { bool symmetry = false; if (symmetry) { var symmGraph = graph.Select(edge => new Pair <int>(edge.b, edge.a)); var undirectedGraph = graph.Concat(symmGraph); graph = undirectedGraph; //I need to double the buckets (maybe is not correct to double) for (int i = 0; i < buckets.Length; i++) { buckets[i].a = buckets[i].a * 2; buckets[i].b = buckets[i].b * 2; } } // does a DPCount, but then immediately replaces the degree with the bucket. //var aBuckets = graph.DPCount(x => x.a, (k, i) => new Pair<int>(k, buckets.Where(bucket => i < bucket.a).First().a)); //var bBuckets = graph.DPCount(x => x.b, (k, i) => new Pair<int>(k, buckets.Where(bucket => i < bucket.b).First().b)); //var aDegEdge = graph.Join(aBuckets, edge => edge.a, pair => pair.a, (edge, pair) => new Pair<Pair<int>, int>(edge, pair.b)); //var bDegEdge = graph.Join(bBuckets, edge => edge.b, pair => pair.a, (edge, pair) => new Pair<Pair<int>, int>(edge, pair.b)); var aDegrees = graph.DPCount(x => x.a, (k, i) => new Pair <int>(k, i)); var bDegrees = graph.DPCount(x => x.b, (k, i) => new Pair <int>(k, i)); var aDegEdge = graph.Join(aDegrees, edge => edge.a, pair => pair.a, (edge, pair) => new Pair <Pair <int>, int>(edge, pair.b)); var bDegEdge = graph.Join(bDegrees, edge => edge.b, pair => pair.a, (edge, pair) => new Pair <Pair <int>, int>(edge, pair.b)); var edgeDegree = aDegEdge.Join(bDegEdge, aTriple => aTriple.a, bTriple => bTriple.a, (aTriple, bTriple) => new Triple <Pair <int>, int, int>(aTriple.a, aTriple.b, bTriple.b)); //create a a path of length 3 with the degrees of each node involved var abc = edgeDegree.Join(edgeDegree, x => x.a.b, y => y.a.a, (x, y) => new Triple(x.b, x.c, y.c)); return(abc); }
public static PINQCollection <int> Transpose <T>(this PINQCollection <T> input) where T : IEquatable <T> { return(input.Shave(1.0, (i, t) => i)); }
public static void TestMCMC(Pair <int>[] edges, int parallelism, string filename) { double epsilon = 0.1; var graph = PINQCollection <Pair <int> > .Input(parallelism); // introduce the sensitive graph data graph.OnNext(edges.Select(x => new Weighted <Pair <int> >(x, +1.0))); #region take preliminary node and edge multiplicity measurements // count number of nodes var doubleNodes = 0.5 + graph.GroupBy(e => e.a, i => i, (i, l) => l.Count) .Concat(graph.Select(e => e.b)) .Shave(1.0, (i, x) => i) .Where(x => x == 0) .Count(y => y, epsilon / 2)[0] * 2; var numNodes = (int)doubleNodes; Console.WriteLine("num of nodes: " + numNodes + " " + doubleNodes); //count multiedges and selfloops var multiplicity = graph.Shave(1.0, (i, x) => new Pair <int> { a = i, b = x.a == x.b ? 1 : 0 }) .Count(y => y, epsilon); #endregion #region warm start measurements (in and out degree distributions and ccdf) var synth = graph.WarmStart(numNodes, numNodes, epsilon).ToArray(); //foreach (var edge in synth) Console.WriteLine("Synth: {0}", edge); //synth.PrintDegreeDistribution("synth"); //print the graph after warm start //System.IO.File.WriteAllLines(filename + "-synthGraph.txt", synth.Select(x => x.a + "\t" + x.b)); #endregion #region define a graph analysis graph.TrianglesByDegree(0.1); //Clustering(graph, 0.1); //Console.WriteLine("measuring tris"); //graph.jddTriangles(Enumerable.Range(10, 10).Select(x => new Pair<int>(10 * x, 10 * x)).ToArray(), 0.1); //Console.WriteLine("measured tris"); #endregion // revealing this directly violates differential privacy; for measurement only. Console.WriteLine("Error on real data: {0}", Observation.TotalError); // remove sensitive graph data and introduce synthetic graph data. graph.OnNext(edges.Select(x => new Weighted <Pair <int> >(x, -1.0))); graph.OnNext(synth.Select(x => new Weighted <Pair <int> >(x, +1.0))); // perform mcmc steps. var mcmcIterations = 1000000; var mcmcd = graph.ComputeWithSwaps(synth, numNodes, mcmcIterations).ToArray(); //write the synthetic graph //System.IO.File.WriteAllLines(filename + "-newresultingGraph.txt", mcmcd.Select(x => x.a + "\t" + x.b)); // remove the mcmc graph data and re-introduce sensitive graph data. graph.OnNext(mcmcd.Select(x => new Weighted <Pair <int> >(x, -1.0))); graph.OnNext(edges.Select(x => new Weighted <Pair <int> >(x, +1.0))); // revealing this directly violates differential privace; for measurement only. Console.WriteLine("Final error {0}", Observation.TotalError); }
public static void DoExperiments(string path) { Console.WriteLine("Begin to load input data."); var customer_file_path = path + @"/customer.txt"; var order_file_path = path + @"/order.txt"; var supplier_file_path = path + @"/supplier.txt"; var lineitem_file_path = path + @"/lineitem.txt"; var ps_file_path = path + @"/ps.txt"; var r1_file_path = path + @"/R1.txt"; var r2_file_path = path + @"/R2.txt"; var r3_file_path = path + @"/R3.txt"; var r4_file_path = path + @"/R4.txt"; var r5_file_path = path + @"/R5.txt"; var r6_file_path = path + @"/R6.txt"; var customer_file = System.IO.File.ReadAllLines(customer_file_path); var customer = customer_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray(); var supplier_file = System.IO.File.ReadAllLines(supplier_file_path); var supplier = supplier_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray(); var order_file = System.IO.File.ReadAllLines(order_file_path); var order = order_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray(); var lineitem_file = System.IO.File.ReadAllLines(lineitem_file_path); var lineitem = lineitem_file.Select(x => x.Split(' ')).Select(x => new Triple(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]), Convert.ToInt32(x[2]))).ToArray(); var ps_file = System.IO.File.ReadAllLines(ps_file_path); var ps = ps_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray(); var r1_file = System.IO.File.ReadAllLines(r1_file_path); var r1 = r1_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray(); var r2_file = System.IO.File.ReadAllLines(r2_file_path); var r2 = r2_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray(); var r3_file = System.IO.File.ReadAllLines(r3_file_path); var r3 = r3_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray(); var r4_file = System.IO.File.ReadAllLines(r4_file_path); var r4 = r4_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray(); var r5_file = System.IO.File.ReadAllLines(r5_file_path); var r5 = r5_file.Select(x => x.Split(' ')).Select(x => new Pair <int>(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]))).ToArray(); var r6_file = System.IO.File.ReadAllLines(r6_file_path); var r6 = r6_file.Select(x => x.Split(' ')).Select(x => new Triple(Convert.ToInt32(x[0]), Convert.ToInt32(x[1]), Convert.ToInt32(x[2]))).ToArray(); Console.WriteLine("Finish loading input data."); Console.WriteLine("Begin to add weights for data."); var w_c = PINQCollection <Pair <int> > .Input(4); w_c.OnNext(customer.Select(x => new Weighted <Pair <int> >(x, +1.0))); var w_s = PINQCollection <Pair <int> > .Input(4); w_s.OnNext(supplier.Select(x => new Weighted <Pair <int> >(x, +1.0))); var w_o = PINQCollection <Pair <int> > .Input(4); w_o.OnNext(order.Select(x => new Weighted <Pair <int> >(x, +1.0))); var w_ps = PINQCollection <Pair <int> > .Input(4); w_ps.OnNext(ps.Select(x => new Weighted <Pair <int> >(x, +1.0))); var w_l = PINQCollection <Triple> .Input(4); w_l.OnNext(lineitem.Select(x => new Weighted <Triple>(x, +1.0))); var w_r1 = PINQCollection <Pair <int> > .Input(4); w_r1.OnNext(r1.Select(x => new Weighted <Pair <int> >(x, +1.0))); var w_r2 = PINQCollection <Pair <int> > .Input(4); w_r2.OnNext(r2.Select(x => new Weighted <Pair <int> >(x, +1.0))); var w_r3 = PINQCollection <Pair <int> > .Input(4); w_r3.OnNext(r3.Select(x => new Weighted <Pair <int> >(x, +1.0))); var w_r4 = PINQCollection <Pair <int> > .Input(4); w_r4.OnNext(r4.Select(x => new Weighted <Pair <int> >(x, +1.0))); var w_r5 = PINQCollection <Pair <int> > .Input(4); w_r5.OnNext(r5.Select(x => new Weighted <Pair <int> >(x, +1.0))); var w_r6 = PINQCollection <Triple> .Input(4); w_r6.OnNext(r6.Select(x => new Weighted <Triple>(x, +1.0))); Console.WriteLine("Finish adding weights to data."); Stopwatch sw = new Stopwatch(); sw.Start(); double total = 0; int repeat_time = 10; Console.WriteLine("For query 1: "); for (int i = 0; i < repeat_time; i++) { var w_c_o = w_c.Join(w_o, x => x.a, y => y.b, (x, y) => new Triple(x.b, x.a, y.a)); var w_s_l = w_l.Join(w_s, x => x.c, y => y.a, (x, y) => new Triple(x.a, x.b, x.c)); var w_c_o_s_l = w_c_o.Join(w_s_l, x => x.c, y => y.a, (x, y) => true); var q1_result = w_c_o_s_l.Count(x => true, 100000)[true]; total += q1_result; } sw.Stop(); Console.WriteLine("Result is " + total / repeat_time); TimeSpan ts2 = sw.Elapsed; Console.WriteLine("Time: " + ts2.TotalMilliseconds / repeat_time / 1000 + "s"); sw.Start(); total = 0; Console.WriteLine("For query 2: "); for (int i = 0; i < repeat_time; i++) { var w_s_ps = w_s.Join(w_ps, x => x.a, y => y.b, (x, y) => new Triple(x.b, x.a, y.a)); var w_o_l = w_o.Join(w_l, x => x.a, y => y.a, (x, y) => new Triple(y.c, y.b, y.a)); var w_s_ps_o_l = w_s_ps.Join(w_o_l, x => new Pair <int>(x.b, x.c), y => new Pair <int>(y.a, y.b), (x, y) => true); var q2_result = w_s_ps_o_l.Count(x => true, 100000)[true]; total += q2_result; } sw.Stop(); Console.WriteLine("Result is " + total / repeat_time); ts2 = sw.Elapsed; Console.WriteLine("Time: " + ts2.TotalMilliseconds / repeat_time / 1000 + "s"); sw.Start(); total = 0; Console.WriteLine("For query 3: "); for (int i = 0; i < repeat_time; i++) { var w_c_o = w_c.Join(w_o, x => x.a, y => y.b, (x, y) => new Triple(x.b, x.a, y.a)); var w_s_l = w_s.Join(w_l, x => x.a, y => y.c, (x, y) => new Triple(x.b, x.a, y.a)); var w_c_o_s_l = w_c_o.Join(w_s_l, x => new Pair <int>(x.a, x.c), y => new Pair <int>(y.a, y.c), (x, y) => true); var q3_result = w_c_o_s_l.Count(x => true, 100000)[true]; total += q3_result; } sw.Stop(); Console.WriteLine("Result is " + total / repeat_time); ts2 = sw.Elapsed; Console.WriteLine("Time: " + ts2.TotalMilliseconds / repeat_time / 1000 + "s"); sw.Start(); total = 0; Console.WriteLine("For query 4: "); for (int i = 0; i < repeat_time; i++) { var w_r12 = w_r1.Join(w_r2, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b)); var w_r123 = w_r12.Join(w_r3, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b)); var w_r1234 = w_r123.Join(w_r4, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b)); var w_r12345 = w_r1234.Join(w_r5, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b)); var q4_result = w_r12345.Count(x => true, 100000)[true]; total += q4_result; } sw.Stop(); Console.WriteLine("Result is " + total / repeat_time); ts2 = sw.Elapsed; Console.WriteLine("Time: " + ts2.TotalMilliseconds / repeat_time / 1000 + "s"); sw.Start(); total = 0; Console.WriteLine("For query 5: "); for (int i = 0; i < repeat_time; i++) { var w_r12 = w_r1.Join(w_r2, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b)); var w_r123 = w_r12.Join(w_r3, x => new Pair <int>(x.a, x.b), y => new Pair <int>(y.b, y.a), (x, y) => true); var q5_result = w_r123.Count(x => true, 100000)[true]; total += q5_result; } sw.Stop(); Console.WriteLine("Result is " + total / repeat_time); ts2 = sw.Elapsed; Console.WriteLine("Time: " + ts2.TotalMilliseconds / repeat_time / 1000 + "s"); sw.Start(); total = 0; Console.WriteLine("For query 6: "); for (int i = 0; i < repeat_time; i++) { var w_r12 = w_r1.Join(w_r2, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b)); var w_r123 = w_r12.Join(w_r3, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b)); var w_r1234 = w_r123.Join(w_r4, x => new Pair <int>(x.a, x.b), y => new Pair <int>(y.b, y.a), (x, y) => true); var q6_result = w_r1234.Count(x => true, 100000)[true]; total += q6_result; } sw.Stop(); Console.WriteLine("Result is " + total / repeat_time); ts2 = sw.Elapsed; Console.WriteLine("Time: " + ts2.TotalMilliseconds / repeat_time / 1000 + "s"); sw.Start(); total = 0; Console.WriteLine("For query 7: "); for (int i = 0; i < repeat_time; i++) { var w_r12 = w_r1.Join(w_r2, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b)); var w_r123 = w_r12.Join(w_r3, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b)); var w_r1234 = w_r123.Join(w_r4, x => x.b, y => y.a, (x, y) => new Pair <int>(x.a, y.b)); var w_r12345 = w_r1234.Join(w_r5, x => new Pair <int>(x.a, x.b), y => new Pair <int>(y.b, y.a), (x, y) => true); var q7_result = w_r12345.Count(x => true, 100000)[true]; total += q7_result; } sw.Stop(); Console.WriteLine("Result is " + total / repeat_time); ts2 = sw.Elapsed; Console.WriteLine("Time: " + ts2.TotalMilliseconds / repeat_time / 1000 + "s"); sw.Start(); total = 0; Console.WriteLine("For query 8: "); for (int i = 0; i < repeat_time; i++) { var w_r34 = w_r3.Join(w_r4, x => x.b, y => y.a, (x, y) => new Triple(x.a, x.b, y.b)); var w_r345 = w_r34.Join(w_r5, x => new Pair(x.a, x.c), y => new Pair(y.b, y.a), (x, y) => new Triple(x.a, x.b, x.c)); var w_r3456 = w_r345.Join(w_r6, x => x, y => y, (x, y) => true); var q8_result = w_r3456.Count(x => true, 100000)[true]; total += q8_result; } sw.Stop(); Console.WriteLine("Result is " + total / repeat_time); ts2 = sw.Elapsed; Console.WriteLine("Time: " + ts2.TotalMilliseconds / repeat_time / 1000 + "s"); }