static void Main(string[] args) { // Read from data file, and insert into class string[] lines = System.IO.File.ReadAllLines(@"C:\Users\Administrator\Documents\Visual Studio 2010\Projects\DPLab\ConsoleApplication2\DPLabData.csv"); IList<Record> recList = new List<Record>(); foreach (string line in lines) { // Use "," as delimeter to break line into an array of ints string[] words = line.Split(','); // Create new Record Record rec = new Record(); rec.setAll(words); // set all properties // Add Record to List recList.Add(rec); } // Convert recList to iQueryable var source = recList.AsQueryable<Record>(); var agent = new PINQAgentBudget(5.0); var db = new PINQueryable<Record>(source, agent); distTotalRev(db, source); // Chart 1 (4 figures) (e = 1) Console.WriteLine("=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=\n"); distLocalRev(db, source); // Chart 2 (4 figures) (e = 1) Console.WriteLine("=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=\n"); distExpenses(db, source); // Chart 3 (4 figures) (e = 1) Console.WriteLine("=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=\n"); debt(db, source); // Figure 1, 2, and 3 (e = 0.5) Console.WriteLine("=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=\n"); costPerStudent(db, source); // Figure 4 (e = 0.2) highEnrollment(db, source); // Figure 5,6 (e = 0.4) lowEnrollment(db, source); // Figure 7,8 (e = 0.4) Console.WriteLine("=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=\n"); teachSalary(db, source); // Figure 9, 10, 11 (e = 0.5) Console.ReadLine(); //Pause }
static void Main(string[] args) { var participants = 1000; var edges = 10000; var sourcegraph = GenerateData(participants).Take(edges).ToArray().AsQueryable(); var agent = new PINQAgentBudget(10000); var securegraph = new PINQueryable <int[]>(sourcegraph, agent); // we'll start by computing degree distributions var nodes = securegraph.GroupBy(x => x[0]); var nodeparts = nodes.Partition(Enumerable.Range(0, 20).ToArray(), x => x.Count()); foreach (var degree in Enumerable.Range(0, 20)) { Console.WriteLine("degree {0}:\t{1:F2}\t+/- {2:F2}", degree, nodeparts[degree].NoisyCount(0.1), 10.0); } Console.WriteLine(); // for a buch of the analyses, we want the degree to be bounded var bound = 10; var bounded = BoundDegree(securegraph, bound).Materialize(); // with a degree-bounded graph, we can measure things like assortativity. Each edge is joined using both of its endpoints. // this uses the "bounded-join", which imposes a limit on the number of records with each key, to bound the transformation's stability. var edgedegrees = securegraph.Join(nodes, edge => edge[0], node => node.Key, bound, bound, (edge, node) => new int[] { node.Count(), edge[1] }) .Join(nodes, edge => edge[1], node => node.Key, bound, bound, (edge, node) => new int[] { edge[0], node.Count() }); Console.WriteLine("Assortativity:"); var srcparts = edgedegrees.Partition(Enumerable.Range(8, 5).ToArray(), edge => edge[0]); foreach (var i in Enumerable.Range(8, 5)) { var dstparts = srcparts[i].Partition(Enumerable.Range(8, 5).ToArray(), edge => edge[1]); foreach (var j in Enumerable.Range(8, 5)) { Console.Write("\t{0:F2}", dstparts[j].NoisyCount(0.1)); } Console.WriteLine(); } Console.WriteLine(); // we can also measure the correlation coefficient: the number of triangles divided by the number of length two paths. var paths2 = ExtendPaths(bounded, bounded, bound, bound); var paths3 = ExtendPaths(paths2, bounded, bound * bound, bound); var triangles = paths3.Where(x => x[0] == x[3]); Console.WriteLine("Triangles:\t{0}", triangles.NoisyCount(0.1)); Console.WriteLine("Len 2 paths:\t{0}", paths2.NoisyCount(0.1)); Console.WriteLine(); // one way to view pagerank is the sum over all paths arriving at a vertex, of the probability of // traversing that path. usually this looks something like (alpha/degree)^length // although we'll have to have increasingly noisy counts with longer paths, to prevent privacy explosion, // the contributions of these terms are scaled down commensurately. var depth = 3; var paths = new PINQueryable <int[]> [depth]; paths[0] = bounded; foreach (var index in Enumerable.Range(1, depth - 1)) { paths[index] = ExtendPaths(paths[index - 1], bounded, Convert.ToInt32(Math.Pow(bound, index)), bound).Materialize(); } // for any set of endpoints (too small a set gives bad results, as privacy would dictate) we compute var pagerank = 0.0; foreach (var index in Enumerable.Range(0, depth)) { pagerank += paths[index].Where(path => path.Last() % 10 == 0) .NoisyCount(0.1 * Math.Pow(0.85 / bound, index)) * Math.Pow(0.85 / bound, index); Console.WriteLine("pagerank using paths of length at most {0}:\t{1}", index + 1, pagerank); } Console.ReadKey(); }
static void Main(string[] args) { var dimensions = 8; var records = 10000; var sourcedata = GenerateData(dimensions).Take(records).ToArray().AsQueryable(); PINQAgentBudget agent = new PINQAgentBudget(5); var securedata = new PINQueryable <double[]>(sourcedata, agent); // let's start by computing the centroid of the data var means = Mean(securedata, dimensions, 0.1); Console.WriteLine("mean vector:"); foreach (var mean in means) { Console.Write("\t{0:F4}", mean); } Console.WriteLine(); Console.WriteLine(); /* * // we can also center the data and compute its covariance * var centered = securedata.Select(x => x.Select((v, i) => v - means[i]).ToArray()); * var covariance = Covariance(centered, dimensions, 8); * * Console.WriteLine("covariance matrix:"); * foreach (var row in covariance) * { * foreach (var entry in row) * Console.Write("\t{0:F4}", entry); * Console.WriteLine(); * } * Console.WriteLine(); */ // iterative algorithms are also possible. we'll do k-means first var k = 3; var centers = GenerateData(dimensions).Take(k).ToArray(); var iterations = 20; foreach (var iteration in Enumerable.Range(0, iterations)) { kMeansStep(securedata, centers, 0.1); } Console.WriteLine("kMeans: {0} centers, {1} iterations", k, iterations); foreach (var center in centers) { foreach (var value in center) { Console.Write("\t{0:F4}", value); } Console.WriteLine(); } Console.WriteLine(); /* * // Moving to supervised learning, let's label the points by whether they are nearest the first center or not * var labeled = securedata.Select(x => new Example(x, NearestCenter(x, centers) == centers[0] ? 1.0 : -1.0)); * * // the Perceptron algorithm repeatedly adds misclassified examples to a normal vector * var perceptronnormal = GenerateData(dimensions).First(); * foreach (var index in Enumerable.Range(0, iterations)) * perceptronnormal = PerceptronStep(labeled, perceptronnormal, 0.1); * * var perceptronerror = labeled.NoisyAverage(0.1, x => x.label * x.vector.Select((v, i) => v * perceptronnormal[i]).Sum() < 0.0 ? 1.0 : 0.0); * Console.WriteLine("perceptron error rate:\t\t{0:F4}", perceptronerror); * * // the Support Vector Machine attempts to find a maximum margin classifier * var supportvectornormal = GenerateData(dimensions).First(); * foreach (var index in Enumerable.Range(0, iterations)) * supportvectornormal = SupportVectorStep(labeled, supportvectornormal, 0.1); * * var supportvectorerror = labeled.NoisyAverage(0.1, x => x.label * x.vector.Select((v, i) => v * supportvectornormal[i]).Sum() < 0.0 ? 1.0 : 0.0); * Console.WriteLine("support vector error rate:\t{0:F4}", supportvectorerror); * * // Logistic regression optimizes the likelihood of the labels under the logistic function * var logisticnormal = GenerateData(dimensions).First(); * foreach (var index in Enumerable.Range(0, iterations)) * logisticnormal = LogisticStep(labeled, logisticnormal, 0.1); * * var logisticerror = labeled.NoisyAverage(0.1, x => x.label * x.vector.Select((v, i) => v * logisticnormal[i]).Sum() < 0.0 ? 1.0 : 0.0); * Console.WriteLine("logistic error rate:\t\t{0:F4}", logisticerror); * * Console.ReadKey(); */ }