Пример #1
0
        static void Main(string[] args)
        {

            // Read from data file, and insert into class
            string[] lines = System.IO.File.ReadAllLines(@"C:\Users\Administrator\Documents\Visual Studio 2010\Projects\DPLab\ConsoleApplication2\DPLabData.csv");
            IList<Record> recList = new List<Record>();

            foreach (string line in lines)
            {
                // Use "," as delimeter to break line into an array of ints
                string[] words = line.Split(',');

                // Create new Record
                Record rec = new Record();

                rec.setAll(words);         // set all properties

                // Add Record to List
                recList.Add(rec);
            }

            // Convert recList to iQueryable
            var source = recList.AsQueryable<Record>();
            var agent = new PINQAgentBudget(5.0);

            var db = new PINQueryable<Record>(source, agent);

            distTotalRev(db, source);                                                       // Chart 1 (4 figures)      (e = 1)
            Console.WriteLine("=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=\n");
            distLocalRev(db, source);                                                       // Chart 2 (4 figures)      (e = 1)
            Console.WriteLine("=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=\n");
            distExpenses(db, source);                                                       // Chart 3 (4 figures)      (e = 1)
            Console.WriteLine("=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=\n");
            debt(db, source);                                                               // Figure 1, 2, and 3       (e = 0.5)
            Console.WriteLine("=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=\n");
            costPerStudent(db, source);                                                     // Figure 4                 (e = 0.2)       
            highEnrollment(db, source);                                                     // Figure 5,6               (e = 0.4)
            lowEnrollment(db, source);                                                      // Figure 7,8               (e = 0.4)
            Console.WriteLine("=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=\n");
            teachSalary(db, source);                                                        // Figure 9, 10, 11         (e = 0.5)

            Console.ReadLine(); //Pause
        }
Пример #2
0
        static void Main(string[] args)
        {
            var participants = 1000;
            var edges        = 10000;

            var sourcegraph = GenerateData(participants).Take(edges).ToArray().AsQueryable();
            var agent       = new PINQAgentBudget(10000);
            var securegraph = new PINQueryable <int[]>(sourcegraph, agent);

            // we'll start by computing degree distributions
            var nodes = securegraph.GroupBy(x => x[0]);

            var nodeparts = nodes.Partition(Enumerable.Range(0, 20).ToArray(), x => x.Count());

            foreach (var degree in Enumerable.Range(0, 20))
            {
                Console.WriteLine("degree {0}:\t{1:F2}\t+/- {2:F2}", degree, nodeparts[degree].NoisyCount(0.1), 10.0);
            }

            Console.WriteLine();


            // for a buch of the analyses, we want the degree to be bounded
            var bound   = 10;
            var bounded = BoundDegree(securegraph, bound).Materialize();


            // with a degree-bounded graph, we can measure things like assortativity. Each edge is joined using both of its endpoints.
            // this uses the "bounded-join", which imposes a limit on the number of records with each key, to bound the transformation's stability.
            var edgedegrees = securegraph.Join(nodes, edge => edge[0], node => node.Key, bound, bound, (edge, node) => new int[] { node.Count(), edge[1] })
                              .Join(nodes, edge => edge[1], node => node.Key, bound, bound, (edge, node) => new int[] { edge[0], node.Count() });

            Console.WriteLine("Assortativity:");
            var srcparts = edgedegrees.Partition(Enumerable.Range(8, 5).ToArray(), edge => edge[0]);

            foreach (var i in Enumerable.Range(8, 5))
            {
                var dstparts = srcparts[i].Partition(Enumerable.Range(8, 5).ToArray(), edge => edge[1]);
                foreach (var j in Enumerable.Range(8, 5))
                {
                    Console.Write("\t{0:F2}", dstparts[j].NoisyCount(0.1));
                }

                Console.WriteLine();
            }
            Console.WriteLine();


            // we can also measure the correlation coefficient: the number of triangles divided by the number of length two paths.
            var paths2    = ExtendPaths(bounded, bounded, bound, bound);
            var paths3    = ExtendPaths(paths2, bounded, bound * bound, bound);
            var triangles = paths3.Where(x => x[0] == x[3]);

            Console.WriteLine("Triangles:\t{0}", triangles.NoisyCount(0.1));
            Console.WriteLine("Len 2 paths:\t{0}", paths2.NoisyCount(0.1));
            Console.WriteLine();


            // one way to view pagerank is the sum over all paths arriving at a vertex, of the probability of
            // traversing that path. usually this looks something like (alpha/degree)^length
            // although we'll have to have increasingly noisy counts with longer paths, to prevent privacy explosion,
            // the contributions of these terms are scaled down commensurately.

            var depth = 3;
            var paths = new PINQueryable <int[]> [depth];

            paths[0] = bounded;
            foreach (var index in Enumerable.Range(1, depth - 1))
            {
                paths[index] = ExtendPaths(paths[index - 1], bounded, Convert.ToInt32(Math.Pow(bound, index)), bound).Materialize();
            }

            // for any set of endpoints (too small a set gives bad results, as privacy would dictate) we compute
            var pagerank = 0.0;

            foreach (var index in Enumerable.Range(0, depth))
            {
                pagerank += paths[index].Where(path => path.Last() % 10 == 0)
                            .NoisyCount(0.1 * Math.Pow(0.85 / bound, index)) * Math.Pow(0.85 / bound, index);

                Console.WriteLine("pagerank using paths of length at most {0}:\t{1}", index + 1, pagerank);
            }

            Console.ReadKey();
        }
Пример #3
0
        static void Main(string[] args)
        {
            var             dimensions = 8;
            var             records    = 10000;
            var             sourcedata = GenerateData(dimensions).Take(records).ToArray().AsQueryable();
            PINQAgentBudget agent      = new PINQAgentBudget(5);
            var             securedata = new PINQueryable <double[]>(sourcedata, agent);

            // let's start by computing the centroid of the data
            var means = Mean(securedata, dimensions, 0.1);

            Console.WriteLine("mean vector:");
            foreach (var mean in means)
            {
                Console.Write("\t{0:F4}", mean);
            }
            Console.WriteLine();
            Console.WriteLine();

            /*
             * // we can also center the data and compute its covariance
             * var centered = securedata.Select(x => x.Select((v, i) => v - means[i]).ToArray());
             * var covariance = Covariance(centered, dimensions, 8);
             *
             * Console.WriteLine("covariance matrix:");
             * foreach (var row in covariance)
             * {
             *  foreach (var entry in row)
             *      Console.Write("\t{0:F4}", entry);
             *  Console.WriteLine();
             * }
             * Console.WriteLine();
             */

            // iterative algorithms are also possible. we'll do k-means first
            var k          = 3;
            var centers    = GenerateData(dimensions).Take(k).ToArray();
            var iterations = 20;

            foreach (var iteration in Enumerable.Range(0, iterations))
            {
                kMeansStep(securedata, centers, 0.1);
            }

            Console.WriteLine("kMeans: {0} centers, {1} iterations", k, iterations);
            foreach (var center in centers)
            {
                foreach (var value in center)
                {
                    Console.Write("\t{0:F4}", value);
                }
                Console.WriteLine();
            }
            Console.WriteLine();


            /*
             * // Moving to supervised learning, let's label the points by whether they are nearest the first center or not
             * var labeled = securedata.Select(x => new Example(x, NearestCenter(x, centers) == centers[0] ? 1.0 : -1.0));
             *
             * // the Perceptron algorithm repeatedly adds misclassified examples to a normal vector
             * var perceptronnormal = GenerateData(dimensions).First();
             * foreach (var index in Enumerable.Range(0, iterations))
             *  perceptronnormal = PerceptronStep(labeled, perceptronnormal, 0.1);
             *
             * var perceptronerror = labeled.NoisyAverage(0.1, x => x.label * x.vector.Select((v, i) => v * perceptronnormal[i]).Sum() < 0.0 ? 1.0 : 0.0);
             * Console.WriteLine("perceptron error rate:\t\t{0:F4}", perceptronerror);
             *
             * // the Support Vector Machine attempts to find a maximum margin classifier
             * var supportvectornormal = GenerateData(dimensions).First();
             * foreach (var index in Enumerable.Range(0, iterations))
             *  supportvectornormal = SupportVectorStep(labeled, supportvectornormal, 0.1);
             *
             * var supportvectorerror = labeled.NoisyAverage(0.1, x => x.label * x.vector.Select((v, i) => v * supportvectornormal[i]).Sum() < 0.0 ? 1.0 : 0.0);
             * Console.WriteLine("support vector error rate:\t{0:F4}", supportvectorerror);
             *
             * // Logistic regression optimizes the likelihood of the labels under the logistic function
             * var logisticnormal = GenerateData(dimensions).First();
             * foreach (var index in Enumerable.Range(0, iterations))
             *  logisticnormal = LogisticStep(labeled, logisticnormal, 0.1);
             *
             * var logisticerror = labeled.NoisyAverage(0.1, x => x.label * x.vector.Select((v, i) => v * logisticnormal[i]).Sum() < 0.0 ? 1.0 : 0.0);
             * Console.WriteLine("logistic error rate:\t\t{0:F4}", logisticerror);
             *
             * Console.ReadKey();
             */
        }