Exemplo n.º 1
0
        // computes and steps along the gradient of
        public static double[] PerceptronStep(PINQueryable <Example> input, double[] normal, double epsilon)
        {
            // select the examples that are currently mis-labeled by the normal vector
            var errors = input.Where(x => x.label * x.vector.Select((v, i) => v * normal[i]).Sum() < 0.0);

            // fold the average error into the normal
            var newnormal = new double[normal.Length];

            foreach (var coordinate in Enumerable.Range(0, normal.Length))
            {
                newnormal[coordinate] = normal[coordinate] + errors.NoisyAverage(epsilon, x => x.label * x.vector[coordinate]);
            }

            return(newnormal);
        }
Exemplo n.º 2
0
        // computes the outer product of the data matrix with itself. if the data are centered, this is the covariance matrix
        public static double[][] Covariance(PINQueryable <double[]> input, int dimensions, double epsilon)
        {
            double[][] outer = new double[dimensions][];

            foreach (var i in Enumerable.Range(0, dimensions))
            {
                outer[i] = new double[dimensions];
                foreach (var j in Enumerable.Range(0, dimensions))
                {
                    outer[i][j] = input.NoisyAverage(epsilon, x => x[i] * x[j]);
                }
            }

            return(outer);
        }
Exemplo n.º 3
0
        // runs one step of the iterative k-means algorithm.
        public static void kMeansStep(PINQueryable <double[]> input, double[][] centers, double epsilon)
        {
            // partition data set by the supplied centers; somewhat icky in pure LINQ... (( and it assumes centers[0] exists ))
            var parts = input.Partition(centers, x => NearestCenter(x, centers));

            // update each of the centers
            foreach (var center in centers)
            {
                var part = parts[center];
                foreach (var index in Enumerable.Range(0, center.Length))
                {
                    center[index] = part.NoisyAverage(epsilon, x => x[index]);
                }
            }
        }
Exemplo n.º 4
0
        // computes and steps along the gradient of the SVM objective function: Sum_i HingeLoss(1.0 - normal^Tx_i y_i) + ||w||_2^2
        public static double[] SupportVectorStep(PINQueryable <Example> input, double[] normal, double epsilon)
        {
            // select the examples that are currently mis-labeled by the normal vector. also add some negative normal for our regularizer
            var errors = input.Where(x => x.Label * x.Vector.Select((v, i) => v * normal[i]).Sum() < 1.0)
                         .Concat(Enumerable.Repeat(new Example(normal, -1.0), 10).AsQueryable());

            // fold the average error into the normal
            var newnormal = new double[normal.Length];

            foreach (var coordinate in Enumerable.Range(0, normal.Length))
            {
                newnormal[coordinate] = normal[coordinate] + errors.NoisyAverage(epsilon, x => x.Label * x.Vector[coordinate]);
            }

            return(newnormal);
        }
Exemplo n.º 5
0
        public static void test3(String[] args)
        {
            // preparing a private data source
            var             filename = @"..\..\test3_groupbyname.txt";
            var             data     = File.ReadAllLines(filename).AsQueryable();
            PINQAgentLogger agent    = new PINQAgentLogger(filename);
            var             text     = new PINQueryable <string>(data, agent);
            var             users    = text.Select(line => line.Split(','))
                                       .Where(x => x[1] == args[0])
                                       .Where(x => x[3] == args[1]);

            Console.WriteLine(" 无噪声——患有癌症且地址为北京的病人有: " + users.count() + "人");
            Console.WriteLine(" 有噪声——患有癌症且地址为北京的病人有: " + users.NoisyCount(10.0) + "人");

            Console.ReadKey();
        }
Exemplo n.º 6
0
        static void Main(string[] args)
        {
            /* Note: various DryadLINQ data sources are commented out, and replaced
             * with empty data sets to avoid compile errors.
             */

            // open DryadLINQ data source.
            //var ddc = new DryadDataContext(@"file://\\sherwood-091\dryadlinqusers\mcsherry");
            //IQueryable<string> searchesdata = ddc.GetPartitionedTable<string>("SearchLogs.txt", CompressionScheme.GZipFast);
            IQueryable <string> searchesdata = Enumerable.Empty <string>().AsQueryable();

            // Encase data sources in PINQueryable privacy type.
            PINQueryable <string> searches = new PINQueryable <string>(searchesdata, new PINQAgentLogger("searches"));

            // extract fields, then restrict to searches for args[0]
            var searchsubset = searches.Select(x => x.Split(','))
                               .Where(x => x[20].ToLower() == args[0]);

            Console.WriteLine(args[0] + " count: " + searchsubset.NoisyCount(0.1));

            #region Further analysis, and visualization.

            // open second data set, containing ip to latlon mappings.
            //IQueryable<string[]> iplatlondata = ddc.GetPartitionedTable<LineRecord>("IPtoLatLon.txt", CompressionScheme.GZipFast).Select(x => x.line.Split('\t'));
            IQueryable <string[]>   iplatlondata = Enumerable.Empty <string>().Select(x => x.Split('\t')).AsQueryable();
            PINQueryable <string[]> iplatlon     = new PINQueryable <string[]>(iplatlondata, new PINQAgentLogger("iplatlon"));

            // extract the IP address, and clip off the final octet.
            var searchips = searchsubset.Select(x => x[0].Split('.'))
                            .Where(x => x.Count() == 4)
                            .Select(x => x[0] + "." + x[1] + "." + x[2] + ".0");

            // join queries x address; get coords
            var coordinates = from x in searchips
                              join y in iplatlon on x equals y[0]
                              select new double[] { Convert.ToDouble(y.First()[1]) / 90.0,
                                                    Convert.ToDouble(y.First()[2]) / 180.0 };

            // prepare and output a html page visualization via virtual earth
            WriteHeader(args[0]);               // output the header of the .html
            Histogram(coordinates, 100, "");    // analyze data, output contents
            WriteFooter();                      // output the footer of the .html

            #endregion
        }
Exemplo n.º 7
0
        public static void debt(PINQueryable<Record> db, IQueryable<Record> source)
        {
            // Get average debt. Omit records with no debt?
            var q1 = db.Where(x => x.debt != 0);
            var q2 = source.Where(x => x.debt != 0);

            Expression<Func<Record, double>> debt = x => x.debt / 10000;     // Divide by 10000 to ensure it is within the [-1,+1] range
            double avgDebt = q1.NoisyAverage(0.2, debt) * 10000;
            double cleanAvgDebt = q2.Average(debt) * 10000;

            Console.WriteLine("Noisy Schools with Debt: " + q1.NoisyCount(0.1) + "\t\t\t\tClean: " + q2.Count());
            Console.WriteLine("Noisy Average Debt: $" + (1000 * avgDebt) + "\t\t\t\t\tClean: $" + (1000 * cleanAvgDebt));
  
            double totalDebt = db.NoisySum(0.2, debt) * 10000;
            double cleanTotalDebt = source.Sum(debt) * 10000;

            Console.WriteLine("Total Debt: $" + (1000 * totalDebt) + "\t\t\t\t\t\tClean: $" + (1000 * cleanTotalDebt));
        }
Exemplo n.º 8
0
        // computes and steps along the gradient of the logarithm of the Logistic Regression objective function
        public static double[] LogisticStep(PINQueryable <Example> input, double[] normal, double epsilon)
        {
            // compute the logistic probability of (xi, yi) under "normal", subtracted from (label + 1.0)/2.0 = target
            var errors = input.Select(x => new
            {
                vector = x.Vector,
                error  = (x.Label + 1.0) / 2.0 - 1.0 / (1 + Math.Exp(-x.Vector.Select((v, i) => v * normal[i]).Sum()))
            });

            // fold the average error into the normal
            var newnormal = new double[normal.Length];

            foreach (var coordinate in Enumerable.Range(0, normal.Length))
            {
                newnormal[coordinate] = normal[coordinate] + errors.NoisySum(epsilon, x => x.error * x.vector[coordinate]) * 0.00001;
            }
            return(newnormal);
        }
Exemplo n.º 9
0
        public static PINQueryable <int[]> BoundDegree(PINQueryable <int[]> edges, int bound)
        {
            // reduce the degree of the graph
            var clamped = edges.GroupBy(edge => edge[0])                        // collect up edges by source
                          .SelectMany(bound, group => group.Take(bound))        // only keep *bound* of them
                          .GroupBy(edge => edge[1])                             // collect up edges by target
                          .SelectMany(bound, group => group.Take(bound));       // only keep *bound* of them

            // A more "privacy efficient" approach uses the generalized Distinct transformation.
            // The stability constant here is 4 instead of 4 * bound^2 using the GroupBy operations above.
            // clamped = edges.Distinct(bound, edge => edge[0])
            //                .Distinct(bound, edge => edge[1])

            // symmetrize (if interested) and return. degree is now at most 2 * bound.
            return(clamped.Select(x => new int[] { x[1], x[0] })
                   .Concat(clamped)
                   .Distinct());
        }
Exemplo n.º 10
0
        static double[] GetErrorWholeCount(IQueryable <BSOM_DataSet_revised> data,
                                           PINQueryable <BSOM_DataSet_revised> search, double[] epsilons)
        {
            // Get true value
            int trueCount = data.Count();

            // Get list to hold our answers
            double[] result = new double[epsilons.Length];

            // Calculate differences
            short idx = 0;

            foreach (double ep in epsilons)
            {
                result[idx++] = trueCount - search.NoisyCount(ep);
            }

            return(result);
        }
Exemplo n.º 11
0
        static void TestPartitionWhere(IQueryable <BSOM_DataSet_revised> data,
                                       PINQueryable <BSOM_DataSet_revised> search, double[] epsilons)
        {
            // Group IDs by O1_PI_01 scores, then count items in each group
            // This is what is emulated by PINQ partition operator, below
            var result = data.GroupBy(x => x.O1_PI_01).Select(
                group => new {
                key   = group.Key,
                count = group.Count()
            });

            Console.WriteLine("Count of items in distinct O1_PI_01 groups");
            foreach (var r in result)
            {
                Console.WriteLine(String.Format("Score {0}: {1}",
                                                r.key, r.count));
            }

            // PINQ version, with partition instead of groupby
            // Partition is poorly documented, see example at
            //  https://github.com/LLGemini/PINQ/blob/master/TestHarness/TestHarness.cs

            // Note we must explicitly give the keys here, so PINQ assumes
            //  we must know something about the data already to use
            //  this powerful operator
            // Note our keys and values must be of the same type, hence we use
            //  string keys. These must also perfectly match the values
            //  returned by the raw query.
            string[] keys = { "0.5000", "0.5500", "0.6000", "0.6500", "0.7000",
                              "0.7500", "0.8000", "0.8500", "0.9000", "0.9500", "1.0000" };
            var      pinqQuerySet = search.Partition(keys, x => x.O1_PI_01);

            Console.WriteLine("Noisy Counts:");
            foreach (double ep in epsilons)
            {
                foreach (string key in keys)
                {
                    Console.WriteLine(String.Format("Epsilon {0}\tScore {1}:" +
                                                    " {2}", ep, key, pinqQuerySet[key].NoisyCount(ep)));
                }
                Console.WriteLine("---");
            }
        }
Exemplo n.º 12
0
        static void Main(string[] args)
        {

            // Read from data file, and insert into class
            string[] lines = System.IO.File.ReadAllLines(@"C:\Users\Administrator\Documents\Visual Studio 2010\Projects\DPLab\ConsoleApplication2\DPLabData.csv");
            IList<Record> recList = new List<Record>();

            foreach (string line in lines)
            {
                // Use "," as delimeter to break line into an array of ints
                string[] words = line.Split(',');

                // Create new Record
                Record rec = new Record();

                rec.setAll(words);         // set all properties

                // Add Record to List
                recList.Add(rec);
            }

            // Convert recList to iQueryable
            var source = recList.AsQueryable<Record>();
            var agent = new PINQAgentBudget(5.0);

            var db = new PINQueryable<Record>(source, agent);

            distTotalRev(db, source);                                                       // Chart 1 (4 figures)      (e = 1)
            Console.WriteLine("=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=\n");
            distLocalRev(db, source);                                                       // Chart 2 (4 figures)      (e = 1)
            Console.WriteLine("=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=\n");
            distExpenses(db, source);                                                       // Chart 3 (4 figures)      (e = 1)
            Console.WriteLine("=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=\n");
            debt(db, source);                                                               // Figure 1, 2, and 3       (e = 0.5)
            Console.WriteLine("=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=\n");
            costPerStudent(db, source);                                                     // Figure 4                 (e = 0.2)       
            highEnrollment(db, source);                                                     // Figure 5,6               (e = 0.4)
            lowEnrollment(db, source);                                                      // Figure 7,8               (e = 0.4)
            Console.WriteLine("=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=+~+=\n");
            teachSalary(db, source);                                                        // Figure 9, 10, 11         (e = 0.5)

            Console.ReadLine(); //Pause
        }
Exemplo n.º 13
0
        static double[] GetErrorRangedCount(IQueryable <BSOM_DataSet_revised> data,
                                            PINQueryable <BSOM_DataSet_revised> search, double[] epsilons)
        {
            // Get true value
            double trueSum = data.Where(
                x => Convert.ToDouble(x.O1_PI_01) < 0.8).Count();

            // Get list to hold our answers
            double[] result = new double[epsilons.Length];

            // Calculate differences
            short idx = 0;

            foreach (double ep in epsilons)
            {
                result[idx++] = trueSum - search.Where(x => Convert.ToDouble(
                                                           x.O1_PI_01) < 0.8).NoisyCount(ep);
            }

            return(result);
        }
Exemplo n.º 14
0
        static double[] GetErrorAverage(IQueryable <BSOM_DataSet_revised> data,
                                        PINQueryable <BSOM_DataSet_revised> search, double[] epsilons)
        {
            // Get true value
            double trueAvg = data.Average(
                x => Convert.ToDouble(x.O1_PI_01));

            // Get list to hold our answers
            double[] result = new double[epsilons.Length];

            // Calculate differences
            short idx = 0;

            foreach (double ep in epsilons)
            {
                result[idx++] = trueAvg - search.NoisyAverage(
                    ep, x => Convert.ToDouble(x.O1_PI_01));
            }

            return(result);
        }
Exemplo n.º 15
0
        public static void test5(String[] args)
        {
            // preparing a private data source
            var             filename = @"..\..\processed.cleveland.data";
            var             data     = File.ReadAllLines(filename, Encoding.UTF8).AsQueryable();
            PINQAgentLogger agent    = new PINQAgentLogger(filename);
            var             text     = new PINQueryable <string>(data, agent);

            var parts = text.Select(line => line.Split(','))
                        .Partition(args, fields => fields[13]);

            Console.WriteLine("不患心脏病的人数{0},加入噪声后:{1}", parts["0"].count(), parts["0"].NoisyCount(1.0));
            Console.WriteLine("患1病的人数{0},加入噪声后:{1}", parts["1"].count(), parts["1"].NoisyCount(1.0));
            Console.WriteLine("患2病的人数{0},加入噪声后:{1}", parts["2"].count(), parts["2"].NoisyCount(1.0));
            Console.WriteLine("患3病的人数{0},加入噪声后:{1}", parts["3"].count(), parts["3"].NoisyCount(1.0));
            Console.WriteLine("患4病的人数{0},加入噪声后:{1}", parts["4"].count(), parts["4"].NoisyCount(1.0));
            Console.WriteLine("总人数:{0}", parts["0"].count() + parts["1"].count() + parts["2"].count() + parts["3"].count() + parts["4"].count());
            Console.WriteLine();

            Console.ReadKey();
        }
Exemplo n.º 16
0
        static void TestExhaustedPrivacyBudget(
            IQueryable <BSOM_DataSet_revised> data)
        {
            // Note we need no LINQ version of this query, as there is no
            //  privacy budget to compare to with it

            // We first need to have a PINQueryable object that actually
            //  checks against a budget
            PINQueryable <BSOM_DataSet_revised> search =
                new PINQueryable <BSOM_DataSet_revised>(
                    data, new PINQAgentBudget(50));

            // Essentially apply transformations until we can't anymore
            // This will be done by repetitively using a 'where' tranform,
            //  while incrementing the actual threhold we intend to cut
            Console.Write("Number of iterations we can do before privacy" +
                          " budget is exhausted: ");
            double threshold = 0.1;
            int    iters     = 0;

            while (true)
            {
                // Do a selection of data
                var result = search.Where(x =>
                                          Convert.ToDouble(x.O1_PI_01) > threshold);

                // Try to do a noisy count, breaking if we except
                try {
                    result.NoisyCount(1);
                } catch (Exception e) {
                    Console.WriteLine(iters);
                    break;
                }

                // Increment threshold and counter
                threshold += 0.1;
                iters++;
            }
        }
Exemplo n.º 17
0
        public static void function1()
        {
            // preparing a private data source
            var             filename = @"..\..\test2.txt";
            var             data     = File.ReadAllLines(filename).AsQueryable();
            PINQAgentLogger agent    = new PINQAgentLogger(filename);
            var             text     = new PINQueryable <string>(data, agent);

            /**** Data is now sealed up. Use from this point on is unrestricted ****/


            // output a noisy count of the number of lines of text
            Console.WriteLine("Lines of text: " + text.count() + "  Lines of text: " + text.NoisyCount(1.0));
            //Console.WriteLine("**privacy change**\tbudget:{0}", agent.getBudget());


            // restrict using a user defined predicate, and count again (with noise)
            Console.WriteLine("Lines with semi-colons: " + text.Where(line => line.Contains(';')).NoisyCount(1.0));
            //Console.WriteLine("**privacy change**\tbudget:{0}", agent.getBudget());

            // think about splitting the records into arrays (declarative, so nothing happens yet)
            var words = text.Select(line => line.Split('*'));

            Console.WriteLine("words: {0}, words_noisy: {1}", words.count(), words.NoisyCount(1.0));

            // partition the data by number of "words", and count how many of each type there are
            var keys  = new int[] { 0, 1, 2, 3, 4, 5 };
            var parts = words.Partition(keys, line => line.Count());

            foreach (var count in keys)
            {
                Console.WriteLine("");
                Console.WriteLine("Lines with " + count + " words(no noisy):" + "\t" + parts[count].count());
                Console.WriteLine("Lines with " + count + " words(noisy):" + "\t" + parts[count].NoisyCount(1.0));
            }

            Console.ReadKey();
        }
Exemplo n.º 18
0
        private static void wrapPrecedenceTable(string precedenceFile)
        {
            IEnumerable <Event> events = cc.Read <Event>(precedenceFile, inputFileDescription);

            EventQuery = new PINQueryable <Event>(events.AsQueryable(), Agent);
        }
Exemplo n.º 19
0
        static void OtherMain(string[] args)
        {
            var dimensions = 8;
            var records    = 10000;
            var sourcedata = GenerateData(dimensions).Take(records).ToArray().AsQueryable();
            var securedata = new PINQueryable <double[]>(sourcedata, null);

            // let's start by computing the centroid of the data
            //            var means = Mean(securedata, dimensions, 0.1);
            //
            //            Console.WriteLine("mean vector:");
            //            foreach (var mean in means)
            //                Console.Write("\t{0:F4}", mean);
            //            Console.WriteLine();
            //            Console.WriteLine();
            //
            //
            //            // we can also center the data and compute its covariance
            //            var centered = securedata.Select(x => x.Select((v, i) => v - means[i]).ToArray());
            //            var covariance = Covariance(centered, dimensions, 8);
            //
            //            Console.WriteLine("covariance matrix:");
            //            foreach (var row in covariance)
            //            {
            //                foreach (var entry in row)
            //                    Console.Write("\t{0:F4}", entry);
            //                Console.WriteLine();
            //            }
            //            Console.WriteLine();


            // iterative algorithms are also possible. we'll do k-means first
            var k          = 3;
            var centers    = GenerateData(dimensions).Take(k).ToArray();
            var iterations = 5;

            foreach (var iteration in Enumerable.Range(0, iterations))
            {
                kMeansStep(securedata, centers, 0.1);
            }

            Console.WriteLine("kMeans: {0} centers, {1} iterations", k, iterations);
            foreach (var center in centers)
            {
                foreach (var value in center)
                {
                    Console.Write("\t{0:F4}", value);
                }
                Console.WriteLine();
            }
            Console.WriteLine();


            // Moving to supervised learning, let's label the points by whether they are nearest the first center or not
            var labeled = securedata.Select(x => new Example(x, NearestCenter(x, centers) == centers[0] ? 1.0 : -1.0));

            // the Perceptron algorithm repeatedly adds misclassified examples to a normal vector
            //            var perceptronnormal = GenerateData(dimensions).First();
            //            foreach (var index in Enumerable.Range(0, iterations))
            //                perceptronnormal = PerceptronStep(labeled, perceptronnormal, 0.1);
            //
            //            var perceptronerror = labeled.NoisyAverage(0.1, x => x.label * x.vector.Select((v, i) => v * perceptronnormal[i]).Sum() < 0.0 ? 1.0 : 0.0);
            //            Console.WriteLine("perceptron error rate:\t\t{0:F4}", perceptronerror);
            //
            //            // the Support Vector Machine attempts to find a maximum margin classifier
            //            var supportvectornormal = GenerateData(dimensions).First();
            //            foreach (var index in Enumerable.Range(0, iterations))
            //                supportvectornormal = SupportVectorStep(labeled, supportvectornormal, 0.1);
            //
            //            var supportvectorerror = labeled.NoisyAverage(0.1, x => x.label * x.vector.Select((v, i) => v * supportvectornormal[i]).Sum() < 0.0 ? 1.0 : 0.0);
            //            Console.WriteLine("support vector error rate:\t{0:F4}", supportvectorerror);

            // Logistic regression optimizes the likelihood of the labels under the logistic function
            var logisticnormal = GenerateData(dimensions).First();

            foreach (var index in Enumerable.Range(0, iterations))
            {
                logisticnormal = LogisticStep(labeled, logisticnormal, 0.1);
            }

            var logisticerror = labeled.NoisyAverage(0.1, x => x.Label * x.Vector.Select((v, i) => v * logisticnormal[i]).Sum() < 0.0 ? 1.0 : 0.0);

            Console.WriteLine("logistic error rate:\t\t{0:F4}", logisticerror);

            Console.ReadKey();
        }
Exemplo n.º 20
0
        static void Main(string[] args)
        {
            var participants = 1000;
            var edges        = 10000;

            var sourcegraph = GenerateData(participants).Take(edges).ToArray().AsQueryable();
            var agent       = new PINQAgentBudget(10000);
            var securegraph = new PINQueryable <int[]>(sourcegraph, agent);

            // we'll start by computing degree distributions
            var nodes = securegraph.GroupBy(x => x[0]);

            var nodeparts = nodes.Partition(Enumerable.Range(0, 20).ToArray(), x => x.Count());

            foreach (var degree in Enumerable.Range(0, 20))
            {
                Console.WriteLine("degree {0}:\t{1:F2}\t+/- {2:F2}", degree, nodeparts[degree].NoisyCount(0.1), 10.0);
            }

            Console.WriteLine();


            // for a buch of the analyses, we want the degree to be bounded
            var bound   = 10;
            var bounded = BoundDegree(securegraph, bound).Materialize();


            // with a degree-bounded graph, we can measure things like assortativity. Each edge is joined using both of its endpoints.
            // this uses the "bounded-join", which imposes a limit on the number of records with each key, to bound the transformation's stability.
            var edgedegrees = securegraph.Join(nodes, edge => edge[0], node => node.Key, bound, bound, (edge, node) => new int[] { node.Count(), edge[1] })
                              .Join(nodes, edge => edge[1], node => node.Key, bound, bound, (edge, node) => new int[] { edge[0], node.Count() });

            Console.WriteLine("Assortativity:");
            var srcparts = edgedegrees.Partition(Enumerable.Range(8, 5).ToArray(), edge => edge[0]);

            foreach (var i in Enumerable.Range(8, 5))
            {
                var dstparts = srcparts[i].Partition(Enumerable.Range(8, 5).ToArray(), edge => edge[1]);
                foreach (var j in Enumerable.Range(8, 5))
                {
                    Console.Write("\t{0:F2}", dstparts[j].NoisyCount(0.1));
                }

                Console.WriteLine();
            }
            Console.WriteLine();


            // we can also measure the correlation coefficient: the number of triangles divided by the number of length two paths.
            var paths2    = ExtendPaths(bounded, bounded, bound, bound);
            var paths3    = ExtendPaths(paths2, bounded, bound * bound, bound);
            var triangles = paths3.Where(x => x[0] == x[3]);

            Console.WriteLine("Triangles:\t{0}", triangles.NoisyCount(0.1));
            Console.WriteLine("Len 2 paths:\t{0}", paths2.NoisyCount(0.1));
            Console.WriteLine();


            // one way to view pagerank is the sum over all paths arriving at a vertex, of the probability of
            // traversing that path. usually this looks something like (alpha/degree)^length
            // although we'll have to have increasingly noisy counts with longer paths, to prevent privacy explosion,
            // the contributions of these terms are scaled down commensurately.

            var depth = 3;
            var paths = new PINQueryable <int[]> [depth];

            paths[0] = bounded;
            foreach (var index in Enumerable.Range(1, depth - 1))
            {
                paths[index] = ExtendPaths(paths[index - 1], bounded, Convert.ToInt32(Math.Pow(bound, index)), bound).Materialize();
            }

            // for any set of endpoints (too small a set gives bad results, as privacy would dictate) we compute
            var pagerank = 0.0;

            foreach (var index in Enumerable.Range(0, depth))
            {
                pagerank += paths[index].Where(path => path.Last() % 10 == 0)
                            .NoisyCount(0.1 * Math.Pow(0.85 / bound, index)) * Math.Pow(0.85 / bound, index);

                Console.WriteLine("pagerank using paths of length at most {0}:\t{1}", index + 1, pagerank);
            }

            Console.ReadKey();
        }
Exemplo n.º 21
0
        private static void wrapSequenceTable(string sequenceFile)
        {
            IEnumerable <Trace> traces = cc.Read <Trace>(sequenceFile, inputFileDescription);

            TraceQuery = new PINQueryable <Trace>(traces.AsQueryable(), Agent);
        }