Example #1
0
        public void TestFrontendCall()
        {
            var config   = new ConfigParams(tokenId: "10");
            var er       = new EnrichmentWrapper(config);
            var instance = RandomInstance.RandomizeCoordinatesAndSave(20, config, false);

            er.SpatialmHGWrapper(instance.Item1.Zip(instance.Item2, (a, b) => new Tuple <double, double, bool>(a.GetDimension(0), a.GetDimension(1), b)).ToList());
        }
Example #2
0
        public void TestFrontendData()
        {
            var config = new ConfigParams(tokenId: "");
            var er     = new EnrichmentWrapper(config);
            //var instance = RandomInstance.RandomizeCoordinatesAndSave(20, config, false);
            var data = File.ReadAllLines(@"zika.csv").Skip(1).Select(l => l.Split(','))
                       .Select(v => new Tuple <double, double, bool>(double.Parse(v[2]), double.Parse(v[3]), v[1] == "1"))
                       .ToList();
            var res = er.SpatialmHGWrapper(data);

            foreach (var r in res.Cast <SpatialmHGResult>())
            {
                Console.WriteLine(r.pvalue);
            }
        }
        private static List <ISpatialmHGResult> getSpatialmHGResults(List <Tuple <double, double, bool> > points, Dictionary <string, string> parameters)
        {
            EnrichmentWrapper wrapper = new EnrichmentWrapper(parameters);

            return(wrapper.SpatialmHGWrapper(points));
        }
Example #4
0
        public static void CompareExhaustiveWithPivots(int numcoords = 50, int numiter = 500)
        {
            Config = new ConfigParams("");
            #region init
            StaticConfigParams.rnd = (Config.ActionList & Actions.Program_RandomConstSeed) != 0 ? new SafeRandom(1) : new SafeRandom();
            Config.timer.Start();
            #endregion
            //Load coordinates and labels
            var identities = new List <string>();
            var resultPairedDiff = new List <double>();
            int victories = 0, ties = 0;
            using (var fileout = new StreamWriter(@"pivot_vs_exhaustive.csv"))
                for (var instanceIter = 0; instanceIter < numiter; instanceIter++)
                {
                    var coordinates = new List <ICoordinate>();
                    var labels      = new List <bool>();
                    StaticConfigParams.filenamesuffix = instanceIter.ToString();
                    Console.WriteLine("File {0}", instanceIter);
                    var res = Program.RandomizeCoordinatesAndSave(numcoords, false);
                    coordinates = res.Item1;
                    labels      = res.Item2;
                    var zeros       = labels.Count(l => l == false);
                    var filterCount = (int)(Config.FilterKFurthestZeros * zeros);
                    if (filterCount > 0)
                    {
                        Console.WriteLine("Filtering {0} far away points", filterCount);
                        var positives = new List <ICoordinate>();
                        var negatives = new List <ICoordinate>();
                        var negIds    = new List <int>();
                        for (var i = 0; i < coordinates.Count; i++)
                        {
                            if (labels[i])
                            {
                                positives.Add(coordinates[i]);
                            }
                            else
                            {
                                negatives.Add(coordinates[i]);
                                negIds.Add(i);
                            }
                        }
                        var negMinDist = new HashSet <int>(negatives.Zip(negIds, (a, b) => new { PosMinDist = positives.Select(p => p.EuclideanDistance(a)).Min(), Id = b })
                                                           .OrderByDescending(n => n.PosMinDist).Select(t => t.Id).Take(filterCount));
                        coordinates = coordinates.Where((a, b) => !negMinDist.Contains(b)).ToList();
                        labels      = labels.Where((a, b) => !negMinDist.Contains(b)).ToList();
                        numcoords  -= filterCount;
                    }

                    //Actual work starts here
                    var ones      = labels.Count(l => l);
                    var linecount = ones * (numcoords - ones);
                    Config.Cellcount = ((long)linecount * (linecount - 1)) / 2.0 + linecount + 1;

                    mHGJumper.Initialize(ones, numcoords - ones);
                    mHGJumper.optHGT = Config.SIGNIFICANCE_THRESHOLD; // / Cellcount; //for bonferonni
                                                                      //alpha is the Bonferonni (union-bound) corrected significance level
                    Tesselation T  = null;
                    var         ew = new EnrichmentWrapper(Config);
                    Console.WriteLine(@"Starting work on {0} coordinates with {1} 1's (|cells|={2:n0}, alpha={3}).", numcoords, ones, Config.Cellcount, mHGJumper.optHGT);
                    var instanceData      = coordinates.Zip(labels, (a, b) => new Tuple <double, double, bool>(a.GetDimension(0), a.GetDimension(1), b)).ToList();
                    var resultsExhaustive = ew.SpatialmHGWrapper(instanceData).Select(v => (SpatialmHGResult)v).First();
                    var resultsPivot      = ew.mHGPivotWrapper(instanceData).Select(v => (SpatialmHGResult)v).First();
                    fileout.WriteLine($"{resultsExhaustive.pvalue}, {resultsPivot.pvalue}");

                    if (resultsExhaustive.pvalue < resultsPivot.pvalue)
                    {
                        victories++;
                    }
                    else if (resultsExhaustive.pvalue == resultsPivot.pvalue)
                    {
                        ties++;
                    }
                    else
                    {
                        Console.WriteLine($"Debug me");
                    }
                    resultPairedDiff.Add(Math.Log10(resultsPivot.pvalue) - Math.Log10(resultsExhaustive.pvalue));
                }

            Console.WriteLine($"Out of {numiter} iterations, spatial enrichment won in {victories} and tied in {ties}.");
            Console.WriteLine("Total elapsed time: {0:g}.\nPress any key to continue.", Config.timer.Elapsed);
            File.WriteAllLines("experiment_pvaldiffs.txt", resultPairedDiff.Select(v => v.ToString()).ToArray());
            Console.ReadKey();
        }
Example #5
0
        /// <summary>
        /// 3d subsample from 50 points 20. run 100 times. compare to opt.
        /// </summary>
        /// <param name="numcoords"></param>a
        /// <param name="numiter"></param>
        public static List <double> CompareExahustiveWithSubsamplingInput(int numcoords = 50, int subsampleSize = 20, int numiter = 100, string suffix = "0")
        {
            Config = new ConfigParams("");
            #region init
            StaticConfigParams.rnd = (Config.ActionList & Actions.Program_RandomConstSeed) != 0 ? new SafeRandom(1) : new SafeRandom();
            Config.timer.Start();
            #endregion
            //Load coordinates and labels
            var resultPairedDiff = new List <double>();
            var extraAnalyses = new List <string>();
            int victories = 0, ties = 0;
            var coordinates = new List <ICoordinate>();
            var labels      = new List <bool>();
            Program.Config = Config;
            StaticConfigParams.filenamesuffix = suffix;
            var res = Program.RandomizeCoordinatesAndSave(numcoords, true);
            coordinates = res.Item1;
            labels      = res.Item2;
            var zeros       = labels.Count(l => l == false);
            var filterCount = (int)(Config.FilterKFurthestZeros * zeros);
            if (filterCount > 0)
            {
                Console.WriteLine("Filtering {0} far away points", filterCount);
                var positives = new List <ICoordinate>();
                var negatives = new List <ICoordinate>();
                var negIds    = new List <int>();
                for (var i = 0; i < coordinates.Count; i++)
                {
                    if (labels[i])
                    {
                        positives.Add(coordinates[i]);
                    }
                    else
                    {
                        negatives.Add(coordinates[i]);
                        negIds.Add(i);
                    }
                }
                var negMinDist = new HashSet <int>(negatives.Zip(negIds, (a, b) => new { PosMinDist = positives.Select(p => p.EuclideanDistance(a)).Min(), Id = b })
                                                   .OrderByDescending(n => n.PosMinDist).Select(t => t.Id).Take(filterCount));
                coordinates = coordinates.Where((a, b) => !negMinDist.Contains(b)).ToList();
                labels      = labels.Where((a, b) => !negMinDist.Contains(b)).ToList();
                numcoords  -= filterCount;
            }

            var instanceDataCoords = coordinates.Zip(labels, (a, b) => new Tuple <ICoordinate, bool>(a, b)).ToList();
            var instanceData       = coordinates.Zip(labels, (a, b) => new Tuple <double, double, bool>(a.GetDimension(0), a.GetDimension(1), b)).ToList();
            Config.SKIP_SLACK = -1000;
            var ew = new EnrichmentWrapper(Config);
            var resultsExhaustive = ew.SpatialmHGWrapper(instanceData).Select(v => (SpatialmHGResult)v).First();
            //Actual work starts here
            var ones      = labels.Count(l => l);
            var linecount = ones * (numcoords - ones);
            Config.Cellcount = ((long)linecount * (linecount - 1)) / 2.0 + linecount + 1;

            mHGJumper.Initialize(ones, numcoords - ones);
            mHGJumper.optHGT = Config.SIGNIFICANCE_THRESHOLD; // / Cellcount; //for bonferonni
                                                              //alpha is the Bonferonni (union-bound) corrected significance level

            using (var fileout = new StreamWriter($"sample_vs_exhaustive_{suffix}.csv"))
                for (var instanceIter = 1; instanceIter < numiter; instanceIter++)
                {
                    StaticConfigParams.filenamesuffix = instanceIter.ToString();
                    Console.WriteLine("File {0}", instanceIter);

                    var sampleCoords = coordinates
                                       .Zip(labels, (a, b) => new { Coords = a, Labels = b, Rand = StaticConfigParams.rnd.Next() })
                                       .OrderBy(v => v.Rand).Take(subsampleSize).ToList();
                    while (sampleCoords.All(v => v.Labels) || !sampleCoords.Any(v => v.Labels))
                    {
                        sampleCoords = coordinates
                                       .Zip(labels, (a, b) => new { Coords = a, Labels = b, Rand = StaticConfigParams.rnd.Next() })
                                       .OrderBy(v => v.Rand).Take(subsampleSize).ToList();
                    }
                    if (StaticConfigParams.WriteToCSV)
                    {
                        Generics.SaveToCSV(sampleCoords.Select(t => t.Coords.ToString() + "," + Convert.ToDouble(t.Labels)),
                                           $@"coords_{StaticConfigParams.filenamesuffix}.csv");
                    }

                    ones             = sampleCoords.Count(l => l.Labels);
                    linecount        = ones * (subsampleSize - ones);
                    Config.Cellcount = ((long)linecount * (linecount - 1)) / 2.0 + linecount + 1;

                    Console.WriteLine(@"Starting work on {0} coordinates with {1} 1's (|cells|={2:n0}, alpha={3}).", numcoords, ones, Config.Cellcount, mHGJumper.optHGT);

                    mHGJumper.optHGT = Config.SIGNIFICANCE_THRESHOLD;
                    Tesselation T = new Tesselation(sampleCoords.Select(v => (Coordinate)v.Coords).ToList(), sampleCoords.Select(v => v.Labels).ToList(), null, Config)
                    {
                        ProjectedFrom = coordinates,
                        SourceLabels  = labels.ToArray()
                    };
                    var topResults = T.GradientSkippingSweep(numStartCoords: 20, numThreads: Environment.ProcessorCount - 1).First();
                    Line.Reset();


                    if (resultsExhaustive.pvalue < topResults.mHG.Item1)
                    {
                        victories++;
                    }
                    else if (resultsExhaustive.pvalue == topResults.mHG.Item1)
                    {
                        ties++;
                    }
                    else
                    {
                        Console.WriteLine($"Debug me");
                    }
                    var pdiff = Math.Log10(topResults.mHG.Item1) - Math.Log10(resultsExhaustive.pvalue);
                    resultPairedDiff.Add(pdiff);

                    mHGJumper.optHGT = Config.SIGNIFICANCE_THRESHOLD;
                    Console.Write($"Uniform grid strategy @{Config.Cellcount} pivots... ");
                    var uniformGridFactory = new Gridding();
                    uniformGridFactory.GeneratePivotGrid(Convert.ToInt64(Config.Cellcount));
                    var uniformGridPivotlst = uniformGridFactory.GetPivots().ToList();
                    var uniformGridPivot    = uniformGridPivotlst.AsParallel().Max(p => - Math.Log10(EnrichmentAtPivot(instanceDataCoords, p)));
                    Console.WriteLine($"p={uniformGridPivot:e}");
                    Console.Write($"Empirical grid strategy @{Config.Cellcount} pivots... ");
                    mHGJumper.optHGT = Config.SIGNIFICANCE_THRESHOLD;
                    var empiricalGridFactory = new Gridding();
                    empiricalGridFactory.GenerateEmpricialDensityGrid(Convert.ToInt64(Config.Cellcount), instanceDataCoords);
                    var empiricalGridPivotlst = empiricalGridFactory.GetPivots().ToList();
                    var empiricalGridPivot    = empiricalGridPivotlst.AsParallel().Max(p => - Math.Log10(EnrichmentAtPivot(instanceDataCoords, p)));
                    Console.WriteLine($"p={empiricalGridPivot:e}");
                    //extraAnalyses.Add($"{-Math.Log10(resultsExhaustive.pvalue)}, {-Math.Log10(topResults.mHG.Item1)}, {uniformGridPivot}, {empiricalGridPivot}");
                    fileout.WriteLine($"{-Math.Log10(resultsExhaustive.pvalue)}, {-Math.Log10(topResults.mHG.Item1)}, {uniformGridPivot}, {empiricalGridPivot}");
                }

            Console.WriteLine($"Out of {numiter} iterations, spatial enrichment won in {victories} and tied in {ties}.");
            Console.WriteLine("Total elapsed time: {0:g}.\nPress any key to continue.", Config.timer.Elapsed);
            //File.WriteAllLines($"experiment_pvaldiffs_{suffix}.txt", resultPairedDiff.Select(v => v.ToString()).ToArray());
            //File.WriteAllLines($"experimentsAll_{suffix}.txt", extraAnalyses);
            return(resultPairedDiff);
        }
        static void Main(string[] args)
        {
            var options = new CommandlineParameters();
            var isValid = Parser.Default.ParseArgumentsStrict(args, options);

            //args = new[] {@"c:\Users\shaybe\Dropbox\Thesis-PHd\SpatialEnrichment\Datasets\usStatesBordersData.csv"};
            //args = new[] { @"c:\Users\shaybe\Dropbox\Thesis-PHd\SpatialEnrichment\Caulobacter\transferases\acetyltransferase.csv" };
            var numcoords = 300;

            Config = new ConfigParams("");

            if ((Config.ActionList & Actions.Experiment_ComparePivots) != 0)
            {
                Console.WriteLine(@"Running pivot comparison experiment");
                Experiments.CompareExhaustiveWithPivots(numcoords, numiter: 30);
                return;
            }
            if ((Config.ActionList & Actions.Experiment_SampleLines) != 0)
            {
                Console.WriteLine(@"Running sampling comparison experiment");
                var subsamples = new[] { 10, 20, 30 };
                var population = new[] { 40, 60, 100 };
                var counter    = 0;
                foreach (var nu in subsamples)
                {
                    foreach (var N in population)
                    {
                        Experiments.CompareExahustiveWithSubsamplingInput(N, nu, 50, counter++.ToString());
                    }
                }
                return;
            }

            if (Config.SKIP_SLACK != 0)
            {
                Console.WriteLine(@"Warning! Current configuration uses CONST_SKIP_SLACK={0}", Config.SKIP_SLACK);
            }
            if (StaticConfigParams.WriteToCSV)
            {
                Console.WriteLine(@"Warning! Current configuration writes cells to CSV - this is SLOW.");
            }

            #region init
            StaticConfigParams.rnd = (Config.ActionList & Actions.Program_RandomConstSeed) != 0 ? new SafeRandom(1) : new SafeRandom();
            Config.timer.Start();
            #endregion

            foreach (var dir in new List <string>()
            {
                "Cells", "Planes"
            })
            {
                var di = new DirectoryInfo(dir);
                if (!di.Exists)
                {
                    di.Create();
                }
                foreach (FileInfo file in di.GetFiles())
                {
                    file.Delete();
                }
            }
            foreach (var filemask in new List <string>()
            {
                "lines_*.csv", "coordSample_*.csv "
            })
            {
                FileInfo[] taskFiles = new DirectoryInfo(Directory.GetCurrentDirectory()).GetFiles(filemask);
                foreach (FileInfo file in taskFiles)
                {
                    file.Delete();
                }
            }
            //Load coordinates and labels
            var infile     = Path.GetFileNameWithoutExtension(args.Length > 0?args[0]:"");
            var identities = new List <string>();
            for (var instanceIter = 0; instanceIter < 1; instanceIter++)
            {
                var         coordinates = new List <ICoordinate>();
                List <bool> labels      = null;
                StaticConfigParams.filenamesuffix = instanceIter.ToString();
                Console.WriteLine("File {0}", instanceIter);
                if (args.Length > 0)
                {
                    if (File.Exists(args[0]))
                    {
                        var res = LoadCoordinatesFromFile(args, ref numcoords, identities);
                        coordinates = res.Item1;
                        labels      = res.Item2;
                    }
                    else
                    {
                        throw new ArgumentException("Input file not found!");
                    }
                }
                else
                {
                    var res = RandomizeCoordinatesAndSave(numcoords);
                    coordinates = res.Item1;
                    labels      = res.Item2;
                }

                var zeros       = labels.Count(l => l == false);
                var filterCount = (int)(Config.FilterKFurthestZeros * zeros);
                if (filterCount > 0)
                {
                    Console.WriteLine("Filtering {0} far away points", filterCount);
                    var positives = new List <ICoordinate>();
                    var negatives = new List <ICoordinate>();
                    var negIds    = new List <int>();
                    for (var i = 0; i < coordinates.Count; i++)
                    {
                        if (labels[i])
                        {
                            positives.Add(coordinates[i]);
                        }
                        else
                        {
                            negatives.Add(coordinates[i]);
                            negIds.Add(i);
                        }
                    }
                    var negMinDist = new HashSet <int>(negatives.Zip(negIds, (a, b) => new { PosMinDist = positives.Select(p => p.EuclideanDistance(a)).Min(), Id = b })
                                                       .OrderByDescending(n => n.PosMinDist).Select(t => t.Id).Take(filterCount));
                    coordinates = coordinates.Where((a, b) => !negMinDist.Contains(b)).ToList();
                    labels      = labels.Where((a, b) => !negMinDist.Contains(b)).ToList();
                    numcoords  -= filterCount;
                }

                //Actual work starts here
                var ones      = labels.Count(l => l);
                var linecount = ones * (numcoords - ones);
                Config.Cellcount = ((long)linecount * (linecount - 1)) / 2.0 + linecount + 1;

                //Look at lazy caretaker numbers. Note, we don't actually cover open cells
                //so its -linecount as each line has two open cells on either side of it,
                //and each open cell is made up of two lines.

                mHGJumper.Initialize(ones, numcoords - ones);
                mHGJumper.optHGT = Config.SIGNIFICANCE_THRESHOLD;// / Cellcount; //for bonferonni
                //alpha is the Bonferonni (union-bound) corrected significance level

                //Debugging.debug_mHG(numcoords,ones);
                Tesselation T                    = null;
                var         coordType            = coordinates.First().GetType();
                var         ew                   = new EnrichmentWrapper(Config);
                List <ISpatialmHGResult> results = null;
                if (coordType == typeof(Coordinate3D))
                {
                    Config.Cellcount += MathExtensions.Binomial(linecount, 3);
                    Console.WriteLine(@"Projecting 3D problem to collection of 2D {0} coordinates with {1} 1's (|cells|={2:n0}, alpha={3}).", numcoords, ones, Config.Cellcount, mHGJumper.optHGT);
                    results = ew.SpatialmHGWrapper3D(coordinates.Zip(labels,
                                                                     (a, b) => new Tuple <double, double, double, bool>(a.GetDimension(0), a.GetDimension(1),
                                                                                                                        a.GetDimension(2), b)).ToList(), options.BatchMode);
                }
                else if (coordType == typeof(Coordinate))
                {
                    Console.WriteLine(@"Starting work on {0} coordinates with {1} 1's (|cells|={2:n0}, alpha={3}).", numcoords, ones, Config.Cellcount, mHGJumper.optHGT);
                    results = ew.SpatialmHGWrapper(coordinates.Zip(labels, (a, b) =>
                                                                   new Tuple <double, double, bool>(a.GetDimension(0), a.GetDimension(1), b)).ToList());
                }
                for (var resid = 0; resid < results.Count; resid++)
                {
                    results[resid].SaveToCSV($@"Cells\{infile}_Cell_{resid}_{StaticConfigParams.filenamesuffix}.csv");
                }
                using (var outfile = new StreamWriter($"{infile}_mhglist_{StaticConfigParams.filenamesuffix}.csv"))
                    foreach (var res in Config.mHGlist.Where(t => t != null))
                    {
                        outfile.WriteLine("{0},{1}", res.Item2, res.Item1);
                    }
                if (options.BatchMode)
                {
                    AzureBatchExecution.UploadFileToContainer($"{infile}_mhglist_{StaticConfigParams.filenamesuffix}.csv", options.SaasUrl);
                    for (var resid = 0; resid < results.Count; resid++)
                    {
                        AzureBatchExecution.UploadFileToContainer($@"Cells\{infile}_Cell_{resid}_{StaticConfigParams.filenamesuffix}.csv", options.SaasUrl);
                    }
                }
            }

            //Finalize
            if (args.Length == 0 || Debugger.IsAttached)
            {
                Console.WriteLine("Total elapsed time: {0:g}.\nPress any key to continue.", Config.timer.Elapsed);
                Console.ReadKey();
            }
        }