Exemplo n.º 1
0
        private List <SpatialmHGResult> Solve2DProblem(List <Coordinate> coords, List <bool> labels, List <Coordinate3D> projectedFrom = null, PrincipalComponentAnalysis pca = null)
        {
            var T = new Tesselation(coords, labels, new List <string>(), Config)
            {
                pca = pca
            };

            if (projectedFrom != null)
            {
                T.ProjectedFrom = projectedFrom.Cast <ICoordinate>().ToList();
            }

            IEnumerable <Cell> topResults = null;

            if ((Config.ActionList & Actions.Search_CellSkipping) != 0)
            {
                topResults = T.GradientSkippingSweep(numStartCoords: 20, numThreads: Environment.ProcessorCount - 1);
                Tesselation.Reset();
                return(topResults.Select(t => new SpatialmHGResult(t)).ToList());
            }
            if ((Config.ActionList & Actions.Search_Exhaustive) != 0)
            {
                T.GenerateFromCoordinates();
            }
            if ((Config.ActionList & Actions.Search_Originals) != 0)
            {
                //mHGOnOriginalPoints(args, coordinates, labels, numcoords);
            }
            if ((Config.ActionList & Actions.Search_FixedSet) != 0)
            {
                /*
                 * var avgX = coordinates.Select(c => c.GetDimension(0)).Average();
                 * var avgY = coordinates.Select(c => c.GetDimension(1)).Average();
                 * var cord = new Coordinate(avgX, avgY);
                 * mHGOnOriginalPoints(args, coordinates, labels, numcoords, new List<ICoordinate>() { cord });
                 */
            }
            if ((Config.ActionList & Actions.Search_LineSweep) != 0)
            {
                T.LineSweep();
            }
            if ((Config.ActionList & Actions.Search_EmpricalSampling) != 0)
            {
                var problem     = coords.Zip(labels, (a, b) => new Tuple <ICoordinate, bool>(a, b)).ToList();
                var gr          = new Gridding();
                var problemSize = MathExtensions.Binomial(Line.Count, 2) + Line.Count + 1;
                gr.GenerateEmpricialDensityGrid((long)Math.Min(problemSize, 100000), problem);
                var results = new ConcurrentPriorityQueue <double, SpatialmHGResult>();
                Parallel.ForEach(gr.GetPivots(), pivot =>
                {
                    var binvec = problem.OrderBy(c => c.Item1.EuclideanDistance(pivot)).Select(c => c.Item2).ToArray();
                    var res    = mHGJumper.minimumHypergeometric(binvec);
                    results.Enqueue(res.Item1, new SpatialmHGResult(res.Item1, res.Item2, (Coordinate)pivot));
                    while (results.Count > Config.GetTopKResults)
                    {
                        results.TryDequeue(out var junk);
                    }
                });
                return(results.Select(v => v.Value).ToList());
            }

            return(null);
        }
Exemplo n.º 2
0
        public List <ISpatialmHGResult> SpatialmHGWrapper3D(List <Tuple <double, double, double, bool> > input, bool runViaAzure = false)
        {
            var        coordinates = input.Select(c => (ICoordinate) new Coordinate3D(c.Item1, c.Item2, c.Item3)).ToList();
            Normalizer nrm         = new Normalizer(coordinates);
            var        normcoords  = nrm.Normalize(coordinates).Select(c => (Coordinate3D)c).ToList();
            var        labels      = input.Select(c => c.Item4).ToList();

            InitializeMHG(labels);
            int idx       = -1;
            var solutions = new ConcurrentPriorityQueue <double, SpatialmHGResult3D>();
            var planeList = new ConcurrentPriorityQueue <double, Plane>(); //minheap based, smaller is better

            //Foreach perpendicular bisector plane
            for (var i = 0; i < coordinates.Count; i++)
            {
                for (var j = 0; j < coordinates.Count; j++)
                {
                    if (labels[i] != labels[j])
                    {
                        //Reduce to 2D problem
                        var plane = Plane.Bisector(normcoords[i], normcoords[j]);
                        planeList.Enqueue(1.0, plane);
                    }
                }
            }

            var numPlanes = planeList.Count();

            if ((Config.ActionList & Actions.Search_EmpricalSampling) != 0)
            {
                var problem     = normcoords.Zip(labels, (a, b) => new Tuple <ICoordinate, bool>(a, b)).ToList();
                var gr          = new Gridding();
                var problemSize = MathExtensions.Binomial(numPlanes, 3) + MathExtensions.Binomial(numPlanes, 2) + numPlanes + 1;
                gr.GenerateEmpricialDensityGrid((long)Math.Min(problemSize, 100000), problem);
                var results = new ConcurrentPriorityQueue <double, ISpatialmHGResult>();
                Parallel.ForEach(gr.GetPivots(), pivot =>
                {
                    var binvec = problem.OrderBy(c => c.Item1.EuclideanDistance(pivot)).Select(c => c.Item2).ToArray();
                    var res    = mHGJumper.minimumHypergeometric(binvec);
                    results.Enqueue(res.Item1, new SpatialmHGResult3D(res.Item1, res.Item2, (Coordinate3D)pivot));
                    while (results.Count > Config.GetTopKResults)
                    {
                        results.TryDequeue(out var junk);
                    }
                });
                return(results.Select(v => v.Value).ToList());
            }

            if ((Config.ActionList & Actions.Search_CellSkipping) != 0)
            {
                if (runViaAzure)
                {
                    var fileList = new List <string>();
                    foreach (var file in Directory.EnumerateFiles(@"3D\Planes\"))
                    {
                        File.Delete(file);
                    }
                    foreach (var file in Directory.EnumerateFiles(@"3D\2dProblems\"))
                    {
                        File.Delete(file);
                    }
                    var asList = planeList.ToList();
                    Parallel.ForEach(asList, currPlane =>
                    {
                        var currIdx = Interlocked.Increment(ref idx);
                        Console.Write($"\r\r\r\r\r\rGenerating 2D projection {currIdx}/{numPlanes}.");
                        var plane = currPlane.Value;

                        if (StaticConfigParams.WriteToCSV)
                        {
                            Config.Log.WriteLine("Selected plane {0}/{1} at distance {2}", currIdx, numPlanes, currPlane.Key);
                        }
                        var subProblemIn2D  = plane.ProjectOntoAndRotate(normcoords, out PrincipalComponentAnalysis pca);
                        pca.NumberOfOutputs = 3; //project back to 3D
                        pca.Save($@"3D\PCA\pca{currIdx}.bin");
                        Generics.SaveToCSV(plane, $@"3D\Planes\plane{currIdx}.csv", true);
                        Generics.SaveToCSV(subProblemIn2D.Zip(labels, (c, l) => c.ToString() + "," + l), $@"3D\2dProblems\coords{currIdx}.csv", true);
                        fileList.Add($@"3D\2dProblems\coords{currIdx}.csv");
                    });
                    Console.WriteLine(@"Done. Initializing Batch pool.");
                    AzureBatchExecution.MainAsync(fileList).Wait();
                }
                else
                {
                    while (planeList.TryDequeue(out var currPlane))
                    {
                        var plane = currPlane.Value;
                        idx++;
                        if (StaticConfigParams.WriteToCSV)
                        {
                            Generics.SaveToCSV(plane, $@"Planes\plane{idx}.csv", true);
                        }
                        Config.Log.WriteLine("Selected plane {0}/{1} at distance {2}", idx, numPlanes, currPlane.Key);
                        var subProblemIn2D = plane.ProjectOntoAndRotate(normcoords, out PrincipalComponentAnalysis pca);
                        pca.NumberOfOutputs = 3; //project back to 3D
                                                 //Solve 2D problem
                        StaticConfigParams.filenamesuffix = idx.ToString();
                        var res = Solve2DProblem(subProblemIn2D, labels, normcoords, pca);
                        foreach (var mHGresult2D in res)
                        {
                            var projectedResult = new SpatialmHGResult3D(mHGresult2D, pca, idx);
                            solutions.Enqueue(projectedResult.pvalue, projectedResult);
                        }
                        solutions.TryPeek(out var bestCell);
                        var bestCellCenter  = bestCell.Value.GetCenter();
                        var remainingPlanes = planeList.Select(t => t.Value).ToList();
                        planeList.Clear();
                        foreach (var p in remainingPlanes)
                        {
                            planeList.Enqueue(bestCellCenter.DistanceToPlane(p), p);
                        }
                    }
                }

                //Combine 2D solutions
                var combinedResultsNaive = new List <SpatialmHGResult3D>();
                for (var i = 0; i < Config.GetTopKResults; i++)
                {
                    KeyValuePair <double, SpatialmHGResult3D> bestCell;
                    solutions.TryDequeue(out bestCell);
                    if (bestCell.Key <= Config.SIGNIFICANCE_THRESHOLD)
                    {
                        bestCell.Value.Denormalize(nrm);
                        combinedResultsNaive.Add(bestCell.Value);
                    }
                    else
                    {
                        break;
                    }
                }
                Config.Log.updater?.Wait();
                return(combinedResultsNaive.Cast <ISpatialmHGResult>().ToList());
            }

            return(null);
        }
Exemplo n.º 3
0
        static void Main(string[] args)
        {
            var options = new CommandlineParameters();
            var isValid = Parser.Default.ParseArgumentsStrict(args, options);

            //args = new[] {@"c:\Users\shaybe\Dropbox\Thesis-PHd\SpatialEnrichment\Datasets\usStatesBordersData.csv"};
            //args = new[] { @"c:\Users\shaybe\Dropbox\Thesis-PHd\SpatialEnrichment\Caulobacter\transferases\acetyltransferase.csv" };
            var numcoords = 300;

            Config = new ConfigParams("");

            if ((Config.ActionList & Actions.Experiment_ComparePivots) != 0)
            {
                Console.WriteLine(@"Running pivot comparison experiment");
                Experiments.CompareExhaustiveWithPivots(numcoords, numiter: 30);
                return;
            }
            if ((Config.ActionList & Actions.Experiment_SampleLines) != 0)
            {
                Console.WriteLine(@"Running sampling comparison experiment");
                var subsamples = new[] { 10, 20, 30 };
                var population = new[] { 40, 60, 100 };
                var counter    = 0;
                foreach (var nu in subsamples)
                {
                    foreach (var N in population)
                    {
                        Experiments.CompareExahustiveWithSubsamplingInput(N, nu, 50, counter++.ToString());
                    }
                }
                return;
            }

            if (Config.SKIP_SLACK != 0)
            {
                Console.WriteLine(@"Warning! Current configuration uses CONST_SKIP_SLACK={0}", Config.SKIP_SLACK);
            }
            if (StaticConfigParams.WriteToCSV)
            {
                Console.WriteLine(@"Warning! Current configuration writes cells to CSV - this is SLOW.");
            }

            #region init
            StaticConfigParams.rnd = (Config.ActionList & Actions.Program_RandomConstSeed) != 0 ? new SafeRandom(1) : new SafeRandom();
            Config.timer.Start();
            #endregion

            foreach (var dir in new List <string>()
            {
                "Cells", "Planes"
            })
            {
                var di = new DirectoryInfo(dir);
                if (!di.Exists)
                {
                    di.Create();
                }
                foreach (FileInfo file in di.GetFiles())
                {
                    file.Delete();
                }
            }
            foreach (var filemask in new List <string>()
            {
                "lines_*.csv", "coordSample_*.csv "
            })
            {
                FileInfo[] taskFiles = new DirectoryInfo(Directory.GetCurrentDirectory()).GetFiles(filemask);
                foreach (FileInfo file in taskFiles)
                {
                    file.Delete();
                }
            }
            //Load coordinates and labels
            var infile     = Path.GetFileNameWithoutExtension(args.Length > 0?args[0]:"");
            var identities = new List <string>();
            for (var instanceIter = 0; instanceIter < 1; instanceIter++)
            {
                var         coordinates = new List <ICoordinate>();
                List <bool> labels      = null;
                StaticConfigParams.filenamesuffix = instanceIter.ToString();
                Console.WriteLine("File {0}", instanceIter);
                if (args.Length > 0)
                {
                    if (File.Exists(args[0]))
                    {
                        var res = LoadCoordinatesFromFile(args, ref numcoords, identities);
                        coordinates = res.Item1;
                        labels      = res.Item2;
                    }
                    else
                    {
                        throw new ArgumentException("Input file not found!");
                    }
                }
                else
                {
                    var res = RandomizeCoordinatesAndSave(numcoords);
                    coordinates = res.Item1;
                    labels      = res.Item2;
                }

                var zeros       = labels.Count(l => l == false);
                var filterCount = (int)(Config.FilterKFurthestZeros * zeros);
                if (filterCount > 0)
                {
                    Console.WriteLine("Filtering {0} far away points", filterCount);
                    var positives = new List <ICoordinate>();
                    var negatives = new List <ICoordinate>();
                    var negIds    = new List <int>();
                    for (var i = 0; i < coordinates.Count; i++)
                    {
                        if (labels[i])
                        {
                            positives.Add(coordinates[i]);
                        }
                        else
                        {
                            negatives.Add(coordinates[i]);
                            negIds.Add(i);
                        }
                    }
                    var negMinDist = new HashSet <int>(negatives.Zip(negIds, (a, b) => new { PosMinDist = positives.Select(p => p.EuclideanDistance(a)).Min(), Id = b })
                                                       .OrderByDescending(n => n.PosMinDist).Select(t => t.Id).Take(filterCount));
                    coordinates = coordinates.Where((a, b) => !negMinDist.Contains(b)).ToList();
                    labels      = labels.Where((a, b) => !negMinDist.Contains(b)).ToList();
                    numcoords  -= filterCount;
                }

                //Actual work starts here
                var ones      = labels.Count(l => l);
                var linecount = ones * (numcoords - ones);
                Config.Cellcount = ((long)linecount * (linecount - 1)) / 2.0 + linecount + 1;

                //Look at lazy caretaker numbers. Note, we don't actually cover open cells
                //so its -linecount as each line has two open cells on either side of it,
                //and each open cell is made up of two lines.

                mHGJumper.Initialize(ones, numcoords - ones);
                mHGJumper.optHGT = Config.SIGNIFICANCE_THRESHOLD;// / Cellcount; //for bonferonni
                //alpha is the Bonferonni (union-bound) corrected significance level

                //Debugging.debug_mHG(numcoords,ones);
                Tesselation T                    = null;
                var         coordType            = coordinates.First().GetType();
                var         ew                   = new EnrichmentWrapper(Config);
                List <ISpatialmHGResult> results = null;
                if (coordType == typeof(Coordinate3D))
                {
                    Config.Cellcount += MathExtensions.Binomial(linecount, 3);
                    Console.WriteLine(@"Projecting 3D problem to collection of 2D {0} coordinates with {1} 1's (|cells|={2:n0}, alpha={3}).", numcoords, ones, Config.Cellcount, mHGJumper.optHGT);
                    results = ew.SpatialmHGWrapper3D(coordinates.Zip(labels,
                                                                     (a, b) => new Tuple <double, double, double, bool>(a.GetDimension(0), a.GetDimension(1),
                                                                                                                        a.GetDimension(2), b)).ToList(), options.BatchMode);
                }
                else if (coordType == typeof(Coordinate))
                {
                    Console.WriteLine(@"Starting work on {0} coordinates with {1} 1's (|cells|={2:n0}, alpha={3}).", numcoords, ones, Config.Cellcount, mHGJumper.optHGT);
                    results = ew.SpatialmHGWrapper(coordinates.Zip(labels, (a, b) =>
                                                                   new Tuple <double, double, bool>(a.GetDimension(0), a.GetDimension(1), b)).ToList());
                }
                for (var resid = 0; resid < results.Count; resid++)
                {
                    results[resid].SaveToCSV($@"Cells\{infile}_Cell_{resid}_{StaticConfigParams.filenamesuffix}.csv");
                }
                using (var outfile = new StreamWriter($"{infile}_mhglist_{StaticConfigParams.filenamesuffix}.csv"))
                    foreach (var res in Config.mHGlist.Where(t => t != null))
                    {
                        outfile.WriteLine("{0},{1}", res.Item2, res.Item1);
                    }
                if (options.BatchMode)
                {
                    AzureBatchExecution.UploadFileToContainer($"{infile}_mhglist_{StaticConfigParams.filenamesuffix}.csv", options.SaasUrl);
                    for (var resid = 0; resid < results.Count; resid++)
                    {
                        AzureBatchExecution.UploadFileToContainer($@"Cells\{infile}_Cell_{resid}_{StaticConfigParams.filenamesuffix}.csv", options.SaasUrl);
                    }
                }
            }

            //Finalize
            if (args.Length == 0 || Debugger.IsAttached)
            {
                Console.WriteLine("Total elapsed time: {0:g}.\nPress any key to continue.", Config.timer.Elapsed);
                Console.ReadKey();
            }
        }