private List <SpatialmHGResult> Solve2DProblem(List <Coordinate> coords, List <bool> labels, List <Coordinate3D> projectedFrom = null, PrincipalComponentAnalysis pca = null) { var T = new Tesselation(coords, labels, new List <string>(), Config) { pca = pca }; if (projectedFrom != null) { T.ProjectedFrom = projectedFrom.Cast <ICoordinate>().ToList(); } IEnumerable <Cell> topResults = null; if ((Config.ActionList & Actions.Search_CellSkipping) != 0) { topResults = T.GradientSkippingSweep(numStartCoords: 20, numThreads: Environment.ProcessorCount - 1); Tesselation.Reset(); return(topResults.Select(t => new SpatialmHGResult(t)).ToList()); } if ((Config.ActionList & Actions.Search_Exhaustive) != 0) { T.GenerateFromCoordinates(); } if ((Config.ActionList & Actions.Search_Originals) != 0) { //mHGOnOriginalPoints(args, coordinates, labels, numcoords); } if ((Config.ActionList & Actions.Search_FixedSet) != 0) { /* * var avgX = coordinates.Select(c => c.GetDimension(0)).Average(); * var avgY = coordinates.Select(c => c.GetDimension(1)).Average(); * var cord = new Coordinate(avgX, avgY); * mHGOnOriginalPoints(args, coordinates, labels, numcoords, new List<ICoordinate>() { cord }); */ } if ((Config.ActionList & Actions.Search_LineSweep) != 0) { T.LineSweep(); } if ((Config.ActionList & Actions.Search_EmpricalSampling) != 0) { var problem = coords.Zip(labels, (a, b) => new Tuple <ICoordinate, bool>(a, b)).ToList(); var gr = new Gridding(); var problemSize = MathExtensions.Binomial(Line.Count, 2) + Line.Count + 1; gr.GenerateEmpricialDensityGrid((long)Math.Min(problemSize, 100000), problem); var results = new ConcurrentPriorityQueue <double, SpatialmHGResult>(); Parallel.ForEach(gr.GetPivots(), pivot => { var binvec = problem.OrderBy(c => c.Item1.EuclideanDistance(pivot)).Select(c => c.Item2).ToArray(); var res = mHGJumper.minimumHypergeometric(binvec); results.Enqueue(res.Item1, new SpatialmHGResult(res.Item1, res.Item2, (Coordinate)pivot)); while (results.Count > Config.GetTopKResults) { results.TryDequeue(out var junk); } }); return(results.Select(v => v.Value).ToList()); } return(null); }
public List <ISpatialmHGResult> SpatialmHGWrapper3D(List <Tuple <double, double, double, bool> > input, bool runViaAzure = false) { var coordinates = input.Select(c => (ICoordinate) new Coordinate3D(c.Item1, c.Item2, c.Item3)).ToList(); Normalizer nrm = new Normalizer(coordinates); var normcoords = nrm.Normalize(coordinates).Select(c => (Coordinate3D)c).ToList(); var labels = input.Select(c => c.Item4).ToList(); InitializeMHG(labels); int idx = -1; var solutions = new ConcurrentPriorityQueue <double, SpatialmHGResult3D>(); var planeList = new ConcurrentPriorityQueue <double, Plane>(); //minheap based, smaller is better //Foreach perpendicular bisector plane for (var i = 0; i < coordinates.Count; i++) { for (var j = 0; j < coordinates.Count; j++) { if (labels[i] != labels[j]) { //Reduce to 2D problem var plane = Plane.Bisector(normcoords[i], normcoords[j]); planeList.Enqueue(1.0, plane); } } } var numPlanes = planeList.Count(); if ((Config.ActionList & Actions.Search_EmpricalSampling) != 0) { var problem = normcoords.Zip(labels, (a, b) => new Tuple <ICoordinate, bool>(a, b)).ToList(); var gr = new Gridding(); var problemSize = MathExtensions.Binomial(numPlanes, 3) + MathExtensions.Binomial(numPlanes, 2) + numPlanes + 1; gr.GenerateEmpricialDensityGrid((long)Math.Min(problemSize, 100000), problem); var results = new ConcurrentPriorityQueue <double, ISpatialmHGResult>(); Parallel.ForEach(gr.GetPivots(), pivot => { var binvec = problem.OrderBy(c => c.Item1.EuclideanDistance(pivot)).Select(c => c.Item2).ToArray(); var res = mHGJumper.minimumHypergeometric(binvec); results.Enqueue(res.Item1, new SpatialmHGResult3D(res.Item1, res.Item2, (Coordinate3D)pivot)); while (results.Count > Config.GetTopKResults) { results.TryDequeue(out var junk); } }); return(results.Select(v => v.Value).ToList()); } if ((Config.ActionList & Actions.Search_CellSkipping) != 0) { if (runViaAzure) { var fileList = new List <string>(); foreach (var file in Directory.EnumerateFiles(@"3D\Planes\")) { File.Delete(file); } foreach (var file in Directory.EnumerateFiles(@"3D\2dProblems\")) { File.Delete(file); } var asList = planeList.ToList(); Parallel.ForEach(asList, currPlane => { var currIdx = Interlocked.Increment(ref idx); Console.Write($"\r\r\r\r\r\rGenerating 2D projection {currIdx}/{numPlanes}."); var plane = currPlane.Value; if (StaticConfigParams.WriteToCSV) { Config.Log.WriteLine("Selected plane {0}/{1} at distance {2}", currIdx, numPlanes, currPlane.Key); } var subProblemIn2D = plane.ProjectOntoAndRotate(normcoords, out PrincipalComponentAnalysis pca); pca.NumberOfOutputs = 3; //project back to 3D pca.Save($@"3D\PCA\pca{currIdx}.bin"); Generics.SaveToCSV(plane, $@"3D\Planes\plane{currIdx}.csv", true); Generics.SaveToCSV(subProblemIn2D.Zip(labels, (c, l) => c.ToString() + "," + l), $@"3D\2dProblems\coords{currIdx}.csv", true); fileList.Add($@"3D\2dProblems\coords{currIdx}.csv"); }); Console.WriteLine(@"Done. Initializing Batch pool."); AzureBatchExecution.MainAsync(fileList).Wait(); } else { while (planeList.TryDequeue(out var currPlane)) { var plane = currPlane.Value; idx++; if (StaticConfigParams.WriteToCSV) { Generics.SaveToCSV(plane, $@"Planes\plane{idx}.csv", true); } Config.Log.WriteLine("Selected plane {0}/{1} at distance {2}", idx, numPlanes, currPlane.Key); var subProblemIn2D = plane.ProjectOntoAndRotate(normcoords, out PrincipalComponentAnalysis pca); pca.NumberOfOutputs = 3; //project back to 3D //Solve 2D problem StaticConfigParams.filenamesuffix = idx.ToString(); var res = Solve2DProblem(subProblemIn2D, labels, normcoords, pca); foreach (var mHGresult2D in res) { var projectedResult = new SpatialmHGResult3D(mHGresult2D, pca, idx); solutions.Enqueue(projectedResult.pvalue, projectedResult); } solutions.TryPeek(out var bestCell); var bestCellCenter = bestCell.Value.GetCenter(); var remainingPlanes = planeList.Select(t => t.Value).ToList(); planeList.Clear(); foreach (var p in remainingPlanes) { planeList.Enqueue(bestCellCenter.DistanceToPlane(p), p); } } } //Combine 2D solutions var combinedResultsNaive = new List <SpatialmHGResult3D>(); for (var i = 0; i < Config.GetTopKResults; i++) { KeyValuePair <double, SpatialmHGResult3D> bestCell; solutions.TryDequeue(out bestCell); if (bestCell.Key <= Config.SIGNIFICANCE_THRESHOLD) { bestCell.Value.Denormalize(nrm); combinedResultsNaive.Add(bestCell.Value); } else { break; } } Config.Log.updater?.Wait(); return(combinedResultsNaive.Cast <ISpatialmHGResult>().ToList()); } return(null); }
static void Main(string[] args) { var options = new CommandlineParameters(); var isValid = Parser.Default.ParseArgumentsStrict(args, options); //args = new[] {@"c:\Users\shaybe\Dropbox\Thesis-PHd\SpatialEnrichment\Datasets\usStatesBordersData.csv"}; //args = new[] { @"c:\Users\shaybe\Dropbox\Thesis-PHd\SpatialEnrichment\Caulobacter\transferases\acetyltransferase.csv" }; var numcoords = 300; Config = new ConfigParams(""); if ((Config.ActionList & Actions.Experiment_ComparePivots) != 0) { Console.WriteLine(@"Running pivot comparison experiment"); Experiments.CompareExhaustiveWithPivots(numcoords, numiter: 30); return; } if ((Config.ActionList & Actions.Experiment_SampleLines) != 0) { Console.WriteLine(@"Running sampling comparison experiment"); var subsamples = new[] { 10, 20, 30 }; var population = new[] { 40, 60, 100 }; var counter = 0; foreach (var nu in subsamples) { foreach (var N in population) { Experiments.CompareExahustiveWithSubsamplingInput(N, nu, 50, counter++.ToString()); } } return; } if (Config.SKIP_SLACK != 0) { Console.WriteLine(@"Warning! Current configuration uses CONST_SKIP_SLACK={0}", Config.SKIP_SLACK); } if (StaticConfigParams.WriteToCSV) { Console.WriteLine(@"Warning! Current configuration writes cells to CSV - this is SLOW."); } #region init StaticConfigParams.rnd = (Config.ActionList & Actions.Program_RandomConstSeed) != 0 ? new SafeRandom(1) : new SafeRandom(); Config.timer.Start(); #endregion foreach (var dir in new List <string>() { "Cells", "Planes" }) { var di = new DirectoryInfo(dir); if (!di.Exists) { di.Create(); } foreach (FileInfo file in di.GetFiles()) { file.Delete(); } } foreach (var filemask in new List <string>() { "lines_*.csv", "coordSample_*.csv " }) { FileInfo[] taskFiles = new DirectoryInfo(Directory.GetCurrentDirectory()).GetFiles(filemask); foreach (FileInfo file in taskFiles) { file.Delete(); } } //Load coordinates and labels var infile = Path.GetFileNameWithoutExtension(args.Length > 0?args[0]:""); var identities = new List <string>(); for (var instanceIter = 0; instanceIter < 1; instanceIter++) { var coordinates = new List <ICoordinate>(); List <bool> labels = null; StaticConfigParams.filenamesuffix = instanceIter.ToString(); Console.WriteLine("File {0}", instanceIter); if (args.Length > 0) { if (File.Exists(args[0])) { var res = LoadCoordinatesFromFile(args, ref numcoords, identities); coordinates = res.Item1; labels = res.Item2; } else { throw new ArgumentException("Input file not found!"); } } else { var res = RandomizeCoordinatesAndSave(numcoords); coordinates = res.Item1; labels = res.Item2; } var zeros = labels.Count(l => l == false); var filterCount = (int)(Config.FilterKFurthestZeros * zeros); if (filterCount > 0) { Console.WriteLine("Filtering {0} far away points", filterCount); var positives = new List <ICoordinate>(); var negatives = new List <ICoordinate>(); var negIds = new List <int>(); for (var i = 0; i < coordinates.Count; i++) { if (labels[i]) { positives.Add(coordinates[i]); } else { negatives.Add(coordinates[i]); negIds.Add(i); } } var negMinDist = new HashSet <int>(negatives.Zip(negIds, (a, b) => new { PosMinDist = positives.Select(p => p.EuclideanDistance(a)).Min(), Id = b }) .OrderByDescending(n => n.PosMinDist).Select(t => t.Id).Take(filterCount)); coordinates = coordinates.Where((a, b) => !negMinDist.Contains(b)).ToList(); labels = labels.Where((a, b) => !negMinDist.Contains(b)).ToList(); numcoords -= filterCount; } //Actual work starts here var ones = labels.Count(l => l); var linecount = ones * (numcoords - ones); Config.Cellcount = ((long)linecount * (linecount - 1)) / 2.0 + linecount + 1; //Look at lazy caretaker numbers. Note, we don't actually cover open cells //so its -linecount as each line has two open cells on either side of it, //and each open cell is made up of two lines. mHGJumper.Initialize(ones, numcoords - ones); mHGJumper.optHGT = Config.SIGNIFICANCE_THRESHOLD;// / Cellcount; //for bonferonni //alpha is the Bonferonni (union-bound) corrected significance level //Debugging.debug_mHG(numcoords,ones); Tesselation T = null; var coordType = coordinates.First().GetType(); var ew = new EnrichmentWrapper(Config); List <ISpatialmHGResult> results = null; if (coordType == typeof(Coordinate3D)) { Config.Cellcount += MathExtensions.Binomial(linecount, 3); Console.WriteLine(@"Projecting 3D problem to collection of 2D {0} coordinates with {1} 1's (|cells|={2:n0}, alpha={3}).", numcoords, ones, Config.Cellcount, mHGJumper.optHGT); results = ew.SpatialmHGWrapper3D(coordinates.Zip(labels, (a, b) => new Tuple <double, double, double, bool>(a.GetDimension(0), a.GetDimension(1), a.GetDimension(2), b)).ToList(), options.BatchMode); } else if (coordType == typeof(Coordinate)) { Console.WriteLine(@"Starting work on {0} coordinates with {1} 1's (|cells|={2:n0}, alpha={3}).", numcoords, ones, Config.Cellcount, mHGJumper.optHGT); results = ew.SpatialmHGWrapper(coordinates.Zip(labels, (a, b) => new Tuple <double, double, bool>(a.GetDimension(0), a.GetDimension(1), b)).ToList()); } for (var resid = 0; resid < results.Count; resid++) { results[resid].SaveToCSV($@"Cells\{infile}_Cell_{resid}_{StaticConfigParams.filenamesuffix}.csv"); } using (var outfile = new StreamWriter($"{infile}_mhglist_{StaticConfigParams.filenamesuffix}.csv")) foreach (var res in Config.mHGlist.Where(t => t != null)) { outfile.WriteLine("{0},{1}", res.Item2, res.Item1); } if (options.BatchMode) { AzureBatchExecution.UploadFileToContainer($"{infile}_mhglist_{StaticConfigParams.filenamesuffix}.csv", options.SaasUrl); for (var resid = 0; resid < results.Count; resid++) { AzureBatchExecution.UploadFileToContainer($@"Cells\{infile}_Cell_{resid}_{StaticConfigParams.filenamesuffix}.csv", options.SaasUrl); } } } //Finalize if (args.Length == 0 || Debugger.IsAttached) { Console.WriteLine("Total elapsed time: {0:g}.\nPress any key to continue.", Config.timer.Elapsed); Console.ReadKey(); } }