public void TestFrontendCall() { var config = new ConfigParams(tokenId: "10"); var er = new EnrichmentWrapper(config); var instance = RandomInstance.RandomizeCoordinatesAndSave(20, config, false); er.SpatialmHGWrapper(instance.Item1.Zip(instance.Item2, (a, b) => new Tuple <double, double, bool>(a.GetDimension(0), a.GetDimension(1), b)).ToList()); }
public void TestFrontendData() { var config = new ConfigParams(tokenId: ""); var er = new EnrichmentWrapper(config); //var instance = RandomInstance.RandomizeCoordinatesAndSave(20, config, false); var data = File.ReadAllLines(@"zika.csv").Skip(1).Select(l => l.Split(',')) .Select(v => new Tuple <double, double, bool>(double.Parse(v[2]), double.Parse(v[3]), v[1] == "1")) .ToList(); var res = er.SpatialmHGWrapper(data); foreach (var r in res.Cast <SpatialmHGResult>()) { Console.WriteLine(r.pvalue); } }
private static List <ISpatialmHGResult> getSpatialmHGResults(List <Tuple <double, double, bool> > points, Dictionary <string, string> parameters) { EnrichmentWrapper wrapper = new EnrichmentWrapper(parameters); return(wrapper.SpatialmHGWrapper(points)); }
public static void CompareExhaustiveWithPivots(int numcoords = 50, int numiter = 500) { Config = new ConfigParams(""); #region init StaticConfigParams.rnd = (Config.ActionList & Actions.Program_RandomConstSeed) != 0 ? new SafeRandom(1) : new SafeRandom(); Config.timer.Start(); #endregion //Load coordinates and labels var identities = new List <string>(); var resultPairedDiff = new List <double>(); int victories = 0, ties = 0; using (var fileout = new StreamWriter(@"pivot_vs_exhaustive.csv")) for (var instanceIter = 0; instanceIter < numiter; instanceIter++) { var coordinates = new List <ICoordinate>(); var labels = new List <bool>(); StaticConfigParams.filenamesuffix = instanceIter.ToString(); Console.WriteLine("File {0}", instanceIter); var res = Program.RandomizeCoordinatesAndSave(numcoords, false); coordinates = res.Item1; labels = res.Item2; var zeros = labels.Count(l => l == false); var filterCount = (int)(Config.FilterKFurthestZeros * zeros); if (filterCount > 0) { Console.WriteLine("Filtering {0} far away points", filterCount); var positives = new List <ICoordinate>(); var negatives = new List <ICoordinate>(); var negIds = new List <int>(); for (var i = 0; i < coordinates.Count; i++) { if (labels[i]) { positives.Add(coordinates[i]); } else { negatives.Add(coordinates[i]); negIds.Add(i); } } var negMinDist = new HashSet <int>(negatives.Zip(negIds, (a, b) => new { PosMinDist = positives.Select(p => p.EuclideanDistance(a)).Min(), Id = b }) .OrderByDescending(n => n.PosMinDist).Select(t => t.Id).Take(filterCount)); coordinates = coordinates.Where((a, b) => !negMinDist.Contains(b)).ToList(); labels = labels.Where((a, b) => !negMinDist.Contains(b)).ToList(); numcoords -= filterCount; } //Actual work starts here var ones = labels.Count(l => l); var linecount = ones * (numcoords - ones); Config.Cellcount = ((long)linecount * (linecount - 1)) / 2.0 + linecount + 1; mHGJumper.Initialize(ones, numcoords - ones); mHGJumper.optHGT = Config.SIGNIFICANCE_THRESHOLD; // / Cellcount; //for bonferonni //alpha is the Bonferonni (union-bound) corrected significance level Tesselation T = null; var ew = new EnrichmentWrapper(Config); Console.WriteLine(@"Starting work on {0} coordinates with {1} 1's (|cells|={2:n0}, alpha={3}).", numcoords, ones, Config.Cellcount, mHGJumper.optHGT); var instanceData = coordinates.Zip(labels, (a, b) => new Tuple <double, double, bool>(a.GetDimension(0), a.GetDimension(1), b)).ToList(); var resultsExhaustive = ew.SpatialmHGWrapper(instanceData).Select(v => (SpatialmHGResult)v).First(); var resultsPivot = ew.mHGPivotWrapper(instanceData).Select(v => (SpatialmHGResult)v).First(); fileout.WriteLine($"{resultsExhaustive.pvalue}, {resultsPivot.pvalue}"); if (resultsExhaustive.pvalue < resultsPivot.pvalue) { victories++; } else if (resultsExhaustive.pvalue == resultsPivot.pvalue) { ties++; } else { Console.WriteLine($"Debug me"); } resultPairedDiff.Add(Math.Log10(resultsPivot.pvalue) - Math.Log10(resultsExhaustive.pvalue)); } Console.WriteLine($"Out of {numiter} iterations, spatial enrichment won in {victories} and tied in {ties}."); Console.WriteLine("Total elapsed time: {0:g}.\nPress any key to continue.", Config.timer.Elapsed); File.WriteAllLines("experiment_pvaldiffs.txt", resultPairedDiff.Select(v => v.ToString()).ToArray()); Console.ReadKey(); }
/// <summary> /// 3d subsample from 50 points 20. run 100 times. compare to opt. /// </summary> /// <param name="numcoords"></param>a /// <param name="numiter"></param> public static List <double> CompareExahustiveWithSubsamplingInput(int numcoords = 50, int subsampleSize = 20, int numiter = 100, string suffix = "0") { Config = new ConfigParams(""); #region init StaticConfigParams.rnd = (Config.ActionList & Actions.Program_RandomConstSeed) != 0 ? new SafeRandom(1) : new SafeRandom(); Config.timer.Start(); #endregion //Load coordinates and labels var resultPairedDiff = new List <double>(); var extraAnalyses = new List <string>(); int victories = 0, ties = 0; var coordinates = new List <ICoordinate>(); var labels = new List <bool>(); Program.Config = Config; StaticConfigParams.filenamesuffix = suffix; var res = Program.RandomizeCoordinatesAndSave(numcoords, true); coordinates = res.Item1; labels = res.Item2; var zeros = labels.Count(l => l == false); var filterCount = (int)(Config.FilterKFurthestZeros * zeros); if (filterCount > 0) { Console.WriteLine("Filtering {0} far away points", filterCount); var positives = new List <ICoordinate>(); var negatives = new List <ICoordinate>(); var negIds = new List <int>(); for (var i = 0; i < coordinates.Count; i++) { if (labels[i]) { positives.Add(coordinates[i]); } else { negatives.Add(coordinates[i]); negIds.Add(i); } } var negMinDist = new HashSet <int>(negatives.Zip(negIds, (a, b) => new { PosMinDist = positives.Select(p => p.EuclideanDistance(a)).Min(), Id = b }) .OrderByDescending(n => n.PosMinDist).Select(t => t.Id).Take(filterCount)); coordinates = coordinates.Where((a, b) => !negMinDist.Contains(b)).ToList(); labels = labels.Where((a, b) => !negMinDist.Contains(b)).ToList(); numcoords -= filterCount; } var instanceDataCoords = coordinates.Zip(labels, (a, b) => new Tuple <ICoordinate, bool>(a, b)).ToList(); var instanceData = coordinates.Zip(labels, (a, b) => new Tuple <double, double, bool>(a.GetDimension(0), a.GetDimension(1), b)).ToList(); Config.SKIP_SLACK = -1000; var ew = new EnrichmentWrapper(Config); var resultsExhaustive = ew.SpatialmHGWrapper(instanceData).Select(v => (SpatialmHGResult)v).First(); //Actual work starts here var ones = labels.Count(l => l); var linecount = ones * (numcoords - ones); Config.Cellcount = ((long)linecount * (linecount - 1)) / 2.0 + linecount + 1; mHGJumper.Initialize(ones, numcoords - ones); mHGJumper.optHGT = Config.SIGNIFICANCE_THRESHOLD; // / Cellcount; //for bonferonni //alpha is the Bonferonni (union-bound) corrected significance level using (var fileout = new StreamWriter($"sample_vs_exhaustive_{suffix}.csv")) for (var instanceIter = 1; instanceIter < numiter; instanceIter++) { StaticConfigParams.filenamesuffix = instanceIter.ToString(); Console.WriteLine("File {0}", instanceIter); var sampleCoords = coordinates .Zip(labels, (a, b) => new { Coords = a, Labels = b, Rand = StaticConfigParams.rnd.Next() }) .OrderBy(v => v.Rand).Take(subsampleSize).ToList(); while (sampleCoords.All(v => v.Labels) || !sampleCoords.Any(v => v.Labels)) { sampleCoords = coordinates .Zip(labels, (a, b) => new { Coords = a, Labels = b, Rand = StaticConfigParams.rnd.Next() }) .OrderBy(v => v.Rand).Take(subsampleSize).ToList(); } if (StaticConfigParams.WriteToCSV) { Generics.SaveToCSV(sampleCoords.Select(t => t.Coords.ToString() + "," + Convert.ToDouble(t.Labels)), $@"coords_{StaticConfigParams.filenamesuffix}.csv"); } ones = sampleCoords.Count(l => l.Labels); linecount = ones * (subsampleSize - ones); Config.Cellcount = ((long)linecount * (linecount - 1)) / 2.0 + linecount + 1; Console.WriteLine(@"Starting work on {0} coordinates with {1} 1's (|cells|={2:n0}, alpha={3}).", numcoords, ones, Config.Cellcount, mHGJumper.optHGT); mHGJumper.optHGT = Config.SIGNIFICANCE_THRESHOLD; Tesselation T = new Tesselation(sampleCoords.Select(v => (Coordinate)v.Coords).ToList(), sampleCoords.Select(v => v.Labels).ToList(), null, Config) { ProjectedFrom = coordinates, SourceLabels = labels.ToArray() }; var topResults = T.GradientSkippingSweep(numStartCoords: 20, numThreads: Environment.ProcessorCount - 1).First(); Line.Reset(); if (resultsExhaustive.pvalue < topResults.mHG.Item1) { victories++; } else if (resultsExhaustive.pvalue == topResults.mHG.Item1) { ties++; } else { Console.WriteLine($"Debug me"); } var pdiff = Math.Log10(topResults.mHG.Item1) - Math.Log10(resultsExhaustive.pvalue); resultPairedDiff.Add(pdiff); mHGJumper.optHGT = Config.SIGNIFICANCE_THRESHOLD; Console.Write($"Uniform grid strategy @{Config.Cellcount} pivots... "); var uniformGridFactory = new Gridding(); uniformGridFactory.GeneratePivotGrid(Convert.ToInt64(Config.Cellcount)); var uniformGridPivotlst = uniformGridFactory.GetPivots().ToList(); var uniformGridPivot = uniformGridPivotlst.AsParallel().Max(p => - Math.Log10(EnrichmentAtPivot(instanceDataCoords, p))); Console.WriteLine($"p={uniformGridPivot:e}"); Console.Write($"Empirical grid strategy @{Config.Cellcount} pivots... "); mHGJumper.optHGT = Config.SIGNIFICANCE_THRESHOLD; var empiricalGridFactory = new Gridding(); empiricalGridFactory.GenerateEmpricialDensityGrid(Convert.ToInt64(Config.Cellcount), instanceDataCoords); var empiricalGridPivotlst = empiricalGridFactory.GetPivots().ToList(); var empiricalGridPivot = empiricalGridPivotlst.AsParallel().Max(p => - Math.Log10(EnrichmentAtPivot(instanceDataCoords, p))); Console.WriteLine($"p={empiricalGridPivot:e}"); //extraAnalyses.Add($"{-Math.Log10(resultsExhaustive.pvalue)}, {-Math.Log10(topResults.mHG.Item1)}, {uniformGridPivot}, {empiricalGridPivot}"); fileout.WriteLine($"{-Math.Log10(resultsExhaustive.pvalue)}, {-Math.Log10(topResults.mHG.Item1)}, {uniformGridPivot}, {empiricalGridPivot}"); } Console.WriteLine($"Out of {numiter} iterations, spatial enrichment won in {victories} and tied in {ties}."); Console.WriteLine("Total elapsed time: {0:g}.\nPress any key to continue.", Config.timer.Elapsed); //File.WriteAllLines($"experiment_pvaldiffs_{suffix}.txt", resultPairedDiff.Select(v => v.ToString()).ToArray()); //File.WriteAllLines($"experimentsAll_{suffix}.txt", extraAnalyses); return(resultPairedDiff); }
static void Main(string[] args) { var options = new CommandlineParameters(); var isValid = Parser.Default.ParseArgumentsStrict(args, options); //args = new[] {@"c:\Users\shaybe\Dropbox\Thesis-PHd\SpatialEnrichment\Datasets\usStatesBordersData.csv"}; //args = new[] { @"c:\Users\shaybe\Dropbox\Thesis-PHd\SpatialEnrichment\Caulobacter\transferases\acetyltransferase.csv" }; var numcoords = 300; Config = new ConfigParams(""); if ((Config.ActionList & Actions.Experiment_ComparePivots) != 0) { Console.WriteLine(@"Running pivot comparison experiment"); Experiments.CompareExhaustiveWithPivots(numcoords, numiter: 30); return; } if ((Config.ActionList & Actions.Experiment_SampleLines) != 0) { Console.WriteLine(@"Running sampling comparison experiment"); var subsamples = new[] { 10, 20, 30 }; var population = new[] { 40, 60, 100 }; var counter = 0; foreach (var nu in subsamples) { foreach (var N in population) { Experiments.CompareExahustiveWithSubsamplingInput(N, nu, 50, counter++.ToString()); } } return; } if (Config.SKIP_SLACK != 0) { Console.WriteLine(@"Warning! Current configuration uses CONST_SKIP_SLACK={0}", Config.SKIP_SLACK); } if (StaticConfigParams.WriteToCSV) { Console.WriteLine(@"Warning! Current configuration writes cells to CSV - this is SLOW."); } #region init StaticConfigParams.rnd = (Config.ActionList & Actions.Program_RandomConstSeed) != 0 ? new SafeRandom(1) : new SafeRandom(); Config.timer.Start(); #endregion foreach (var dir in new List <string>() { "Cells", "Planes" }) { var di = new DirectoryInfo(dir); if (!di.Exists) { di.Create(); } foreach (FileInfo file in di.GetFiles()) { file.Delete(); } } foreach (var filemask in new List <string>() { "lines_*.csv", "coordSample_*.csv " }) { FileInfo[] taskFiles = new DirectoryInfo(Directory.GetCurrentDirectory()).GetFiles(filemask); foreach (FileInfo file in taskFiles) { file.Delete(); } } //Load coordinates and labels var infile = Path.GetFileNameWithoutExtension(args.Length > 0?args[0]:""); var identities = new List <string>(); for (var instanceIter = 0; instanceIter < 1; instanceIter++) { var coordinates = new List <ICoordinate>(); List <bool> labels = null; StaticConfigParams.filenamesuffix = instanceIter.ToString(); Console.WriteLine("File {0}", instanceIter); if (args.Length > 0) { if (File.Exists(args[0])) { var res = LoadCoordinatesFromFile(args, ref numcoords, identities); coordinates = res.Item1; labels = res.Item2; } else { throw new ArgumentException("Input file not found!"); } } else { var res = RandomizeCoordinatesAndSave(numcoords); coordinates = res.Item1; labels = res.Item2; } var zeros = labels.Count(l => l == false); var filterCount = (int)(Config.FilterKFurthestZeros * zeros); if (filterCount > 0) { Console.WriteLine("Filtering {0} far away points", filterCount); var positives = new List <ICoordinate>(); var negatives = new List <ICoordinate>(); var negIds = new List <int>(); for (var i = 0; i < coordinates.Count; i++) { if (labels[i]) { positives.Add(coordinates[i]); } else { negatives.Add(coordinates[i]); negIds.Add(i); } } var negMinDist = new HashSet <int>(negatives.Zip(negIds, (a, b) => new { PosMinDist = positives.Select(p => p.EuclideanDistance(a)).Min(), Id = b }) .OrderByDescending(n => n.PosMinDist).Select(t => t.Id).Take(filterCount)); coordinates = coordinates.Where((a, b) => !negMinDist.Contains(b)).ToList(); labels = labels.Where((a, b) => !negMinDist.Contains(b)).ToList(); numcoords -= filterCount; } //Actual work starts here var ones = labels.Count(l => l); var linecount = ones * (numcoords - ones); Config.Cellcount = ((long)linecount * (linecount - 1)) / 2.0 + linecount + 1; //Look at lazy caretaker numbers. Note, we don't actually cover open cells //so its -linecount as each line has two open cells on either side of it, //and each open cell is made up of two lines. mHGJumper.Initialize(ones, numcoords - ones); mHGJumper.optHGT = Config.SIGNIFICANCE_THRESHOLD;// / Cellcount; //for bonferonni //alpha is the Bonferonni (union-bound) corrected significance level //Debugging.debug_mHG(numcoords,ones); Tesselation T = null; var coordType = coordinates.First().GetType(); var ew = new EnrichmentWrapper(Config); List <ISpatialmHGResult> results = null; if (coordType == typeof(Coordinate3D)) { Config.Cellcount += MathExtensions.Binomial(linecount, 3); Console.WriteLine(@"Projecting 3D problem to collection of 2D {0} coordinates with {1} 1's (|cells|={2:n0}, alpha={3}).", numcoords, ones, Config.Cellcount, mHGJumper.optHGT); results = ew.SpatialmHGWrapper3D(coordinates.Zip(labels, (a, b) => new Tuple <double, double, double, bool>(a.GetDimension(0), a.GetDimension(1), a.GetDimension(2), b)).ToList(), options.BatchMode); } else if (coordType == typeof(Coordinate)) { Console.WriteLine(@"Starting work on {0} coordinates with {1} 1's (|cells|={2:n0}, alpha={3}).", numcoords, ones, Config.Cellcount, mHGJumper.optHGT); results = ew.SpatialmHGWrapper(coordinates.Zip(labels, (a, b) => new Tuple <double, double, bool>(a.GetDimension(0), a.GetDimension(1), b)).ToList()); } for (var resid = 0; resid < results.Count; resid++) { results[resid].SaveToCSV($@"Cells\{infile}_Cell_{resid}_{StaticConfigParams.filenamesuffix}.csv"); } using (var outfile = new StreamWriter($"{infile}_mhglist_{StaticConfigParams.filenamesuffix}.csv")) foreach (var res in Config.mHGlist.Where(t => t != null)) { outfile.WriteLine("{0},{1}", res.Item2, res.Item1); } if (options.BatchMode) { AzureBatchExecution.UploadFileToContainer($"{infile}_mhglist_{StaticConfigParams.filenamesuffix}.csv", options.SaasUrl); for (var resid = 0; resid < results.Count; resid++) { AzureBatchExecution.UploadFileToContainer($@"Cells\{infile}_Cell_{resid}_{StaticConfigParams.filenamesuffix}.csv", options.SaasUrl); } } } //Finalize if (args.Length == 0 || Debugger.IsAttached) { Console.WriteLine("Total elapsed time: {0:g}.\nPress any key to continue.", Config.timer.Elapsed); Console.ReadKey(); } }