/// <summary> /// Thread operation to calculate statistics for [cluster]. /// /// [cluster] is guarenteed to be unique, so needn't be locked. /// </summary> private static void Thread_CalculateClusterStatistics([Const] Core core, [Const] EClustererStatistics statistics, [MutableSafe] Cluster cluster, [MutableSafe] ProgressParallelHandler prog) { prog.SafeIncrement(); cluster.CalculateAveragedStatistics(); cluster.CalculateCommentFlags(); Dictionary <string, double> clusterStatistics = cluster.ClusterStatistics; List <Assignment> assignments = cluster.Assignments.List; int hcomp, numcomp, hpeak, numpath; ClustererStatisticsHelper.CalculateHighestCompounds(cluster, out hcomp, out numcomp); ClustererStatisticsHelper.CalculateHighestPeaks(cluster, out hpeak, out numpath); clusterStatistics[STAT_CLUSTER_AVERAGE_HIGHEST_NUM_COMPOUNDS] = hcomp; clusterStatistics[STAT_CLUSTER_AVERAGE_NUM_COMPOUNDS] = numcomp; clusterStatistics[STAT_CLUSTER_AVERAGE_HIGHEST_NUM_PEAKS] = hpeak; clusterStatistics[STAT_CLUSTER_AVERAGE_NUM_PATHWAYS] = numpath; ////////////////////////// // GROUP STATS (cluster) if (statistics.HasFlag(EClustererStatistics.ClusterAverages)) { AddAveragedStatistics(core, clusterStatistics, assignments); } }
/// <summary> /// Thread operation fo calculate statistics for [stat]. /// /// [stat] is guarenteed to be unique, however stat.Assignment is not, hence stat.Assignment must be locked. /// /// Currently only stat.Assignment.AssignmentStatistics is the only member to be R/W locked, since that is all /// that is modified. /// </summary> private static void Thread_CalculateAssignmentStatistics([Const] EClustererStatistics statistics, [MutableUnsafe] ForStat stat, [Const] Cluster[] realClusters, [Const] ConfigurationMetric metric, [MutableSafe] ProgressParallelHandler prog) { prog.SafeIncrement(); // STATS: Distance from avg if (stat.ClusterVector != null) { // Euclidean if (statistics.HasFlag(EClustererStatistics.EuclideanFromAverage)) { double ed = Maths.Euclidean(stat.AssignmentVector.Values, stat.ClusterVector); stat.Assignment.AssignmentStatistics.ThreadSafeIndex(CreatePartialKey(stat.ObsFilter, STAT_ASSIGNMENT_EUCLIDEAN_FROM_AVG), ed); stat.Assignment.AssignmentStatistics.ThreadSafeIndex(CreatePartialKey(stat.ObsFilter, STAT_ASSIGNMENT_EUCLIDEAN_FROM_AVG_SQUARED), ed * ed); } // Custom (if applicable) if (metric != null && statistics.HasFlag(EClustererStatistics.DistanceFromAverage) && !(metric.Args.Id == Algo.ID_METRIC_EUCLIDEAN && statistics.HasFlag(EClustererStatistics.EuclideanFromAverage))) { string key1 = metric.ToString() + STAT_ASSIGNMENT_DISTANCE_FROM_AVG; string key2 = metric.ToString() + STAT_ASSIGNMENT_DISTANCE_FROM_AVG_SQUARED; double dd = metric.Calculate(stat.AssignmentVector.Values, stat.ClusterVector); stat.Assignment.AssignmentStatistics.ThreadSafeIndex(CreatePartialKey(stat.ObsFilter, key1), dd); stat.Assignment.AssignmentStatistics.ThreadSafeIndex(CreatePartialKey(stat.ObsFilter, key2), dd * dd); } } // STATS: Silhouette Cluster nextNearestCluster = null; if (statistics.HasFlag(EClustererStatistics.SilhouetteWidth)) { double silhouetteWidth; double nextNearestClusterId; ClustererStatisticsHelper.CalculateSilhouette(stat, realClusters, out silhouetteWidth, out nextNearestCluster); if (!double.TryParse(nextNearestCluster.ShortName, out nextNearestClusterId)) { nextNearestClusterId = double.NaN; } // Silhouette stat.Assignment.AssignmentStatistics.ThreadSafeIndex(CreatePartialKey(stat.ObsFilter, STAT_ASSIGNMENT_SILHOUETTE_WIDTH), silhouetteWidth); stat.Assignment.AssignmentStatistics.ThreadSafeIndex(CreatePartialKey(stat.ObsFilter, STAT_ASSIGNMENT_NEXT_NEAREST_CLUSTER), nextNearestClusterId); } // STATS: Score if (stat.ObsFilter == null) { // Score stat.Assignment.AssignmentStatistics.ThreadSafeIndex(STAT_ASSIGNMENT_SCORE, stat.Assignment.Score); // Next nearest cluster stat.Assignment.NextNearestCluster = nextNearestCluster; // Only one ForStat per Assignment has ObsFilter == null so thread safe not required } }
/// <summary> /// Constructor. /// </summary> public ArgsClusterer(string id, IMatrixProvider source, PeakFilter sigFilter, ConfigurationMetric distance, ObsFilter atypes, bool splitGroups, EClustererStatistics suppressMetric, object[] parameters, string clusterNamePrefix) : base(id, source, parameters) { this.PeakFilter = sigFilter; this.Distance = distance; this.ObsFilter = atypes; this.SplitGroups = splitGroups; this.Statistics = suppressMetric; this.OverrideShortName = clusterNamePrefix; }
/// <summary> /// Recalculates the statistcal set. /// </summary> public void RecalculateStatistics(Core core, EClustererStatistics stats, ProgressReporter prog) { foreach (ResultClusterer result in Repetitions) { IntensityMatrix vmatrix; DistanceMatrix dmatrix; Owner.ClustererConfiguration.GetAlgorithmOrThrow().ExecuteAlgorithm(core, -1, true, Owner.ClustererConfiguration, null, prog, out vmatrix, out dmatrix); result.RecalculateStatistics(core, Owner.ClustererConfiguration.Distance, vmatrix, dmatrix, stats, prog); } }
/// <summary> /// Calculates statistics for the algorithm as a whole. /// </summary> private void CalculateSummaryStatistics(Core core, EClustererStatistics statistics, Cluster[] realClusters) { if (statistics.HasFlag(EClustererStatistics.AlgorithmAverages)) { AddAveragedStatistics(core, this.ClustererStatistics, Assignments); } if (statistics.HasFlag(EClustererStatistics.BayesianInformationCriterion)) { this.ClustererStatistics[STAT_CLUSTERER_BIC] = ClustererStatisticsHelper.CalculateBic(realClusters, Assignments); } }
private string BatchProcess(EUpdateResults options, IEnumerable <ClusterEvaluationPointer> tests, EClustererStatistics stats, ProgressReporter proggy) { StringBuilder sb = new StringBuilder(); foreach (ClusterEvaluationPointer res in tests) { sb.AppendLine("CONFIG: " + res.Configuration.ParameterConfigAsString); sb.AppendLine("PARAMS: " + res.Configuration.ParameterValuesAsString); sb.AppendLine("NAME: " + res.OverrideDisplayName); sb.AppendLine("FILE: " + res.FileName); if (!res.HasResults) { sb.AppendLine(" - No results."); continue; } Stopwatch timer = Stopwatch.StartNew(); proggy.Enter("Loading results"); bool load = options.Has(EUpdateResults.Csv | EUpdateResults.Statistics | EUpdateResults.Resave); ClusterEvaluationResults set = load ? this.LoadResults(res.FileName, proggy) : null; proggy.Leave(); if (load && set == null) { sb.AppendLine(" - Load failed."); continue; } sb.AppendLine(" - LOAD-TIME: " + timer.Elapsed); sb.AppendLine(); if (options.Has(EUpdateResults.Csv)) { timer.Restart(); proggy.Enter("Selecting results"); proggy.SetProgressMarquee(); this.Invoke((MethodInvoker)(() => this.SelectResults(res.FileName, set))); proggy.Leave(); sb.AppendLine(" - DISPLAY-TIME: " + timer.Elapsed); string csvFileName = Path.Combine(Path.GetDirectoryName(res.FileName), Path.GetFileNameWithoutExtension(res.FileName) + ".csv"); csvFileName = UiControls.GetNewFile(csvFileName, checkOriginal: true); try { timer.Restart(); proggy.Enter("Exporting CSV"); proggy.SetProgressMarquee(); this.Invoke((MethodInvoker)(() => { using (StreamWriter sw = new StreamWriter(csvFileName)) { this._lvhStatistics.WriteItems(sw, true); } })); proggy.Leave(); sb.AppendLine(" - EXPORT-TIME: " + timer.Elapsed); sb.AppendLine(" - EXPORT: " + csvFileName); } catch (Exception ex) { sb.AppendLine(" - Export failed: " + ex.Message); } sb.AppendLine(); } if (options.Has(EUpdateResults.Statistics)) { foreach (ClusterEvaluationParameterResult rep in set.Results) { rep.RecalculateStatistics(this._core, stats, proggy); } } if (options.Has(EUpdateResults.Information)) { proggy.Enter("Exporting information"); string infoFileName = Path.Combine(Path.GetDirectoryName(res.FileName), Path.GetFileNameWithoutExtension(res.FileName) + ".txt"); infoFileName = UiControls.GetNewFile(infoFileName, checkOriginal: true); StringBuilder info = new StringBuilder(); info.Append(res.DisplayName); info.AppendLine(res.Configuration.ParameterConfigAsString); info.AppendLine(res.Configuration.ParameterValuesAsString); File.WriteAllText(infoFileName, info.ToString()); sb.AppendLine(" - INFORMATION: " + infoFileName); sb.AppendLine(); } if (options.Has(EUpdateResults.Resave)) { string bakFileName = Path.Combine(Path.GetDirectoryName(res.FileName), Path.GetFileNameWithoutExtension(res.FileName) + ".old"); bakFileName = UiControls.GetNewFile(bakFileName, checkOriginal: true); timer.Restart(); proggy.Enter("Backing up original"); proggy.SetProgressMarquee(); File.Copy(res.FileName, bakFileName, false); proggy.Leave(); sb.AppendLine(" - BACKUP-TIME: " + timer.Elapsed); sb.AppendLine(" - BACKUP: " + bakFileName); timer.Restart(); proggy.Enter("Saving in latest format"); SaveResults(this._core, res.FileName, null, set, proggy); proggy.Leave(); sb.AppendLine(" - SAVE-TIME: " + timer.Elapsed); sb.AppendLine(" - SAVE: " + res.FileName); sb.AppendLine(); } } return(sb.ToString()); }
/// <summary> /// Action completed - calculate statisstics /// </summary> internal void FinalizeResults(Core core, ConfigurationMetric metric, IntensityMatrix vmatrix, DistanceMatrix dmatrix, EClustererStatistics statistics, ProgressReporter prog) { UiControls.Assert(Assignments.IsEmpty(), "FinalizeResults on ClusterResults already called."); // Get ALL the assignments foreach (Cluster cluster in RealClusters) { Assignments.AddRange(cluster.Assignments.List); } RecalculateStatistics(core, metric, vmatrix, dmatrix, statistics, prog); }
/// <summary> /// Determines what needs calculating. /// </summary> private void Thread_AddFilterToCalculationList([Const] Core core, [Const] ConfigurationMetric metric, [Const] IntensityMatrix vmatrix, [Const] DistanceMatrix dmatrix, [Const] EClustererStatistics statistics, [Const] Cluster[] realClusters, [Const] ObsFilter obsFilter, [MutableUnsafe] List <ForStat> needsCalculating, [MutableSafe] ProgressParallelHandler progP) { progP.SafeIncrement(); IntensityMatrix vmatFiltered; DistanceMatrix dmatFiltered; int[] filteredIndices; if (obsFilter == null) { vmatFiltered = vmatrix; dmatFiltered = dmatrix; filteredIndices = null; } else { filteredIndices = vmatrix.Columns.Which(z => obsFilter.Test(z.Observation)).ToArray(); // TODO: Multuple iteration vmatFiltered = vmatrix.Subset(null, obsFilter, ESubsetFlags.None); dmatFiltered = null; } Dictionary <Cluster, IReadOnlyList <double> > centreVectors = new Dictionary <Cluster, IReadOnlyList <double> >(); foreach (Cluster cluster in realClusters) { ///////////////////// // ASSIGNMENT STATS var centre = cluster.GetCentre(ECentreMode.Average, ECandidateMode.Assignments); IReadOnlyList <double> centreVector = centre.Count != 0 ? centre[0] : null; if (filteredIndices != null) { centreVector = centreVector.Extract(filteredIndices); } centreVectors.Add(cluster, centreVector); } foreach (Assignment ass in Assignments) { ForStat f = new ForStat(); f.Assignment = ass; f.ObsFilter = obsFilter; if (filteredIndices != null) { f.AssignmentVector = vmatFiltered.Vectors[ass.Vector.Index]; } else { f.AssignmentVector = ass.Vector; } f.ClusterVector = centreVectors[ass.Cluster]; if (statistics.HasFlag(EClustererStatistics.SilhouetteWidth)) { if (dmatFiltered == null) { dmatFiltered = DistanceMatrix.Create(core, vmatrix, metric, ProgressReporter.GetEmpty()); } } f.DistanceMatrix = dmatFiltered; lock (needsCalculating) { needsCalculating.Add(f); } } }
/// <summary> /// Recalculates the statistics. /// </summary> /// <param name="core">Core</param> /// <param name="metric">Metric for statistics</param> /// <param name="statistics">What to calculate</param> /// <param name="prog">Report progress to</param> /// <param name="vmatrix">Value matrix</param> /// <param name="dmatrix">Distance matrix (optional - if not present will be calculated if necessary)</param> internal void RecalculateStatistics(Core core, ConfigurationMetric metric, IntensityMatrix vmatrix, DistanceMatrix dmatrix, EClustererStatistics statistics, ProgressReporter prog) { // Add basics ClustererStatistics[STAT_NUM_VECTORS] = vmatrix.NumRows; ClustererStatistics[STAT_LENGTH_OF_VECTORS] = vmatrix.NumCols; // Don't calculate metrics? if (statistics == EClustererStatistics.None) { return; } // Get the non-insig clusters Cluster[] realClusters = RealClusters.ToArray(); // If we don't have a DMatrix we should calculate the sil. width manually // The DMatrix might be too big to pass to R so its better just to avoid it. prog.Enter("Calculating statistics"); List <ObsFilter> groupFilters = new List <ObsFilter>(); // No filter groupFilters.Add(null); if (!vmatrix.HasSplitGroups) { // Defined filters if (statistics.HasFlag(EClustererStatistics.IncludePartialVectorsForFilters)) { groupFilters.AddRange(core.ObsFilters); } // Group filters (if not already) if (statistics.HasFlag(EClustererStatistics.IncludePartialVectorsForGroups)) { AllGroupsFilters(core, groupFilters); } } List <ForStat> needsCalculating = new List <ForStat>(); prog.Enter("Input vectors"); ProgressParallelHandler progP = prog.CreateParallelHandler(groupFilters.Count); ProgressParallelHandler closure1 = progP; Parallel.ForEach(groupFilters, obsFilter => Thread_AddFilterToCalculationList(core, metric, vmatrix, dmatrix, statistics, realClusters, obsFilter, needsCalculating, closure1)); prog.Leave(); // ASSIGNMENT STATS prog.Enter("Assignments"); progP = prog.CreateParallelHandler(needsCalculating.Count); ProgressParallelHandler closure2 = progP; Parallel.ForEach(needsCalculating, z => Thread_CalculateAssignmentStatistics(statistics, z, realClusters, metric, closure2)); prog.Leave(); // CLUSTER STATS prog.Enter("Clusters"); progP = prog.CreateParallelHandler(this.Clusters.Length); Parallel.ForEach(this.Clusters, z => Thread_CalculateClusterStatistics(core, statistics, z, progP)); prog.Leave(); // SUMMARY STATS prog.Enter("Summary"); CalculateSummaryStatistics(core, statistics, realClusters); prog.Leave(); prog.Leave(); }