Example #1
0
        /// <summary>
        /// Thread operation to calculate statistics for [cluster].
        ///
        /// [cluster] is guarenteed to be unique, so needn't be locked.
        /// </summary>
        private static void Thread_CalculateClusterStatistics([Const] Core core, [Const] EClustererStatistics statistics, [MutableSafe] Cluster cluster, [MutableSafe] ProgressParallelHandler prog)
        {
            prog.SafeIncrement();
            cluster.CalculateAveragedStatistics();
            cluster.CalculateCommentFlags();

            Dictionary <string, double> clusterStatistics = cluster.ClusterStatistics;
            List <Assignment>           assignments       = cluster.Assignments.List;

            int hcomp, numcomp, hpeak, numpath;

            ClustererStatisticsHelper.CalculateHighestCompounds(cluster, out hcomp, out numcomp);
            ClustererStatisticsHelper.CalculateHighestPeaks(cluster, out hpeak, out numpath);
            clusterStatistics[STAT_CLUSTER_AVERAGE_HIGHEST_NUM_COMPOUNDS] = hcomp;
            clusterStatistics[STAT_CLUSTER_AVERAGE_NUM_COMPOUNDS]         = numcomp;
            clusterStatistics[STAT_CLUSTER_AVERAGE_HIGHEST_NUM_PEAKS]     = hpeak;
            clusterStatistics[STAT_CLUSTER_AVERAGE_NUM_PATHWAYS]          = numpath;

            //////////////////////////
            // GROUP STATS (cluster)
            if (statistics.HasFlag(EClustererStatistics.ClusterAverages))
            {
                AddAveragedStatistics(core, clusterStatistics, assignments);
            }
        }
Example #2
0
        /// <summary>
        /// Thread operation fo calculate statistics for [stat].
        ///
        /// [stat] is guarenteed to be unique, however stat.Assignment is not, hence stat.Assignment must be locked.
        ///
        /// Currently only stat.Assignment.AssignmentStatistics is the only member to be R/W locked, since that is all
        /// that is modified.
        /// </summary>
        private static void Thread_CalculateAssignmentStatistics([Const] EClustererStatistics statistics, [MutableUnsafe] ForStat stat, [Const] Cluster[] realClusters, [Const] ConfigurationMetric metric, [MutableSafe] ProgressParallelHandler prog)
        {
            prog.SafeIncrement();

            // STATS: Distance from avg
            if (stat.ClusterVector != null)
            {
                // Euclidean
                if (statistics.HasFlag(EClustererStatistics.EuclideanFromAverage))
                {
                    double ed = Maths.Euclidean(stat.AssignmentVector.Values, stat.ClusterVector);
                    stat.Assignment.AssignmentStatistics.ThreadSafeIndex(CreatePartialKey(stat.ObsFilter, STAT_ASSIGNMENT_EUCLIDEAN_FROM_AVG), ed);
                    stat.Assignment.AssignmentStatistics.ThreadSafeIndex(CreatePartialKey(stat.ObsFilter, STAT_ASSIGNMENT_EUCLIDEAN_FROM_AVG_SQUARED), ed * ed);
                }

                // Custom (if applicable)
                if (metric != null &&
                    statistics.HasFlag(EClustererStatistics.DistanceFromAverage) &&
                    !(metric.Args.Id == Algo.ID_METRIC_EUCLIDEAN && statistics.HasFlag(EClustererStatistics.EuclideanFromAverage)))
                {
                    string key1 = metric.ToString() + STAT_ASSIGNMENT_DISTANCE_FROM_AVG;
                    string key2 = metric.ToString() + STAT_ASSIGNMENT_DISTANCE_FROM_AVG_SQUARED;
                    double dd   = metric.Calculate(stat.AssignmentVector.Values, stat.ClusterVector);

                    stat.Assignment.AssignmentStatistics.ThreadSafeIndex(CreatePartialKey(stat.ObsFilter, key1), dd);
                    stat.Assignment.AssignmentStatistics.ThreadSafeIndex(CreatePartialKey(stat.ObsFilter, key2), dd * dd);
                }
            }

            // STATS: Silhouette
            Cluster nextNearestCluster = null;

            if (statistics.HasFlag(EClustererStatistics.SilhouetteWidth))
            {
                double silhouetteWidth;
                double nextNearestClusterId;

                ClustererStatisticsHelper.CalculateSilhouette(stat, realClusters, out silhouetteWidth, out nextNearestCluster);

                if (!double.TryParse(nextNearestCluster.ShortName, out nextNearestClusterId))
                {
                    nextNearestClusterId = double.NaN;
                }

                // Silhouette
                stat.Assignment.AssignmentStatistics.ThreadSafeIndex(CreatePartialKey(stat.ObsFilter, STAT_ASSIGNMENT_SILHOUETTE_WIDTH), silhouetteWidth);
                stat.Assignment.AssignmentStatistics.ThreadSafeIndex(CreatePartialKey(stat.ObsFilter, STAT_ASSIGNMENT_NEXT_NEAREST_CLUSTER), nextNearestClusterId);
            }

            // STATS: Score
            if (stat.ObsFilter == null)
            {
                // Score
                stat.Assignment.AssignmentStatistics.ThreadSafeIndex(STAT_ASSIGNMENT_SCORE, stat.Assignment.Score);

                // Next nearest cluster
                stat.Assignment.NextNearestCluster = nextNearestCluster; // Only one ForStat per Assignment has ObsFilter == null so thread safe not required
            }
        }
Example #3
0
 /// <summary>
 /// Constructor.
 /// </summary>
 public ArgsClusterer(string id, IMatrixProvider source, PeakFilter sigFilter, ConfigurationMetric distance, ObsFilter atypes, bool splitGroups, EClustererStatistics suppressMetric, object[] parameters, string clusterNamePrefix)
     : base(id, source, parameters)
 {
     this.PeakFilter        = sigFilter;
     this.Distance          = distance;
     this.ObsFilter         = atypes;
     this.SplitGroups       = splitGroups;
     this.Statistics        = suppressMetric;
     this.OverrideShortName = clusterNamePrefix;
 }
 /// <summary>
 /// Recalculates the statistcal set.
 /// </summary>
 public void RecalculateStatistics(Core core, EClustererStatistics stats, ProgressReporter prog)
 {
     foreach (ResultClusterer result in Repetitions)
     {
         IntensityMatrix vmatrix;
         DistanceMatrix  dmatrix;
         Owner.ClustererConfiguration.GetAlgorithmOrThrow().ExecuteAlgorithm(core, -1, true, Owner.ClustererConfiguration, null, prog, out vmatrix, out dmatrix);
         result.RecalculateStatistics(core, Owner.ClustererConfiguration.Distance, vmatrix, dmatrix, stats, prog);
     }
 }
Example #5
0
        /// <summary>
        /// Calculates statistics for the algorithm as a whole.
        /// </summary>
        private void CalculateSummaryStatistics(Core core, EClustererStatistics statistics, Cluster[] realClusters)
        {
            if (statistics.HasFlag(EClustererStatistics.AlgorithmAverages))
            {
                AddAveragedStatistics(core, this.ClustererStatistics, Assignments);
            }

            if (statistics.HasFlag(EClustererStatistics.BayesianInformationCriterion))
            {
                this.ClustererStatistics[STAT_CLUSTERER_BIC] = ClustererStatisticsHelper.CalculateBic(realClusters, Assignments);
            }
        }
Example #6
0
        private string BatchProcess(EUpdateResults options, IEnumerable <ClusterEvaluationPointer> tests, EClustererStatistics stats, ProgressReporter proggy)
        {
            StringBuilder sb = new StringBuilder();

            foreach (ClusterEvaluationPointer res in tests)
            {
                sb.AppendLine("CONFIG: " + res.Configuration.ParameterConfigAsString);
                sb.AppendLine("PARAMS: " + res.Configuration.ParameterValuesAsString);
                sb.AppendLine("NAME: " + res.OverrideDisplayName);
                sb.AppendLine("FILE: " + res.FileName);

                if (!res.HasResults)
                {
                    sb.AppendLine(" - No results.");
                    continue;
                }

                Stopwatch timer = Stopwatch.StartNew();
                proggy.Enter("Loading results");
                bool load = options.Has(EUpdateResults.Csv | EUpdateResults.Statistics | EUpdateResults.Resave);
                ClusterEvaluationResults set = load ? this.LoadResults(res.FileName, proggy) : null;
                proggy.Leave();

                if (load && set == null)
                {
                    sb.AppendLine(" - Load failed.");
                    continue;
                }

                sb.AppendLine(" - LOAD-TIME: " + timer.Elapsed);
                sb.AppendLine();

                if (options.Has(EUpdateResults.Csv))
                {
                    timer.Restart();
                    proggy.Enter("Selecting results");
                    proggy.SetProgressMarquee();
                    this.Invoke((MethodInvoker)(() => this.SelectResults(res.FileName, set)));
                    proggy.Leave();
                    sb.AppendLine(" - DISPLAY-TIME: " + timer.Elapsed);

                    string csvFileName = Path.Combine(Path.GetDirectoryName(res.FileName), Path.GetFileNameWithoutExtension(res.FileName) + ".csv");
                    csvFileName = UiControls.GetNewFile(csvFileName, checkOriginal: true);

                    try
                    {
                        timer.Restart();
                        proggy.Enter("Exporting CSV");
                        proggy.SetProgressMarquee();
                        this.Invoke((MethodInvoker)(() =>
                        {
                            using (StreamWriter sw = new StreamWriter(csvFileName))
                            {
                                this._lvhStatistics.WriteItems(sw, true);
                            }
                        }));
                        proggy.Leave();
                        sb.AppendLine(" - EXPORT-TIME: " + timer.Elapsed);
                        sb.AppendLine(" - EXPORT: " + csvFileName);
                    }
                    catch (Exception ex)
                    {
                        sb.AppendLine(" - Export failed: " + ex.Message);
                    }

                    sb.AppendLine();
                }

                if (options.Has(EUpdateResults.Statistics))
                {
                    foreach (ClusterEvaluationParameterResult rep in set.Results)
                    {
                        rep.RecalculateStatistics(this._core, stats, proggy);
                    }
                }

                if (options.Has(EUpdateResults.Information))
                {
                    proggy.Enter("Exporting information");

                    string infoFileName = Path.Combine(Path.GetDirectoryName(res.FileName), Path.GetFileNameWithoutExtension(res.FileName) + ".txt");
                    infoFileName = UiControls.GetNewFile(infoFileName, checkOriginal: true);

                    StringBuilder info = new StringBuilder();
                    info.Append(res.DisplayName);
                    info.AppendLine(res.Configuration.ParameterConfigAsString);
                    info.AppendLine(res.Configuration.ParameterValuesAsString);

                    File.WriteAllText(infoFileName, info.ToString());

                    sb.AppendLine(" - INFORMATION: " + infoFileName);
                    sb.AppendLine();
                }

                if (options.Has(EUpdateResults.Resave))
                {
                    string bakFileName = Path.Combine(Path.GetDirectoryName(res.FileName), Path.GetFileNameWithoutExtension(res.FileName) + ".old");
                    bakFileName = UiControls.GetNewFile(bakFileName, checkOriginal: true);
                    timer.Restart();
                    proggy.Enter("Backing up original");
                    proggy.SetProgressMarquee();
                    File.Copy(res.FileName, bakFileName, false);
                    proggy.Leave();
                    sb.AppendLine(" - BACKUP-TIME: " + timer.Elapsed);
                    sb.AppendLine(" - BACKUP: " + bakFileName);

                    timer.Restart();
                    proggy.Enter("Saving in latest format");
                    SaveResults(this._core, res.FileName, null, set, proggy);
                    proggy.Leave();
                    sb.AppendLine(" - SAVE-TIME: " + timer.Elapsed);
                    sb.AppendLine(" - SAVE: " + res.FileName);
                    sb.AppendLine();
                }
            }

            return(sb.ToString());
        }
Example #7
0
        /// <summary>
        /// Action completed - calculate statisstics
        /// </summary>
        internal void FinalizeResults(Core core, ConfigurationMetric metric, IntensityMatrix vmatrix, DistanceMatrix dmatrix, EClustererStatistics statistics, ProgressReporter prog)
        {
            UiControls.Assert(Assignments.IsEmpty(), "FinalizeResults on ClusterResults already called.");

            // Get ALL the assignments
            foreach (Cluster cluster in RealClusters)
            {
                Assignments.AddRange(cluster.Assignments.List);
            }

            RecalculateStatistics(core, metric, vmatrix, dmatrix, statistics, prog);
        }
Example #8
0
        /// <summary>
        /// Determines what needs calculating.
        /// </summary>
        private void Thread_AddFilterToCalculationList([Const] Core core, [Const] ConfigurationMetric metric, [Const] IntensityMatrix vmatrix, [Const] DistanceMatrix dmatrix, [Const] EClustererStatistics statistics, [Const] Cluster[] realClusters, [Const] ObsFilter obsFilter, [MutableUnsafe] List <ForStat> needsCalculating, [MutableSafe] ProgressParallelHandler progP)
        {
            progP.SafeIncrement();

            IntensityMatrix vmatFiltered;
            DistanceMatrix  dmatFiltered;

            int[] filteredIndices;

            if (obsFilter == null)
            {
                vmatFiltered    = vmatrix;
                dmatFiltered    = dmatrix;
                filteredIndices = null;
            }
            else
            {
                filteredIndices = vmatrix.Columns.Which(z => obsFilter.Test(z.Observation)).ToArray();  // TODO: Multuple iteration
                vmatFiltered    = vmatrix.Subset(null, obsFilter, ESubsetFlags.None);
                dmatFiltered    = null;
            }

            Dictionary <Cluster, IReadOnlyList <double> > centreVectors = new Dictionary <Cluster, IReadOnlyList <double> >();

            foreach (Cluster cluster in realClusters)
            {
                /////////////////////
                // ASSIGNMENT STATS
                var centre = cluster.GetCentre(ECentreMode.Average, ECandidateMode.Assignments);
                IReadOnlyList <double> centreVector = centre.Count != 0 ? centre[0] : null;

                if (filteredIndices != null)
                {
                    centreVector = centreVector.Extract(filteredIndices);
                }

                centreVectors.Add(cluster, centreVector);
            }

            foreach (Assignment ass in Assignments)
            {
                ForStat f = new ForStat();
                f.Assignment = ass;
                f.ObsFilter  = obsFilter;

                if (filteredIndices != null)
                {
                    f.AssignmentVector = vmatFiltered.Vectors[ass.Vector.Index];
                }
                else
                {
                    f.AssignmentVector = ass.Vector;
                }

                f.ClusterVector = centreVectors[ass.Cluster];

                if (statistics.HasFlag(EClustererStatistics.SilhouetteWidth))
                {
                    if (dmatFiltered == null)
                    {
                        dmatFiltered = DistanceMatrix.Create(core, vmatrix, metric, ProgressReporter.GetEmpty());
                    }
                }

                f.DistanceMatrix = dmatFiltered;

                lock (needsCalculating)
                {
                    needsCalculating.Add(f);
                }
            }
        }
Example #9
0
        /// <summary>
        /// Recalculates the statistics.
        /// </summary>
        /// <param name="core">Core</param>
        /// <param name="metric">Metric for statistics</param>
        /// <param name="statistics">What to calculate</param>
        /// <param name="prog">Report progress to</param>
        /// <param name="vmatrix">Value matrix</param>
        /// <param name="dmatrix">Distance matrix (optional - if not present will be calculated if necessary)</param>
        internal void RecalculateStatistics(Core core, ConfigurationMetric metric, IntensityMatrix vmatrix, DistanceMatrix dmatrix, EClustererStatistics statistics, ProgressReporter prog)
        {
            // Add basics
            ClustererStatistics[STAT_NUM_VECTORS]       = vmatrix.NumRows;
            ClustererStatistics[STAT_LENGTH_OF_VECTORS] = vmatrix.NumCols;

            // Don't calculate metrics?
            if (statistics == EClustererStatistics.None)
            {
                return;
            }

            // Get the non-insig clusters
            Cluster[] realClusters = RealClusters.ToArray();

            // If we don't have a DMatrix we should calculate the sil. width manually
            // The DMatrix might be too big to pass to R so its better just to avoid it.
            prog.Enter("Calculating statistics");
            List <ObsFilter> groupFilters = new List <ObsFilter>();

            // No filter
            groupFilters.Add(null);

            if (!vmatrix.HasSplitGroups)
            {
                // Defined filters
                if (statistics.HasFlag(EClustererStatistics.IncludePartialVectorsForFilters))
                {
                    groupFilters.AddRange(core.ObsFilters);
                }

                // Group filters (if not already)
                if (statistics.HasFlag(EClustererStatistics.IncludePartialVectorsForGroups))
                {
                    AllGroupsFilters(core, groupFilters);
                }
            }

            List <ForStat> needsCalculating = new List <ForStat>();

            prog.Enter("Input vectors");
            ProgressParallelHandler progP    = prog.CreateParallelHandler(groupFilters.Count);
            ProgressParallelHandler closure1 = progP;

            Parallel.ForEach(groupFilters, obsFilter => Thread_AddFilterToCalculationList(core, metric, vmatrix, dmatrix, statistics, realClusters, obsFilter, needsCalculating, closure1));
            prog.Leave();

            // ASSIGNMENT STATS
            prog.Enter("Assignments");
            progP = prog.CreateParallelHandler(needsCalculating.Count);
            ProgressParallelHandler closure2 = progP;

            Parallel.ForEach(needsCalculating, z => Thread_CalculateAssignmentStatistics(statistics, z, realClusters, metric, closure2));
            prog.Leave();

            // CLUSTER STATS
            prog.Enter("Clusters");
            progP = prog.CreateParallelHandler(this.Clusters.Length);
            Parallel.ForEach(this.Clusters, z => Thread_CalculateClusterStatistics(core, statistics, z, progP));
            prog.Leave();

            // SUMMARY STATS
            prog.Enter("Summary");
            CalculateSummaryStatistics(core, statistics, realClusters);
            prog.Leave();

            prog.Leave();
        }