Ejemplo n.º 1
0
        /// <summary>
        /// Thread operation fo calculate statistics for [stat].
        ///
        /// [stat] is guarenteed to be unique, however stat.Assignment is not, hence stat.Assignment must be locked.
        ///
        /// Currently only stat.Assignment.AssignmentStatistics is the only member to be R/W locked, since that is all
        /// that is modified.
        /// </summary>
        private static void Thread_CalculateAssignmentStatistics([Const] EClustererStatistics statistics, [MutableUnsafe] ForStat stat, [Const] Cluster[] realClusters, [Const] ConfigurationMetric metric, [MutableSafe] ProgressParallelHandler prog)
        {
            prog.SafeIncrement();

            // STATS: Distance from avg
            if (stat.ClusterVector != null)
            {
                // Euclidean
                if (statistics.HasFlag(EClustererStatistics.EuclideanFromAverage))
                {
                    double ed = Maths.Euclidean(stat.AssignmentVector.Values, stat.ClusterVector);
                    stat.Assignment.AssignmentStatistics.ThreadSafeIndex(CreatePartialKey(stat.ObsFilter, STAT_ASSIGNMENT_EUCLIDEAN_FROM_AVG), ed);
                    stat.Assignment.AssignmentStatistics.ThreadSafeIndex(CreatePartialKey(stat.ObsFilter, STAT_ASSIGNMENT_EUCLIDEAN_FROM_AVG_SQUARED), ed * ed);
                }

                // Custom (if applicable)
                if (metric != null &&
                    statistics.HasFlag(EClustererStatistics.DistanceFromAverage) &&
                    !(metric.Args.Id == Algo.ID_METRIC_EUCLIDEAN && statistics.HasFlag(EClustererStatistics.EuclideanFromAverage)))
                {
                    string key1 = metric.ToString() + STAT_ASSIGNMENT_DISTANCE_FROM_AVG;
                    string key2 = metric.ToString() + STAT_ASSIGNMENT_DISTANCE_FROM_AVG_SQUARED;
                    double dd   = metric.Calculate(stat.AssignmentVector.Values, stat.ClusterVector);

                    stat.Assignment.AssignmentStatistics.ThreadSafeIndex(CreatePartialKey(stat.ObsFilter, key1), dd);
                    stat.Assignment.AssignmentStatistics.ThreadSafeIndex(CreatePartialKey(stat.ObsFilter, key2), dd * dd);
                }
            }

            // STATS: Silhouette
            Cluster nextNearestCluster = null;

            if (statistics.HasFlag(EClustererStatistics.SilhouetteWidth))
            {
                double silhouetteWidth;
                double nextNearestClusterId;

                ClustererStatisticsHelper.CalculateSilhouette(stat, realClusters, out silhouetteWidth, out nextNearestCluster);

                if (!double.TryParse(nextNearestCluster.ShortName, out nextNearestClusterId))
                {
                    nextNearestClusterId = double.NaN;
                }

                // Silhouette
                stat.Assignment.AssignmentStatistics.ThreadSafeIndex(CreatePartialKey(stat.ObsFilter, STAT_ASSIGNMENT_SILHOUETTE_WIDTH), silhouetteWidth);
                stat.Assignment.AssignmentStatistics.ThreadSafeIndex(CreatePartialKey(stat.ObsFilter, STAT_ASSIGNMENT_NEXT_NEAREST_CLUSTER), nextNearestClusterId);
            }

            // STATS: Score
            if (stat.ObsFilter == null)
            {
                // Score
                stat.Assignment.AssignmentStatistics.ThreadSafeIndex(STAT_ASSIGNMENT_SCORE, stat.Assignment.Score);

                // Next nearest cluster
                stat.Assignment.NextNearestCluster = nextNearestCluster; // Only one ForStat per Assignment has ObsFilter == null so thread safe not required
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Determines what needs calculating.
        /// </summary>
        private void Thread_AddFilterToCalculationList([Const] Core core, [Const] ConfigurationMetric metric, [Const] IntensityMatrix vmatrix, [Const] DistanceMatrix dmatrix, [Const] EClustererStatistics statistics, [Const] Cluster[] realClusters, [Const] ObsFilter obsFilter, [MutableUnsafe] List <ForStat> needsCalculating, [MutableSafe] ProgressParallelHandler progP)
        {
            progP.SafeIncrement();

            IntensityMatrix vmatFiltered;
            DistanceMatrix  dmatFiltered;

            int[] filteredIndices;

            if (obsFilter == null)
            {
                vmatFiltered    = vmatrix;
                dmatFiltered    = dmatrix;
                filteredIndices = null;
            }
            else
            {
                filteredIndices = vmatrix.Columns.Which(z => obsFilter.Test(z.Observation)).ToArray();  // TODO: Multuple iteration
                vmatFiltered    = vmatrix.Subset(null, obsFilter, ESubsetFlags.None);
                dmatFiltered    = null;
            }

            Dictionary <Cluster, IReadOnlyList <double> > centreVectors = new Dictionary <Cluster, IReadOnlyList <double> >();

            foreach (Cluster cluster in realClusters)
            {
                /////////////////////
                // ASSIGNMENT STATS
                var centre = cluster.GetCentre(ECentreMode.Average, ECandidateMode.Assignments);
                IReadOnlyList <double> centreVector = centre.Count != 0 ? centre[0] : null;

                if (filteredIndices != null)
                {
                    centreVector = centreVector.Extract(filteredIndices);
                }

                centreVectors.Add(cluster, centreVector);
            }

            foreach (Assignment ass in Assignments)
            {
                ForStat f = new ForStat();
                f.Assignment = ass;
                f.ObsFilter  = obsFilter;

                if (filteredIndices != null)
                {
                    f.AssignmentVector = vmatFiltered.Vectors[ass.Vector.Index];
                }
                else
                {
                    f.AssignmentVector = ass.Vector;
                }

                f.ClusterVector = centreVectors[ass.Cluster];

                if (statistics.HasFlag(EClustererStatistics.SilhouetteWidth))
                {
                    if (dmatFiltered == null)
                    {
                        dmatFiltered = DistanceMatrix.Create(core, vmatrix, metric, ProgressReporter.GetEmpty());
                    }
                }

                f.DistanceMatrix = dmatFiltered;

                lock (needsCalculating)
                {
                    needsCalculating.Add(f);
                }
            }
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Thread operation to calculate statistics for [cluster].
        ///
        /// [cluster] is guarenteed to be unique, so needn't be locked.
        /// </summary>
        private static void Thread_CalculateClusterStatistics([Const] Core core, [Const] EClustererStatistics statistics, [MutableSafe] Cluster cluster, [MutableSafe] ProgressParallelHandler prog)
        {
            prog.SafeIncrement();
            cluster.CalculateAveragedStatistics();
            cluster.CalculateCommentFlags();

            Dictionary <string, double> clusterStatistics = cluster.ClusterStatistics;
            List <Assignment>           assignments       = cluster.Assignments.List;

            int hcomp, numcomp, hpeak, numpath;

            ClustererStatisticsHelper.CalculateHighestCompounds(cluster, out hcomp, out numcomp);
            ClustererStatisticsHelper.CalculateHighestPeaks(cluster, out hpeak, out numpath);
            clusterStatistics[STAT_CLUSTER_AVERAGE_HIGHEST_NUM_COMPOUNDS] = hcomp;
            clusterStatistics[STAT_CLUSTER_AVERAGE_NUM_COMPOUNDS]         = numcomp;
            clusterStatistics[STAT_CLUSTER_AVERAGE_HIGHEST_NUM_PEAKS]     = hpeak;
            clusterStatistics[STAT_CLUSTER_AVERAGE_NUM_PATHWAYS]          = numpath;

            //////////////////////////
            // GROUP STATS (cluster)
            if (statistics.HasFlag(EClustererStatistics.ClusterAverages))
            {
                AddAveragedStatistics(core, clusterStatistics, assignments);
            }
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Recalculates the statistics.
        /// </summary>
        /// <param name="core">Core</param>
        /// <param name="metric">Metric for statistics</param>
        /// <param name="statistics">What to calculate</param>
        /// <param name="prog">Report progress to</param>
        /// <param name="vmatrix">Value matrix</param>
        /// <param name="dmatrix">Distance matrix (optional - if not present will be calculated if necessary)</param>
        internal void RecalculateStatistics(Core core, ConfigurationMetric metric, IntensityMatrix vmatrix, DistanceMatrix dmatrix, EClustererStatistics statistics, ProgressReporter prog)
        {
            // Add basics
            ClustererStatistics[STAT_NUM_VECTORS]       = vmatrix.NumRows;
            ClustererStatistics[STAT_LENGTH_OF_VECTORS] = vmatrix.NumCols;

            // Don't calculate metrics?
            if (statistics == EClustererStatistics.None)
            {
                return;
            }

            // Get the non-insig clusters
            Cluster[] realClusters = RealClusters.ToArray();

            // If we don't have a DMatrix we should calculate the sil. width manually
            // The DMatrix might be too big to pass to R so its better just to avoid it.
            prog.Enter("Calculating statistics");
            List <ObsFilter> groupFilters = new List <ObsFilter>();

            // No filter
            groupFilters.Add(null);

            if (!vmatrix.HasSplitGroups)
            {
                // Defined filters
                if (statistics.HasFlag(EClustererStatistics.IncludePartialVectorsForFilters))
                {
                    groupFilters.AddRange(core.ObsFilters);
                }

                // Group filters (if not already)
                if (statistics.HasFlag(EClustererStatistics.IncludePartialVectorsForGroups))
                {
                    AllGroupsFilters(core, groupFilters);
                }
            }

            List <ForStat> needsCalculating = new List <ForStat>();

            prog.Enter("Input vectors");
            ProgressParallelHandler progP    = prog.CreateParallelHandler(groupFilters.Count);
            ProgressParallelHandler closure1 = progP;

            Parallel.ForEach(groupFilters, obsFilter => Thread_AddFilterToCalculationList(core, metric, vmatrix, dmatrix, statistics, realClusters, obsFilter, needsCalculating, closure1));
            prog.Leave();

            // ASSIGNMENT STATS
            prog.Enter("Assignments");
            progP = prog.CreateParallelHandler(needsCalculating.Count);
            ProgressParallelHandler closure2 = progP;

            Parallel.ForEach(needsCalculating, z => Thread_CalculateAssignmentStatistics(statistics, z, realClusters, metric, closure2));
            prog.Leave();

            // CLUSTER STATS
            prog.Enter("Clusters");
            progP = prog.CreateParallelHandler(this.Clusters.Length);
            Parallel.ForEach(this.Clusters, z => Thread_CalculateClusterStatistics(core, statistics, z, progP));
            prog.Leave();

            // SUMMARY STATS
            prog.Enter("Summary");
            CalculateSummaryStatistics(core, statistics, realClusters);
            prog.Leave();

            prog.Leave();
        }