示例#1
0
 private void FlushCurrentCluster()
 {
     if (_currentClusterDirty)
     {
         _clusterWriter.WriteCluster(_currentCluster, _currentClusterBuffer, 0, _currentClusterBuffer.Length);
         _currentClusterDirty = false;
     }
 }
示例#2
0
        /// <summary>
        /// Clusters the data but does not store the results, instead immediately writes the data to the stream writer provided.
        /// </summary>
        /// <param name="data"></param>
        /// <param name="writer"></param>
        public void ClusterAndProcess(List <T> data, IClusterWriter <U> writer)
        {
            /*
             * This clustering algorithm first sorts the list of input UMC's by mass.  It then iterates
             * through this list partitioning the data into blocks of UMC's based on a mass tolerance.
             * When it finds gaps larger or equal to the mass (ppm) tolerance specified by the user,
             * it will process the data before the gap (a block) until the current index of the features in question.
             */

            // Make sure we have data to cluster first.
            if (data == null)
            {
                throw new NullReferenceException("The input feature data list was null.  Cannot process this data.");
            }

            // Make sure there is no null UMC data in the input list.
            var nullIndex = data.FindIndex(delegate(T x) { return(x == null); });

            if (nullIndex > 0)
            {
                throw new NullReferenceException("The feature at index " + nullIndex + " was null.  Cannot process this data.");
            }

            OnNotify("Sorting cluster mass list");

            // The first thing we do is to sort the features based on mass since we know that has the least variability in the data across runs.
            data.Sort(m_massComparer);

            // Now partition the data based on mass ranges and the parameter values.
            var massTolerance = Parameters.Tolerances.Mass;

            // This is the index of first feature of a given mass partition.
            var startUMCIndex = 0;
            var totalFeatures = data.Count;


            OnNotify("Detecting mass partitions");
            var tenPercent = Convert.ToInt32(totalFeatures * .1);
            var counter    = 0;
            var percent    = 0;

            var clusterId = 0;

            for (var i = 0; i < totalFeatures - 1; i++)
            {
                if (counter > tenPercent)
                {
                    counter  = 0;
                    percent += 10;
                    OnNotify(string.Format("Clustering Completed...{0}%", percent));
                }
                counter++;

                // Here we compute the ppm mass difference between consecutive features (based on mass).
                // This will determine if we cluster a block of data or not.
                var umcX = data[i];
                var umcY = data[i + 1];
                var ppm  = Math.Abs(FeatureLight.ComputeMassPPMDifference(umcX.MassMonoisotopicAligned, umcY.MassMonoisotopicAligned));

                // If the difference is greater than the tolerance then we cluster
                //  - we dont check the sign of the ppm because the data should be sorted based on mass.
                if (ppm > massTolerance)
                {
                    // If start UMC Index is equal to one, then that means the feature at startUMCIndex
                    // could not find any other features near it within the mass tolerance specified.
                    if (startUMCIndex == i)
                    {
                        var cluster = new U();
                        cluster.AmbiguityScore = m_maxDistance;

                        umcX.SetParentFeature(cluster);
                        cluster.AddChildFeature(umcX);

                        cluster.CalculateStatistics(Parameters.CentroidRepresentation);
                        cluster.Id = clusterId++;
                        writer.WriteCluster(cluster);
                    }
                    else
                    {
                        // Otherwise we have more than one feature to to consider.
                        var distances     = CalculatePairWiseDistances(startUMCIndex, i, data);
                        var localClusters = CreateSingletonClusters(data, startUMCIndex, i);
                        var blockClusters = LinkFeatures(distances, localClusters);

                        CalculateAmbiguityScore(blockClusters);

                        foreach (var cluster in localClusters.Values)
                        {
                            cluster.Id = clusterId++;
                            CalculateStatistics(cluster);
                            writer.WriteCluster(cluster);
                        }
                    }

                    startUMCIndex = i + 1;
                }
            }

            // Make sure that we cluster what is left over.
            if (startUMCIndex < totalFeatures)
            {
                OnNotify(string.Format("Clustering last partition...{0}%", percent));
                var distances     = CalculatePairWiseDistances(startUMCIndex, totalFeatures - 1, data);
                var localClusters = CreateSingletonClusters(data, startUMCIndex, totalFeatures - 1);
                var blockClusters = LinkFeatures(distances, localClusters);

                CalculateAmbiguityScore(blockClusters);

                if (localClusters.Count < 2)
                {
                    foreach (var cluster in localClusters.Values)
                    {
                        cluster.Id = clusterId++;
                        CalculateStatistics(cluster);
                        writer.WriteCluster(cluster);
                    }
                }
                else
                {
                    foreach (var cluster in blockClusters)
                    {
                        cluster.Id = clusterId++;
                        CalculateStatistics(cluster);
                        writer.WriteCluster(cluster);
                    }
                }
            }
            // OnNotify("Generating cluster statistics");
        }