private void FlushCurrentCluster() { if (_currentClusterDirty) { _clusterWriter.WriteCluster(_currentCluster, _currentClusterBuffer, 0, _currentClusterBuffer.Length); _currentClusterDirty = false; } }
/// <summary> /// Clusters the data but does not store the results, instead immediately writes the data to the stream writer provided. /// </summary> /// <param name="data"></param> /// <param name="writer"></param> public void ClusterAndProcess(List <T> data, IClusterWriter <U> writer) { /* * This clustering algorithm first sorts the list of input UMC's by mass. It then iterates * through this list partitioning the data into blocks of UMC's based on a mass tolerance. * When it finds gaps larger or equal to the mass (ppm) tolerance specified by the user, * it will process the data before the gap (a block) until the current index of the features in question. */ // Make sure we have data to cluster first. if (data == null) { throw new NullReferenceException("The input feature data list was null. Cannot process this data."); } // Make sure there is no null UMC data in the input list. var nullIndex = data.FindIndex(delegate(T x) { return(x == null); }); if (nullIndex > 0) { throw new NullReferenceException("The feature at index " + nullIndex + " was null. Cannot process this data."); } OnNotify("Sorting cluster mass list"); // The first thing we do is to sort the features based on mass since we know that has the least variability in the data across runs. data.Sort(m_massComparer); // Now partition the data based on mass ranges and the parameter values. var massTolerance = Parameters.Tolerances.Mass; // This is the index of first feature of a given mass partition. var startUMCIndex = 0; var totalFeatures = data.Count; OnNotify("Detecting mass partitions"); var tenPercent = Convert.ToInt32(totalFeatures * .1); var counter = 0; var percent = 0; var clusterId = 0; for (var i = 0; i < totalFeatures - 1; i++) { if (counter > tenPercent) { counter = 0; percent += 10; OnNotify(string.Format("Clustering Completed...{0}%", percent)); } counter++; // Here we compute the ppm mass difference between consecutive features (based on mass). // This will determine if we cluster a block of data or not. var umcX = data[i]; var umcY = data[i + 1]; var ppm = Math.Abs(FeatureLight.ComputeMassPPMDifference(umcX.MassMonoisotopicAligned, umcY.MassMonoisotopicAligned)); // If the difference is greater than the tolerance then we cluster // - we dont check the sign of the ppm because the data should be sorted based on mass. if (ppm > massTolerance) { // If start UMC Index is equal to one, then that means the feature at startUMCIndex // could not find any other features near it within the mass tolerance specified. if (startUMCIndex == i) { var cluster = new U(); cluster.AmbiguityScore = m_maxDistance; umcX.SetParentFeature(cluster); cluster.AddChildFeature(umcX); cluster.CalculateStatistics(Parameters.CentroidRepresentation); cluster.Id = clusterId++; writer.WriteCluster(cluster); } else { // Otherwise we have more than one feature to to consider. var distances = CalculatePairWiseDistances(startUMCIndex, i, data); var localClusters = CreateSingletonClusters(data, startUMCIndex, i); var blockClusters = LinkFeatures(distances, localClusters); CalculateAmbiguityScore(blockClusters); foreach (var cluster in localClusters.Values) { cluster.Id = clusterId++; CalculateStatistics(cluster); writer.WriteCluster(cluster); } } startUMCIndex = i + 1; } } // Make sure that we cluster what is left over. if (startUMCIndex < totalFeatures) { OnNotify(string.Format("Clustering last partition...{0}%", percent)); var distances = CalculatePairWiseDistances(startUMCIndex, totalFeatures - 1, data); var localClusters = CreateSingletonClusters(data, startUMCIndex, totalFeatures - 1); var blockClusters = LinkFeatures(distances, localClusters); CalculateAmbiguityScore(blockClusters); if (localClusters.Count < 2) { foreach (var cluster in localClusters.Values) { cluster.Id = clusterId++; CalculateStatistics(cluster); writer.WriteCluster(cluster); } } else { foreach (var cluster in blockClusters) { cluster.Id = clusterId++; CalculateStatistics(cluster); writer.WriteCluster(cluster); } } } // OnNotify("Generating cluster statistics"); }