/// <summary> /// First pass clustering: Cluster MSFeatureLight (typically corresponding to /// lines in an _isos file) features by M/Z into UMCLights. /// </summary> /// <param name="msFeatures"></param> /// <param name="progress">The progress reporter.</param> /// <returns>Clustered MSFeatures as <see cref="UMCLight" />.</returns> private List <UMCLight> FirstPassClustering(List <MSFeatureLight> msFeatures, IProgress <ProgressData> progress) { var featureList = new List <UMCLight>(); var features = firstPassClusterer.Cluster(msFeatures, progress); foreach (var feature in features) { if (feature.MsFeatures.Count == 0) { continue; } ////feature.CalculateStatistics(ClusterCentroidRepresentation.Median); feature.MassMonoisotopic = (feature.Mz * feature.ChargeState) - (SubAtomicParticleLibrary.MASS_PROTON * feature.ChargeState); featureList.Add(feature); } return(featureList); }
/// <summary> /// Runs the MultiAlign analysis /// </summary> public void AlignDatasets( IEnumerable<UMCLight> baselineFeatures, IEnumerable<UMCLight> aligneeFeatures, ISpectraProvider providerX, ISpectraProvider providerY, IFeatureAligner<IEnumerable<UMCLight>, IEnumerable<UMCLight>, classAlignmentData> aligner, IClusterer<UMCLight, UMCClusterLight> clusterer, string matchPath, string errorPath) { // cluster before we do anything else.... var allFeatures = new List<UMCLight>(); allFeatures.AddRange(baselineFeatures); allFeatures.AddRange(aligneeFeatures); var maxBaseline = baselineFeatures.Max(x => x.Scan); var minBaseline = baselineFeatures.Min(x => x.Scan); var maxAlignee = aligneeFeatures.Max(x => x.Scan); var minAlignee = aligneeFeatures.Min(x => x.Scan); foreach (var feature in aligneeFeatures) { feature.Net = Convert.ToDouble(feature.Scan - minAlignee) / Convert.ToDouble(maxAlignee - minAlignee); feature.MassMonoisotopicAligned = feature.MassMonoisotopic; } foreach (var feature in baselineFeatures) { feature.Net = Convert.ToDouble(feature.Scan - minBaseline) / Convert.ToDouble(maxBaseline - minBaseline); feature.MassMonoisotopicAligned = feature.MassMonoisotopic; } // This tells us the differences before we align. var clusters = clusterer.Cluster(allFeatures); var clusterId = 0; foreach (var cluster in clusters) { cluster.Id = clusterId++; } var scorer = new GlobalPeptideClusterScorer(); var preAlignment = scorer.Score(clusters); aligner.AligneeSpectraProvider = providerY; aligner.BaselineSpectraProvider = providerX; UpdateStatus("Aligning data"); // Aligner data var data = aligner.Align(baselineFeatures, aligneeFeatures); var matches = data.Matches; // create anchor points for LCMSWarp alignment var massPoints = new List<RegressionPoint>(); var netPoints = new List<RegressionPoint>(); foreach (var match in matches) { var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz, match.AnchorPointY.Mz); var netError = match.AnchorPointX.Net - match.AnchorPointY.Net; var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError); massPoints.Add(massPoint); var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError); netPoints.Add(netPoint); } foreach (var feature in allFeatures) { feature.UmcCluster = null; feature.ClusterId = -1; } // Then cluster after alignment! UpdateStatus("clustering data"); clusters = clusterer.Cluster(allFeatures); var postAlignment = scorer.Score(clusters); UpdateStatus("Note\tSame\tDifferent"); UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster, preAlignment.DifferentCluster)); UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster, postAlignment.DifferentCluster)); matches = FilterMatches(matches, 40); SaveMatches(matchPath, matches); DeRegisterProgressNotifier(aligner); DeRegisterProgressNotifier(clusterer); }
/// <summary> /// Second pass clustering: Cluster the UMCLights with XICs. /// </summary> /// <param name="progress">The progress reporter.</param> /// <param name="umcLights"></param> /// <returns></returns> private List <UMCLight> SecondPassClustering(List <UMCLight> umcLights, IProgress <ProgressData> progress) { return(secondPassClusterer.Cluster(umcLights, progress).ToList()); }
internal void ClusterGroupOfFeatures(IClusterer <UMCLight, UMCClusterLight> clusterer, List <UMCLight> features, ref int clusterCount, IProgress <ProgressData> internalProgress = null) { var progData = new ProgressData(internalProgress); var clusterProgress = new Progress <ProgressData>(pd => progData.Report(pd.Percent)); if (this.ShouldRefineWithMsMs) { progData.StepRange(35); } else { progData.StepRange(70); } var clusters = clusterer.Cluster(features, clusterProgress); foreach (var cluster in clusters) { cluster.Id = clusterCount++; cluster.UmcList.ForEach(x => x.ClusterId = cluster.Id); // Updates the cluster with statistics foreach (var feature in cluster.UmcList) { cluster.MsMsCount += feature.MsMsCount; cluster.IdentifiedSpectraCount += feature.IdentifiedSpectraCount; } } if (this.ShouldRefineWithMsMs) { try { progData.StepRange(70); var clusterRefiner = ClusterPostProcessorBuilder.GetClusterPostProcessor <UMCClusterLight, UMCLight>( this.analysis.Options.ClusterPostProcessingoptions, this.analysis.DataProviders); clusters = clusterRefiner.Cluster(clusters, clusterProgress); } catch (DatasetInformation.MissingRawDataException e) { MessageBox.Show(string.Format("{0}\nDataset: {1}", e.Message, e.GroupId)); } } this.analysis.Clusters = clusters; clusters.ForEach(c => c.Abundance = c.UmcList.Sum(umc => umc.AbundanceSum)); foreach (var dataset in this.Datasets) { if (dataset.DatasetState == DatasetInformationViewModel.DatasetStates.Clustering) { dataset.DatasetState = DatasetInformationViewModel.DatasetStates.PersistingClusters; } } ThreadSafeDispatcher.Invoke(this.ClusterFeaturesCommand.RaiseCanExecuteChanged); ThreadSafeDispatcher.Invoke(this.DisplayClustersCommand.RaiseCanExecuteChanged); this.analysis.DataProviders.DatabaseLock.EnterWriteLock(); progData.StepRange(85); this.analysis.DataProviders.ClusterCache.AddAllStateless(clusters, clusterProgress); progData.StepRange(100); this.analysis.DataProviders.FeatureCache.UpdateAll(features, clusterProgress); this.analysis.DataProviders.DatabaseLock.ExitWriteLock(); }
/// <summary> /// Runs the MultiAlign analysis /// </summary> public void PerformMultiAlignAnalysis(DatasetInformation baselineDataset, IEnumerable <DatasetInformation> aligneeDatasets, LcmsFeatureFindingOptions featureFindingOptions, MsFeatureFilteringOptions msFilterOptions, LcmsFeatureFilteringOptions lcmsFilterOptions, SpectralOptions peptideOptions, MultiAlignCore.Algorithms.FeatureFinding.IFeatureFinder featureFinder, IFeatureAligner <IEnumerable <UMCLight>, IEnumerable <UMCLight>, AlignmentData> aligner, IClusterer <UMCLight, UMCClusterLight> clusterer, string matchPath, string errorPath) { UpdateStatus("Loading baseline features."); var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path); msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions); // Load the baseline reference set using (var rawProviderX = new InformedProteomicsReader()) { rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0); UpdateStatus("Creating Baseline LCMS Features."); var baselineFeatures = featureFinder.FindFeatures(msFeatures, featureFindingOptions, rawProviderX); LinkPeptidesToFeatures(baselineDataset.Sequence.Path, baselineFeatures, peptideOptions.Fdr, peptideOptions.IdScore); var providerX = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures); // Then load the alignee dataset foreach (var dataset in aligneeDatasets) { var aligneeMsFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path); aligneeMsFeatures = LcmsFeatureFilters.FilterMsFeatures(aligneeMsFeatures, msFilterOptions); using (var rawProviderY = new InformedProteomicsReader()) { rawProviderY.AddDataFile(dataset.RawFile.Path, 0); UpdateStatus("Finding alignee features"); var aligneeFeatures = featureFinder.FindFeatures(aligneeMsFeatures, featureFindingOptions, rawProviderY); LinkPeptidesToFeatures(dataset.Sequence.Path, aligneeFeatures, peptideOptions.Fdr, peptideOptions.IdScore); var providerY = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures); // cluster before we do anything else.... var allFeatures = new List <UMCLight>(); allFeatures.AddRange(baselineFeatures); allFeatures.AddRange(aligneeFeatures); foreach (var feature in allFeatures) { feature.Net = feature.Net; feature.MassMonoisotopicAligned = feature.MassMonoisotopic; } // This tells us the differences before we align. var clusters = clusterer.Cluster(allFeatures); var preAlignment = AnalyzeClusters(clusters); aligner.AligneeSpectraProvider = providerY; aligner.BaselineSpectraProvider = providerX; UpdateStatus("Aligning data"); // Aligner data var data = aligner.Align(baselineFeatures, aligneeFeatures); var matches = data.Matches; WriteErrors(errorPath, matches); // create anchor points for LCMSWarp alignment var massPoints = new List <RegressionPoint>(); var netPoints = new List <RegressionPoint>(); foreach (var match in matches) { var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz, match.AnchorPointY.Mz); var netError = match.AnchorPointX.Net - match.AnchorPointY.Net; var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError); massPoints.Add(massPoint); var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError); netPoints.Add(netPoint); } foreach (var feature in allFeatures) { feature.UmcCluster = null; feature.ClusterId = -1; } // Then cluster after alignment! UpdateStatus("clustering data"); clusters = clusterer.Cluster(allFeatures); var postAlignment = AnalyzeClusters(clusters); UpdateStatus("Note\tSame\tDifferent"); UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster, preAlignment.DifferentCluster)); UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster, postAlignment.DifferentCluster)); SaveMatches(matchPath, matches); } } } DeRegisterProgressNotifier(aligner); DeRegisterProgressNotifier(featureFinder); DeRegisterProgressNotifier(clusterer); }
/// <summary> /// Runs the MultiAlign analysis /// </summary> public void AlignDatasets(IEnumerable <UMCLight> baselineFeatures, IEnumerable <UMCLight> aligneeFeatures, ISpectraProvider providerX, ISpectraProvider providerY, IFeatureAligner <IEnumerable <UMCLight>, IEnumerable <UMCLight>, AlignmentData> aligner, IClusterer <UMCLight, UMCClusterLight> clusterer, string matchPath, string errorPath) { // cluster before we do anything else.... var allFeatures = new List <UMCLight>(); allFeatures.AddRange(baselineFeatures); allFeatures.AddRange(aligneeFeatures); var maxBaseline = baselineFeatures.Max(x => x.Scan); var minBaseline = baselineFeatures.Min(x => x.Scan); var maxAlignee = aligneeFeatures.Max(x => x.Scan); var minAlignee = aligneeFeatures.Min(x => x.Scan); foreach (var feature in aligneeFeatures) { feature.Net = Convert.ToDouble(feature.Scan - minAlignee) / Convert.ToDouble(maxAlignee - minAlignee); feature.MassMonoisotopicAligned = feature.MassMonoisotopic; } foreach (var feature in baselineFeatures) { feature.Net = Convert.ToDouble(feature.Scan - minBaseline) / Convert.ToDouble(maxBaseline - minBaseline); feature.MassMonoisotopicAligned = feature.MassMonoisotopic; } // This tells us the differences before we align. var clusters = clusterer.Cluster(allFeatures); var clusterId = 0; foreach (var cluster in clusters) { cluster.Id = clusterId++; } var scorer = new GlobalPeptideClusterScorer(); var preAlignment = scorer.Score(clusters); aligner.AligneeSpectraProvider = providerY; aligner.BaselineSpectraProvider = providerX; UpdateStatus("Aligning data"); // Aligner data var data = aligner.Align(baselineFeatures, aligneeFeatures); var matches = data.Matches; // create anchor points for LCMSWarp alignment var massPoints = new List <RegressionPoint>(); var netPoints = new List <RegressionPoint>(); foreach (var match in matches) { var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz, match.AnchorPointY.Mz); var netError = match.AnchorPointX.Net - match.AnchorPointY.Net; var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError); massPoints.Add(massPoint); var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError); netPoints.Add(netPoint); } foreach (var feature in allFeatures) { feature.UmcCluster = null; feature.ClusterId = -1; } // Then cluster after alignment! UpdateStatus("clustering data"); clusters = clusterer.Cluster(allFeatures); var postAlignment = scorer.Score(clusters); UpdateStatus("Note\tSame\tDifferent"); UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster, preAlignment.DifferentCluster)); UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster, postAlignment.DifferentCluster)); matches = FilterMatches(matches, 40); SaveMatches(matchPath, matches); DeRegisterProgressNotifier(aligner); DeRegisterProgressNotifier(clusterer); }
/// <summary> /// Runs the MultiAlign analysis /// </summary> public void PerformMultiAlignAnalysis(DatasetInformation baselineDataset, IEnumerable<DatasetInformation> aligneeDatasets, LcmsFeatureFindingOptions featureFindingOptions, MsFeatureFilteringOptions msFilterOptions, LcmsFeatureFilteringOptions lcmsFilterOptions, SpectralOptions peptideOptions, IFeatureFinder featureFinder, IFeatureAligner<IEnumerable<UMCLight>, IEnumerable<UMCLight>, classAlignmentData> aligner, IClusterer<UMCLight, UMCClusterLight> clusterer, string matchPath, string errorPath) { UpdateStatus("Loading baseline features."); var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path); msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions); // Load the baseline reference set using (var rawProviderX = RawLoaderFactory.CreateFileReader(baselineDataset.RawPath)) { rawProviderX.AddDataFile(baselineDataset.RawPath, 0); UpdateStatus("Creating Baseline LCMS Features."); var baselineFeatures = featureFinder.FindFeatures(msFeatures, featureFindingOptions, rawProviderX); LinkPeptidesToFeatures(baselineDataset.SequencePath, baselineFeatures, peptideOptions.Fdr, peptideOptions.IdScore); var providerX = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures); // Then load the alignee dataset foreach (var dataset in aligneeDatasets) { var aligneeMsFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path); aligneeMsFeatures = LcmsFeatureFilters.FilterMsFeatures(aligneeMsFeatures, msFilterOptions); using (var rawProviderY = RawLoaderFactory.CreateFileReader(dataset.RawPath)) { rawProviderY.AddDataFile(dataset.RawPath, 0); UpdateStatus("Finding alignee features"); var aligneeFeatures = featureFinder.FindFeatures(aligneeMsFeatures, featureFindingOptions, rawProviderY); LinkPeptidesToFeatures(dataset.SequencePath, aligneeFeatures, peptideOptions.Fdr, peptideOptions.IdScore); var providerY = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures); // cluster before we do anything else.... var allFeatures = new List<UMCLight>(); allFeatures.AddRange(baselineFeatures); allFeatures.AddRange(aligneeFeatures); foreach (var feature in allFeatures) { feature.Net = feature.Net; feature.MassMonoisotopicAligned = feature.MassMonoisotopic; } // This tells us the differences before we align. var clusters = clusterer.Cluster(allFeatures); var preAlignment = AnalyzeClusters(clusters); aligner.AligneeSpectraProvider = providerY; aligner.BaselineSpectraProvider = providerX; UpdateStatus("Aligning data"); // Aligner data var data = aligner.Align(baselineFeatures, aligneeFeatures); var matches = data.Matches; WriteErrors(errorPath, matches); // create anchor points for LCMSWarp alignment var massPoints = new List<RegressionPoint>(); var netPoints = new List<RegressionPoint>(); foreach (var match in matches) { var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz, match.AnchorPointY.Mz); var netError = match.AnchorPointX.Net - match.AnchorPointY.Net; var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError); massPoints.Add(massPoint); var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError); netPoints.Add(netPoint); } foreach (var feature in allFeatures) { feature.UmcCluster = null; feature.ClusterId = -1; } // Then cluster after alignment! UpdateStatus("clustering data"); clusters = clusterer.Cluster(allFeatures); var postAlignment = AnalyzeClusters(clusters); UpdateStatus("Note\tSame\tDifferent"); UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster, preAlignment.DifferentCluster)); UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster, postAlignment.DifferentCluster)); SaveMatches(matchPath, matches); } } } DeRegisterProgressNotifier(aligner); DeRegisterProgressNotifier(featureFinder); DeRegisterProgressNotifier(clusterer); }