コード例 #1
0
        /// <summary>
        /// First pass clustering: Cluster MSFeatureLight (typically corresponding to
        ///  lines in an _isos file) features by M/Z into UMCLights.
        /// </summary>
        /// <param name="msFeatures"></param>
        /// <param name="progress">The progress reporter.</param>
        /// <returns>Clustered MSFeatures as <see cref="UMCLight" />.</returns>
        private List <UMCLight> FirstPassClustering(List <MSFeatureLight> msFeatures, IProgress <ProgressData> progress)
        {
            var featureList = new List <UMCLight>();
            var features    = firstPassClusterer.Cluster(msFeatures, progress);

            foreach (var feature in features)
            {
                if (feature.MsFeatures.Count == 0)
                {
                    continue;
                }

                ////feature.CalculateStatistics(ClusterCentroidRepresentation.Median);
                feature.MassMonoisotopic = (feature.Mz * feature.ChargeState) - (SubAtomicParticleLibrary.MASS_PROTON * feature.ChargeState);
                featureList.Add(feature);
            }

            return(featureList);
        }
コード例 #2
0
ファイル: Figure5.cs プロジェクト: msdna/MultiAlign
        /// <summary>
        ///     Runs the MultiAlign analysis
        /// </summary>
        public void AlignDatasets(  IEnumerable<UMCLight>   baselineFeatures,
                                    IEnumerable<UMCLight>   aligneeFeatures,
                                    ISpectraProvider        providerX,
                                    ISpectraProvider        providerY,
                                    IFeatureAligner<IEnumerable<UMCLight>,
                                        IEnumerable<UMCLight>,
                                        classAlignmentData> aligner,
                                    IClusterer<UMCLight, UMCClusterLight> clusterer,
                                    string matchPath,
                                    string errorPath)
        {
            // cluster before we do anything else....
            var allFeatures = new List<UMCLight>();
            allFeatures.AddRange(baselineFeatures);
            allFeatures.AddRange(aligneeFeatures);

            var maxBaseline = baselineFeatures.Max(x => x.Scan);
            var minBaseline = baselineFeatures.Min(x => x.Scan);

            var maxAlignee  = aligneeFeatures.Max(x => x.Scan);
            var minAlignee  = aligneeFeatures.Min(x => x.Scan);

            foreach (var feature in aligneeFeatures)
            {
                feature.Net = Convert.ToDouble(feature.Scan - minAlignee) / Convert.ToDouble(maxAlignee - minAlignee);
                feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
            }

            foreach (var feature in baselineFeatures)
            {
                feature.Net = Convert.ToDouble(feature.Scan - minBaseline) / Convert.ToDouble(maxBaseline - minBaseline);
                feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
            }

            // This tells us the differences before we align.
            var clusters     = clusterer.Cluster(allFeatures);
            var clusterId    = 0;
            foreach (var cluster in clusters)
            {
                cluster.Id = clusterId++;
            }
            var scorer       = new GlobalPeptideClusterScorer();
            var preAlignment = scorer.Score(clusters);

            aligner.AligneeSpectraProvider  = providerY;
            aligner.BaselineSpectraProvider = providerX;

            UpdateStatus("Aligning data");
            // Aligner data
            var data    = aligner.Align(baselineFeatures, aligneeFeatures);
            var matches = data.Matches;

            // create anchor points for LCMSWarp alignment
            var massPoints = new List<RegressionPoint>();
            var netPoints = new List<RegressionPoint>();
            foreach (var match in matches)
            {
                var massError   = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz,
                                                    match.AnchorPointY.Mz);
                var netError    = match.AnchorPointX.Net - match.AnchorPointY.Net;
                var massPoint   = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError);
                massPoints.Add(massPoint);

                var netPoint    = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError);
                netPoints.Add(netPoint);
            }

            foreach (var feature in allFeatures)
            {
                feature.UmcCluster = null;
                feature.ClusterId = -1;
            }
            // Then cluster after alignment!
            UpdateStatus("clustering data");
            clusters = clusterer.Cluster(allFeatures);
            var postAlignment = scorer.Score(clusters);

            UpdateStatus("Note\tSame\tDifferent");
            UpdateStatus(string.Format("Pre\t{0}\t{1}",
                            preAlignment.SameCluster,
                            preAlignment.DifferentCluster));
            UpdateStatus(string.Format("Post\t{0}\t{1}",
                            postAlignment.SameCluster,
                            postAlignment.DifferentCluster));

            matches = FilterMatches(matches, 40);

            SaveMatches(matchPath, matches);
            DeRegisterProgressNotifier(aligner);
            DeRegisterProgressNotifier(clusterer);
        }
コード例 #3
0
 /// <summary>
 /// Second pass clustering: Cluster the UMCLights with XICs.
 /// </summary>
 /// <param name="progress">The progress reporter.</param>
 /// <param name="umcLights"></param>
 /// <returns></returns>
 private List <UMCLight> SecondPassClustering(List <UMCLight> umcLights, IProgress <ProgressData> progress)
 {
     return(secondPassClusterer.Cluster(umcLights, progress).ToList());
 }
コード例 #4
0
        internal void ClusterGroupOfFeatures(IClusterer <UMCLight, UMCClusterLight> clusterer, List <UMCLight> features, ref int clusterCount, IProgress <ProgressData> internalProgress = null)
        {
            var progData        = new ProgressData(internalProgress);
            var clusterProgress = new Progress <ProgressData>(pd => progData.Report(pd.Percent));

            if (this.ShouldRefineWithMsMs)
            {
                progData.StepRange(35);
            }
            else
            {
                progData.StepRange(70);
            }

            var clusters = clusterer.Cluster(features, clusterProgress);

            foreach (var cluster in clusters)
            {
                cluster.Id = clusterCount++;
                cluster.UmcList.ForEach(x => x.ClusterId = cluster.Id);

                // Updates the cluster with statistics
                foreach (var feature in cluster.UmcList)
                {
                    cluster.MsMsCount += feature.MsMsCount;
                    cluster.IdentifiedSpectraCount += feature.IdentifiedSpectraCount;
                }
            }

            if (this.ShouldRefineWithMsMs)
            {
                try
                {
                    progData.StepRange(70);
                    var clusterRefiner =
                        ClusterPostProcessorBuilder.GetClusterPostProcessor <UMCClusterLight, UMCLight>(
                            this.analysis.Options.ClusterPostProcessingoptions,
                            this.analysis.DataProviders);
                    clusters = clusterRefiner.Cluster(clusters, clusterProgress);
                }
                catch (DatasetInformation.MissingRawDataException e)
                {
                    MessageBox.Show(string.Format("{0}\nDataset: {1}", e.Message, e.GroupId));
                }
            }

            this.analysis.Clusters            = clusters;
            clusters.ForEach(c => c.Abundance = c.UmcList.Sum(umc => umc.AbundanceSum));

            foreach (var dataset in this.Datasets)
            {
                if (dataset.DatasetState == DatasetInformationViewModel.DatasetStates.Clustering)
                {
                    dataset.DatasetState = DatasetInformationViewModel.DatasetStates.PersistingClusters;
                }
            }

            ThreadSafeDispatcher.Invoke(this.ClusterFeaturesCommand.RaiseCanExecuteChanged);
            ThreadSafeDispatcher.Invoke(this.DisplayClustersCommand.RaiseCanExecuteChanged);

            this.analysis.DataProviders.DatabaseLock.EnterWriteLock();
            progData.StepRange(85);
            this.analysis.DataProviders.ClusterCache.AddAllStateless(clusters, clusterProgress);

            progData.StepRange(100);
            this.analysis.DataProviders.FeatureCache.UpdateAll(features, clusterProgress);
            this.analysis.DataProviders.DatabaseLock.ExitWriteLock();
        }
コード例 #5
0
        /// <summary>
        ///     Runs the MultiAlign analysis
        /// </summary>
        public void PerformMultiAlignAnalysis(DatasetInformation baselineDataset,
                                              IEnumerable <DatasetInformation> aligneeDatasets,
                                              LcmsFeatureFindingOptions featureFindingOptions,
                                              MsFeatureFilteringOptions msFilterOptions,
                                              LcmsFeatureFilteringOptions lcmsFilterOptions,
                                              SpectralOptions peptideOptions,
                                              MultiAlignCore.Algorithms.FeatureFinding.IFeatureFinder featureFinder,
                                              IFeatureAligner <IEnumerable <UMCLight>,
                                                               IEnumerable <UMCLight>,
                                                               AlignmentData> aligner,
                                              IClusterer <UMCLight, UMCClusterLight> clusterer,
                                              string matchPath,
                                              string errorPath)
        {
            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path);

            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);

            // Load the baseline reference set
            using (var rawProviderX = new InformedProteomicsReader())
            {
                rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0);
                UpdateStatus("Creating Baseline LCMS Features.");
                var baselineFeatures = featureFinder.FindFeatures(msFeatures,
                                                                  featureFindingOptions,
                                                                  rawProviderX);
                LinkPeptidesToFeatures(baselineDataset.Sequence.Path, baselineFeatures, peptideOptions.Fdr,
                                       peptideOptions.IdScore);

                var providerX = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures);

                // Then load the alignee dataset
                foreach (var dataset in aligneeDatasets)
                {
                    var aligneeMsFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path);
                    aligneeMsFeatures = LcmsFeatureFilters.FilterMsFeatures(aligneeMsFeatures, msFilterOptions);
                    using (var rawProviderY = new InformedProteomicsReader())
                    {
                        rawProviderY.AddDataFile(dataset.RawFile.Path, 0);

                        UpdateStatus("Finding alignee features");
                        var aligneeFeatures = featureFinder.FindFeatures(aligneeMsFeatures,
                                                                         featureFindingOptions,
                                                                         rawProviderY);
                        LinkPeptidesToFeatures(dataset.Sequence.Path, aligneeFeatures, peptideOptions.Fdr,
                                               peptideOptions.IdScore);

                        var providerY = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures);

                        // cluster before we do anything else....
                        var allFeatures = new List <UMCLight>();
                        allFeatures.AddRange(baselineFeatures);
                        allFeatures.AddRange(aligneeFeatures);
                        foreach (var feature in allFeatures)
                        {
                            feature.Net = feature.Net;
                            feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                        }

                        // This tells us the differences before we align.
                        var clusters     = clusterer.Cluster(allFeatures);
                        var preAlignment = AnalyzeClusters(clusters);

                        aligner.AligneeSpectraProvider  = providerY;
                        aligner.BaselineSpectraProvider = providerX;


                        UpdateStatus("Aligning data");
                        // Aligner data
                        var data    = aligner.Align(baselineFeatures, aligneeFeatures);
                        var matches = data.Matches;


                        WriteErrors(errorPath, matches);

                        // create anchor points for LCMSWarp alignment
                        var massPoints = new List <RegressionPoint>();
                        var netPoints  = new List <RegressionPoint>();
                        foreach (var match in matches)
                        {
                            var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz,
                                                                                  match.AnchorPointY.Mz);
                            var netError  = match.AnchorPointX.Net - match.AnchorPointY.Net;
                            var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError);
                            massPoints.Add(massPoint);

                            var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError);
                            netPoints.Add(netPoint);
                        }


                        foreach (var feature in allFeatures)
                        {
                            feature.UmcCluster = null;
                            feature.ClusterId  = -1;
                        }
                        // Then cluster after alignment!
                        UpdateStatus("clustering data");
                        clusters = clusterer.Cluster(allFeatures);
                        var postAlignment = AnalyzeClusters(clusters);

                        UpdateStatus("Note\tSame\tDifferent");
                        UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster,
                                                   preAlignment.DifferentCluster));
                        UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster,
                                                   postAlignment.DifferentCluster));

                        SaveMatches(matchPath, matches);
                    }
                }
            }

            DeRegisterProgressNotifier(aligner);
            DeRegisterProgressNotifier(featureFinder);
            DeRegisterProgressNotifier(clusterer);
        }
コード例 #6
0
        /// <summary>
        ///     Runs the MultiAlign analysis
        /// </summary>
        public void AlignDatasets(IEnumerable <UMCLight> baselineFeatures,
                                  IEnumerable <UMCLight> aligneeFeatures,
                                  ISpectraProvider providerX,
                                  ISpectraProvider providerY,
                                  IFeatureAligner <IEnumerable <UMCLight>,
                                                   IEnumerable <UMCLight>,
                                                   AlignmentData> aligner,
                                  IClusterer <UMCLight, UMCClusterLight> clusterer,
                                  string matchPath,
                                  string errorPath)
        {
            // cluster before we do anything else....
            var allFeatures = new List <UMCLight>();

            allFeatures.AddRange(baselineFeatures);
            allFeatures.AddRange(aligneeFeatures);


            var maxBaseline = baselineFeatures.Max(x => x.Scan);
            var minBaseline = baselineFeatures.Min(x => x.Scan);

            var maxAlignee = aligneeFeatures.Max(x => x.Scan);
            var minAlignee = aligneeFeatures.Min(x => x.Scan);

            foreach (var feature in aligneeFeatures)
            {
                feature.Net = Convert.ToDouble(feature.Scan - minAlignee) / Convert.ToDouble(maxAlignee - minAlignee);
                feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
            }

            foreach (var feature in baselineFeatures)
            {
                feature.Net = Convert.ToDouble(feature.Scan - minBaseline) / Convert.ToDouble(maxBaseline - minBaseline);
                feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
            }

            // This tells us the differences before we align.
            var clusters  = clusterer.Cluster(allFeatures);
            var clusterId = 0;

            foreach (var cluster in clusters)
            {
                cluster.Id = clusterId++;
            }
            var scorer       = new GlobalPeptideClusterScorer();
            var preAlignment = scorer.Score(clusters);

            aligner.AligneeSpectraProvider  = providerY;
            aligner.BaselineSpectraProvider = providerX;

            UpdateStatus("Aligning data");
            // Aligner data
            var data    = aligner.Align(baselineFeatures, aligneeFeatures);
            var matches = data.Matches;

            // create anchor points for LCMSWarp alignment
            var massPoints = new List <RegressionPoint>();
            var netPoints  = new List <RegressionPoint>();

            foreach (var match in matches)
            {
                var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz,
                                                                      match.AnchorPointY.Mz);
                var netError  = match.AnchorPointX.Net - match.AnchorPointY.Net;
                var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError);
                massPoints.Add(massPoint);

                var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError);
                netPoints.Add(netPoint);
            }

            foreach (var feature in allFeatures)
            {
                feature.UmcCluster = null;
                feature.ClusterId  = -1;
            }
            // Then cluster after alignment!
            UpdateStatus("clustering data");
            clusters = clusterer.Cluster(allFeatures);
            var postAlignment = scorer.Score(clusters);

            UpdateStatus("Note\tSame\tDifferent");
            UpdateStatus(string.Format("Pre\t{0}\t{1}",
                                       preAlignment.SameCluster,
                                       preAlignment.DifferentCluster));
            UpdateStatus(string.Format("Post\t{0}\t{1}",
                                       postAlignment.SameCluster,
                                       postAlignment.DifferentCluster));

            matches = FilterMatches(matches, 40);

            SaveMatches(matchPath, matches);
            DeRegisterProgressNotifier(aligner);
            DeRegisterProgressNotifier(clusterer);
        }
コード例 #7
0
        /// <summary>
        ///     Runs the MultiAlign analysis
        /// </summary>
        public void PerformMultiAlignAnalysis(DatasetInformation baselineDataset,
            IEnumerable<DatasetInformation> aligneeDatasets,
            LcmsFeatureFindingOptions featureFindingOptions,
            MsFeatureFilteringOptions msFilterOptions,
            LcmsFeatureFilteringOptions lcmsFilterOptions,
            SpectralOptions peptideOptions,
            IFeatureFinder featureFinder,
            IFeatureAligner<IEnumerable<UMCLight>,
            IEnumerable<UMCLight>,
            classAlignmentData> aligner,
            IClusterer<UMCLight, UMCClusterLight> clusterer,
            string matchPath,
            string errorPath)
        {
            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path);
            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);

            // Load the baseline reference set
            using (var rawProviderX = RawLoaderFactory.CreateFileReader(baselineDataset.RawPath))
            {
                rawProviderX.AddDataFile(baselineDataset.RawPath, 0);
                UpdateStatus("Creating Baseline LCMS Features.");
                var baselineFeatures = featureFinder.FindFeatures(msFeatures,
                    featureFindingOptions,
                    rawProviderX);
                LinkPeptidesToFeatures(baselineDataset.SequencePath, baselineFeatures, peptideOptions.Fdr,
                    peptideOptions.IdScore);

                var providerX = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures);

                // Then load the alignee dataset
                foreach (var dataset in aligneeDatasets)
                {
                    var aligneeMsFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path);
                    aligneeMsFeatures = LcmsFeatureFilters.FilterMsFeatures(aligneeMsFeatures, msFilterOptions);
                    using (var rawProviderY = RawLoaderFactory.CreateFileReader(dataset.RawPath))
                    {
                        rawProviderY.AddDataFile(dataset.RawPath, 0);

                        UpdateStatus("Finding alignee features");
                        var aligneeFeatures = featureFinder.FindFeatures(aligneeMsFeatures,
                            featureFindingOptions,
                            rawProviderY);
                        LinkPeptidesToFeatures(dataset.SequencePath, aligneeFeatures, peptideOptions.Fdr,
                            peptideOptions.IdScore);

                        var providerY = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures);

                        // cluster before we do anything else....
                        var allFeatures = new List<UMCLight>();
                        allFeatures.AddRange(baselineFeatures);
                        allFeatures.AddRange(aligneeFeatures);
                        foreach (var feature in allFeatures)
                        {
                            feature.Net = feature.Net;
                            feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                        }

                        // This tells us the differences before we align.
                        var clusters = clusterer.Cluster(allFeatures);
                        var preAlignment = AnalyzeClusters(clusters);

                        aligner.AligneeSpectraProvider = providerY;
                        aligner.BaselineSpectraProvider = providerX;

                        UpdateStatus("Aligning data");
                        // Aligner data
                        var data = aligner.Align(baselineFeatures, aligneeFeatures);
                        var matches = data.Matches;

                        WriteErrors(errorPath, matches);

                        // create anchor points for LCMSWarp alignment
                        var massPoints = new List<RegressionPoint>();
                        var netPoints = new List<RegressionPoint>();
                        foreach (var match in matches)
                        {
                            var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz,
                                match.AnchorPointY.Mz);
                            var netError = match.AnchorPointX.Net - match.AnchorPointY.Net;
                            var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError);
                            massPoints.Add(massPoint);

                            var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError);
                            netPoints.Add(netPoint);
                        }

                        foreach (var feature in allFeatures)
                        {
                            feature.UmcCluster = null;
                            feature.ClusterId = -1;
                        }
                        // Then cluster after alignment!
                        UpdateStatus("clustering data");
                        clusters = clusterer.Cluster(allFeatures);
                        var postAlignment = AnalyzeClusters(clusters);

                        UpdateStatus("Note\tSame\tDifferent");
                        UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster,
                            preAlignment.DifferentCluster));
                        UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster,
                            postAlignment.DifferentCluster));

                        SaveMatches(matchPath, matches);
                    }
                }
            }

            DeRegisterProgressNotifier(aligner);
            DeRegisterProgressNotifier(featureFinder);
            DeRegisterProgressNotifier(clusterer);
        }