Пример #1
0
        /// <summary>
        ///     Runs the MultiAlign analysis
        /// </summary>
        public void AlignDatasets(  IEnumerable<UMCLight>   baselineFeatures,
                                    IEnumerable<UMCLight>   aligneeFeatures,
                                    ISpectraProvider        providerX,
                                    ISpectraProvider        providerY,
                                    IFeatureAligner<IEnumerable<UMCLight>,
                                        IEnumerable<UMCLight>,
                                        classAlignmentData> aligner,
                                    IClusterer<UMCLight, UMCClusterLight> clusterer,
                                    string matchPath,
                                    string errorPath)
        {
            // cluster before we do anything else....
            var allFeatures = new List<UMCLight>();
            allFeatures.AddRange(baselineFeatures);
            allFeatures.AddRange(aligneeFeatures);

            var maxBaseline = baselineFeatures.Max(x => x.Scan);
            var minBaseline = baselineFeatures.Min(x => x.Scan);

            var maxAlignee  = aligneeFeatures.Max(x => x.Scan);
            var minAlignee  = aligneeFeatures.Min(x => x.Scan);

            foreach (var feature in aligneeFeatures)
            {
                feature.Net = Convert.ToDouble(feature.Scan - minAlignee) / Convert.ToDouble(maxAlignee - minAlignee);
                feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
            }

            foreach (var feature in baselineFeatures)
            {
                feature.Net = Convert.ToDouble(feature.Scan - minBaseline) / Convert.ToDouble(maxBaseline - minBaseline);
                feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
            }

            // This tells us the differences before we align.
            var clusters     = clusterer.Cluster(allFeatures);
            var clusterId    = 0;
            foreach (var cluster in clusters)
            {
                cluster.Id = clusterId++;
            }
            var scorer       = new GlobalPeptideClusterScorer();
            var preAlignment = scorer.Score(clusters);

            aligner.AligneeSpectraProvider  = providerY;
            aligner.BaselineSpectraProvider = providerX;

            UpdateStatus("Aligning data");
            // Aligner data
            var data    = aligner.Align(baselineFeatures, aligneeFeatures);
            var matches = data.Matches;

            // create anchor points for LCMSWarp alignment
            var massPoints = new List<RegressionPoint>();
            var netPoints = new List<RegressionPoint>();
            foreach (var match in matches)
            {
                var massError   = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz,
                                                    match.AnchorPointY.Mz);
                var netError    = match.AnchorPointX.Net - match.AnchorPointY.Net;
                var massPoint   = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError);
                massPoints.Add(massPoint);

                var netPoint    = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError);
                netPoints.Add(netPoint);
            }

            foreach (var feature in allFeatures)
            {
                feature.UmcCluster = null;
                feature.ClusterId = -1;
            }
            // Then cluster after alignment!
            UpdateStatus("clustering data");
            clusters = clusterer.Cluster(allFeatures);
            var postAlignment = scorer.Score(clusters);

            UpdateStatus("Note\tSame\tDifferent");
            UpdateStatus(string.Format("Pre\t{0}\t{1}",
                            preAlignment.SameCluster,
                            preAlignment.DifferentCluster));
            UpdateStatus(string.Format("Post\t{0}\t{1}",
                            postAlignment.SameCluster,
                            postAlignment.DifferentCluster));

            matches = FilterMatches(matches, 40);

            SaveMatches(matchPath, matches);
            DeRegisterProgressNotifier(aligner);
            DeRegisterProgressNotifier(clusterer);
        }
Пример #2
0
        /// <summary>
        ///     Runs the MultiAlign analysis
        /// </summary>
        public void AlignDatasets(IEnumerable <UMCLight> baselineFeatures,
                                  IEnumerable <UMCLight> aligneeFeatures,
                                  ISpectraProvider providerX,
                                  ISpectraProvider providerY,
                                  IFeatureAligner <IEnumerable <UMCLight>,
                                                   IEnumerable <UMCLight>,
                                                   AlignmentData> aligner,
                                  IClusterer <UMCLight, UMCClusterLight> clusterer,
                                  string matchPath,
                                  string errorPath)
        {
            // cluster before we do anything else....
            var allFeatures = new List <UMCLight>();

            allFeatures.AddRange(baselineFeatures);
            allFeatures.AddRange(aligneeFeatures);


            var maxBaseline = baselineFeatures.Max(x => x.Scan);
            var minBaseline = baselineFeatures.Min(x => x.Scan);

            var maxAlignee = aligneeFeatures.Max(x => x.Scan);
            var minAlignee = aligneeFeatures.Min(x => x.Scan);

            foreach (var feature in aligneeFeatures)
            {
                feature.Net = Convert.ToDouble(feature.Scan - minAlignee) / Convert.ToDouble(maxAlignee - minAlignee);
                feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
            }

            foreach (var feature in baselineFeatures)
            {
                feature.Net = Convert.ToDouble(feature.Scan - minBaseline) / Convert.ToDouble(maxBaseline - minBaseline);
                feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
            }

            // This tells us the differences before we align.
            var clusters  = clusterer.Cluster(allFeatures);
            var clusterId = 0;

            foreach (var cluster in clusters)
            {
                cluster.Id = clusterId++;
            }
            var scorer       = new GlobalPeptideClusterScorer();
            var preAlignment = scorer.Score(clusters);

            aligner.AligneeSpectraProvider  = providerY;
            aligner.BaselineSpectraProvider = providerX;

            UpdateStatus("Aligning data");
            // Aligner data
            var data    = aligner.Align(baselineFeatures, aligneeFeatures);
            var matches = data.Matches;

            // create anchor points for LCMSWarp alignment
            var massPoints = new List <RegressionPoint>();
            var netPoints  = new List <RegressionPoint>();

            foreach (var match in matches)
            {
                var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz,
                                                                      match.AnchorPointY.Mz);
                var netError  = match.AnchorPointX.Net - match.AnchorPointY.Net;
                var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError);
                massPoints.Add(massPoint);

                var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError);
                netPoints.Add(netPoint);
            }

            foreach (var feature in allFeatures)
            {
                feature.UmcCluster = null;
                feature.ClusterId  = -1;
            }
            // Then cluster after alignment!
            UpdateStatus("clustering data");
            clusters = clusterer.Cluster(allFeatures);
            var postAlignment = scorer.Score(clusters);

            UpdateStatus("Note\tSame\tDifferent");
            UpdateStatus(string.Format("Pre\t{0}\t{1}",
                                       preAlignment.SameCluster,
                                       preAlignment.DifferentCluster));
            UpdateStatus(string.Format("Post\t{0}\t{1}",
                                       postAlignment.SameCluster,
                                       postAlignment.DifferentCluster));

            matches = FilterMatches(matches, 40);

            SaveMatches(matchPath, matches);
            DeRegisterProgressNotifier(aligner);
            DeRegisterProgressNotifier(clusterer);
        }
Пример #3
0
        /// <summary>
        ///     Runs the MultiAlign analysis
        /// </summary>
        public void PerformMultiAlignAnalysis(DatasetInformation baselineDataset,
                                              IEnumerable <DatasetInformation> aligneeDatasets,
                                              LcmsFeatureFindingOptions featureFindingOptions,
                                              MsFeatureFilteringOptions msFilterOptions,
                                              LcmsFeatureFilteringOptions lcmsFilterOptions,
                                              SpectralOptions peptideOptions,
                                              MultiAlignCore.Algorithms.FeatureFinding.IFeatureFinder featureFinder,
                                              IFeatureAligner <IEnumerable <UMCLight>,
                                                               IEnumerable <UMCLight>,
                                                               AlignmentData> aligner,
                                              IClusterer <UMCLight, UMCClusterLight> clusterer,
                                              string matchPath,
                                              string errorPath)
        {
            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path);

            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);

            // Load the baseline reference set
            using (var rawProviderX = new InformedProteomicsReader())
            {
                rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0);
                UpdateStatus("Creating Baseline LCMS Features.");
                var baselineFeatures = featureFinder.FindFeatures(msFeatures,
                                                                  featureFindingOptions,
                                                                  rawProviderX);
                LinkPeptidesToFeatures(baselineDataset.Sequence.Path, baselineFeatures, peptideOptions.Fdr,
                                       peptideOptions.IdScore);

                var providerX = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures);

                // Then load the alignee dataset
                foreach (var dataset in aligneeDatasets)
                {
                    var aligneeMsFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path);
                    aligneeMsFeatures = LcmsFeatureFilters.FilterMsFeatures(aligneeMsFeatures, msFilterOptions);
                    using (var rawProviderY = new InformedProteomicsReader())
                    {
                        rawProviderY.AddDataFile(dataset.RawFile.Path, 0);

                        UpdateStatus("Finding alignee features");
                        var aligneeFeatures = featureFinder.FindFeatures(aligneeMsFeatures,
                                                                         featureFindingOptions,
                                                                         rawProviderY);
                        LinkPeptidesToFeatures(dataset.Sequence.Path, aligneeFeatures, peptideOptions.Fdr,
                                               peptideOptions.IdScore);

                        var providerY = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures);

                        // cluster before we do anything else....
                        var allFeatures = new List <UMCLight>();
                        allFeatures.AddRange(baselineFeatures);
                        allFeatures.AddRange(aligneeFeatures);
                        foreach (var feature in allFeatures)
                        {
                            feature.Net = feature.Net;
                            feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                        }

                        // This tells us the differences before we align.
                        var clusters     = clusterer.Cluster(allFeatures);
                        var preAlignment = AnalyzeClusters(clusters);

                        aligner.AligneeSpectraProvider  = providerY;
                        aligner.BaselineSpectraProvider = providerX;


                        UpdateStatus("Aligning data");
                        // Aligner data
                        var data    = aligner.Align(baselineFeatures, aligneeFeatures);
                        var matches = data.Matches;


                        WriteErrors(errorPath, matches);

                        // create anchor points for LCMSWarp alignment
                        var massPoints = new List <RegressionPoint>();
                        var netPoints  = new List <RegressionPoint>();
                        foreach (var match in matches)
                        {
                            var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz,
                                                                                  match.AnchorPointY.Mz);
                            var netError  = match.AnchorPointX.Net - match.AnchorPointY.Net;
                            var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError);
                            massPoints.Add(massPoint);

                            var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError);
                            netPoints.Add(netPoint);
                        }


                        foreach (var feature in allFeatures)
                        {
                            feature.UmcCluster = null;
                            feature.ClusterId  = -1;
                        }
                        // Then cluster after alignment!
                        UpdateStatus("clustering data");
                        clusters = clusterer.Cluster(allFeatures);
                        var postAlignment = AnalyzeClusters(clusters);

                        UpdateStatus("Note\tSame\tDifferent");
                        UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster,
                                                   preAlignment.DifferentCluster));
                        UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster,
                                                   postAlignment.DifferentCluster));

                        SaveMatches(matchPath, matches);
                    }
                }
            }

            DeRegisterProgressNotifier(aligner);
            DeRegisterProgressNotifier(featureFinder);
            DeRegisterProgressNotifier(clusterer);
        }
Пример #4
0
        /// <summary>
        ///     Runs the MultiAlign analysis
        /// </summary>
        public void PerformMultiAlignAnalysis(DatasetInformation baselineDataset,
            IEnumerable<DatasetInformation> aligneeDatasets,
            LcmsFeatureFindingOptions featureFindingOptions,
            MsFeatureFilteringOptions msFilterOptions,
            LcmsFeatureFilteringOptions lcmsFilterOptions,
            SpectralOptions peptideOptions,
            IFeatureFinder featureFinder,
            IFeatureAligner<IEnumerable<UMCLight>,
            IEnumerable<UMCLight>,
            classAlignmentData> aligner,
            IClusterer<UMCLight, UMCClusterLight> clusterer,
            string matchPath,
            string errorPath)
        {
            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path);
            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);

            // Load the baseline reference set
            using (var rawProviderX = RawLoaderFactory.CreateFileReader(baselineDataset.RawPath))
            {
                rawProviderX.AddDataFile(baselineDataset.RawPath, 0);
                UpdateStatus("Creating Baseline LCMS Features.");
                var baselineFeatures = featureFinder.FindFeatures(msFeatures,
                    featureFindingOptions,
                    rawProviderX);
                LinkPeptidesToFeatures(baselineDataset.SequencePath, baselineFeatures, peptideOptions.Fdr,
                    peptideOptions.IdScore);

                var providerX = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures);

                // Then load the alignee dataset
                foreach (var dataset in aligneeDatasets)
                {
                    var aligneeMsFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path);
                    aligneeMsFeatures = LcmsFeatureFilters.FilterMsFeatures(aligneeMsFeatures, msFilterOptions);
                    using (var rawProviderY = RawLoaderFactory.CreateFileReader(dataset.RawPath))
                    {
                        rawProviderY.AddDataFile(dataset.RawPath, 0);

                        UpdateStatus("Finding alignee features");
                        var aligneeFeatures = featureFinder.FindFeatures(aligneeMsFeatures,
                            featureFindingOptions,
                            rawProviderY);
                        LinkPeptidesToFeatures(dataset.SequencePath, aligneeFeatures, peptideOptions.Fdr,
                            peptideOptions.IdScore);

                        var providerY = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures);

                        // cluster before we do anything else....
                        var allFeatures = new List<UMCLight>();
                        allFeatures.AddRange(baselineFeatures);
                        allFeatures.AddRange(aligneeFeatures);
                        foreach (var feature in allFeatures)
                        {
                            feature.Net = feature.Net;
                            feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                        }

                        // This tells us the differences before we align.
                        var clusters = clusterer.Cluster(allFeatures);
                        var preAlignment = AnalyzeClusters(clusters);

                        aligner.AligneeSpectraProvider = providerY;
                        aligner.BaselineSpectraProvider = providerX;

                        UpdateStatus("Aligning data");
                        // Aligner data
                        var data = aligner.Align(baselineFeatures, aligneeFeatures);
                        var matches = data.Matches;

                        WriteErrors(errorPath, matches);

                        // create anchor points for LCMSWarp alignment
                        var massPoints = new List<RegressionPoint>();
                        var netPoints = new List<RegressionPoint>();
                        foreach (var match in matches)
                        {
                            var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz,
                                match.AnchorPointY.Mz);
                            var netError = match.AnchorPointX.Net - match.AnchorPointY.Net;
                            var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError);
                            massPoints.Add(massPoint);

                            var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError);
                            netPoints.Add(netPoint);
                        }

                        foreach (var feature in allFeatures)
                        {
                            feature.UmcCluster = null;
                            feature.ClusterId = -1;
                        }
                        // Then cluster after alignment!
                        UpdateStatus("clustering data");
                        clusters = clusterer.Cluster(allFeatures);
                        var postAlignment = AnalyzeClusters(clusters);

                        UpdateStatus("Note\tSame\tDifferent");
                        UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster,
                            preAlignment.DifferentCluster));
                        UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster,
                            postAlignment.DifferentCluster));

                        SaveMatches(matchPath, matches);
                    }
                }
            }

            DeRegisterProgressNotifier(aligner);
            DeRegisterProgressNotifier(featureFinder);
            DeRegisterProgressNotifier(clusterer);
        }