Exemplo n.º 1
0
 /// <summary>
 /// Reconstruct the MS/MS for a cluster.
 /// Requires that the cluster's LCMS and MS features have been reconstructed.
 /// </summary>
 /// <param name="cluster">The cluster to reconstruct.</param>
 public static void ReconstructClusterMsMs(this UMCClusterLight cluster, SpectraProviderCache providers, bool getPeaks = true)
 {
     foreach (var feature in cluster.Features)
     {
         var provider = providers.GetSpectraProvider(feature.GroupId);
         feature.ReconstructUMCMsMs(provider, getPeaks);
     }
 }
Exemplo n.º 2
0
        public void CompareMs2IdsToMs1Ids(string liquidResultsPath, string isosFile, string rawFile)
        {
            // Read mass tags.
            var massTagReader = new LiquidResultsFileLoader(liquidResultsPath);
            var massTags      = massTagReader.LoadDatabase();

            // Get identifications - this rereads the liquid results file, but I'm leaving it that way
            // for now because this is just a test.
            var scansToIds = this.GetIds(liquidResultsPath);

            // Read raw data file.
            var spectraProviderCache = new SpectraProviderCache();
            var spectraProvider      = spectraProviderCache.GetSpectraProvider(rawFile);

            // Read isos features
            var isosReader = new MsFeatureLightFileReader();

            isosReader.IsosFilteroptions = new DeconToolsIsosFilterOptions {
                MaximumIsotopicFit = 0.15
            };
            var msFeatures = isosReader.ReadFile(isosFile).ToList();

            // Get LCMS features
            var msFeatureClusterer = new MsToLcmsFeatures(spectraProvider);
            var lcmsFeatures       = msFeatureClusterer.Convert(msFeatures);

            lcmsFeatures.ForEach(feature => { feature.NetAligned = feature.Net; feature.MassMonoisotopicAligned = feature.MassMonoisotopic; });

            // Create clusters - Since this is only working on a single dataset, there should be a 1:1 mapping
            // between LCMS features and clusters.
            var clusters = new List <UMCClusterLight> {
                Capacity = lcmsFeatures.Count
            };

            foreach (var lcmsFeature in lcmsFeatures)
            {
                var cluster = new UMCClusterLight(lcmsFeature);
                cluster.CalculateStatistics(ClusterCentroidRepresentation.Median);
                clusters.Add(cluster);
            }

            // Do STAC AMT matching
            var stacAdapter = new STACAdapter <UMCClusterLight>
            {
                Options = new FeatureMatcherParameters
                {
                    ShouldCalculateShiftFDR = false,
                    UsePriors           = true,
                    UseEllipsoid        = true,
                    UseDriftTime        = false,
                    ShouldCalculateSTAC = true,
                }
            };
            var amtMatches = stacAdapter.PerformPeakMatching(clusters, massTags);

            // Group AMT matches by cluster, convert MassTags to Protein objects (represents lipid ID,
            // rather than Protein ID here) for simplicity in comparing them to the MS/MS IDs.
            var ms1Matches = clusters.ToDictionary(cluster => cluster, cluster => new List <Protein>());

            foreach (var amtMatch in amtMatches)
            {
                var cluster = amtMatch.Observed;
                var massTag = amtMatch.Target;
                ms1Matches[cluster].Add(new Protein
                {
                    Name            = massTag.ProteinName,
                    Sequence        = massTag.PeptideSequence,
                    ChemicalFormula = massTag.PeptideSequence
                });
            }

            // Now we need to backtrack MS/MS identifications -> clusters
            var ms2Matches = new Dictionary <UMCClusterLight, List <Protein> >();

            foreach (var cluster in clusters)
            {
                ms2Matches.Add(cluster, new List <Protein>());
                foreach (var lcmsFeature in cluster.UmcList)
                {
                    foreach (var msFeature in lcmsFeature.MsFeatures)
                    {
                        foreach (var msmsFeature in msFeature.MSnSpectra)
                        {
                            if (scansToIds.ContainsKey(msmsFeature.Scan))
                            {
                                ms2Matches[cluster].AddRange(scansToIds[msmsFeature.Scan]);
                            }
                        }
                    }
                }
            }

            // How many clusters have IDs from MS/MS?
            var clusterMs1IdCount = ms1Matches.Values.Count(value => value.Any());
            var clusterMs2IdCount = ms2Matches.Values.Count(value => value.Any());

            int overlapCount = 0; // Number of IDs that overlapped between MS1 and MS2 identifications.

            // Finally compare the MS1 IDs to the MS2 IDs.
            foreach (var cluster in clusters)
            {
                // For now only comparing by name
                var ms1Ids    = ms1Matches[cluster];
                var ms1Lipids = ms1Ids.Select(id => id.Name);

                var ms2Ids    = ms2Matches[cluster];
                var ms2Lipids = ms2Ids.Select(id => id.Name);

                // Compare MS1 IDs for the cluster vs MS2 IDs for the cluster.
                var ms1OnlyIds = ms1Lipids.Where(lipid => !ms2Lipids.Contains(lipid));
                var ms2OnlyIds = ms2Lipids.Where(lipid => !ms1Lipids.Contains(lipid));

                overlapCount += ms1OnlyIds.Intersect(ms2OnlyIds).Count();

                // Write Results
                if (ms1OnlyIds.Any() || ms2OnlyIds.Any())
                {
                    Console.WriteLine("Cluster {0}:", cluster.Id);
                    if (ms1OnlyIds.Any())
                    {
                        Console.WriteLine("\tMs1 Only IDs:");
                        foreach (var id in ms1OnlyIds)
                        {
                            Console.WriteLine("\t\t{0}", id);
                        }
                    }

                    if (ms2OnlyIds.Any())
                    {
                        Console.WriteLine("\tMs2 Only IDs:");
                        foreach (var id in ms2OnlyIds)
                        {
                            Console.WriteLine("\t\t{0}", id);
                        }
                    }
                }
            }

            Console.WriteLine("Overlap: {0}", overlapCount);
        }
Exemplo n.º 3
0
        /// <summary>
        /// Aligns features based on MSMS spectral similarity.
        /// </summary>
        /// <param name="featureMap"></param>
        /// <param name="msms"></param>
        public List <MsmsCluster> Cluster(List <UMCLight> features, SpectraProviderCache provider)
        {
            UpdateStatus("Mapping UMC's to MS/MS spectra using intensity profile.");
            // Step 1: Cluster the spectra
            // Create the collection of samples.
            var msFeatures = new List <MSFeatureLight>();

            // Sort through the features
            foreach (var feature in features)
            {
                // Sort out charge states...?
                var chargeMap = new Dictionary <int, MSFeatureLight>();

                double         abundance  = int.MinValue;
                MSFeatureLight maxFeature = null;

                // Find the max abundance spectrum.  This the number of features we have to search.
                foreach (var msFeature in feature.MsFeatures)
                {
                    if (msFeature.Abundance > abundance && msFeature.MSnSpectra.Count > 0)
                    {
                        abundance  = msFeature.Abundance;
                        maxFeature = msFeature;
                    }
                }

                if (maxFeature != null)
                {
                    msFeatures.Add(maxFeature);
                }
            }

            UpdateStatus(string.Format("Found {0} total spectra for clustering.", msFeatures.Count));

            UpdateStatus("Sorting spectra.");
            // Sort based on mass using the max abundance of the feature.
            msFeatures.Sort(delegate(MSFeatureLight x, MSFeatureLight y)
                            { return(x.MassMonoisotopicMostAbundant.CompareTo(y.MassMonoisotopicMostAbundant)); });

            // Then cluster the spectra.
            var j = 1;
            var h = 0;
            var N = msFeatures.Count;

            var clusters  = new List <MsmsCluster>();
            var tol       = MassTolerance;
            var lastTotal = 0;

            UpdateStatus("Clustering spectra.");
            while (j < N)
            {
                var i        = j - 1;
                var featureJ = msFeatures[j];
                var featureI = msFeatures[i];
                var diff     = FeatureLight.ComputeMassPPMDifference(featureJ.MassMonoisotopicMostAbundant, featureI.MassMonoisotopicMostAbundant);

                if (Math.Abs(diff) > tol)
                {
                    // We only care to create clusters of size greater than one.
                    if ((j - h) > 1)
                    {
                        var data = Cluster(h,
                                           j,
                                           msFeatures,
                                           provider,
                                           SimilarityTolerance);
                        clusters.AddRange(data);
                    }

                    // Reset the count, we're done looking at those clusters.
                    h = j;
                }
                if (j - lastTotal > 500)
                {
                    lastTotal = j;
                    UpdateStatus(string.Format("Processed {0} / {1} total spectra.", lastTotal, N));
                }
                j++;
            }
            UpdateStatus("Finishing last cluster data.");

            // Cluster the rest
            if ((j - h) > 1)
            {
                var data = Cluster(h,
                                   j,
                                   msFeatures,
                                   provider,
                                   SimilarityTolerance);
                clusters.AddRange(data);
            }
            UpdateStatus("Finished clustering.");
            var passingClusters = clusters.Where(cluster => cluster.Features.Count >= MinimumClusterSize);

            return(passingClusters.ToList());
        }
Exemplo n.º 4
0
        public void ClusterMsMs(string name,
                                string resultPath,
                                string sequencePath,
                                SequenceFileType type,
                                string baseline,
                                string features,
                                double percent)
        {
            var baselineRaw = baseline.Replace("_isos.csv", ".raw");
            var featuresRaw = features.Replace("_isos.csv", ".raw");


            Console.WriteLine("Create Baseline Information");

            var baselineInfo = new DatasetInformation
            {
                DatasetId = 0,
            };

            baselineInfo.InputFiles.Add(new InputFile {
                Path = baseline, FileType = InputFileType.Features
            });
            baselineInfo.InputFiles.Add(new InputFile {
                Path = baselineRaw, FileType = InputFileType.Raw
            });
            baselineInfo.InputFiles.Add(new InputFile {
                Path = sequencePath, FileType = InputFileType.Sequence
            });

            Console.WriteLine("Create Alignee Information");
            var aligneeInfo = new DatasetInformation
            {
                DatasetId = 1,
            };

            aligneeInfo.InputFiles.Add(new InputFile {
                Path = features, FileType = InputFileType.Features
            });
            aligneeInfo.InputFiles.Add(new InputFile {
                Path = featuresRaw, FileType = InputFileType.Raw
            });
            aligneeInfo.InputFiles.Add(new InputFile {
                Path = sequencePath, FileType = InputFileType.Sequence
            });

            var reader = new MsFeatureLightFileReader();

            Console.WriteLine("Reading Baseline Features");
            var baselineMsFeatures = reader.ReadFile(baseline).ToList();

            baselineMsFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId);

            Console.WriteLine("Reading Alignee Features");
            var aligneeMsFeatures = reader.ReadFile(features).ToList();

            aligneeMsFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId);


            var finder     = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);
            var tolerances = new FeatureTolerances
            {
                Mass = 8,
                Net  = .005
            };
            var options = new LcmsFeatureFindingOptions(tolerances);

            Console.WriteLine("Detecting Baseline Features");
            var baselineFeatures = finder.FindFeatures(baselineMsFeatures, options, null);

            Console.WriteLine("Detecting Alignee Features");
            var aligneeFeatures = finder.FindFeatures(aligneeMsFeatures, options, null);

            Console.WriteLine("Managing baseline and alignee features");
            baselineFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId);
            aligneeFeatures.ForEach(x => x.GroupId  = aligneeInfo.DatasetId);

            Console.WriteLine("Clustering MS/MS Spectra");
            var clusterer = new MSMSClusterer();

            clusterer.MzTolerance      = .5;
            clusterer.MassTolerance    = 6;
            clusterer.SpectralComparer = new SpectralNormalizedDotProductComparer
            {
                TopPercent = percent
            };
            clusterer.SimilarityTolerance = .5;
            clusterer.ScanRange           = 905;
            clusterer.Progress           += clusterer_Progress;

            var allFeatures = new List <UMCLight>();

            allFeatures.AddRange(baselineFeatures);
            allFeatures.AddRange(aligneeFeatures);

            List <MsmsCluster> clusters = null;
            var spectraProviderCache    = new SpectraProviderCache();

            spectraProviderCache.GetSpectraProvider(baselineInfo.RawFile.Path, baselineInfo.DatasetId);
            spectraProviderCache.GetSpectraProvider(aligneeInfo.RawFile.Path, aligneeInfo.DatasetId);


            clusters = clusterer.Cluster(allFeatures, spectraProviderCache);
            Console.WriteLine("Found {0} Total Clusters", clusters.Count);

            if (clusters != null)
            {
                var now            = DateTime.Now;
                var testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}_scans.txt",
                                                   name,
                                                   now.Year,
                                                   now.Month,
                                                   now.Day,
                                                   now.Hour,
                                                   now.Minute,
                                                   now.Second,
                                                   resultPath
                                                   );
                using (TextWriter writer = File.CreateText(testResultPath))
                {
                    writer.WriteLine("[Data]");
                    writer.WriteLine("{0}", baseline);
                    writer.WriteLine("{0}", features);
                    writer.WriteLine("[Scans]");
                    writer.WriteLine();
                    foreach (var cluster in clusters)
                    {
                        var scanData = "";
                        if (cluster.Features.Count == 2)
                        {
                            foreach (var feature in cluster.Features)
                            {
                                scanData += string.Format("{0},", feature.Scan);
                            }
                            scanData += string.Format("{0}", cluster.MeanScore);

                            writer.WriteLine(scanData);
                        }
                    }
                }
                testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}.txt",
                                               name,
                                               now.Year,
                                               now.Month,
                                               now.Day,
                                               now.Hour,
                                               now.Minute,
                                               now.Second,
                                               resultPath
                                               );
                using (TextWriter writer = File.CreateText(testResultPath))
                {
                    writer.WriteLine("[Data]");
                    writer.WriteLine("{0}", baseline);
                    writer.WriteLine("{0}", features);
                    writer.WriteLine("[Scans]");
                    foreach (var cluster in clusters)
                    {
                        var scanData = "";
                        var data     = "";
                        foreach (var feature in cluster.Features)
                        {
                            scanData += string.Format("{0},", feature.Scan);
                            data     += string.Format("{0},{1},{2},{3},{4},{5}",
                                                      feature.GroupId,
                                                      feature.Id,
                                                      feature.MassMonoisotopic,
                                                      feature.Mz,
                                                      feature.ChargeState,
                                                      feature.Scan);
                            foreach (var spectrum in feature.MSnSpectra)
                            {
                                foreach (var peptide in spectrum.Peptides)
                                {
                                    data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score);
                                }
                            }
                        }
                        writer.WriteLine(scanData + "," + data);
                    }
                    writer.WriteLine("");
                    writer.WriteLine("");
                    writer.WriteLine("[Clusters]");

                    foreach (var cluster in clusters)
                    {
                        writer.WriteLine("cluster id, cluster score");
                        writer.WriteLine("{0}, {1}", cluster.Id, cluster.MeanScore);
                        writer.WriteLine("feature dataset id, id, monoisotopic mass, mz, charge, scan, peptides");

                        foreach (var feature in cluster.Features)
                        {
                            var data = string.Format("{0},{1},{2},{3},{4},{5}",
                                                     feature.GroupId,
                                                     feature.Id,
                                                     feature.MassMonoisotopic,
                                                     feature.Mz,
                                                     feature.ChargeState,
                                                     feature.Scan);
                            foreach (var spectrum in feature.MSnSpectra)
                            {
                                foreach (var peptide in spectrum.Peptides)
                                {
                                    data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score);
                                }
                            }
                            writer.WriteLine(data);
                        }
                    }
                }
            }
        }
Exemplo n.º 5
0
        /// <summary>
        /// Clusters spectra together based on similarity.
        /// </summary>
        /// <param name="start"></param>
        /// <param name="stop"></param>
        /// <param name="features"></param>
        private List <MsmsCluster> Cluster(int start,
                                           int stop,
                                           List <MSFeatureLight> features,
                                           SpectraProviderCache provider,
                                           double similarityTolerance)
        {
            var massTolerance = MassTolerance;

            // Maps the feature to a cluster ID.
            var featureMap = new Dictionary <MSFeatureLight, int>();

            // Maps the cluster ID to a cluster.
            var clusterMap = new Dictionary <int, MsmsCluster>();
            var clusters   = new List <MsmsCluster>();

            // Create singleton clusters.
            var id = 0;

            for (var i = start; i < stop; i++)
            {
                var feature = features[i];
                var cluster = new MsmsCluster();
                cluster.Id        = id++;
                cluster.MeanScore = 0;
                cluster.Features.Add(feature);

                featureMap.Add(feature, cluster.Id);
                clusterMap.Add(cluster.Id, cluster);
            }
            var protonMass = AdductMass;

            // Then iterate and cluster.
            for (var i = start; i < stop; i++)
            {
                var featureI = features[i];
                var clusterI = clusterMap[featureMap[featureI]];

                for (var j = i + 1; j < stop; j++)
                {
                    var featureJ = features[j];
                    var clusterJ = clusterMap[featureMap[featureJ]];

                    // Don't cluster the same thing
                    if (clusterI.Id == clusterJ.Id)
                    {
                        continue;
                    }

                    // Don't cluster from the same dataset.  Let the linkage algorithm decide if they
                    // belong in the same cluster, and later, go back and determine if the cluster is valid or not.
                    if (featureI.GroupId == featureJ.GroupId)
                    {
                        continue;
                    }

                    // Check the scan difference.  If it fits then we are within range.
                    var scanDiff = Math.Abs(featureI.Scan - featureJ.Scan);
                    if (scanDiff <= ScanRange)
                    {
                        // Use the most abundant mass because it had a higher chance of being fragmented.
                        var mzI = (featureI.MassMonoisotopicMostAbundant / featureI.ChargeState) + protonMass;
                        var mzJ = (featureJ.MassMonoisotopicMostAbundant / featureJ.ChargeState) + protonMass;

                        var mzDiff = Math.Abs(mzI - mzJ);
                        if (mzDiff <= MzTolerance)
                        {
                            var scanSummary = new ScanSummary();
                            if (featureI.MSnSpectra[0].Peaks.Count <= 0)
                            {
                                var spectraProvider = provider.GetSpectraProvider(featureI.GroupId);
                                featureI.MSnSpectra[0].Peaks = spectraProvider.GetRawSpectra(featureI.MSnSpectra[0].Scan, featureI.GroupId, out scanSummary);
                                featureI.MSnSpectra[0].Peaks = XYData.Bin(featureI.MSnSpectra[0].Peaks,
                                                                          0,
                                                                          2000,
                                                                          MzTolerance);
                            }
                            if (featureJ.MSnSpectra[0].Peaks.Count <= 0)
                            {
                                var spectraProvider = provider.GetSpectraProvider(featureJ.GroupId);
                                featureJ.MSnSpectra[0].Peaks = spectraProvider.GetRawSpectra(featureJ.MSnSpectra[0].Scan, out scanSummary);
                                featureJ.MSnSpectra[0].Peaks = XYData.Bin(featureJ.MSnSpectra[0].Peaks,
                                                                          0,
                                                                          2000,
                                                                          MzTolerance);
                            }


                            // Compute similarity
                            var score = SpectralComparer.CompareSpectra(featureI.MSnSpectra[0], featureJ.MSnSpectra[0]);

                            if (score >= similarityTolerance)
                            {
                                clusterJ.MeanScore += score;
                                foreach (var xFeature in clusterI.Features)
                                {
                                    clusterJ.Features.Add(xFeature);
                                    featureMap[xFeature] = clusterJ.Id;
                                    clusterMap.Remove(clusterI.Id);
                                }
                            }
                        }
                    }
                }
            }

            clusters.AddRange(clusterMap.Values);

            for (var i = start; i < stop; i++)
            {
                features[i].MSnSpectra[0].Peaks.Clear();
            }
            foreach (var cluster in clusters)
            {
                cluster.MeanScore /= (cluster.Features.Count - 1);
            }
            return(clusters);
        }