Exemplo n.º 1
0
        /// <summary>
        ///     Finds features
        /// </summary>
        /// <returns></returns>
        public List <UMCLight> FindFeatures(List <MSFeatureLight> msFeatures,
                                            LcmsFeatureFindingOptions options, IScanSummaryProvider provider,
                                            IProgress <ProgressData> progress = null)
        {
            if (provider == null)
            {
                throw new ArgumentNullException(nameof(provider));
            }

            var tolerances = new FeatureTolerances
            {
                Mass = options.InstrumentTolerances.Mass,
                Net  = options.MaximumNetRange
            };

            var clusterer = new MsToLcmsFeatures(provider, options);

            // MultiAlignCore.Algorithms.FeatureClustering.MsFeatureTreeClusterer
            //var clusterer = new MsFeatureTreeClusterer<MSFeatureLight, UMCLight>
            //{
            //    Tolerances =
            //        new FeatureTolerances
            //        {
            //            Mass = options.InstrumentTolerances.Mass,
            //            Net = options.MaximumNetRange
            //        },
            //    ScanTolerance = options.MaximumScanRange,
            //    SpectraProvider = (InformedProteomicsReader) provider
            //    //TODO: Make sure we have a mass range for XIC's too....
            //};

            //clusterer.SpectraProvider = (InformedProteomicsReader) provider;

            //OnStatus("Starting cluster definition");
            //clusterer.Progress += (sender, args) => OnStatus(args.Message);

            var features = clusterer.Convert(msFeatures, progress);

            var minScan = int.MaxValue;
            var maxScan = int.MinValue;

            foreach (var feature in msFeatures)
            {
                minScan = Math.Min(feature.Scan, minScan);
                maxScan = Math.Max(feature.Scan, maxScan);
            }



            var minScanTime = provider.GetScanSummary(minScan).Time;
            var maxScanTime = provider.GetScanSummary(maxScan).Time;
            var id          = 0;
            var newFeatures = new List <UMCLight>();

            foreach (var feature in features)
            {
                if (feature.MsFeatures.Count < 1)
                {
                    continue;
                }
                feature.Net = (provider.GetScanSummary(feature.Scan).Time - minScanTime) /
                              (maxScanTime - minScanTime);
                feature.CalculateStatistics();
                feature.Id = id++;
                newFeatures.Add(feature);
                //Sets the width of the feature to be the width of the peak, not the width of the tails
                var maxAbundance      = double.MinValue;
                var maxAbundanceIndex = 0;
                for (var msFeatureIndex = 0; msFeatureIndex < feature.MsFeatures.Count - 1; msFeatureIndex++)
                {
                    var msFeature = feature.MsFeatures[msFeatureIndex];
                    if (msFeature.Abundance > maxAbundance)
                    {
                        maxAbundance      = msFeature.Abundance;
                        maxAbundanceIndex = msFeatureIndex;
                    }
                }
                for (var msFeatureIndex = maxAbundanceIndex; msFeatureIndex > 0; msFeatureIndex--)
                {
                    if (feature.MsFeatures[msFeatureIndex].Abundance / maxAbundance <= 0.05)
                    {
                        feature.ScanStart = feature.MsFeatures[msFeatureIndex].Scan;
                        break;
                    }
                }
                for (var msFeatureIndex = maxAbundanceIndex; msFeatureIndex < feature.MsFeatures.Count - 1; msFeatureIndex++)
                {
                    if (feature.MsFeatures[msFeatureIndex].Abundance / maxAbundance <= 0.05)
                    {
                        feature.ScanEnd = feature.MsFeatures[msFeatureIndex].Scan;
                        break;
                    }
                }
            }
            return(features);
        }
Exemplo n.º 2
0
        public void CompareMs2IdsToMs1Ids(string liquidResultsPath, string isosFile, string rawFile)
        {
            // Read mass tags.
            var massTagReader = new LiquidResultsFileLoader(liquidResultsPath);
            var massTags      = massTagReader.LoadDatabase();

            // Get identifications - this rereads the liquid results file, but I'm leaving it that way
            // for now because this is just a test.
            var scansToIds = this.GetIds(liquidResultsPath);

            // Read raw data file.
            var spectraProviderCache = new SpectraProviderCache();
            var spectraProvider      = spectraProviderCache.GetSpectraProvider(rawFile);

            // Read isos features
            var isosReader = new MsFeatureLightFileReader();

            isosReader.IsosFilteroptions = new DeconToolsIsosFilterOptions {
                MaximumIsotopicFit = 0.15
            };
            var msFeatures = isosReader.ReadFile(isosFile).ToList();

            // Get LCMS features
            var msFeatureClusterer = new MsToLcmsFeatures(spectraProvider);
            var lcmsFeatures       = msFeatureClusterer.Convert(msFeatures);

            lcmsFeatures.ForEach(feature => { feature.NetAligned = feature.Net; feature.MassMonoisotopicAligned = feature.MassMonoisotopic; });

            // Create clusters - Since this is only working on a single dataset, there should be a 1:1 mapping
            // between LCMS features and clusters.
            var clusters = new List <UMCClusterLight> {
                Capacity = lcmsFeatures.Count
            };

            foreach (var lcmsFeature in lcmsFeatures)
            {
                var cluster = new UMCClusterLight(lcmsFeature);
                cluster.CalculateStatistics(ClusterCentroidRepresentation.Median);
                clusters.Add(cluster);
            }

            // Do STAC AMT matching
            var stacAdapter = new STACAdapter <UMCClusterLight>
            {
                Options = new FeatureMatcherParameters
                {
                    ShouldCalculateShiftFDR = false,
                    UsePriors           = true,
                    UseEllipsoid        = true,
                    UseDriftTime        = false,
                    ShouldCalculateSTAC = true,
                }
            };
            var amtMatches = stacAdapter.PerformPeakMatching(clusters, massTags);

            // Group AMT matches by cluster, convert MassTags to Protein objects (represents lipid ID,
            // rather than Protein ID here) for simplicity in comparing them to the MS/MS IDs.
            var ms1Matches = clusters.ToDictionary(cluster => cluster, cluster => new List <Protein>());

            foreach (var amtMatch in amtMatches)
            {
                var cluster = amtMatch.Observed;
                var massTag = amtMatch.Target;
                ms1Matches[cluster].Add(new Protein
                {
                    Name            = massTag.ProteinName,
                    Sequence        = massTag.PeptideSequence,
                    ChemicalFormula = massTag.PeptideSequence
                });
            }

            // Now we need to backtrack MS/MS identifications -> clusters
            var ms2Matches = new Dictionary <UMCClusterLight, List <Protein> >();

            foreach (var cluster in clusters)
            {
                ms2Matches.Add(cluster, new List <Protein>());
                foreach (var lcmsFeature in cluster.UmcList)
                {
                    foreach (var msFeature in lcmsFeature.MsFeatures)
                    {
                        foreach (var msmsFeature in msFeature.MSnSpectra)
                        {
                            if (scansToIds.ContainsKey(msmsFeature.Scan))
                            {
                                ms2Matches[cluster].AddRange(scansToIds[msmsFeature.Scan]);
                            }
                        }
                    }
                }
            }

            // How many clusters have IDs from MS/MS?
            var clusterMs1IdCount = ms1Matches.Values.Count(value => value.Any());
            var clusterMs2IdCount = ms2Matches.Values.Count(value => value.Any());

            int overlapCount = 0; // Number of IDs that overlapped between MS1 and MS2 identifications.

            // Finally compare the MS1 IDs to the MS2 IDs.
            foreach (var cluster in clusters)
            {
                // For now only comparing by name
                var ms1Ids    = ms1Matches[cluster];
                var ms1Lipids = ms1Ids.Select(id => id.Name);

                var ms2Ids    = ms2Matches[cluster];
                var ms2Lipids = ms2Ids.Select(id => id.Name);

                // Compare MS1 IDs for the cluster vs MS2 IDs for the cluster.
                var ms1OnlyIds = ms1Lipids.Where(lipid => !ms2Lipids.Contains(lipid));
                var ms2OnlyIds = ms2Lipids.Where(lipid => !ms1Lipids.Contains(lipid));

                overlapCount += ms1OnlyIds.Intersect(ms2OnlyIds).Count();

                // Write Results
                if (ms1OnlyIds.Any() || ms2OnlyIds.Any())
                {
                    Console.WriteLine("Cluster {0}:", cluster.Id);
                    if (ms1OnlyIds.Any())
                    {
                        Console.WriteLine("\tMs1 Only IDs:");
                        foreach (var id in ms1OnlyIds)
                        {
                            Console.WriteLine("\t\t{0}", id);
                        }
                    }

                    if (ms2OnlyIds.Any())
                    {
                        Console.WriteLine("\tMs2 Only IDs:");
                        foreach (var id in ms2OnlyIds)
                        {
                            Console.WriteLine("\t\t{0}", id);
                        }
                    }
                }
            }

            Console.WriteLine("Overlap: {0}", overlapCount);
        }