Ejemplo n.º 1
0
        /// <summary>
        /// </summary>
        /// <param name="scan"></param>
        /// <param name="path"></param>
        /// <param name="mz"></param>
        /// <param name="mzRange"></param>
        /// <returns></returns>
        public static List <XYData> GetParentSpectrum(string path, int scan, double minMz, double maxMz)
        {
            ISpectraProvider provider = GetProvider(path);

            if (provider == null)
            {
                return(null);
            }

            List <XYData> spectrum = null;

            try
            {
                var summary = new ScanSummary();
                spectrum = provider.GetRawSpectra(scan, 1, out summary);
            }
            catch
            {
                Logger.PrintMessage("Could not load the raw spectra");
                return(null);
            }

            if (spectrum == null)
            {
                return(null);
            }

            var data = (from x in spectrum
                        where x.X > minMz && x.X < maxMz
                        select x).ToList();

            return(data);
        }
Ejemplo n.º 2
0
        private MSSpectra GetSpectrum(ISpectraProvider reader, int scan, int group, double mzTolerance = .5)
        {
            var summary  = new ScanSummary();
            var peaks    = reader.GetRawSpectra(scan, 2, out summary);
            var spectrum = new MSSpectra();

            spectrum.Peaks = peaks;

            return(spectrum);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Creates an XIC from the given set of target features.
        /// </summary>
        /// <param name="massError">Mass error to use when pulling peaks</param>
        /// <param name="msFeatures">Seed features that provide the targets</param>
        /// <param name="provider"></param>
        /// <returns></returns>
        public IEnumerable <MSFeatureLight> CreateXic(IList <MSFeatureLight> msFeatures,
                                                      double massError,
                                                      ISpectraProvider provider)
        {
            var newFeatures = new List <MSFeatureLight>();

            if (msFeatures.Count <= 0)
            {
                return(newFeatures);
            }

            var minScan = msFeatures[0].Scan;
            var maxScan = msFeatures[msFeatures.Count - 1].Scan;

            minScan -= 100;
            maxScan += 100;
            minScan  = Math.Max(0, minScan);

            var    min          = double.MaxValue;
            var    max          = double.MinValue;
            double maxIntensity = 0;
            var    featureMap   = new Dictionary <int, MSFeatureLight>();
            double mz           = 0;

            foreach (var chargeFeature in msFeatures)
            {
                min = Math.Min(min, chargeFeature.Mz);
                max = Math.Max(max, chargeFeature.Mz);

                if (chargeFeature.Abundance > maxIntensity)
                {
                    maxIntensity = chargeFeature.Abundance;
                    mz           = chargeFeature.Mz;
                }

                // Map the feature...
                if (!featureMap.ContainsKey(chargeFeature.Scan))
                {
                    featureMap.Add(chargeFeature.Scan, chargeFeature);
                }
            }

            var features = CreateXic(mz, massError, minScan, maxScan, provider);

            foreach (var msFeature in features)
            {
                var scan = msFeature.Scan;
                if (featureMap.ContainsKey(msFeature.Scan))
                {
                    featureMap[scan].Abundance = msFeature.Abundance;
                }
                newFeatures.Add(msFeature);
            }
            return(newFeatures);
        }
Ejemplo n.º 4
0
 /// <summary>
 /// Reconstruct the MS/MS for a feature.
 /// Requires that the features's LCMS and MS features have been reconstructed.
 /// </summary>
 /// <param name="umc">The feature to reconstruct.</param>
 public static void ReconstructUMCMsMs(this UMCLight umc, ISpectraProvider provider, bool getPeaks = true)
 {
     foreach (var msFeature in umc.Features)
     {
         var fragmentationSpectra = provider.GetMSMSSpectra(
             msFeature.Scan,
             msFeature.Mz,
             false);
         msFeature.MSnSpectra.AddRange(fragmentationSpectra);
     }
 }
Ejemplo n.º 5
0
 public static void LoadMsMs(List <UMCLight> features, ISpectraProvider spectraProvider)
 {
     foreach (var feature in features)
     {
         foreach (var msFeature in feature.Features)
         {
             var fragmentationSpectra = spectraProvider.GetMSMSSpectra(
                 msFeature.Scan,
                 msFeature.Mz,
                 false);
             msFeature.MSnSpectra.AddRange(fragmentationSpectra);
         }
     }
 }
Ejemplo n.º 6
0
        public static MSSpectra GetSpectrum(ISpectraProvider reader, int scan, int group, double mzTolerance = .5)
        {
            var summary  = new ScanSummary();
            var spectrum = reader.GetSpectrum(scan, group, 2, out summary, true);

            if (ShouldLogScale)
            {
                foreach (var peak in spectrum.Peaks)
                {
                    peak.Y = Math.Log(peak.Y, 2);
                }
            }
            return(spectrum);
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Creates an XIC from the m/z values provided.
        /// </summary>
        /// <param name="mz"></param>
        /// <param name="massError"></param>
        /// <param name="minScan"></param>
        /// <param name="maxScan"></param>
        /// <param name="provider"></param>
        /// <returns></returns>
        public IEnumerable <MSFeatureLight> CreateXic(double mz,
                                                      double massError,
                                                      int minScan,
                                                      int maxScan,
                                                      ISpectraProvider provider)
        {
            var newFeatures = new List <MSFeatureLight>();
            var lower       = FeatureLight.ComputeDaDifferenceFromPPM(mz, massError);
            var higher      = FeatureLight.ComputeDaDifferenceFromPPM(mz, -massError);



            for (var i = minScan; i < maxScan; i++)
            {
                List <XYData> spectrum = null;

                try
                {
                    var summary = new ScanSummary();
                    spectrum = provider.GetRawSpectra(i, 0, 1, out summary);
                }
                catch
                {
                }

                if (spectrum == null)
                {
                    continue;
                }

                var data = (from x in spectrum
                            where x.X > lower && x.X < higher
                            select x).ToList();

                var summedIntensity = data.Sum(x => x.Y);


                var newFeature = new MSFeatureLight
                {
                    Scan      = i,
                    Net       = i,
                    Abundance = Convert.ToInt64(summedIntensity)
                };
                newFeatures.Add(newFeature);
            }
            return(newFeatures);
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Finds LCMS Features using the PNNL Omics linkage clustering algorithms.
        /// </summary>
        public List <UMCLight> FindFeatures(List <MSFeatureLight> rawMsFeatures,
                                            LCMSFeatureFindingOptions options,
                                            ISpectraProvider provider)
        {
            const ClusterCentroidRepresentation centroidType = ClusterCentroidRepresentation.Mean;
            List <UMCLight> features = null;

            m_options = options;

            m_minScan = int.MaxValue;
            m_maxScan = int.MinValue;
            foreach (var feature in rawMsFeatures)
            {
                m_minScan = Math.Min(feature.Scan, m_minScan);
                m_maxScan = Math.Max(feature.Scan, m_maxScan);
            }

            var finder = new MSFeatureSingleLinkageClustering <MSFeatureLight, UMCLight>
            {
                Parameters =
                {
                    DistanceFunction = WeightedNETDistanceFunction,
                    RangeFunction    = WithinRange,
                    Tolerances       = { Mass = options.ConstraintMonoMass, RetentionTime = 100, DriftTime = 100 }
                }
            };

            finder.Parameters.CentroidRepresentation = centroidType;
            m_maxDistance = options.MaxDistance;
            features      = finder.Cluster(rawMsFeatures);

            // Remove the short UMC's.
            features.RemoveAll(x => (x.ScanEnd - x.ScanStart + 1) < options.MinUMCLength);

            var id = 0;

            foreach (var feature in features)
            {
                feature.NET           = Convert.ToDouble(feature.Scan - m_minScan) / Convert.ToDouble(m_maxScan - m_minScan);
                feature.RetentionTime = feature.NET;
                feature.ID            = id++;
            }

            return(features);
        }
Ejemplo n.º 9
0
        /// <summary>
        /// Finds LCMS Features using the PNNL Omics linkage clustering algorithms.  
        /// </summary>
        public List<UMCLight> FindFeatures( List<MSFeatureLight>        rawMsFeatures, 
            LCMSFeatureFindingOptions   options,
            ISpectraProvider            provider)
        {
            const ClusterCentroidRepresentation centroidType = ClusterCentroidRepresentation.Mean;
            List<UMCLight>       features                    = null;
            m_options                                        = options;

            m_minScan = int.MaxValue;
            m_maxScan = int.MinValue;
            foreach (var feature in rawMsFeatures)
            {
                m_minScan = Math.Min(feature.Scan, m_minScan);
                m_maxScan = Math.Max(feature.Scan, m_maxScan);
            }

            var finder   = new MSFeatureSingleLinkageClustering<MSFeatureLight, UMCLight>
            {
                Parameters =
                {
                    DistanceFunction = WeightedNETDistanceFunction,
                    RangeFunction    = WithinRange,
                    Tolerances       = {Mass = options.ConstraintMonoMass, RetentionTime = 100, DriftTime = 100}
                }
            };
            finder.Parameters.CentroidRepresentation                            = centroidType;
            m_maxDistance                                                       = options.MaxDistance;
            features                                                            = finder.Cluster(rawMsFeatures);

            // Remove the short UMC's.
            features.RemoveAll(x => (x.ScanEnd - x.ScanStart + 1) < options.MinUMCLength);

            var id = 0;
            foreach (var feature in features)
            {
                feature.NET             = Convert.ToDouble(feature.Scan - m_minScan) / Convert.ToDouble(m_maxScan - m_minScan);
                feature.RetentionTime   = feature.NET;
                feature.ID              = id++;
            }

            return features;
        }
        public CachedFeatureSpectraProvider(ISpectraProvider reader, IEnumerable<UMCLight> features)
        {
            m_reader = reader;
            m_spectraMap = new Dictionary<int, MSSpectra>();

            // Sort out the features to make a dictionary so we can look up spectra
            // and summary information later on without having to touch the disk again...and
            // this restricts all possible spectra to those that came from deisotoped data.
            foreach (var feature in features)
            {
                foreach (var msFeature in feature.MsFeatures)
                {
                    foreach (var spectrum in msFeature.MSnSpectra)
                    {
                        if (!m_spectraMap.ContainsKey(spectrum.Scan))
                            m_spectraMap.Add(spectrum.Scan, spectrum);
                    }
                }
            }
        }
Ejemplo n.º 11
0
        public CachedFeatureSpectraProvider(ISpectraProvider reader, IEnumerable <UMCLight> features)
        {
            m_reader     = reader;
            m_spectraMap = new Dictionary <int, MSSpectra>();

            // Sort out the features to make a dictionary so we can look up spectra
            // and summary information later on without having to touch the disk again...and
            // this restricts all possible spectra to those that came from deisotoped data.
            foreach (var feature in features)
            {
                foreach (var msFeature in feature.MsFeatures)
                {
                    foreach (var spectrum in msFeature.MSnSpectra)
                    {
                        if (!m_spectraMap.ContainsKey(spectrum.Scan))
                        {
                            m_spectraMap.Add(spectrum.Scan, spectrum);
                        }
                    }
                }
            }
        }
Ejemplo n.º 12
0
        protected static SpectralAnalysis MatchDatasets(SpectralComparison comparerType,
                                                        ISpectraProvider readerX,
                                                        ISpectraProvider readerY,
                                                        SpectralOptions options,
                                                        AlignmentDataset datasetX,
                                                        AlignmentDataset datasetY,
                                                        List <string> names)
        {
            var peptideReader = PeptideReaderFactory.CreateReader(SequenceFileType.MSGF);
            var finder        = new SpectralAnchorPointFinder();
            var validator     = new SpectralAnchorPointValidator();
            var comparer      = SpectralComparerFactory.CreateSpectraComparer(comparerType);
            var filter        = SpectrumFilterFactory.CreateFilter(SpectraFilters.TopPercent);

            var matches = finder.FindAnchorPoints(readerX,
                                                  readerY,
                                                  comparer,
                                                  filter,
                                                  options);

            var peptidesX = peptideReader.Read(datasetX.PeptideFile);
            var peptidesY = peptideReader.Read(datasetY.PeptideFile);

            validator.ValidateMatches(matches,
                                      peptidesX,
                                      peptidesY,
                                      options);

            var analysis = new SpectralAnalysis
            {
                DatasetNames = names,
                Matches      = matches,
                Options      = options
            };

            return(analysis);
        }
Ejemplo n.º 13
0
        public static MSSpectra GetSpectra(double mzTolerance,
                                           double percent,
                                           ISpectraFilter filter,
                                           ISpectraProvider readerY,
                                           int scany,
                                           int numberRequiredPeaks)
        {
            var spectrum = GetSpectrum(readerY,
                                       scany,
                                       0,
                                       mzTolerance);

            if (spectrum.Peaks.Count < numberRequiredPeaks)
            {
                return(null);
            }

            spectrum.Peaks = filter.Threshold(spectrum.Peaks, percent);
            spectrum.Peaks = XYData.Bin(spectrum.Peaks,
                                        0,
                                        2000,
                                        mzTolerance);
            return(spectrum);
        }
Ejemplo n.º 14
0
        public static List <XYData> GetDaughterSpectrum(string path, int scan)
        {
            ISpectraProvider provider = GetProvider(path);

            if (provider == null)
            {
                return(null);
            }

            List <XYData> spectrum = null;

            try
            {
                var summary = new ScanSummary();
                spectrum = provider.GetRawSpectra(scan, 2, out summary);
            }
            catch
            {
                Logger.PrintMessage("Could not load the raw spectra");
                return(null);
            }

            return(spectrum);
        }
Ejemplo n.º 15
0
 public RawLoaderCache(ISpectraProvider provider)
 {
     m_summaryMap = new Dictionary<int, Dictionary<int, ScanSummary>>();
     m_provider = provider;
 }
Ejemplo n.º 16
0
        /// <summary>
        ///     Finds features
        /// </summary>
        /// <returns></returns>
        public List<UMCLight> FindFeatures(List<MSFeatureLight> msFeatures,
            LcmsFeatureFindingOptions options,
            ISpectraProvider provider)
        {
            var clusterer = new MsFeatureTreeClusterer<MSFeatureLight, UMCLight>
            {
                Tolerances =
                    new FeatureTolerances
                    {
                        Mass = options.InstrumentTolerances.Mass,
                        Net = options.MaximumNetRange
                    },
                ScanTolerance = options.MaximumScanRange,
                SpectraProvider = provider
                //TODO: Make sure we have a mass range for XIC's too....
            };

            clusterer.SpectraProvider = provider;

            OnStatus("Starting cluster definition");
            clusterer.Progress += (sender, args) => OnStatus(args.Message);

            var features = clusterer.Cluster(msFeatures);

            var minScan = int.MaxValue;
            var maxScan = int.MinValue;
            foreach (var feature in msFeatures)
            {
                minScan = Math.Min(feature.Scan, minScan);
                maxScan = Math.Max(feature.Scan, maxScan);
            }

            var id = 0;
            var newFeatures = new List<UMCLight>();
            foreach (var feature in features)
            {
                if (feature.MsFeatures.Count < 1)
                    continue;

                feature.Net = Convert.ToDouble(feature.Scan - minScan)/Convert.ToDouble(maxScan - minScan);
                feature.CalculateStatistics(ClusterCentroidRepresentation.Median);
                feature.Net = feature.Net;
                feature.Id = id++;
                newFeatures.Add(feature);
                //Sets the width of the feature to be the width of the peak, not the width of the tails
                var maxAbundance = double.MinValue;
                var maxAbundanceIndex = 0;
                for (var msFeatureIndex = 0; msFeatureIndex < feature.MsFeatures.Count - 1; msFeatureIndex++)
                {
                    var msFeature = feature.MsFeatures[msFeatureIndex];
                    if (msFeature.Abundance > maxAbundance)
                    {
                        maxAbundance = msFeature.Abundance;
                        maxAbundanceIndex = msFeatureIndex;
                    }
                }
                for (var msFeatureIndex = maxAbundanceIndex; msFeatureIndex > 0; msFeatureIndex--)
                {
                    if (feature.MsFeatures[msFeatureIndex].Abundance / maxAbundance <= 0.05)
                    {
                        feature.ScanStart = feature.MsFeatures[msFeatureIndex].Scan;
                        break;
                    }
                }
                for (var msFeatureIndex = maxAbundanceIndex; msFeatureIndex < feature.MsFeatures.Count - 1; msFeatureIndex++)
                {
                    if (feature.MsFeatures[msFeatureIndex].Abundance / maxAbundance <= 0.05)
                    {
                        feature.ScanEnd = feature.MsFeatures[msFeatureIndex].Scan;
                        break;
                    }
                }
            }
            return features;
        }
Ejemplo n.º 17
0
        /// <summary>
        ///     Runs the MultiAlign analysis
        /// </summary>
        public void AlignDatasets(  IEnumerable<UMCLight>   baselineFeatures,
                                    IEnumerable<UMCLight>   aligneeFeatures,
                                    ISpectraProvider        providerX,
                                    ISpectraProvider        providerY,
                                    IFeatureAligner<IEnumerable<UMCLight>,
                                        IEnumerable<UMCLight>,
                                        classAlignmentData> aligner,
                                    IClusterer<UMCLight, UMCClusterLight> clusterer,
                                    string matchPath,
                                    string errorPath)
        {
            // cluster before we do anything else....
            var allFeatures = new List<UMCLight>();
            allFeatures.AddRange(baselineFeatures);
            allFeatures.AddRange(aligneeFeatures);

            var maxBaseline = baselineFeatures.Max(x => x.Scan);
            var minBaseline = baselineFeatures.Min(x => x.Scan);

            var maxAlignee  = aligneeFeatures.Max(x => x.Scan);
            var minAlignee  = aligneeFeatures.Min(x => x.Scan);

            foreach (var feature in aligneeFeatures)
            {
                feature.Net = Convert.ToDouble(feature.Scan - minAlignee) / Convert.ToDouble(maxAlignee - minAlignee);
                feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
            }

            foreach (var feature in baselineFeatures)
            {
                feature.Net = Convert.ToDouble(feature.Scan - minBaseline) / Convert.ToDouble(maxBaseline - minBaseline);
                feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
            }

            // This tells us the differences before we align.
            var clusters     = clusterer.Cluster(allFeatures);
            var clusterId    = 0;
            foreach (var cluster in clusters)
            {
                cluster.Id = clusterId++;
            }
            var scorer       = new GlobalPeptideClusterScorer();
            var preAlignment = scorer.Score(clusters);

            aligner.AligneeSpectraProvider  = providerY;
            aligner.BaselineSpectraProvider = providerX;

            UpdateStatus("Aligning data");
            // Aligner data
            var data    = aligner.Align(baselineFeatures, aligneeFeatures);
            var matches = data.Matches;

            // create anchor points for LCMSWarp alignment
            var massPoints = new List<RegressionPoint>();
            var netPoints = new List<RegressionPoint>();
            foreach (var match in matches)
            {
                var massError   = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz,
                                                    match.AnchorPointY.Mz);
                var netError    = match.AnchorPointX.Net - match.AnchorPointY.Net;
                var massPoint   = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError);
                massPoints.Add(massPoint);

                var netPoint    = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError);
                netPoints.Add(netPoint);
            }

            foreach (var feature in allFeatures)
            {
                feature.UmcCluster = null;
                feature.ClusterId = -1;
            }
            // Then cluster after alignment!
            UpdateStatus("clustering data");
            clusters = clusterer.Cluster(allFeatures);
            var postAlignment = scorer.Score(clusters);

            UpdateStatus("Note\tSame\tDifferent");
            UpdateStatus(string.Format("Pre\t{0}\t{1}",
                            preAlignment.SameCluster,
                            preAlignment.DifferentCluster));
            UpdateStatus(string.Format("Post\t{0}\t{1}",
                            postAlignment.SameCluster,
                            postAlignment.DifferentCluster));

            matches = FilterMatches(matches, 40);

            SaveMatches(matchPath, matches);
            DeRegisterProgressNotifier(aligner);
            DeRegisterProgressNotifier(clusterer);
        }
Ejemplo n.º 18
0
        /// <summary>
        ///     Creates SIC's mapped by charge state for the MS Features in the feature.
        /// </summary>
        /// <param name="feature"></param>
        /// <param name="provider">Object that can read data from a raw file or data source.</param>
        /// <returns></returns>
        public static Dictionary <int, List <XYZData> > CreateChargeSIC(this UMCLight feature, ISpectraProvider provider)
        {
            var chargeMap = feature.CreateChargeMap();
            var sicMap    = new Dictionary <int, List <XYZData> >();

            foreach (var charge in chargeMap.Keys)
            {
                chargeMap[charge].Sort(delegate(MSFeatureLight x, MSFeatureLight y) { return(x.Scan.CompareTo(y.Scan)); }
                                       );
                var data = chargeMap[charge].ConvertAll(x => new XYZData(x.Scan, x.Abundance, x.Mz));
                sicMap.Add(charge, data);
            }

            if (provider != null)
            {
                // Creates an SIC map for a given charge state of the feature.
                foreach (var charge in sicMap.Keys)
                {
                    var data = sicMap[charge];

                    // The data is alread sorted.
                    var minScan  = int.MaxValue;
                    var maxScan  = int.MinValue;
                    var mzValues = new List <double>();
                    foreach (var x in data)
                    {
                        mzValues.Add(x.Z);
                        minScan = Math.Min(minScan, Convert.ToInt32(x.X));
                        maxScan = Math.Max(maxScan, Convert.ToInt32(x.X));
                    }
                    mzValues.Sort();
                    double mz  = 0;
                    var    mid = Convert.ToInt32(mzValues.Count / 2);
                    mz       = mzValues[mid];
                    minScan -= 20;
                    maxScan += 20;

                    // Build the SIC
                    var intensities = new List <XYZData>();
                    for (var scan = minScan; scan < maxScan; scan++)
                    {
                        var    summary     = new ScanSummary();
                        var    spectrum    = provider.GetRawSpectra(scan, 1, out summary);
                        double intensity   = 0;
                        var    minDistance = double.MaxValue;
                        var    index       = -1;
                        for (var i = 0; i < spectrum.Count; i++)
                        {
                            var distance = spectrum[i].X - mz;
                            if (distance < minDistance)
                            {
                                index       = i;
                                minDistance = distance;
                            }
                        }

                        if (index >= 0)
                        {
                            intensity = spectrum[index].Y;
                        }
                        var newPoint = new XYZData(scan, intensity, mz);
                        intensities.Add(newPoint);
                    }

                    sicMap[charge] = intensities;
                }
            }

            return(sicMap);
        }
Ejemplo n.º 19
0
        public IEnumerable <UMCLight> CreateXic(IList <UMCLight> features,
                                                double massError,
                                                ISpectraProvider provider)
        {
            // this algorithm works as follows
            //
            //  PART A - Build the XIC target list
            //  For each UMC Light , find the XIC representation
            //      for each charge in a feature
            //          from start scan to end scan
            //              1. Compute a lower / upper m/z bound
            //              2. build an XIC chomatogram object
            //              3. reference the original UMC Feature -- this allows us to easily add
            //                  chromatograms to the corresponding feature
            //              4. store the chomatogram (with unique ID across all features)
            //
            //  PART B - Read Data From File
            //  Sort the list of XIC's by scan
            //  for each scan s = start scan to end scan
            //      1. find all xic's that start before and end after s -
            //          a. cache these xics in a dictionary based on unique id
            //          b. NOTE: this is why we sort so we can do an O(N) search for
            //             all XIC's that need data from this scan s
            //      2.  Then for each XIC that needs data
            //          a. Pull intensity data from lower / upper m/z bound
            //          b. create an MS Feature
            //          c. store in original UMC Feature
            //          d. Test to see if the XIC is done building (Intensity < 1 or s > scan end)
            //      3. Remove features that are done building from cache
            //
            //  CONCLUSIONS
            //  Building UMC's then takes linear time  (well O(N Lg N) time if you consider sort)
            //      and theoretically is only bounded by the time it takes to read an entire raw file
            //
            if (features.Count <= 0)
            {
                throw new Exception("No features were available to create XIC's from");
            }

            var minScan = Math.Max(1, features.Min(x => x.Scan - ScanWindowSize));
            var maxScan = features.Max(x => x.Scan + ScanWindowSize);

            OnProgress("Sorting features for optimized scan partitioning");
            // PART A
            // Map the feature ID to the xic based features
            var xicFeatures = new SortedSet <XicFeature>();
            var allFeatures = CreateXicTargets(features, massError);

            // PART B
            // sort the features...
            var featureCount = allFeatures.Count;

            allFeatures = allFeatures.OrderBy(x => x.StartScan).ToList();

            // This map tracks all possible features to keep

            var msFeatureId = 0;

            // This list stores a temporary amount of parent MS features
            // so that we can link MS/MS spectra to MS Features
            var parentMsList = new List <MSFeatureLight>();

            // Creates a comparison function for building a BST from a spectrum.
            var msmsFeatureId = 0;

            var totalScans = provider.GetTotalScans(0);

            OnProgress(string.Format("Analyzing {0} scans", totalScans));


            // Iterate over all the scans...
            for (var currentScan = minScan; currentScan < maxScan && currentScan <= totalScans; currentScan++)
            {
                // Find any features that need data from this scan
                var featureIndex = 0;
                while (featureIndex < featureCount)
                {
                    var xicFeature = allFeatures[featureIndex];
                    // This means that no new features were eluting with this scan....
                    if (xicFeature.StartScan > currentScan)
                    {
                        break;
                    }

                    // This means that there is a new feature...
                    if (currentScan <= xicFeature.EndScan)
                    {
                        if (!xicFeatures.Contains(xicFeature))
                        {
                            xicFeatures.Add(xicFeature);
                        }
                    }
                    featureIndex++;
                }

                // Skip pulling the data from the file if there is nothing to pull from.
                if (xicFeatures.Count < 1)
                {
                    continue;
                }

                // Here We link the MSMS Spectra to the UMC Features
                ScanSummary summary;
                var         spectrum = provider.GetRawSpectra(currentScan, 0, 1, out summary);


                if (summary.MsLevel > 1)
                {
                    // If it is an MS 2 spectra... then let's link it to the parent MS
                    // Feature
                    var matching = parentMsList.Where(
                        x => Math.Abs(x.Mz - summary.PrecursorMz) <= FragmentationSizeWindow
                        );

                    foreach (var match in matching)
                    {
                        // We create multiple spectra because this guy is matched to multiple ms
                        // features
                        var spectraData = new MSSpectra
                        {
                            Id              = msmsFeatureId,
                            ScanMetaData    = summary,
                            CollisionType   = summary.CollisionType,
                            Scan            = currentScan,
                            MsLevel         = summary.MsLevel,
                            PrecursorMz     = summary.PrecursorMz,
                            TotalIonCurrent = summary.TotalIonCurrent
                        };

                        match.MSnSpectra.Add(spectraData);
                        spectraData.ParentFeature = match;
                    }

                    if (spectrum != null)
                    {
                        spectrum.Clear();
                    }
                    msmsFeatureId++;

                    continue;
                }


                var mzList        = new double[spectrum.Count];
                var intensityList = new double[spectrum.Count];
                XYData.XYDataListToArrays(spectrum, mzList, intensityList);
                Array.Sort(mzList, intensityList);

                // Tracks which spectra need to be removed from the cache
                var toRemove = new List <XicFeature>();

                // Tracks which features we need to link to MSMS spectra with
                parentMsList.Clear();

                // now we iterate through all features that need data from this scan

                foreach (var xic in xicFeatures)
                {
                    var lower  = xic.LowMz;
                    var higher = xic.HighMz;

                    var startIndex = Array.BinarySearch(mzList, lower);
                    // A bitwise complement of the index, so use the bitwise complement
                    if (startIndex < 0)
                    {
                        startIndex = ~startIndex;
                    }

                    double summedIntensity = 0;

                    if (startIndex < mzList.Count() && mzList[startIndex] < lower)
                    {
                        // All data in the list is lighter than lower; nothing to sum
                    }
                    else
                    {
                        while (startIndex < mzList.Count() && mzList[startIndex] <= higher)
                        {
                            summedIntensity += intensityList[startIndex];
                            startIndex++;
                        }
                    }

                    // See if we need to remove this feature
                    // We only do so if the intensity has dropped off and we are past the end of the feature.
                    if (summedIntensity < 1 && currentScan > xic.EndScan)
                    {
                        toRemove.Add(xic);
                        continue;
                    }

                    var umc = xic.Feature;

                    // otherwise create a new feature here...
                    var msFeature = new MSFeatureLight
                    {
                        ChargeState      = xic.ChargeState,
                        Mz               = xic.Mz,
                        MassMonoisotopic = umc.MassMonoisotopic,
                        Scan             = currentScan,
                        Abundance        = Convert.ToInt64(summedIntensity),
                        Id               = msFeatureId++,
                        DriftTime        = umc.DriftTime,
                        Net              = currentScan,
                        GroupId          = umc.GroupId
                    };
                    parentMsList.Add(msFeature);
                    xic.Feature.AddChildFeature(msFeature);
                }

                // Remove features that end their elution prior to the current scan
                toRemove.ForEach(x => xicFeatures.Remove(x));
            }

            OnProgress("Filtering bad features with no data.");
            features = features.Where(x => x.MsFeatures.Count > 0).ToList();

            OnProgress("Refining XIC features.");
            return(RefineFeatureXics(features));
        }
Ejemplo n.º 20
0
        ///// <summary>
        ///// Links anchor points use the raw spectra provided.
        ///// </summary>
        //public IEnumerable<SpectralAnchorPointMatch> FindAnchorPoints2( ISpectraProvider            readerX,
        //                                                                ISpectraProvider           readerY,
        //                                                                ISpectralComparer          comparer,
        //                                                                ISpectraFilter             filter,
        //                                                                SpectralOptions            options,
        //                                                                bool skipComparison        = true)
        //{
        //    var matches = new List<SpectralAnchorPointMatch>();
        //    var scanDataX  = readerX.GetScanData(0);
        //    var scanDataY  = readerY.GetScanData(0);

        //    // Determine the scan extrema
        //    var maxX = scanDataX.Aggregate((l, r) => l.Value.Scan > r.Value.Scan ? l : r).Key;
        //    var minX = scanDataX.Aggregate((l, r) => l.Value.Scan < r.Value.Scan ? l : r).Key;
        //    var maxY = scanDataY.Aggregate((l, r) => l.Value.Scan > r.Value.Scan ? l : r).Key;
        //    var minY = scanDataY.Aggregate((l, r) => l.Value.Scan < r.Value.Scan ? l : r).Key;

        //    // Create a spectral comparer
        //    var ySpectraCache = new Dictionary<int, MSSpectra>();

        //    // Here we sort the summary spectra....so that we can improve run time efficiency
        //    // and minimize as much memory as possible.
        //    var ySpectraSummary = scanDataY.Values.Where(summary => summary.MsLevel == 2).ToList();
        //    var xSpectraSummary = scanDataX.Values.Where(summary => summary.MsLevel == 2).ToList();

        //    ySpectraSummary.Sort((x, y) => x.PrecursorMZ.CompareTo(y.PrecursorMZ));
        //    xSpectraSummary.Sort((x, y) => x.PrecursorMZ.CompareTo(y.PrecursorMZ));

        //    double mzTolerance = options.MzTolerance;

        //    foreach (var xsum in xSpectraSummary)
        //    {
        //        int scanx = xsum.Scan;

        //        // Grab the first spectra
        //        var spectrumX     = SpectralUtilities.GetSpectra(options.MzBinSize,
        //                                                            options.TopIonPercent,
        //                                                            filter,
        //                                                            readerX,
        //                                                            scanx,
        //                                                            options.RequiredPeakCount);

        //        spectrumX.PrecursorMZ   = xsum.PrecursorMZ;


        //        // Here we make sure that we are efficiently using the cache...we want to clear any
        //        // cached spectra that we arent using.  We know that the summaries are sorted by m/z
        //        // so if the xsum m/z is greater than anything in the cache, dump the spectra...
        //        double currentMz = xsum.PrecursorMZ;
        //        // Use linq?
        //        var toRemove = new List<int>();
        //        foreach (int scan in ySpectraCache.Keys)
        //        {
        //            MSSpectra yscan     = ySpectraCache[scan];
        //            double difference   = currentMz - yscan.PrecursorMZ;
        //            // We only need to care about smaller m/z's
        //            if (difference >= mzTolerance)
        //            {
        //                toRemove.Add(scan);
        //            }
        //            else
        //            {
        //                // Because if we are here, we are within range...AND!
        //                // ...the m/z of i + 1 > i...because they are sorted...
        //                // so if the m/z comes within range (positive) then
        //                // that means we need to evaluate the tolerance.
        //                break;
        //            }
        //        }

        //        // Then we clean up...since spectra can be large...we'll take the performance hit here...
        //        // and minimize memory impacts!
        //        if (toRemove.Count > 0)
        //        {
        //            toRemove.ForEach(x => ySpectraCache.Remove(x));
        //            GC.Collect();
        //            GC.WaitForPendingFinalizers();
        //        }

        //        // Iterate through the other analysis.
        //        foreach (var ysum in ySpectraSummary)
        //        {
        //            int scany = ysum.Scan;

        //            // We know that we are out of range here....
        //            if (Math.Abs(xsum.PrecursorMZ - ysum.PrecursorMZ) >= mzTolerance)
        //                continue;

        //            double netX = Convert.ToDouble(scanx - minX) / Convert.ToDouble(maxX - minX);
        //            double netY = Convert.ToDouble(scany - minY) / Convert.ToDouble(maxY - minY);
        //            double net  = Convert.ToDouble(netX - netY);

        //            // Has to pass the NET tolerance
        //            if (options.NetTolerance < Math.Abs(net)) continue;


        //            // Grab the first spectra...if we have it, great dont re-read
        //            MSSpectra spectrumY = null;
        //            if (ySpectraCache.ContainsKey(scany))
        //            {
        //                if (!skipComparison)
        //                    spectrumY = ySpectraCache[scany];
        //            }
        //            else
        //            {
        //                if (!skipComparison)
        //                {
        //                    spectrumY = SpectralUtilities.GetSpectra(options.MzBinSize,
        //                                                            options.TopIonPercent,
        //                                                            filter,
        //                                                            readerY,
        //                                                            scany,
        //                                                            options.RequiredPeakCount);
        //                    spectrumY.PrecursorMZ = ysum.PrecursorMZ;
        //                    ySpectraCache.Add(scany, spectrumY);
        //                }
        //            }

        //            // compare the spectra
        //            double spectralSimilarity = 0;


        //            if (!skipComparison)
        //                spectralSimilarity = comparer.CompareSpectra(spectrumX, spectrumY);

        //            if (double.IsNaN(spectralSimilarity) || double.IsNegativeInfinity(spectralSimilarity) || double.IsPositiveInfinity(spectralSimilarity))
        //                continue;

        //            if (spectralSimilarity < options.SimilarityCutoff)
        //                continue;

        //            var pointX      = new SpectralAnchorPoint
        //            {
        //                Net = netX,
        //                Mass = 0,
        //                Mz = xsum.PrecursorMZ,
        //                Scan = scanx,
        //                Spectrum = spectrumX
        //            };

        //            var pointY = new SpectralAnchorPoint
        //            {
        //                Net = netX,
        //                Mass = 0,
        //                Mz = ysum.PrecursorMZ,
        //                Scan = scany,
        //                Spectrum = spectrumY
        //            };

        //            var match = new SpectralAnchorPointMatch
        //            {
        //                AnchorPointX    = pointX,
        //                AnchorPointY    = pointY,
        //                SimilarityScore = spectralSimilarity,
        //                IsValidMatch    = AnchorPointMatchType.FalseMatch
        //            };

        //            matches.Add(match);
        //        }
        //    }

        //    return matches;
        //}

        /// <summary>
        /// Computes all anchor point matches between two sets of spectra.
        /// </summary>
        /// <param name="readerX"></param>
        /// <param name="readerY"></param>
        /// <param name="comparer"></param>
        /// <param name="filter"></param>
        /// <param name="options"></param>
        /// <param name="skipComparison"></param>
        /// <returns></returns>
        public IEnumerable <SpectralAnchorPointMatch> FindAnchorPoints(ISpectraProvider readerX,
                                                                       ISpectraProvider readerY,
                                                                       ISpectralComparer comparer,
                                                                       ISpectraFilter filter,
                                                                       SpectralOptions options,
                                                                       bool skipComparison = false)
        {
            if (readerX == null || readerY == null)
            {
                throw new ArgumentNullException();
            }
            var matches   = new List <SpectralAnchorPointMatch>();
            var scanDataX = readerX.GetScanSummaries();
            var scanDataY = readerY.GetScanSummaries(0);

            // Determine the scan extrema
            var maxX = scanDataX.Aggregate((l, r) => l.Scan > r.Scan ? l : r).Scan;
            var minX = scanDataX.Aggregate((l, r) => l.Scan < r.Scan ? l : r).Scan;
            var maxY = scanDataY.Aggregate((l, r) => l.Scan > r.Scan ? l : r).Scan;
            var minY = scanDataY.Aggregate((l, r) => l.Scan < r.Scan ? l : r).Scan;

            // Here we sort the summary spectra....so that we can improve run time efficiency
            // and minimize as much memory as possible.
            var ySpectraSummary = scanDataY.Where(summary => summary.MsLevel == 2).ToList();
            var xSpectraSummary = scanDataX.Where(summary => summary.MsLevel == 2).ToList();


            ySpectraSummary.Sort((x, y) => x.PrecursorMz.CompareTo(y.PrecursorMz));
            xSpectraSummary.Sort((x, y) => x.PrecursorMz.CompareTo(y.PrecursorMz));

            var netTolerance = options.NetTolerance;
            var mzTolerance  = options.MzTolerance;
            var j            = 0;
            var i            = 0;
            var yTotal       = ySpectraSummary.Count;
            var xTotal       = xSpectraSummary.Count;

            var similarities = new List <double>();

            var cache   = new Dictionary <int, MSSpectra>();
            var pointsY = new Dictionary <int, SpectralAnchorPoint>();

            while (i < xTotal && j < yTotal)
            {
                var       xsum       = xSpectraSummary[i];
                var       scanx      = xsum.Scan;
                var       precursorX = xsum.PrecursorMz;
                MSSpectra spectrumX  = null;

                while (j < yTotal && ySpectraSummary[j].PrecursorMz < (precursorX - mzTolerance))
                {
                    // Here we make sure we arent caching something
                    var scany = ySpectraSummary[j].Scan;
                    if (cache.ContainsKey(scany))
                    {
                        cache.Remove(scany);
                        if (pointsY.ContainsKey(scany))
                        {
                            if (pointsY[scany].Spectrum.Peaks != null)
                            {
                                pointsY[scany].Spectrum.Peaks.Clear();
                                pointsY[scany].Spectrum.Peaks = null;
                            }
                        }
                    }
                    j++;
                }


                var k      = 0;
                var points = new List <SpectralAnchorPoint>();

                while ((j + k) < yTotal && Math.Abs(ySpectraSummary[j + k].PrecursorMz - precursorX) < mzTolerance)
                {
                    var ysum = ySpectraSummary[j + k];
                    k++;
                    var scany = ysum.Scan;
                    var netX  = Convert.ToDouble(scanx - minX) / Convert.ToDouble(maxX - minX);
                    var netY  = Convert.ToDouble(scany - minY) / Convert.ToDouble(maxY - minY);
                    var net   = Convert.ToDouble(netX - netY);

                    // Test whether the spectra are within decent range.
                    if (Math.Abs(net) < netTolerance)
                    {
                        // We didnt pull this spectrum before, because we arent sure
                        // if it will be within tolerance....so we just delay this
                        // until we have to...after this happens, we only pull it once.
                        if (spectrumX == null)
                        {
                            if (!skipComparison)
                            {
                                // Grab the first spectra
                                spectrumX = SpectralUtilities.GetSpectra(options.MzBinSize,
                                                                         options.TopIonPercent,
                                                                         filter,
                                                                         readerX,
                                                                         scanx,
                                                                         options.RequiredPeakCount);

                                if (spectrumX != null)
                                {
                                    spectrumX.PrecursorMz = xsum.PrecursorMz;
                                }
                                else
                                {
                                    // This spectra does not have enough peaks or did not pass our filters, throw it away!
                                    break;
                                }
                            }
                        }
                        MSSpectra spectrumY = null;
                        if (!skipComparison)
                        {
                            if (cache.ContainsKey(scany))
                            {
                                spectrumY = cache[scany];
                            }
                            else
                            {
                                spectrumY = SpectralUtilities.GetSpectra(options.MzBinSize,
                                                                         options.TopIonPercent,
                                                                         filter,
                                                                         readerY,
                                                                         scany,
                                                                         options.RequiredPeakCount);

                                if (spectrumY != null)
                                {
                                    spectrumY.PrecursorMz = ysum.PrecursorMz;
                                    cache.Add(scany, spectrumY);
                                }
                                else
                                {
                                    continue;  // This spectra does not have enough peaks or did not pass our filters, throw it away!
                                }
                            }
                        }

                        if (spectrumX == null || spectrumY == null)
                        {
                            continue;
                        }

                        // compare the spectra
                        double spectralSimilarity = 0;
                        if (!skipComparison)
                        {
                            spectralSimilarity = comparer.CompareSpectra(spectrumX, spectrumY);
                        }

                        // similarities.Add(spectralSimilarity);
                        File.AppendAllText(@"c:\data\proteomics\test.txt", string.Format("{0}\t{1}\t{2}\n", spectrumX.PrecursorMz, spectrumY.PrecursorMz, spectralSimilarity));

                        if (double.IsNaN(spectralSimilarity) || double.IsInfinity(spectralSimilarity))
                        {
                            continue;
                        }



                        if (spectralSimilarity < options.SimilarityCutoff)
                        {
                            continue;
                        }

                        var pointX = new SpectralAnchorPoint
                        {
                            Net      = netX,
                            Mass     = 0,
                            Mz       = xsum.PrecursorMz,
                            Scan     = scanx,
                            Spectrum = spectrumX
                        };

                        var pointY = new SpectralAnchorPoint
                        {
                            Net      = netY,
                            Mass     = 0,
                            Mz       = ysum.PrecursorMz,
                            Scan     = scany,
                            Spectrum = spectrumY
                        };

                        var match = new SpectralAnchorPointMatch();
                        match.AnchorPointX    = pointX;
                        match.AnchorPointY    = pointY;
                        match.SimilarityScore = spectralSimilarity;
                        match.IsValidMatch    = AnchorPointMatchType.FalseMatch;
                        matches.Add(match);


                        points.Add(pointX);
                        if (!pointsY.ContainsKey(scany))
                        {
                            pointsY.Add(scany, pointY);
                        }
                    }
                }
                // Move to the next spectra in the x-list
                i++;
                foreach (var p in points)
                {
                    if (p.Spectrum.Peaks != null)
                    {
                        p.Spectrum.Peaks.Clear();
                        p.Spectrum.Peaks = null;
                    }
                }
                points.Clear();
            }
            return(matches);
        }
Ejemplo n.º 21
0
        public IDictionary <int, IList <MSFeatureLight> > CreateXic(UMCLight feature, double massError, ISpectraProvider provider)
        {
            var features       = new Dictionary <int, IList <MSFeatureLight> >();
            var chargeFeatures = feature.CreateChargeMap();

            // For each UMC...
            foreach (var charge in chargeFeatures.Keys)
            {
                // Find the mininmum and maximum features
                var msFeatures = CreateXic(chargeFeatures[charge],
                                           massError,
                                           provider);

                features.Add(charge, new List <MSFeatureLight>());

                foreach (var newFeature in msFeatures)
                {
                    // Here we ask if this is a new MS Feature or old...
                    if (!chargeFeatures.ContainsKey(newFeature.Scan))
                    {
                        // Otherwise add the new feature
                        newFeature.MassMonoisotopic = feature.MassMonoisotopic;
                        newFeature.DriftTime        = feature.DriftTime;
                        newFeature.GroupId          = feature.GroupId;
                    }
                    features[charge].Add(newFeature);
                }
            }
            return(features);
        }
Ejemplo n.º 22
0
        private MSSpectra GetSpectrum(ISpectraProvider reader, int scan, int group, double mzTolerance = .5)
        {
            var summary = new ScanSummary();
            var peaks = reader.GetRawSpectra(scan, group, 2, out summary);
            var spectrum = new MSSpectra();
            spectrum.Peaks = peaks;

            return spectrum;
        }
Ejemplo n.º 23
0
 public RawLoaderCache(ISpectraProvider provider)
 {
     m_summaryMap = new Dictionary <int, Dictionary <int, ScanSummary> >();
     m_provider   = provider;
 }
Ejemplo n.º 24
0
        /// <summary>
        /// Clusters spectra together based on similarity.
        /// </summary>
        /// <param name="start"></param>
        /// <param name="stop"></param>
        /// <param name="features"></param>
        private List <MsmsCluster> Cluster(int start,
                                           int stop,
                                           List <MSFeatureLight> features,
                                           ISpectraProvider provider,
                                           double similarityTolerance)
        {
            var massTolerance = MassTolerance;

            // Maps the feature to a cluster ID.
            var featureMap = new Dictionary <MSFeatureLight, int>();

            // Maps the cluster ID to a cluster.
            var clusterMap = new Dictionary <int, MsmsCluster>();
            var clusters   = new List <MsmsCluster>();

            // Create singleton clusters.
            var id = 0;

            for (var i = start; i < stop; i++)
            {
                var feature = features[i];
                var cluster = new MsmsCluster();
                cluster.Id        = id++;
                cluster.MeanScore = 0;
                cluster.Features.Add(feature);

                featureMap.Add(feature, cluster.Id);
                clusterMap.Add(cluster.Id, cluster);
            }
            var protonMass = AdductMass;

            // Then iterate and cluster.
            for (var i = start; i < stop; i++)
            {
                var featureI = features[i];
                var clusterI = clusterMap[featureMap[featureI]];

                for (var j = i + 1; j < stop; j++)
                {
                    var featureJ = features[j];
                    var clusterJ = clusterMap[featureMap[featureJ]];

                    // Don't cluster the same thing
                    if (clusterI.Id == clusterJ.Id)
                    {
                        continue;
                    }

                    // Don't cluster from the same dataset.  Let the linkage algorithm decide if they
                    // belong in the same cluster, and later, go back and determine if the cluster is valid or not.
                    if (featureI.GroupId == featureJ.GroupId)
                    {
                        continue;
                    }

                    // Check the scan difference.  If it fits then we are within range.
                    var scanDiff = Math.Abs(featureI.Scan - featureJ.Scan);
                    if (scanDiff <= ScanRange)
                    {
                        // Use the most abundant mass because it had a higher chance of being fragmented.
                        var mzI = (featureI.MassMonoisotopicMostAbundant / featureI.ChargeState) + protonMass;
                        var mzJ = (featureJ.MassMonoisotopicMostAbundant / featureJ.ChargeState) + protonMass;

                        var mzDiff = Math.Abs(mzI - mzJ);
                        if (mzDiff <= MzTolerance)
                        {
                            var scanSummary = new ScanSummary();
                            if (featureI.MSnSpectra[0].Peaks.Count <= 0)
                            {
                                featureI.MSnSpectra[0].Peaks = provider.GetRawSpectra(featureI.MSnSpectra[0].Scan, featureI.GroupId, out scanSummary);
                                featureI.MSnSpectra[0].Peaks = XYData.Bin(featureI.MSnSpectra[0].Peaks,
                                                                          0,
                                                                          2000,
                                                                          MzTolerance);
                            }
                            if (featureJ.MSnSpectra[0].Peaks.Count <= 0)
                            {
                                featureJ.MSnSpectra[0].Peaks = provider.GetRawSpectra(featureJ.MSnSpectra[0].Scan, featureJ.GroupId, out scanSummary);
                                featureJ.MSnSpectra[0].Peaks = XYData.Bin(featureJ.MSnSpectra[0].Peaks,
                                                                          0,
                                                                          2000,
                                                                          MzTolerance);
                            }


                            // Compute similarity
                            var score = SpectralComparer.CompareSpectra(featureI.MSnSpectra[0], featureJ.MSnSpectra[0]);

                            if (score >= similarityTolerance)
                            {
                                clusterJ.MeanScore += score;
                                foreach (var xFeature in clusterI.Features)
                                {
                                    clusterJ.Features.Add(xFeature);
                                    featureMap[xFeature] = clusterJ.Id;
                                    clusterMap.Remove(clusterI.Id);
                                }
                            }
                        }
                    }
                }
            }

            clusters.AddRange(clusterMap.Values);

            for (var i = start; i < stop; i++)
            {
                features[i].MSnSpectra[0].Peaks.Clear();
            }
            foreach (var cluster in clusters)
            {
                cluster.MeanScore /= (cluster.Features.Count - 1);
            }
            return(clusters);
        }
Ejemplo n.º 25
0
        /// <summary>
        /// Aligns features based on MSMS spectral similarity.
        /// </summary>
        /// <param name="featureMap"></param>
        /// <param name="msms"></param>
        public List <MsmsCluster> Cluster(List <UMCLight> features, ISpectraProvider provider)
        {
            UpdateStatus("Mapping UMC's to MS/MS spectra using intensity profile.");
            // Step 1: Cluster the spectra
            // Create the collection of samples.
            var msFeatures = new List <MSFeatureLight>();

            // Sort through the features
            foreach (var feature in features)
            {
                // Sort out charge states...?
                var chargeMap = new Dictionary <int, MSFeatureLight>();

                double         abundance  = int.MinValue;
                MSFeatureLight maxFeature = null;

                // Find the max abundance spectrum.  This the number of features we have to search.
                foreach (var msFeature in feature.MsFeatures)
                {
                    if (msFeature.Abundance > abundance && msFeature.MSnSpectra.Count > 0)
                    {
                        abundance  = msFeature.Abundance;
                        maxFeature = msFeature;
                    }
                }

                if (maxFeature != null)
                {
                    msFeatures.Add(maxFeature);
                }
            }

            UpdateStatus(string.Format("Found {0} total spectra for clustering.", msFeatures.Count));

            UpdateStatus("Sorting spectra.");
            // Sort based on mass using the max abundance of the feature.
            msFeatures.Sort(delegate(MSFeatureLight x, MSFeatureLight y)
                            { return(x.MassMonoisotopicMostAbundant.CompareTo(y.MassMonoisotopicMostAbundant)); });

            // Then cluster the spectra.
            var j = 1;
            var h = 0;
            var N = msFeatures.Count;

            var clusters  = new List <MsmsCluster>();
            var tol       = MassTolerance;
            var lastTotal = 0;

            UpdateStatus("Clustering spectra.");
            while (j < N)
            {
                var i        = j - 1;
                var featureJ = msFeatures[j];
                var featureI = msFeatures[i];
                var diff     = FeatureLight.ComputeMassPPMDifference(featureJ.MassMonoisotopicMostAbundant, featureI.MassMonoisotopicMostAbundant);

                if (Math.Abs(diff) > tol)
                {
                    // We only care to create clusters of size greater than one.
                    if ((j - h) > 1)
                    {
                        var data = Cluster(h,
                                           j,
                                           msFeatures,
                                           provider,
                                           SimilarityTolerance);
                        clusters.AddRange(data);
                    }

                    // Reset the count, we're done looking at those clusters.
                    h = j;
                }
                if (j - lastTotal > 500)
                {
                    lastTotal = j;
                    UpdateStatus(string.Format("Processed {0} / {1} total spectra.", lastTotal, N));
                }
                j++;
            }
            UpdateStatus("Finishing last cluster data.");

            // Cluster the rest
            if ((j - h) > 1)
            {
                var data = Cluster(h,
                                   j,
                                   msFeatures,
                                   provider,
                                   SimilarityTolerance);
                clusters.AddRange(data);
            }
            UpdateStatus("Finished clustering.");
            var passingClusters = clusters.Where(cluster => cluster.Features.Count >= MinimumClusterSize);

            return(passingClusters.ToList());
        }
Ejemplo n.º 26
0
        /// <summary>
        ///     Runs the MultiAlign analysis
        /// </summary>
        public void AlignDatasets(IEnumerable <UMCLight> baselineFeatures,
                                  IEnumerable <UMCLight> aligneeFeatures,
                                  ISpectraProvider providerX,
                                  ISpectraProvider providerY,
                                  IFeatureAligner <IEnumerable <UMCLight>,
                                                   IEnumerable <UMCLight>,
                                                   AlignmentData> aligner,
                                  IClusterer <UMCLight, UMCClusterLight> clusterer,
                                  string matchPath,
                                  string errorPath)
        {
            // cluster before we do anything else....
            var allFeatures = new List <UMCLight>();

            allFeatures.AddRange(baselineFeatures);
            allFeatures.AddRange(aligneeFeatures);


            var maxBaseline = baselineFeatures.Max(x => x.Scan);
            var minBaseline = baselineFeatures.Min(x => x.Scan);

            var maxAlignee = aligneeFeatures.Max(x => x.Scan);
            var minAlignee = aligneeFeatures.Min(x => x.Scan);

            foreach (var feature in aligneeFeatures)
            {
                feature.Net = Convert.ToDouble(feature.Scan - minAlignee) / Convert.ToDouble(maxAlignee - minAlignee);
                feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
            }

            foreach (var feature in baselineFeatures)
            {
                feature.Net = Convert.ToDouble(feature.Scan - minBaseline) / Convert.ToDouble(maxBaseline - minBaseline);
                feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
            }

            // This tells us the differences before we align.
            var clusters  = clusterer.Cluster(allFeatures);
            var clusterId = 0;

            foreach (var cluster in clusters)
            {
                cluster.Id = clusterId++;
            }
            var scorer       = new GlobalPeptideClusterScorer();
            var preAlignment = scorer.Score(clusters);

            aligner.AligneeSpectraProvider  = providerY;
            aligner.BaselineSpectraProvider = providerX;

            UpdateStatus("Aligning data");
            // Aligner data
            var data    = aligner.Align(baselineFeatures, aligneeFeatures);
            var matches = data.Matches;

            // create anchor points for LCMSWarp alignment
            var massPoints = new List <RegressionPoint>();
            var netPoints  = new List <RegressionPoint>();

            foreach (var match in matches)
            {
                var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz,
                                                                      match.AnchorPointY.Mz);
                var netError  = match.AnchorPointX.Net - match.AnchorPointY.Net;
                var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError);
                massPoints.Add(massPoint);

                var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError);
                netPoints.Add(netPoint);
            }

            foreach (var feature in allFeatures)
            {
                feature.UmcCluster = null;
                feature.ClusterId  = -1;
            }
            // Then cluster after alignment!
            UpdateStatus("clustering data");
            clusters = clusterer.Cluster(allFeatures);
            var postAlignment = scorer.Score(clusters);

            UpdateStatus("Note\tSame\tDifferent");
            UpdateStatus(string.Format("Pre\t{0}\t{1}",
                                       preAlignment.SameCluster,
                                       preAlignment.DifferentCluster));
            UpdateStatus(string.Format("Post\t{0}\t{1}",
                                       postAlignment.SameCluster,
                                       postAlignment.DifferentCluster));

            matches = FilterMatches(matches, 40);

            SaveMatches(matchPath, matches);
            DeRegisterProgressNotifier(aligner);
            DeRegisterProgressNotifier(clusterer);
        }
Ejemplo n.º 27
0
 public Dictionary <int, int> LinkMSFeaturesToMSn(List <MSFeatureLight> features,
                                                  List <MSSpectra> fragmentSpectra,
                                                  ISpectraProvider provider)
 {
     return(LinkMSFeaturesToMSn(features, fragmentSpectra));
 }
Ejemplo n.º 28
0
        protected static SpectralAnalysis MatchDatasets(SpectralComparison comparerType,
            ISpectraProvider readerX,
            ISpectraProvider readerY,
            SpectralOptions options,
            AlignmentDataset datasetX,
            AlignmentDataset datasetY,
            List<string> names)
        {
            var peptideReader = PeptideReaderFactory.CreateReader(SequenceFileType.MSGF);
            var finder = new SpectralAnchorPointFinder();
            var validator = new SpectralAnchorPointValidator();
            var comparer = SpectralComparerFactory.CreateSpectraComparer(comparerType);
            var filter = SpectrumFilterFactory.CreateFilter(SpectraFilters.TopPercent);

            var matches = finder.FindAnchorPoints(readerX,
                readerY,
                comparer,
                filter,
                options);

            var peptidesX = peptideReader.Read(datasetX.PeptideFile);
            var peptidesY = peptideReader.Read(datasetY.PeptideFile);
            validator.ValidateMatches(matches,
                peptidesX,
                peptidesY,
                options);

            var analysis = new SpectralAnalysis
            {
                DatasetNames = names,
                Matches = matches,
                Options = options
            };
            return analysis;
        }