Пример #1
0
        public static void TestToString()
        {
            // many of these are just to check that the ToString methods don't cause crashes
            var indexedPeak = new IndexedMassSpectralPeak(1.0, 2.0, 3, 4);

            Assert.That(indexedPeak.ToString().Equals("1.000; 4; 3"));

            var    spectraFile   = new SpectraFileInfo("myFullPath", "", 0, 0, 0);
            string spectraString = spectraFile.ToString();

            var    proteinGroup = new ProteinGroup("Accession", "Gene", "Organism");
            string pgString     = proteinGroup.ToString(new List <SpectraFileInfo> {
                spectraFile
            });

            var identification = new Identification(
                spectraFile, "PEPTIDE", "PEPTIDE", 1.0, 2.0, 3,
                new List <ProteinGroup> {
                proteinGroup
            });
            string idString = identification.ToString();

            var    chromPeak       = new ChromatographicPeak(identification, false, spectraFile);
            string chromPeakString = chromPeak.ToString();

            chromPeak.CalculateIntensityForThisFeature(true);
            string peakAfterCalculatingIntensity = chromPeak.ToString();
        }
Пример #2
0
        public static void TestPeakSplittingRight()
        {
            string fileToWrite = "myMzml.mzML";
            string peptide     = "PEPTIDE";
            double intensity   = 1e6;

            Loaders.LoadElements(Path.Combine(TestContext.CurrentContext.TestDirectory, @"elements.dat"));

            // generate mzml file

            // 1 MS1 scan per peptide
            MsDataScan[] scans = new MsDataScan[10];
            double[]     intensityMultipliers = { 1, 3, 5, 10, 5, 3, 1, 1, 3, 1 };

            for (int s = 0; s < scans.Length; s++)
            {
                ChemicalFormula      cf          = new Proteomics.AminoAcidPolymer.Peptide(peptide).GetChemicalFormula();
                IsotopicDistribution dist        = IsotopicDistribution.GetDistribution(cf, 0.125, 1e-8);
                double[]             mz          = dist.Masses.Select(v => v.ToMz(1)).ToArray();
                double[]             intensities = dist.Intensities.Select(v => v * intensity * intensityMultipliers[s]).ToArray();

                // add the scan
                scans[s] = new MsDataScan(massSpectrum: new MzSpectrum(mz, intensities, false), oneBasedScanNumber: s + 1, msnOrder: 1, isCentroid: true,
                                          polarity: Polarity.Positive, retentionTime: 1.0 + s / 10.0, scanWindowRange: new MzRange(400, 1600), scanFilter: "f",
                                          mzAnalyzer: MZAnalyzerType.Orbitrap, totalIonCurrent: intensities.Sum(), injectionTime: 1.0, noiseData: null, nativeId: "scan=" + (s + 1));
            }

            // write the .mzML
            IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(new FakeMsDataFile(scans),
                                                                          Path.Combine(TestContext.CurrentContext.TestDirectory, fileToWrite), false);

            // set up spectra file info
            SpectraFileInfo file1 = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, fileToWrite), "", 0, 0, 0);

            // create some PSMs
            var pg = new ProteinGroup("MyProtein", "gene", "org");

            Identification id1 = new Identification(file1, peptide, peptide,
                                                    new Proteomics.AminoAcidPolymer.Peptide(peptide).MonoisotopicMass, 1.3 + 0.001, 1, new List <ProteinGroup> {
                pg
            });

            // create the FlashLFQ engine
            FlashLFQEngine engine = new FlashLFQEngine(new List <Identification> {
                id1
            });

            // run the engine
            var results = engine.Run();
            ChromatographicPeak peak = results.Peaks.First().Value.First();

            Assert.That(peak.Apex.RetentionTime == 1.3);
            Assert.That(peak.SplitRT == 1.6);
            Assert.That(!peak.IsotopicEnvelopes.Any(p => p.RetentionTime > 1.6));
            Assert.That(peak.IsotopicEnvelopes.Count == 6);
        }
Пример #3
0
 public void MergeFeatureWith(ChromatographicPeak otherFeature, bool integrate)
 {
     if (otherFeature != this)
     {
         var thisFeaturesPeaks = new HashSet <IndexedMassSpectralPeak>(IsotopicEnvelopes.Select(p => p.IndexedPeak));
         this.Identifications = this.Identifications.Union(otherFeature.Identifications).Distinct().OrderBy(p => p.PosteriorErrorProbability).ToList();
         ResolveIdentifications();
         this.IsotopicEnvelopes.AddRange(otherFeature.IsotopicEnvelopes.Where(p => !thisFeaturesPeaks.Contains(p.IndexedPeak)));
         this.CalculateIntensityForThisFeature(integrate);
     }
 }
Пример #4
0
        public RetentionTimeCalibDataPoint(ChromatographicPeak donorFilePeak, ChromatographicPeak acceptorFilePeak)
        {
            DonorFilePeak    = donorFilePeak;
            AcceptorFilePeak = acceptorFilePeak;

            if (donorFilePeak != null && acceptorFilePeak != null)
            {
                RtDiff = acceptorFilePeak.Apex.IndexedPeak.RetentionTime - donorFilePeak.Apex.IndexedPeak.RetentionTime;
            }
            else
            {
                RtDiff = double.NaN;
            }
        }
Пример #5
0
        public void CalculatePeptideResults()
        {
            foreach (var sequence in PeptideModifiedSequences)
            {
                foreach (SpectraFileInfo file in SpectraFiles)
                {
                    sequence.Value.SetDetectionType(file, DetectionType.NotDetected);
                    sequence.Value.SetIntensity(file, 0);
                }
            }

            foreach (var filePeaks in Peaks)
            {
                var groupedPeaks = filePeaks.Value.Where(p => p.NumIdentificationsByFullSeq == 1)
                    .GroupBy(p => p.Identifications.First().ModifiedSequence).ToList();

                foreach (var sequenceWithPeaks in groupedPeaks)
                {
                    string sequence = sequenceWithPeaks.Key;
                    double intensity = sequenceWithPeaks.Max(p => p.Intensity);
                    ChromatographicPeak bestPeak = sequenceWithPeaks.First(p => p.Intensity == intensity);
                    DetectionType detectionType;

                    if (bestPeak.IsMbrPeak && intensity > 0)
                    {
                        detectionType = DetectionType.MBR;
                    }
                    else if (!bestPeak.IsMbrPeak && intensity > 0)
                    {
                        detectionType = DetectionType.MSMS;
                    }
                    else if (!bestPeak.IsMbrPeak && intensity == 0)
                    {
                        detectionType = DetectionType.MSMSIdentifiedButNotQuantified;
                    }
                    else
                    {
                        detectionType = DetectionType.NotDetected;
                    }

                    PeptideModifiedSequences[sequence].SetIntensity(filePeaks.Key, intensity);
                    PeptideModifiedSequences[sequence].SetDetectionType(filePeaks.Key, detectionType);
                }

                // report ambiguous quantification
                var ambiguousPeaks = filePeaks.Value.Where(p => p.NumIdentificationsByFullSeq > 1).ToList();
                foreach (ChromatographicPeak ambiguousPeak in ambiguousPeaks)
                {
                    foreach (Identification id in ambiguousPeak.Identifications)
                    {
                        string sequence = id.ModifiedSequence;

                        double alreadyRecordedIntensity = PeptideModifiedSequences[sequence].GetIntensity(filePeaks.Key);
                        double fractionAmbiguous = ambiguousPeak.Intensity / (alreadyRecordedIntensity + ambiguousPeak.Intensity);

                        if (fractionAmbiguous > 0.3)
                        {
                            PeptideModifiedSequences[sequence].SetIntensity(filePeaks.Key, 0);
                            PeptideModifiedSequences[sequence].SetDetectionType(filePeaks.Key, DetectionType.MSMSAmbiguousPeakfinding);
                        }
                    }
                }
            }

            HandleAmbiguityInFractions();
        }
        //If SILAC (Post-Quantification), compress the light/heavy protein group pairs into the same light protein group but different files
        //Create new files for each silac label and file so that "file 1" now becomes "file 1 (light)" and "file 1 (heavy)"
        //Change heavy residue into the light residue plus a string label ("PEPTIDEa" -> "PEPTIDEK(+8.014)")
        //This light to heavy conversion needs to happen for the flashLFQ peptides here, but can't for the psm peptides, which are constrained to the protein
        //i.e. pwsms currently don't have sequences; they have start/end residues and a protein sequence. We have to change the output sequences when they're created.
        public static void SilacConversionsPostQuantification(List <SilacLabel> silacLabels, List <SpectraFileInfo> spectraFileInfo, List <ProteinGroup> ProteinGroups,
                                                              HashSet <DigestionParams> ListOfDigestionParams, Dictionary <string, List <string> > silacProteinGroupMatcher, FlashLfqResults FlashLfqResults,
                                                              List <PeptideSpectralMatch> allPsms, Dictionary <string, int> ModsToWriteSelection, bool Integrate)
        {
            bool outputLightIntensities = ListOfDigestionParams.Any(x => x.GeneratehUnlabeledProteinsForSilac);


            //MAKE NEW RAW FILES
            //update number of spectra files to include a new file for each label*condition
            Dictionary <SpectraFileInfo, string>          fileToLabelDictionary  = new Dictionary <SpectraFileInfo, string>();          //figure out which file is which label, since some files will be only light and others only heavy. Key is file, value is the label string (label.MassDifference)
            Dictionary <SpectraFileInfo, SpectraFileInfo> labeledToUnlabeledFile = new Dictionary <SpectraFileInfo, SpectraFileInfo>(); //keep track of the heavy-to-light pairs. If multiple, looks like 3-1 and 2-1, but no 3-2 (only heavy to light, no heavy to heavy)
            List <SpectraFileInfo> silacSpectraFileInfo = new List <SpectraFileInfo>();                                                 //new files

            //foreach existing file
            foreach (SpectraFileInfo originalFile in spectraFileInfo)
            {
                //add the existing file as the light
                silacSpectraFileInfo.Add(originalFile);
                //foreach label, add a new file with the label
                foreach (SilacLabel label in silacLabels)
                {
                    SpectraFileInfo silacFile = GetHeavyFileInfo(originalFile, label);
                    silacSpectraFileInfo.Add(silacFile);
                    fileToLabelDictionary[silacFile]  = label.MassDifference;
                    labeledToUnlabeledFile[silacFile] = originalFile;
                }
            }


            //UPDATE PROTEIN GROUPS
            //remove the heavy protein groups so that there are only light ones
            //add the intensities of the heavy groups into the newly created heavy SpectraFileInfos
            HashSet <SpectraFileInfo> lightFilesToRemove = new HashSet <SpectraFileInfo>(); //this is only used when there user specified no unlabeled proteins

            if (ProteinGroups != null)                                                      //if we did parsimony
            {
                List <EngineLayer.ProteinGroup> silacProteinGroups = new List <EngineLayer.ProteinGroup>();
                //The light/unlabeled peptides/proteins were not searched if specified, but they were still quantified to keep track of the labels
                //we need to remove these unlabeled peptides/proteins before output
                //foreach protein group (which has its own quant for each file)
                foreach (EngineLayer.ProteinGroup proteinGroup in ProteinGroups)
                {
                    proteinGroup.FilesForQuantification = silacSpectraFileInfo;                                                    //update fileinfo for the group
                                                                                                                                   //grab the light groups. Using these light groups, find their heavy group pair(s), add them to the light group quant info, and then remove the heavy groups
                    if (silacProteinGroupMatcher.TryGetValue(proteinGroup.ProteinGroupName, out List <string> silacSubGroupNames)) //try to find the light protein groups. If it's not light, ignore it
                    {
                        //the out variable contains all the other heavy protein groups that were generated for this light protein group
                        //go through the files and see if any of them contain the same label. If not, put zeroes for those missing "files"
                        //If the user didn't specify to search light intensities, then don't output them
                        Dictionary <SpectraFileInfo, double> updatedIntensitiesByFile = proteinGroup.IntensitiesByFile; //light intensities
                        List <SpectraFileInfo> lightKeys = updatedIntensitiesByFile.Keys.ToList();

                        //go through all files (including "silac" files)
                        List <ProteinGroup> subGroup = ProteinGroups.Where(x => silacSubGroupNames.Contains(x.ProteinGroupName)).ToList(); //find the protein groups where the accession contains "light" accession of the current protein group
                        foreach (SpectraFileInfo fileInfo in silacSpectraFileInfo)                                                         //for every file (light and heavy)
                        {
                            //if it doesn't have a value, then it's a silac file (light missing values still have a value "0")
                            if (!updatedIntensitiesByFile.ContainsKey(fileInfo))
                            {
                                string       labelSignature = fileToLabelDictionary[fileInfo];                                                                 //a string associated with a silac label
                                ProteinGroup foundGroup     = subGroup.Where(x => x.Proteins.Any(y => y.Accession.Contains(labelSignature))).FirstOrDefault(); //get the protein groups containing this label
                                updatedIntensitiesByFile[fileInfo] = foundGroup == null ? 0 : foundGroup.IntensitiesByFile[labeledToUnlabeledFile[fileInfo]];  //update the intensity for that label in the light group
                            }
                            //else do nothing. The light version is already in the dictionary
                        }

                        //The light/unlabeled peptides/proteins were not searched if specified, but they were still quantified to keep track of the labels
                        //we need to remove these unlabeled peptides/proteins before output
                        if (!outputLightIntensities)
                        {
                            foreach (SpectraFileInfo info in lightKeys)
                            {
                                updatedIntensitiesByFile.Remove(info);
                                proteinGroup.FilesForQuantification.Remove(info);
                                lightFilesToRemove.Add(info);
                            }
                        }

                        silacProteinGroups.Add(proteinGroup);
                    }
                }

                //update
                ProteinGroups.Clear();
                ProteinGroups.AddRange(silacProteinGroups);
                //remove light files (if necessary)
                foreach (SpectraFileInfo info in lightFilesToRemove)
                {
                    FlashLfqResults.SpectraFiles.Remove(info);
                }

                //UPDATE FLASHLFQ PROTEINS
                if (FlashLfqResults != null)                                                                     //can be null if nothing was quantified (all peptides are ambiguous)
                {
                    Dictionary <string, FlashLFQ.ProteinGroup> flashLfqProteins = FlashLfqResults.ProteinGroups; //dictionary of protein group names to protein groups
                                                                                                                 //if the protein group is a heavy protein group, get rid of it. We already accounted for it above.
                    var keys = flashLfqProteins.Keys.ToList();
                    foreach (string key in keys)
                    {
                        if (silacLabels.Any(x => key.Contains(x.MassDifference)))
                        {
                            flashLfqProteins.Remove(key);
                        }
                    }
                }
            }

            ////UPDATE FLASHLFQ SPECTRA FILES
            if (FlashLfqResults != null)                                             //can be null if nothing was quantified (all peptides are ambiguous)
            {
                List <SpectraFileInfo> originalFiles = FlashLfqResults.SpectraFiles; //pass reference
                foreach (SpectraFileInfo info in silacSpectraFileInfo)
                {
                    if (!originalFiles.Contains(info))
                    {
                        originalFiles.Add(info);
                    }
                }
            }

            //UPDATE PEPTIDE INFO
            //convert all psm/peptide/proteingroup sequences from the heavy label to the light label for output
            //We can do this for all of the FlashLFQ peptides/peaks, because they use string sequences.
            //We are unable to do this for Parameters.AllPsms, because they store proteins and start/end residues instead
            //for Psms, we need to convert during the writing.
            for (int i = 0; i < allPsms.Count; i++)
            {
                allPsms[i].ResolveHeavySilacLabel(silacLabels, ModsToWriteSelection);
            }

            //Convert all lfqpeaks from heavy (a) to light (K+8.014) for output
            if (FlashLfqResults != null) //can be null if nothing was quantified (all peptides are ambiguous)
            {
                var lfqPeaks = FlashLfqResults.Peaks;
                List <SpectraFileInfo> peakKeys = lfqPeaks.Keys.ToList();

                foreach (SpectraFileInfo key in peakKeys)
                {
                    List <FlashLFQ.ChromatographicPeak> peaks = lfqPeaks[key];
                    for (int i = 0; i < peaks.Count; i++)
                    {
                        var peak = peaks[i];
                        List <Identification> identifications = new List <Identification>();
                        //check if we're removing light peaks and if it's a light peak
                        if (!outputLightIntensities && !peak.Identifications.Any(x => GetRelevantLabelFromBaseSequence(x.BaseSequence, silacLabels) != null)) //if no ids have any labels, remove them
                        {
                            peaks.RemoveAt(i);
                            i--;
                        }
                        else
                        {
                            foreach (var id in peak.Identifications)
                            {
                                SilacLabel label = GetRelevantLabelFromBaseSequence(id.BaseSequence, silacLabels);
                                HashSet <FlashLFQ.ProteinGroup> originalGroups = id.proteinGroups;
                                List <FlashLFQ.ProteinGroup>    updatedGroups  = new List <FlashLFQ.ProteinGroup>();
                                foreach (FlashLFQ.ProteinGroup group in originalGroups)
                                {
                                    string groupName = group.ProteinGroupName;
                                    if (label == null) //if light
                                    {
                                        updatedGroups.Add(group);
                                    }
                                    else
                                    {
                                        string labelString = "(" + label.OriginalAminoAcid + label.MassDifference;
                                        int    labelIndex  = groupName.IndexOf(labelString);
                                        if (labelIndex != -1) //labelIndex == 1 if a) 2+ peptides are required per protein or b) somebody broke parsimony
                                        {
                                            groupName = groupName.Substring(0, labelIndex);
                                            updatedGroups.Add(new FlashLFQ.ProteinGroup(groupName, group.GeneName, group.Organism));
                                        }
                                    }
                                }

                                Identification updatedId = new Identification(
                                    id.fileInfo,
                                    GetSilacLightBaseSequence(id.BaseSequence, label),
                                    GetSilacLightFullSequence(id.ModifiedSequence, label),
                                    id.monoisotopicMass,
                                    id.ms2RetentionTimeInMinutes,
                                    id.precursorChargeState,
                                    updatedGroups,
                                    id.OptionalChemicalFormula,
                                    id.UseForProteinQuant
                                    );
                                identifications.Add(updatedId);
                            }
                            FlashLFQ.ChromatographicPeak updatedPeak = new FlashLFQ.ChromatographicPeak(identifications.First(), peak.IsMbrPeak, peak.SpectraFileInfo);
                            for (int j = 1; j < identifications.Count; j++) //add all the original identification
                            {
                                updatedPeak.MergeFeatureWith(new FlashLFQ.ChromatographicPeak(identifications[j], peak.IsMbrPeak, peak.SpectraFileInfo), Integrate);
                            }
                            updatedPeak.IsotopicEnvelopes = peak.IsotopicEnvelopes;  //need to set isotopicEnevelopes, since the new identifications didn't have them.
                            updatedPeak.CalculateIntensityForThisFeature(Integrate); //needed to update info
                            peaks[i] = updatedPeak;
                        }
                    }
                }

                //convert all lfq peptides from heavy to light for output
                Dictionary <string, FlashLFQ.Peptide> lfqPwsms = FlashLfqResults.PeptideModifiedSequences;
                List <string> pwsmKeys = lfqPwsms.Keys.ToList();
                foreach (string key in pwsmKeys)
                {
                    FlashLFQ.Peptide currentPeptide = lfqPwsms[key];
                    SilacLabel       label          = GetRelevantLabelFromFullSequence(currentPeptide.Sequence, silacLabels);
                    if (label != null)                                                                                                                           //if it's a heavy peptide
                    {
                        lfqPwsms.Remove(key);                                                                                                                    //get rid of it
                                                                                                                                                                 //update the light version
                        string lightSequence = GetSilacLightFullSequence(currentPeptide.Sequence, label, false);                                                 //get the light sequence
                        List <SpectraFileInfo> heavyFiles = silacSpectraFileInfo.Where(x => x.FilenameWithoutExtension.Contains(label.MassDifference)).ToList(); //these are the heavy raw file names

                        //Find the light peptide (which has a value for the light datafile) and set the intensity for the heavy datafile from the current peptide
                        if (lfqPwsms.TryGetValue(lightSequence, out FlashLFQ.Peptide lightPeptide)) //this should always have a value, since we made replicas earlier, and yet it sometimes doesn't...
                        {
                            foreach (SpectraFileInfo heavyFile in heavyFiles)
                            {
                                SpectraFileInfo lightFile = labeledToUnlabeledFile[heavyFile];
                                lightPeptide.SetIntensity(heavyFile, currentPeptide.GetIntensity(lightFile));
                                lightPeptide.SetDetectionType(heavyFile, currentPeptide.GetDetectionType(lightFile));
                            }
                        }
                        else //if there's no light, create a new entry for the heavy
                        {
                            //new peptide
                            FlashLFQ.Peptide updatedPeptide = new FlashLFQ.Peptide(lightSequence, currentPeptide.UseForProteinQuant);
                            //update the heavy info, set the light values to zero
                            foreach (SpectraFileInfo info in heavyFiles)
                            {
                                updatedPeptide.SetIntensity(info, currentPeptide.GetIntensity(info));
                                updatedPeptide.SetDetectionType(info, currentPeptide.GetDetectionType(info));
                            }

                            //set the other values to zero
                            List <SpectraFileInfo> otherInfo = silacSpectraFileInfo.Where(x => !heavyFiles.Contains(x)).ToList();
                            foreach (SpectraFileInfo info in otherInfo)
                            {
                                updatedPeptide.SetIntensity(info, 0);
                                updatedPeptide.SetDetectionType(info, DetectionType.NotDetected);
                            }
                            HashSet <FlashLFQ.ProteinGroup> originalGroups = currentPeptide.proteinGroups;
                            HashSet <FlashLFQ.ProteinGroup> updatedGroups  = new HashSet <FlashLFQ.ProteinGroup>();
                            foreach (FlashLFQ.ProteinGroup group in originalGroups)
                            {
                                string groupName = group.ProteinGroupName;
                                groupName = groupName.Replace(label.MassDifference, "");
                                updatedGroups.Add(new FlashLFQ.ProteinGroup(groupName, group.GeneName, group.Organism));
                            }
                            updatedPeptide.proteinGroups      = updatedGroups;
                            lfqPwsms[updatedPeptide.Sequence] = updatedPeptide;
                        }
                    }
                }
            }
        }