public static void TestToString() { // many of these are just to check that the ToString methods don't cause crashes var indexedPeak = new IndexedMassSpectralPeak(1.0, 2.0, 3, 4); Assert.That(indexedPeak.ToString().Equals("1.000; 4; 3")); var spectraFile = new SpectraFileInfo("myFullPath", "", 0, 0, 0); string spectraString = spectraFile.ToString(); var proteinGroup = new ProteinGroup("Accession", "Gene", "Organism"); string pgString = proteinGroup.ToString(new List <SpectraFileInfo> { spectraFile }); var identification = new Identification( spectraFile, "PEPTIDE", "PEPTIDE", 1.0, 2.0, 3, new List <ProteinGroup> { proteinGroup }); string idString = identification.ToString(); var chromPeak = new ChromatographicPeak(identification, false, spectraFile); string chromPeakString = chromPeak.ToString(); chromPeak.CalculateIntensityForThisFeature(true); string peakAfterCalculatingIntensity = chromPeak.ToString(); }
public static void TestPeakSplittingRight() { string fileToWrite = "myMzml.mzML"; string peptide = "PEPTIDE"; double intensity = 1e6; Loaders.LoadElements(Path.Combine(TestContext.CurrentContext.TestDirectory, @"elements.dat")); // generate mzml file // 1 MS1 scan per peptide MsDataScan[] scans = new MsDataScan[10]; double[] intensityMultipliers = { 1, 3, 5, 10, 5, 3, 1, 1, 3, 1 }; for (int s = 0; s < scans.Length; s++) { ChemicalFormula cf = new Proteomics.AminoAcidPolymer.Peptide(peptide).GetChemicalFormula(); IsotopicDistribution dist = IsotopicDistribution.GetDistribution(cf, 0.125, 1e-8); double[] mz = dist.Masses.Select(v => v.ToMz(1)).ToArray(); double[] intensities = dist.Intensities.Select(v => v * intensity * intensityMultipliers[s]).ToArray(); // add the scan scans[s] = new MsDataScan(massSpectrum: new MzSpectrum(mz, intensities, false), oneBasedScanNumber: s + 1, msnOrder: 1, isCentroid: true, polarity: Polarity.Positive, retentionTime: 1.0 + s / 10.0, scanWindowRange: new MzRange(400, 1600), scanFilter: "f", mzAnalyzer: MZAnalyzerType.Orbitrap, totalIonCurrent: intensities.Sum(), injectionTime: 1.0, noiseData: null, nativeId: "scan=" + (s + 1)); } // write the .mzML IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(new FakeMsDataFile(scans), Path.Combine(TestContext.CurrentContext.TestDirectory, fileToWrite), false); // set up spectra file info SpectraFileInfo file1 = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, fileToWrite), "", 0, 0, 0); // create some PSMs var pg = new ProteinGroup("MyProtein", "gene", "org"); Identification id1 = new Identification(file1, peptide, peptide, new Proteomics.AminoAcidPolymer.Peptide(peptide).MonoisotopicMass, 1.3 + 0.001, 1, new List <ProteinGroup> { pg }); // create the FlashLFQ engine FlashLFQEngine engine = new FlashLFQEngine(new List <Identification> { id1 }); // run the engine var results = engine.Run(); ChromatographicPeak peak = results.Peaks.First().Value.First(); Assert.That(peak.Apex.RetentionTime == 1.3); Assert.That(peak.SplitRT == 1.6); Assert.That(!peak.IsotopicEnvelopes.Any(p => p.RetentionTime > 1.6)); Assert.That(peak.IsotopicEnvelopes.Count == 6); }
public void MergeFeatureWith(ChromatographicPeak otherFeature, bool integrate) { if (otherFeature != this) { var thisFeaturesPeaks = new HashSet <IndexedMassSpectralPeak>(IsotopicEnvelopes.Select(p => p.IndexedPeak)); this.Identifications = this.Identifications.Union(otherFeature.Identifications).Distinct().OrderBy(p => p.PosteriorErrorProbability).ToList(); ResolveIdentifications(); this.IsotopicEnvelopes.AddRange(otherFeature.IsotopicEnvelopes.Where(p => !thisFeaturesPeaks.Contains(p.IndexedPeak))); this.CalculateIntensityForThisFeature(integrate); } }
public RetentionTimeCalibDataPoint(ChromatographicPeak donorFilePeak, ChromatographicPeak acceptorFilePeak) { DonorFilePeak = donorFilePeak; AcceptorFilePeak = acceptorFilePeak; if (donorFilePeak != null && acceptorFilePeak != null) { RtDiff = acceptorFilePeak.Apex.IndexedPeak.RetentionTime - donorFilePeak.Apex.IndexedPeak.RetentionTime; } else { RtDiff = double.NaN; } }
public void CalculatePeptideResults() { foreach (var sequence in PeptideModifiedSequences) { foreach (SpectraFileInfo file in SpectraFiles) { sequence.Value.SetDetectionType(file, DetectionType.NotDetected); sequence.Value.SetIntensity(file, 0); } } foreach (var filePeaks in Peaks) { var groupedPeaks = filePeaks.Value.Where(p => p.NumIdentificationsByFullSeq == 1) .GroupBy(p => p.Identifications.First().ModifiedSequence).ToList(); foreach (var sequenceWithPeaks in groupedPeaks) { string sequence = sequenceWithPeaks.Key; double intensity = sequenceWithPeaks.Max(p => p.Intensity); ChromatographicPeak bestPeak = sequenceWithPeaks.First(p => p.Intensity == intensity); DetectionType detectionType; if (bestPeak.IsMbrPeak && intensity > 0) { detectionType = DetectionType.MBR; } else if (!bestPeak.IsMbrPeak && intensity > 0) { detectionType = DetectionType.MSMS; } else if (!bestPeak.IsMbrPeak && intensity == 0) { detectionType = DetectionType.MSMSIdentifiedButNotQuantified; } else { detectionType = DetectionType.NotDetected; } PeptideModifiedSequences[sequence].SetIntensity(filePeaks.Key, intensity); PeptideModifiedSequences[sequence].SetDetectionType(filePeaks.Key, detectionType); } // report ambiguous quantification var ambiguousPeaks = filePeaks.Value.Where(p => p.NumIdentificationsByFullSeq > 1).ToList(); foreach (ChromatographicPeak ambiguousPeak in ambiguousPeaks) { foreach (Identification id in ambiguousPeak.Identifications) { string sequence = id.ModifiedSequence; double alreadyRecordedIntensity = PeptideModifiedSequences[sequence].GetIntensity(filePeaks.Key); double fractionAmbiguous = ambiguousPeak.Intensity / (alreadyRecordedIntensity + ambiguousPeak.Intensity); if (fractionAmbiguous > 0.3) { PeptideModifiedSequences[sequence].SetIntensity(filePeaks.Key, 0); PeptideModifiedSequences[sequence].SetDetectionType(filePeaks.Key, DetectionType.MSMSAmbiguousPeakfinding); } } } } HandleAmbiguityInFractions(); }
//If SILAC (Post-Quantification), compress the light/heavy protein group pairs into the same light protein group but different files //Create new files for each silac label and file so that "file 1" now becomes "file 1 (light)" and "file 1 (heavy)" //Change heavy residue into the light residue plus a string label ("PEPTIDEa" -> "PEPTIDEK(+8.014)") //This light to heavy conversion needs to happen for the flashLFQ peptides here, but can't for the psm peptides, which are constrained to the protein //i.e. pwsms currently don't have sequences; they have start/end residues and a protein sequence. We have to change the output sequences when they're created. public static void SilacConversionsPostQuantification(List <SilacLabel> silacLabels, List <SpectraFileInfo> spectraFileInfo, List <ProteinGroup> ProteinGroups, HashSet <DigestionParams> ListOfDigestionParams, Dictionary <string, List <string> > silacProteinGroupMatcher, FlashLfqResults FlashLfqResults, List <PeptideSpectralMatch> allPsms, Dictionary <string, int> ModsToWriteSelection, bool Integrate) { bool outputLightIntensities = ListOfDigestionParams.Any(x => x.GeneratehUnlabeledProteinsForSilac); //MAKE NEW RAW FILES //update number of spectra files to include a new file for each label*condition Dictionary <SpectraFileInfo, string> fileToLabelDictionary = new Dictionary <SpectraFileInfo, string>(); //figure out which file is which label, since some files will be only light and others only heavy. Key is file, value is the label string (label.MassDifference) Dictionary <SpectraFileInfo, SpectraFileInfo> labeledToUnlabeledFile = new Dictionary <SpectraFileInfo, SpectraFileInfo>(); //keep track of the heavy-to-light pairs. If multiple, looks like 3-1 and 2-1, but no 3-2 (only heavy to light, no heavy to heavy) List <SpectraFileInfo> silacSpectraFileInfo = new List <SpectraFileInfo>(); //new files //foreach existing file foreach (SpectraFileInfo originalFile in spectraFileInfo) { //add the existing file as the light silacSpectraFileInfo.Add(originalFile); //foreach label, add a new file with the label foreach (SilacLabel label in silacLabels) { SpectraFileInfo silacFile = GetHeavyFileInfo(originalFile, label); silacSpectraFileInfo.Add(silacFile); fileToLabelDictionary[silacFile] = label.MassDifference; labeledToUnlabeledFile[silacFile] = originalFile; } } //UPDATE PROTEIN GROUPS //remove the heavy protein groups so that there are only light ones //add the intensities of the heavy groups into the newly created heavy SpectraFileInfos HashSet <SpectraFileInfo> lightFilesToRemove = new HashSet <SpectraFileInfo>(); //this is only used when there user specified no unlabeled proteins if (ProteinGroups != null) //if we did parsimony { List <EngineLayer.ProteinGroup> silacProteinGroups = new List <EngineLayer.ProteinGroup>(); //The light/unlabeled peptides/proteins were not searched if specified, but they were still quantified to keep track of the labels //we need to remove these unlabeled peptides/proteins before output //foreach protein group (which has its own quant for each file) foreach (EngineLayer.ProteinGroup proteinGroup in ProteinGroups) { proteinGroup.FilesForQuantification = silacSpectraFileInfo; //update fileinfo for the group //grab the light groups. Using these light groups, find their heavy group pair(s), add them to the light group quant info, and then remove the heavy groups if (silacProteinGroupMatcher.TryGetValue(proteinGroup.ProteinGroupName, out List <string> silacSubGroupNames)) //try to find the light protein groups. If it's not light, ignore it { //the out variable contains all the other heavy protein groups that were generated for this light protein group //go through the files and see if any of them contain the same label. If not, put zeroes for those missing "files" //If the user didn't specify to search light intensities, then don't output them Dictionary <SpectraFileInfo, double> updatedIntensitiesByFile = proteinGroup.IntensitiesByFile; //light intensities List <SpectraFileInfo> lightKeys = updatedIntensitiesByFile.Keys.ToList(); //go through all files (including "silac" files) List <ProteinGroup> subGroup = ProteinGroups.Where(x => silacSubGroupNames.Contains(x.ProteinGroupName)).ToList(); //find the protein groups where the accession contains "light" accession of the current protein group foreach (SpectraFileInfo fileInfo in silacSpectraFileInfo) //for every file (light and heavy) { //if it doesn't have a value, then it's a silac file (light missing values still have a value "0") if (!updatedIntensitiesByFile.ContainsKey(fileInfo)) { string labelSignature = fileToLabelDictionary[fileInfo]; //a string associated with a silac label ProteinGroup foundGroup = subGroup.Where(x => x.Proteins.Any(y => y.Accession.Contains(labelSignature))).FirstOrDefault(); //get the protein groups containing this label updatedIntensitiesByFile[fileInfo] = foundGroup == null ? 0 : foundGroup.IntensitiesByFile[labeledToUnlabeledFile[fileInfo]]; //update the intensity for that label in the light group } //else do nothing. The light version is already in the dictionary } //The light/unlabeled peptides/proteins were not searched if specified, but they were still quantified to keep track of the labels //we need to remove these unlabeled peptides/proteins before output if (!outputLightIntensities) { foreach (SpectraFileInfo info in lightKeys) { updatedIntensitiesByFile.Remove(info); proteinGroup.FilesForQuantification.Remove(info); lightFilesToRemove.Add(info); } } silacProteinGroups.Add(proteinGroup); } } //update ProteinGroups.Clear(); ProteinGroups.AddRange(silacProteinGroups); //remove light files (if necessary) foreach (SpectraFileInfo info in lightFilesToRemove) { FlashLfqResults.SpectraFiles.Remove(info); } //UPDATE FLASHLFQ PROTEINS if (FlashLfqResults != null) //can be null if nothing was quantified (all peptides are ambiguous) { Dictionary <string, FlashLFQ.ProteinGroup> flashLfqProteins = FlashLfqResults.ProteinGroups; //dictionary of protein group names to protein groups //if the protein group is a heavy protein group, get rid of it. We already accounted for it above. var keys = flashLfqProteins.Keys.ToList(); foreach (string key in keys) { if (silacLabels.Any(x => key.Contains(x.MassDifference))) { flashLfqProteins.Remove(key); } } } } ////UPDATE FLASHLFQ SPECTRA FILES if (FlashLfqResults != null) //can be null if nothing was quantified (all peptides are ambiguous) { List <SpectraFileInfo> originalFiles = FlashLfqResults.SpectraFiles; //pass reference foreach (SpectraFileInfo info in silacSpectraFileInfo) { if (!originalFiles.Contains(info)) { originalFiles.Add(info); } } } //UPDATE PEPTIDE INFO //convert all psm/peptide/proteingroup sequences from the heavy label to the light label for output //We can do this for all of the FlashLFQ peptides/peaks, because they use string sequences. //We are unable to do this for Parameters.AllPsms, because they store proteins and start/end residues instead //for Psms, we need to convert during the writing. for (int i = 0; i < allPsms.Count; i++) { allPsms[i].ResolveHeavySilacLabel(silacLabels, ModsToWriteSelection); } //Convert all lfqpeaks from heavy (a) to light (K+8.014) for output if (FlashLfqResults != null) //can be null if nothing was quantified (all peptides are ambiguous) { var lfqPeaks = FlashLfqResults.Peaks; List <SpectraFileInfo> peakKeys = lfqPeaks.Keys.ToList(); foreach (SpectraFileInfo key in peakKeys) { List <FlashLFQ.ChromatographicPeak> peaks = lfqPeaks[key]; for (int i = 0; i < peaks.Count; i++) { var peak = peaks[i]; List <Identification> identifications = new List <Identification>(); //check if we're removing light peaks and if it's a light peak if (!outputLightIntensities && !peak.Identifications.Any(x => GetRelevantLabelFromBaseSequence(x.BaseSequence, silacLabels) != null)) //if no ids have any labels, remove them { peaks.RemoveAt(i); i--; } else { foreach (var id in peak.Identifications) { SilacLabel label = GetRelevantLabelFromBaseSequence(id.BaseSequence, silacLabels); HashSet <FlashLFQ.ProteinGroup> originalGroups = id.proteinGroups; List <FlashLFQ.ProteinGroup> updatedGroups = new List <FlashLFQ.ProteinGroup>(); foreach (FlashLFQ.ProteinGroup group in originalGroups) { string groupName = group.ProteinGroupName; if (label == null) //if light { updatedGroups.Add(group); } else { string labelString = "(" + label.OriginalAminoAcid + label.MassDifference; int labelIndex = groupName.IndexOf(labelString); if (labelIndex != -1) //labelIndex == 1 if a) 2+ peptides are required per protein or b) somebody broke parsimony { groupName = groupName.Substring(0, labelIndex); updatedGroups.Add(new FlashLFQ.ProteinGroup(groupName, group.GeneName, group.Organism)); } } } Identification updatedId = new Identification( id.fileInfo, GetSilacLightBaseSequence(id.BaseSequence, label), GetSilacLightFullSequence(id.ModifiedSequence, label), id.monoisotopicMass, id.ms2RetentionTimeInMinutes, id.precursorChargeState, updatedGroups, id.OptionalChemicalFormula, id.UseForProteinQuant ); identifications.Add(updatedId); } FlashLFQ.ChromatographicPeak updatedPeak = new FlashLFQ.ChromatographicPeak(identifications.First(), peak.IsMbrPeak, peak.SpectraFileInfo); for (int j = 1; j < identifications.Count; j++) //add all the original identification { updatedPeak.MergeFeatureWith(new FlashLFQ.ChromatographicPeak(identifications[j], peak.IsMbrPeak, peak.SpectraFileInfo), Integrate); } updatedPeak.IsotopicEnvelopes = peak.IsotopicEnvelopes; //need to set isotopicEnevelopes, since the new identifications didn't have them. updatedPeak.CalculateIntensityForThisFeature(Integrate); //needed to update info peaks[i] = updatedPeak; } } } //convert all lfq peptides from heavy to light for output Dictionary <string, FlashLFQ.Peptide> lfqPwsms = FlashLfqResults.PeptideModifiedSequences; List <string> pwsmKeys = lfqPwsms.Keys.ToList(); foreach (string key in pwsmKeys) { FlashLFQ.Peptide currentPeptide = lfqPwsms[key]; SilacLabel label = GetRelevantLabelFromFullSequence(currentPeptide.Sequence, silacLabels); if (label != null) //if it's a heavy peptide { lfqPwsms.Remove(key); //get rid of it //update the light version string lightSequence = GetSilacLightFullSequence(currentPeptide.Sequence, label, false); //get the light sequence List <SpectraFileInfo> heavyFiles = silacSpectraFileInfo.Where(x => x.FilenameWithoutExtension.Contains(label.MassDifference)).ToList(); //these are the heavy raw file names //Find the light peptide (which has a value for the light datafile) and set the intensity for the heavy datafile from the current peptide if (lfqPwsms.TryGetValue(lightSequence, out FlashLFQ.Peptide lightPeptide)) //this should always have a value, since we made replicas earlier, and yet it sometimes doesn't... { foreach (SpectraFileInfo heavyFile in heavyFiles) { SpectraFileInfo lightFile = labeledToUnlabeledFile[heavyFile]; lightPeptide.SetIntensity(heavyFile, currentPeptide.GetIntensity(lightFile)); lightPeptide.SetDetectionType(heavyFile, currentPeptide.GetDetectionType(lightFile)); } } else //if there's no light, create a new entry for the heavy { //new peptide FlashLFQ.Peptide updatedPeptide = new FlashLFQ.Peptide(lightSequence, currentPeptide.UseForProteinQuant); //update the heavy info, set the light values to zero foreach (SpectraFileInfo info in heavyFiles) { updatedPeptide.SetIntensity(info, currentPeptide.GetIntensity(info)); updatedPeptide.SetDetectionType(info, currentPeptide.GetDetectionType(info)); } //set the other values to zero List <SpectraFileInfo> otherInfo = silacSpectraFileInfo.Where(x => !heavyFiles.Contains(x)).ToList(); foreach (SpectraFileInfo info in otherInfo) { updatedPeptide.SetIntensity(info, 0); updatedPeptide.SetDetectionType(info, DetectionType.NotDetected); } HashSet <FlashLFQ.ProteinGroup> originalGroups = currentPeptide.proteinGroups; HashSet <FlashLFQ.ProteinGroup> updatedGroups = new HashSet <FlashLFQ.ProteinGroup>(); foreach (FlashLFQ.ProteinGroup group in originalGroups) { string groupName = group.ProteinGroupName; groupName = groupName.Replace(label.MassDifference, ""); updatedGroups.Add(new FlashLFQ.ProteinGroup(groupName, group.GeneName, group.Organism)); } updatedPeptide.proteinGroups = updatedGroups; lfqPwsms[updatedPeptide.Sequence] = updatedPeptide; } } } } }