public static void ExperimentalDesignCalibrationTest(string nonCalibratedFile) { // set up directories string unitTestFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"ExperimentalDesignCalibrationTest"); string outputFolder = Path.Combine(unitTestFolder, @"TaskOutput"); Directory.CreateDirectory(unitTestFolder); Directory.CreateDirectory(outputFolder); // set up original spectra file (input to calibration) string nonCalibratedFilePath = Path.Combine(unitTestFolder, nonCalibratedFile); File.Copy(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\SmallCalibratible_Yeast.mzML"), nonCalibratedFilePath, true); // protein db string myDatabase = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\smalldb.fasta"); // set up original experimental design (input to calibration) SpectraFileInfo fileInfo = new SpectraFileInfo(nonCalibratedFilePath, "condition", 0, 0, 0); var experimentalDesignFilePath = ExperimentalDesign.WriteExperimentalDesignToFile(new List <SpectraFileInfo> { fileInfo }); // run calibration CalibrationTask calibrationTask = new CalibrationTask(); calibrationTask.RunTask(outputFolder, new List <DbForTask> { new DbForTask(myDatabase, false) }, new List <string> { nonCalibratedFilePath }, "test"); // test new experimental design written by calibration var newExpDesignPath = Path.Combine(outputFolder, @"ExperimentalDesign.tsv"); string expectedCalibratedFileName = Path.GetFileNameWithoutExtension(nonCalibratedFilePath) + "-calib.mzML"; var expectedCalibratedFilePath = Path.Combine(outputFolder, expectedCalibratedFileName); var newExperDesign = ExperimentalDesign.ReadExperimentalDesign(newExpDesignPath, new List <string> { expectedCalibratedFilePath }, out var errors); Assert.That(!errors.Any()); Assert.That(newExperDesign.Count == 1); // test file-specific toml written by calibration w/ suggested ppm tolerances string expectedTomlName = Path.GetFileNameWithoutExtension(nonCalibratedFilePath) + "-calib.toml"; Assert.That(File.Exists(Path.Combine(outputFolder, expectedTomlName))); var lines = File.ReadAllLines(Path.Combine(outputFolder, expectedTomlName)); var tolerance = Regex.Match(lines[0], @"\d+\.\d*").Value; var tolerance1 = Regex.Match(lines[1], @"\d+\.\d*").Value; Assert.That(double.TryParse(tolerance, out double tol) == true); Assert.That(double.TryParse(tolerance1, out double tol1) == true); Assert.That(lines[0].Contains("PrecursorMassTolerance")); Assert.That(lines[1].Contains("ProductMassTolerance")); // check that calibrated .mzML exists Assert.That(File.Exists(Path.Combine(outputFolder, expectedCalibratedFilePath))); // clean up Directory.Delete(unitTestFolder, true); }
public static void TestProteinQuantFileHeaders(bool hasDefinedExperimentalDesign, int bioreps, int fractions, int techreps) { // create the unit test directory string unitTestFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestProteinQuantFileHeaders"); Directory.CreateDirectory(unitTestFolder); List <SpectraFileInfo> fileInfos = new List <SpectraFileInfo>(); string peptide = "PEPTIDE"; double ionIntensity = 1e6; string condition = hasDefinedExperimentalDesign ? "TestCondition" : ""; // create the protein database Protein prot = new Protein(peptide, @""); string dbName = Path.Combine(unitTestFolder, "testDB.fasta"); UsefulProteomicsDatabases.ProteinDbWriter.WriteFastaDatabase(new List <Protein> { prot }, dbName, ">"); // create the .mzML files to search/quantify for (int b = 0; b < bioreps; b++) { for (int f = 0; f < fractions; f++) { for (int r = 0; r < techreps; r++) { string fileToWrite = "file_" + "b" + b + "f" + f + "r" + r + ".mzML"; // generate mzml file MsDataScan[] scans = new MsDataScan[2]; // create the MS1 scan ChemicalFormula cf = new Proteomics.AminoAcidPolymer.Peptide(peptide).GetChemicalFormula(); IsotopicDistribution dist = IsotopicDistribution.GetDistribution(cf, 0.125, 1e-8); double[] mz = dist.Masses.Select(v => v.ToMz(1)).ToArray(); double[] intensities = dist.Intensities.Select(v => v * ionIntensity).ToArray(); scans[0] = new MsDataScan(massSpectrum: new MzSpectrum(mz, intensities, false), oneBasedScanNumber: 1, msnOrder: 1, isCentroid: true, polarity: Polarity.Positive, retentionTime: 1.0, scanWindowRange: new MzRange(400, 1600), scanFilter: "f", mzAnalyzer: MZAnalyzerType.Orbitrap, totalIonCurrent: intensities.Sum(), injectionTime: 1.0, noiseData: null, nativeId: "scan=1"); // create the MS2 scan var pep = new PeptideWithSetModifications(peptide, new Dictionary <string, Proteomics.Modification>()); List <Product> frags = new List <Product>(); pep.Fragment(DissociationType.HCD, FragmentationTerminus.Both, frags); double[] mz2 = frags.Select(v => v.NeutralMass.ToMz(1)).ToArray(); double[] intensities2 = frags.Select(v => 1e6).ToArray(); scans[1] = new MsDataScan(massSpectrum: new MzSpectrum(mz2, intensities2, false), oneBasedScanNumber: 2, msnOrder: 2, isCentroid: true, polarity: Polarity.Positive, retentionTime: 1.01, scanWindowRange: new MzRange(100, 1600), scanFilter: "f", mzAnalyzer: MZAnalyzerType.Orbitrap, totalIonCurrent: intensities.Sum(), injectionTime: 1.0, noiseData: null, nativeId: "scan=2", selectedIonMz: pep.MonoisotopicMass.ToMz(1), selectedIonChargeStateGuess: 1, selectedIonIntensity: 1e6, isolationMZ: pep.MonoisotopicMass.ToMz(1), isolationWidth: 1.5, dissociationType: DissociationType.HCD, oneBasedPrecursorScanNumber: 1, selectedIonMonoisotopicGuessMz: pep.MonoisotopicMass.ToMz(1), hcdEnergy: "35"); // write the .mzML string fullPath = Path.Combine(unitTestFolder, fileToWrite); IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra( new MsDataFile(scans, new SourceFile(@"scan number only nativeID format", "mzML format", null, "SHA-1", @"C:\fake.mzML", null)), fullPath, false); var spectraFileInfo = new SpectraFileInfo(fullPath, condition, b, r, f); fileInfos.Add(spectraFileInfo); } } } // write the experimental design for this quantification test if (hasDefinedExperimentalDesign) { ExperimentalDesign.WriteExperimentalDesignToFile(fileInfos); } // run the search/quantification SearchTask task = new SearchTask(); task.RunTask(unitTestFolder, new List <DbForTask> { new DbForTask(dbName, false) }, fileInfos.Select(p => p.FullFilePathWithExtension).ToList(), ""); // read in the protein quant results Assert.That(File.Exists(Path.Combine(unitTestFolder, "AllQuantifiedProteinGroups.tsv"))); var lines = File.ReadAllLines(Path.Combine(unitTestFolder, "AllQuantifiedProteinGroups.tsv")); // check the intensity column headers var splitHeader = lines[0].Split(new char[] { '\t' }).ToList(); var intensityColumnHeaders = splitHeader.Where(p => p.Contains("Intensity", StringComparison.OrdinalIgnoreCase)).ToList(); Assert.That(intensityColumnHeaders.Count == 2); if (!hasDefinedExperimentalDesign) { Assert.That(intensityColumnHeaders[0] == "Intensity_file_b0f0r0"); Assert.That(intensityColumnHeaders[1] == "Intensity_file_b1f0r0"); } else { Assert.That(intensityColumnHeaders[0] == "Intensity_TestCondition_1"); Assert.That(intensityColumnHeaders[1] == "Intensity_TestCondition_2"); } // check the protein intensity values int ind1 = splitHeader.IndexOf(intensityColumnHeaders[0]); int ind2 = splitHeader.IndexOf(intensityColumnHeaders[1]); double intensity1 = double.Parse(lines[1].Split(new char[] { '\t' })[ind1]); double intensity2 = double.Parse(lines[1].Split(new char[] { '\t' })[ind2]); Assert.That(intensity1 > 0); Assert.That(intensity2 > 0); Assert.That(intensity1 == intensity2); Directory.Delete(unitTestFolder, true); }
//If SILAC (Post-Quantification), compress the light/heavy protein group pairs into the same light protein group but different files //Create new files for each silac label and file so that "file 1" now becomes "file 1 (light)" and "file 1 (heavy)" //Change heavy residue into the light residue plus a string label ("PEPTIDEa" -> "PEPTIDEK(+8.014)") //This light to heavy conversion needs to happen for the flashLFQ peptides here, but can't for the psm peptides, which are constrained to the protein //i.e. pwsms currently don't have sequences; they have start/end residues and a protein sequence. We have to change the output sequences when they're created. public static void SilacConversionsPostQuantification(List <SilacLabel> allSilacLabels, SilacLabel startLabel, SilacLabel endLabel, List <SpectraFileInfo> spectraFileInfo, List <ProteinGroup> proteinGroups, HashSet <DigestionParams> listOfDigestionParams, FlashLfqResults flashLfqResults, List <PeptideSpectralMatch> allPsms, Dictionary <string, int> modsToWriteSelection, bool quantifyUnlabeledPeptides) { //do protein quant if we had any results //if no results, we still may need to edit the psms if (flashLfqResults != null) //can be null if no unambiguous psms were found { //after this point, we now have quantification values for the peptides, but they all belong to the same "unlabeled" protein and are in the same file //We can remove "labeled" peptides from each file and put them in a new file as "unlabeled". //MAKE NEW RAW FILES //update number of spectra files to include a new file for each label/condition Dictionary <SpectraFileInfo, List <SpectraFileInfo> > originalToLabeledFileInfoDictionary = CreateSilacRawFiles(flashLfqResults, allSilacLabels, startLabel, endLabel, quantifyUnlabeledPeptides, spectraFileInfo); //we have the files, now let's reassign the psms. //there are a few ways to do this, but we're going to generate the "base" peptide and assign to that //Get Dictionary of protein accessions to peptides Dictionary <string, List <FlashLFQ.Peptide> > unlabeledToPeptidesDictionary = GetDictionaryOfProteinAccessionsToPeptides(flashLfqResults.PeptideModifiedSequences.Values, allSilacLabels, startLabel, endLabel); //we now have a dictionary of unlabeledBaseSequence to the labeled peptides //Better SILAC results can be obtained by using the summed intensities from ms1 scans where all peaks were found, rather than the apex //foreach peptide, unlabeled peptide, get the isotopic envelope intensities for each labeled peptide in each file //save the intensities from ms1s that are shared. If no ms1s contains all the peaks, then just use the apex intensity (default) CalculateSilacIntensities(flashLfqResults.Peaks, unlabeledToPeptidesDictionary); //SPLIT THE FILES List <FlashLFQ.Peptide> updatedPeptides = new List <FlashLFQ.Peptide>(); //split the heavy/light peptides into separate raw files, remove the heavy peptide if (startLabel != null || endLabel != null) //if turnover { //foreach group, the labeled peptides should be split into their labeled files //we're deleting the heavy results after we pull those results into a different file foreach (SpectraFileInfo info in spectraFileInfo) { string fullPathWithExtension = info.FullFilePathWithExtension; string[] pathArray = fullPathWithExtension.Split('.'); string extension = pathArray.Last(); string filePathWithoutExtension = fullPathWithExtension.Substring(0, fullPathWithExtension.Length - extension.Length - 1); //-1 removes the '.' SpectraFileInfo lightInfo = new SpectraFileInfo(filePathWithoutExtension + "_Original." + extension, info.Condition, info.BiologicalReplicate, info.TechnicalReplicate, info.Fraction); SpectraFileInfo heavyInfo = new SpectraFileInfo(filePathWithoutExtension + "_NewlySynthesized." + extension, info.Condition, info.BiologicalReplicate, info.TechnicalReplicate, info.Fraction); originalToLabeledFileInfoDictionary[info] = new List <SpectraFileInfo> { lightInfo, heavyInfo }; flashLfqResults.SpectraFiles.Add(lightInfo); flashLfqResults.SpectraFiles.Add(heavyInfo); } //This step converts the quantification intensities from light/heavy to original/newlySynthesized by splitting up the missed cleavage mixtures foreach (KeyValuePair <string, List <FlashLFQ.Peptide> > kvp in unlabeledToPeptidesDictionary) { string unlabeledSequence = kvp.Key; //this will be the key for the new quant entry List <FlashLFQ.Peptide> peptides = kvp.Value; if (peptides.Count != 1) //sometimes it's one if there is no label site on the peptide (e.g. label K, peptide is PEPTIDER) { //Missed cleavages can yield multiple peptides (e.g. 1 missed = LL, LH, HH; 2 missed = LLL, LLH, LHH, HHH; etc) //Compress into 2 values: Light and Heavy FlashLFQ.Peptide updatedPeptide = new FlashLFQ.Peptide(unlabeledSequence, unlabeledSequence, peptides[0].UseForProteinQuant, CleanPastProteinQuant(peptides[0].ProteinGroups)); //needed to keep protein info. foreach (SpectraFileInfo info in spectraFileInfo) { int maxNumberHeavyAminoAcids = peptides.Count - 1; double lightIntensity = 0; double heavyIntensity = 0; int numUniquePeptidesQuantified = 0; for (int numHeavyAminoAcids = 0; numHeavyAminoAcids < peptides.Count; numHeavyAminoAcids++) { double totalIntensity = peptides[numHeavyAminoAcids].GetIntensity(info); if (totalIntensity > 0) { //prevent confidence of a ratio if only the HL (and not the LL or HH) is observed. //If LL or HH is observed (but not any other), the user knows the ratio is only from one peak. if (numHeavyAminoAcids == 0 || numHeavyAminoAcids == maxNumberHeavyAminoAcids) { numUniquePeptidesQuantified += 2; } else { numUniquePeptidesQuantified++; } double partHeavyIntensity = totalIntensity * numHeavyAminoAcids / maxNumberHeavyAminoAcids; lightIntensity += totalIntensity - partHeavyIntensity; heavyIntensity += partHeavyIntensity; } } //If only a mixed peptide with a missed cleavage was identified, reset the intensity values to zero so the user doesn't get a discreet, inaccurate measurement if (numUniquePeptidesQuantified < 2) { lightIntensity = 0; heavyIntensity = 0; } List <SpectraFileInfo> updatedInfo = originalToLabeledFileInfoDictionary[info]; SpectraFileInfo startInfo = updatedInfo[0]; SpectraFileInfo endInfo = updatedInfo[1]; updatedPeptide.SetIntensity(startInfo, lightIntensity); //assign the corrected light intensity updatedPeptide.SetDetectionType(startInfo, peptides.First().GetDetectionType(info)); updatedPeptide.SetIntensity(endInfo, heavyIntensity); //assign the corrected heavy intensity to the heavy file updatedPeptide.SetDetectionType(endInfo, peptides.Last().GetDetectionType(info)); //could include the mixed here if it really matters } //add the updated peptide to the list updatedPeptides.Add(updatedPeptide); } else { updatedPeptides.Add(peptides[0]); } } } else //multiplex { foreach (var kvp in unlabeledToPeptidesDictionary) { string unlabeledSequence = kvp.Key; List <FlashLFQ.Peptide> peptides = kvp.Value; FlashLFQ.Peptide representativePeptide = peptides[0]; FlashLFQ.Peptide updatedPeptide = new FlashLFQ.Peptide(unlabeledSequence, unlabeledSequence, representativePeptide.UseForProteinQuant, CleanPastProteinQuant(representativePeptide.ProteinGroups)); //needed to keep protein info. //foreach original file foreach (SpectraFileInfo info in spectraFileInfo) { List <SpectraFileInfo> filesForThisFile = originalToLabeledFileInfoDictionary[info]; for (int i = 0; i < peptides.Count; i++) //the files and the peptides can use the same index, because there should be a distinct file for each label/peptide { SpectraFileInfo currentInfo = filesForThisFile[i]; FlashLFQ.Peptide currentPeptide = peptides[i]; updatedPeptide.SetIntensity(currentInfo, currentPeptide.GetIntensity(info)); updatedPeptide.SetDetectionType(currentInfo, currentPeptide.GetDetectionType(info)); } } updatedPeptides.Add(updatedPeptide); } } //Update peptides var peptideResults = flashLfqResults.PeptideModifiedSequences; peptideResults.Clear(); foreach (FlashLFQ.Peptide peptide in updatedPeptides) { peptideResults.Add(peptide.Sequence, peptide); } //Do protein quant flashLfqResults.CalculateProteinResultsTop3(true); //update proteingroups to have all files for quantification if (proteinGroups != null) { List <SpectraFileInfo> allInfo = originalToLabeledFileInfoDictionary.SelectMany(x => x.Value).ToList(); foreach (ProteinGroup proteinGroup in proteinGroups) { proteinGroup.FilesForQuantification = allInfo; proteinGroup.IntensitiesByFile = new Dictionary <SpectraFileInfo, double>(); foreach (var spectraFile in allInfo) { if (flashLfqResults.ProteinGroups.TryGetValue(proteinGroup.ProteinGroupName, out var flashLfqProteinGroup)) { proteinGroup.IntensitiesByFile.Add(spectraFile, flashLfqProteinGroup.GetIntensity(spectraFile)); } else { //needed for decoys/contaminants/proteins that aren't quantified proteinGroup.IntensitiesByFile.Add(spectraFile, 0); } } } } //Convert all lfqpeaks from heavy (a) to light (K+8.014) for output if (flashLfqResults != null) //can be null if nothing was quantified (all peptides are ambiguous) { var lfqPeaks = flashLfqResults.Peaks; List <SpectraFileInfo> peakKeys = lfqPeaks.Keys.ToList(); foreach (SpectraFileInfo key in peakKeys) { List <ChromatographicPeak> peaks = lfqPeaks[key]; for (int i = 0; i < peaks.Count; i++) { var peak = peaks[i]; //check if we're removing light peaks and if it's a light peak if (peak.Identifications.Any(x => GetRelevantLabelFromBaseSequence(x.BaseSequence, allSilacLabels) != null)) //if no ids have any labels, remove them { List <Identification> updatedIds = new List <Identification>(); foreach (var id in peak.Identifications) { string baseSequence = id.BaseSequence; string fullSequence = id.ModifiedSequence; List <SilacLabel> labels = GetRelevantLabelsFromBaseSequenceForOutput(id.BaseSequence, allSilacLabels); if (labels != null) { foreach (SilacLabel label in labels) { baseSequence = GetSilacLightBaseSequence(baseSequence, label); fullSequence = GetSilacLightFullSequence(fullSequence, label); } } Identification updatedId = new Identification( id.FileInfo, baseSequence, fullSequence, id.MonoisotopicMass, id.Ms2RetentionTimeInMinutes, id.PrecursorChargeState, id.ProteinGroups.ToList(), id.OptionalChemicalFormula, id.UseForProteinQuant ); updatedIds.Add(updatedId); } peak.Identifications.Clear(); peak.Identifications.AddRange(updatedIds); } } } } } //convert all psms into human readable format for (int i = 0; i < allPsms.Count; i++) { allPsms[i].ResolveHeavySilacLabel(allSilacLabels, modsToWriteSelection); } }
//If SILAC (Post-Quantification), compress the light/heavy protein group pairs into the same light protein group but different files //Create new files for each silac label and file so that "file 1" now becomes "file 1 (light)" and "file 1 (heavy)" //Change heavy residue into the light residue plus a string label ("PEPTIDEa" -> "PEPTIDEK(+8.014)") //This light to heavy conversion needs to happen for the flashLFQ peptides here, but can't for the psm peptides, which are constrained to the protein //i.e. pwsms currently don't have sequences; they have start/end residues and a protein sequence. We have to change the output sequences when they're created. public static void SilacConversionsPostQuantification(List <SilacLabel> silacLabels, List <SpectraFileInfo> spectraFileInfo, List <ProteinGroup> ProteinGroups, HashSet <DigestionParams> ListOfDigestionParams, Dictionary <string, List <string> > silacProteinGroupMatcher, FlashLfqResults FlashLfqResults, List <PeptideSpectralMatch> allPsms, Dictionary <string, int> ModsToWriteSelection, bool Integrate) { bool outputLightIntensities = ListOfDigestionParams.Any(x => x.GeneratehUnlabeledProteinsForSilac); //MAKE NEW RAW FILES //update number of spectra files to include a new file for each label*condition Dictionary <SpectraFileInfo, string> fileToLabelDictionary = new Dictionary <SpectraFileInfo, string>(); //figure out which file is which label, since some files will be only light and others only heavy. Key is file, value is the label string (label.MassDifference) Dictionary <SpectraFileInfo, SpectraFileInfo> labeledToUnlabeledFile = new Dictionary <SpectraFileInfo, SpectraFileInfo>(); //keep track of the heavy-to-light pairs. If multiple, looks like 3-1 and 2-1, but no 3-2 (only heavy to light, no heavy to heavy) List <SpectraFileInfo> silacSpectraFileInfo = new List <SpectraFileInfo>(); //new files //foreach existing file foreach (SpectraFileInfo originalFile in spectraFileInfo) { //add the existing file as the light silacSpectraFileInfo.Add(originalFile); //foreach label, add a new file with the label foreach (SilacLabel label in silacLabels) { SpectraFileInfo silacFile = GetHeavyFileInfo(originalFile, label); silacSpectraFileInfo.Add(silacFile); fileToLabelDictionary[silacFile] = label.MassDifference; labeledToUnlabeledFile[silacFile] = originalFile; } } //UPDATE PROTEIN GROUPS //remove the heavy protein groups so that there are only light ones //add the intensities of the heavy groups into the newly created heavy SpectraFileInfos HashSet <SpectraFileInfo> lightFilesToRemove = new HashSet <SpectraFileInfo>(); //this is only used when there user specified no unlabeled proteins if (ProteinGroups != null) //if we did parsimony { List <EngineLayer.ProteinGroup> silacProteinGroups = new List <EngineLayer.ProteinGroup>(); //The light/unlabeled peptides/proteins were not searched if specified, but they were still quantified to keep track of the labels //we need to remove these unlabeled peptides/proteins before output //foreach protein group (which has its own quant for each file) foreach (EngineLayer.ProteinGroup proteinGroup in ProteinGroups) { proteinGroup.FilesForQuantification = silacSpectraFileInfo; //update fileinfo for the group //grab the light groups. Using these light groups, find their heavy group pair(s), add them to the light group quant info, and then remove the heavy groups if (silacProteinGroupMatcher.TryGetValue(proteinGroup.ProteinGroupName, out List <string> silacSubGroupNames)) //try to find the light protein groups. If it's not light, ignore it { //the out variable contains all the other heavy protein groups that were generated for this light protein group //go through the files and see if any of them contain the same label. If not, put zeroes for those missing "files" //If the user didn't specify to search light intensities, then don't output them Dictionary <SpectraFileInfo, double> updatedIntensitiesByFile = proteinGroup.IntensitiesByFile; //light intensities List <SpectraFileInfo> lightKeys = updatedIntensitiesByFile.Keys.ToList(); //go through all files (including "silac" files) List <ProteinGroup> subGroup = ProteinGroups.Where(x => silacSubGroupNames.Contains(x.ProteinGroupName)).ToList(); //find the protein groups where the accession contains "light" accession of the current protein group foreach (SpectraFileInfo fileInfo in silacSpectraFileInfo) //for every file (light and heavy) { //if it doesn't have a value, then it's a silac file (light missing values still have a value "0") if (!updatedIntensitiesByFile.ContainsKey(fileInfo)) { string labelSignature = fileToLabelDictionary[fileInfo]; //a string associated with a silac label ProteinGroup foundGroup = subGroup.Where(x => x.Proteins.Any(y => y.Accession.Contains(labelSignature))).FirstOrDefault(); //get the protein groups containing this label updatedIntensitiesByFile[fileInfo] = foundGroup == null ? 0 : foundGroup.IntensitiesByFile[labeledToUnlabeledFile[fileInfo]]; //update the intensity for that label in the light group } //else do nothing. The light version is already in the dictionary } //The light/unlabeled peptides/proteins were not searched if specified, but they were still quantified to keep track of the labels //we need to remove these unlabeled peptides/proteins before output if (!outputLightIntensities) { foreach (SpectraFileInfo info in lightKeys) { updatedIntensitiesByFile.Remove(info); proteinGroup.FilesForQuantification.Remove(info); lightFilesToRemove.Add(info); } } silacProteinGroups.Add(proteinGroup); } } //update ProteinGroups.Clear(); ProteinGroups.AddRange(silacProteinGroups); //remove light files (if necessary) foreach (SpectraFileInfo info in lightFilesToRemove) { FlashLfqResults.SpectraFiles.Remove(info); } //UPDATE FLASHLFQ PROTEINS if (FlashLfqResults != null) //can be null if nothing was quantified (all peptides are ambiguous) { Dictionary <string, FlashLFQ.ProteinGroup> flashLfqProteins = FlashLfqResults.ProteinGroups; //dictionary of protein group names to protein groups //if the protein group is a heavy protein group, get rid of it. We already accounted for it above. var keys = flashLfqProteins.Keys.ToList(); foreach (string key in keys) { if (silacLabels.Any(x => key.Contains(x.MassDifference))) { flashLfqProteins.Remove(key); } } } } ////UPDATE FLASHLFQ SPECTRA FILES if (FlashLfqResults != null) //can be null if nothing was quantified (all peptides are ambiguous) { List <SpectraFileInfo> originalFiles = FlashLfqResults.SpectraFiles; //pass reference foreach (SpectraFileInfo info in silacSpectraFileInfo) { if (!originalFiles.Contains(info)) { originalFiles.Add(info); } } } //UPDATE PEPTIDE INFO //convert all psm/peptide/proteingroup sequences from the heavy label to the light label for output //We can do this for all of the FlashLFQ peptides/peaks, because they use string sequences. //We are unable to do this for Parameters.AllPsms, because they store proteins and start/end residues instead //for Psms, we need to convert during the writing. for (int i = 0; i < allPsms.Count; i++) { allPsms[i].ResolveHeavySilacLabel(silacLabels, ModsToWriteSelection); } //Convert all lfqpeaks from heavy (a) to light (K+8.014) for output if (FlashLfqResults != null) //can be null if nothing was quantified (all peptides are ambiguous) { var lfqPeaks = FlashLfqResults.Peaks; List <SpectraFileInfo> peakKeys = lfqPeaks.Keys.ToList(); foreach (SpectraFileInfo key in peakKeys) { List <FlashLFQ.ChromatographicPeak> peaks = lfqPeaks[key]; for (int i = 0; i < peaks.Count; i++) { var peak = peaks[i]; List <Identification> identifications = new List <Identification>(); //check if we're removing light peaks and if it's a light peak if (!outputLightIntensities && !peak.Identifications.Any(x => GetRelevantLabelFromBaseSequence(x.BaseSequence, silacLabels) != null)) //if no ids have any labels, remove them { peaks.RemoveAt(i); i--; } else { foreach (var id in peak.Identifications) { SilacLabel label = GetRelevantLabelFromBaseSequence(id.BaseSequence, silacLabels); HashSet <FlashLFQ.ProteinGroup> originalGroups = id.proteinGroups; List <FlashLFQ.ProteinGroup> updatedGroups = new List <FlashLFQ.ProteinGroup>(); foreach (FlashLFQ.ProteinGroup group in originalGroups) { string groupName = group.ProteinGroupName; if (label == null) //if light { updatedGroups.Add(group); } else { string labelString = "(" + label.OriginalAminoAcid + label.MassDifference; int labelIndex = groupName.IndexOf(labelString); if (labelIndex != -1) //labelIndex == 1 if a) 2+ peptides are required per protein or b) somebody broke parsimony { groupName = groupName.Substring(0, labelIndex); updatedGroups.Add(new FlashLFQ.ProteinGroup(groupName, group.GeneName, group.Organism)); } } } Identification updatedId = new Identification( id.fileInfo, GetSilacLightBaseSequence(id.BaseSequence, label), GetSilacLightFullSequence(id.ModifiedSequence, label), id.monoisotopicMass, id.ms2RetentionTimeInMinutes, id.precursorChargeState, updatedGroups, id.OptionalChemicalFormula, id.UseForProteinQuant ); identifications.Add(updatedId); } FlashLFQ.ChromatographicPeak updatedPeak = new FlashLFQ.ChromatographicPeak(identifications.First(), peak.IsMbrPeak, peak.SpectraFileInfo); for (int j = 1; j < identifications.Count; j++) //add all the original identification { updatedPeak.MergeFeatureWith(new FlashLFQ.ChromatographicPeak(identifications[j], peak.IsMbrPeak, peak.SpectraFileInfo), Integrate); } updatedPeak.IsotopicEnvelopes = peak.IsotopicEnvelopes; //need to set isotopicEnevelopes, since the new identifications didn't have them. updatedPeak.CalculateIntensityForThisFeature(Integrate); //needed to update info peaks[i] = updatedPeak; } } } //convert all lfq peptides from heavy to light for output Dictionary <string, FlashLFQ.Peptide> lfqPwsms = FlashLfqResults.PeptideModifiedSequences; List <string> pwsmKeys = lfqPwsms.Keys.ToList(); foreach (string key in pwsmKeys) { FlashLFQ.Peptide currentPeptide = lfqPwsms[key]; SilacLabel label = GetRelevantLabelFromFullSequence(currentPeptide.Sequence, silacLabels); if (label != null) //if it's a heavy peptide { lfqPwsms.Remove(key); //get rid of it //update the light version string lightSequence = GetSilacLightFullSequence(currentPeptide.Sequence, label, false); //get the light sequence List <SpectraFileInfo> heavyFiles = silacSpectraFileInfo.Where(x => x.FilenameWithoutExtension.Contains(label.MassDifference)).ToList(); //these are the heavy raw file names //Find the light peptide (which has a value for the light datafile) and set the intensity for the heavy datafile from the current peptide if (lfqPwsms.TryGetValue(lightSequence, out FlashLFQ.Peptide lightPeptide)) //this should always have a value, since we made replicas earlier, and yet it sometimes doesn't... { foreach (SpectraFileInfo heavyFile in heavyFiles) { SpectraFileInfo lightFile = labeledToUnlabeledFile[heavyFile]; lightPeptide.SetIntensity(heavyFile, currentPeptide.GetIntensity(lightFile)); lightPeptide.SetDetectionType(heavyFile, currentPeptide.GetDetectionType(lightFile)); } } else //if there's no light, create a new entry for the heavy { //new peptide FlashLFQ.Peptide updatedPeptide = new FlashLFQ.Peptide(lightSequence, currentPeptide.UseForProteinQuant); //update the heavy info, set the light values to zero foreach (SpectraFileInfo info in heavyFiles) { updatedPeptide.SetIntensity(info, currentPeptide.GetIntensity(info)); updatedPeptide.SetDetectionType(info, currentPeptide.GetDetectionType(info)); } //set the other values to zero List <SpectraFileInfo> otherInfo = silacSpectraFileInfo.Where(x => !heavyFiles.Contains(x)).ToList(); foreach (SpectraFileInfo info in otherInfo) { updatedPeptide.SetIntensity(info, 0); updatedPeptide.SetDetectionType(info, DetectionType.NotDetected); } HashSet <FlashLFQ.ProteinGroup> originalGroups = currentPeptide.proteinGroups; HashSet <FlashLFQ.ProteinGroup> updatedGroups = new HashSet <FlashLFQ.ProteinGroup>(); foreach (FlashLFQ.ProteinGroup group in originalGroups) { string groupName = group.ProteinGroupName; groupName = groupName.Replace(label.MassDifference, ""); updatedGroups.Add(new FlashLFQ.ProteinGroup(groupName, group.GeneName, group.Organism)); } updatedPeptide.proteinGroups = updatedGroups; lfqPwsms[updatedPeptide.Sequence] = updatedPeptide; } } } } }
public static List <SpectraFileInfo> ReadExperimentalDesign(string experimentalDesignPath, List <string> fullFilePathsWithExtension, out List <string> errors) { var expDesign = new List <SpectraFileInfo>(); errors = new List <string>(); if (!File.Exists(experimentalDesignPath)) { errors.Add("Experimental design file not found!"); return(expDesign); } var lines = File.ReadAllLines(experimentalDesignPath); for (int i = 1; i < lines.Length; i++) { var split = lines[i].Split(new char[] { '\t' }); if (split.Length < 5) { errors.Add("Error: The experimental design was not formatted correctly. Expected 5 cells, but found " + split.Length + " on line " + (i + 1)); return(expDesign); } string fileNameWithExtension = split[0]; string condition = split[1]; string strBiorep = split[2]; string strFraction = split[3]; string strTechrep = split[4]; if (!int.TryParse(strBiorep, out int biorep)) { errors.Add("Error: The experimental design was not formatted correctly. The biorep on line " + (i + 1) + " is not an integer"); return(expDesign); } if (!int.TryParse(strFraction, out int fraction)) { errors.Add("Error: The experimental design was not formatted correctly. The fraction on line " + (i + 1) + " is not an integer"); return(expDesign); } if (!int.TryParse(strTechrep, out int techrep)) { errors.Add("Error: The experimental design was not formatted correctly. The techrep on line " + (i + 1) + " is not an integer"); return(expDesign); } var foundFilePath = fullFilePathsWithExtension.FirstOrDefault(p => Path.GetFileName(p) == fileNameWithExtension); if (foundFilePath == null) { // the experimental design could include files that aren't in the spectra file list but that's ok. // it's fine to have extra files defined in the experimental design as long as the remainder is valid continue; } var fileInfo = new SpectraFileInfo(foundFilePath, condition, biorep - 1, techrep - 1, fraction - 1); expDesign.Add(fileInfo); } // check to see if there are any files missing from the experimental design var filesDefinedInExpDesign = expDesign.Select(p => p.FullFilePathWithExtension).ToList(); var notDefined = fullFilePathsWithExtension.Where(p => !filesDefinedInExpDesign.Contains(p)); if (notDefined.Any()) { errors.Add("Error: The experimental design did not contain the file(s): " + string.Join(", ", notDefined)); return(expDesign); } // check to see if the design is valid var designError = GetErrorsInExperimentalDesign(expDesign); if (designError != null) { errors.Add(designError); return(expDesign); } // all files passed in are defined in the experimental design and the exp design is valid return(expDesign); }
private static void Main(string[] args) { Console.WriteLine("Welcome to MetaMorpheus"); // EDGAR: Createing the FlashLfqEngine is unforunately required, // otherwise the code just crashes when executed. SpectraFileInfo mzml = new SpectraFileInfo("sliced-mzml.mzml", "a", 0, 1, 0); var pg = new FlashLFQ.ProteinGroup("MyProtein", "gene", "org"); Identification id3 = new Identification(mzml, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <FlashLFQ.ProteinGroup> { pg }); Identification id4 = new Identification(mzml, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.05811, 2, new List <FlashLFQ.ProteinGroup> { pg }); FlashLfqEngine engine = new FlashLfqEngine(new List <Identification> { id3, id4 }, normalize: true); // EDGAR: End of part required to avoid crash //generate toml Console.WriteLine("generating toml with {0} key-value pairs", args[1]); var tomlData = Toml.Create(); for (int i = 0; i < int.Parse(args[1]); i++) { tomlData.Add(i.ToString(), i); } //write toml Console.WriteLine("writing toml file {0}", args[0]); Stopwatch stopwatch = Stopwatch.StartNew(); Toml.WriteFile(tomlData, args[0]); stopwatch.Stop(); Console.WriteLine("Time elapsed for toml write: {0}\n", stopwatch.ElapsedMilliseconds); //read file Console.WriteLine("reading toml file {0}", args[0]); stopwatch = Stopwatch.StartNew(); var tomlRead = Toml.ReadFile(args[0]); stopwatch.Stop(); Console.WriteLine("Time elapsed for toml read: {0}\n", stopwatch.ElapsedMilliseconds); //read mzml file Console.WriteLine("reading mzml file {0}", args[2]); stopwatch = Stopwatch.StartNew(); var msData = Mzml.LoadAllStaticData(args[2]); stopwatch.Stop(); Console.WriteLine("Time elapsed for mzML read: {0}\n", stopwatch.ElapsedMilliseconds); //write mzml file Console.WriteLine("writing mzml file {0}", args[3]); stopwatch = Stopwatch.StartNew(); MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(msData, args[3], false); stopwatch.Stop(); Console.WriteLine("Time elapsed for mzML write: {0}", stopwatch.ElapsedMilliseconds); }
public static void TestMergePeaks() { string fileToWrite = "myMzml.mzML"; string peptide = "PEPTIDE"; double intensity = 1e6; Loaders.LoadElements(Path.Combine(TestContext.CurrentContext.TestDirectory, @"elements.dat")); // generate mzml file MsDataScan[] scans = new MsDataScan[5]; double[] intensityMultipliers = { 1, 3, 1, 1, 1 }; for (int s = 0; s < scans.Length; s++) { ChemicalFormula cf = new Proteomics.AminoAcidPolymer.Peptide(peptide).GetChemicalFormula(); IsotopicDistribution dist = IsotopicDistribution.GetDistribution(cf, 0.125, 1e-8); double[] mz = dist.Masses.Select(v => v.ToMz(1)).ToArray(); double[] intensities = dist.Intensities.Select(v => v * intensity * intensityMultipliers[s]).ToArray(); if (s == 2 || s == 3) { mz = new[] { 401.0 }; intensities = new[] { 1000.0 }; } // add the scan scans[s] = new MsDataScan(massSpectrum: new MzSpectrum(mz, intensities, false), oneBasedScanNumber: s + 1, msnOrder: 1, isCentroid: true, polarity: Polarity.Positive, retentionTime: 1.0 + s / 10.0, scanWindowRange: new MzRange(400, 1600), scanFilter: "f", mzAnalyzer: MZAnalyzerType.Orbitrap, totalIonCurrent: intensities.Sum(), injectionTime: 1.0, noiseData: null, nativeId: "scan=" + (s + 1)); } // write the .mzML IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(new FakeMsDataFile(scans), Path.Combine(TestContext.CurrentContext.TestDirectory, fileToWrite), false); // set up spectra file info SpectraFileInfo file1 = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, fileToWrite), "", 0, 0, 0); // create some PSMs var pg = new ProteinGroup("MyProtein", "gene", "org"); Identification id1 = new Identification(file1, peptide, peptide, new Proteomics.AminoAcidPolymer.Peptide(peptide).MonoisotopicMass, 1.1 + 0.001, 1, new List <ProteinGroup> { pg }); Identification id2 = new Identification(file1, peptide, peptide, new Proteomics.AminoAcidPolymer.Peptide(peptide).MonoisotopicMass, 1.4 + 0.001, 1, new List <ProteinGroup> { pg }); // create the FlashLFQ engine FlashLFQEngine engine = new FlashLFQEngine(new List <Identification> { id1, id2 }); // run the engine var results = engine.Run(); ChromatographicPeak peak = results.Peaks.First().Value.First(); Assert.That(results.Peaks.First().Value.Count == 1); Assert.That(peak.Apex.RetentionTime == 1.1); }
public static void TestFlashLfqNormalization() { // ********************************* check biorep normalization ********************************* // get the raw file paths SpectraFileInfo raw = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-raw.raw"), "a", 0, 0, 0); SpectraFileInfo mzml = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-mzml.mzml"), "a", 1, 0, 0); // create some PSMs var pg = new ProteinGroup("MyProtein", "gene", "org"); Identification id1 = new Identification(raw, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> { pg }); Identification id2 = new Identification(mzml, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> { pg }); // create the FlashLFQ engine var results = new FlashLFQEngine(new List <Identification> { id1, id2 }, normalize: true).Run(); // check that biorep normalization worked int int1 = (int)System.Math.Round(results.Peaks[mzml].First().Intensity, 0); int int2 = (int)System.Math.Round(results.Peaks[raw].First().Intensity, 0); Assert.That(int1 > 0); Assert.That(int1 == int2); // ********************************* check condition normalization ********************************* raw = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-raw.raw"), "a", 0, 0, 0); mzml = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-mzml.mzml"), "b", 0, 0, 0); id1 = new Identification(raw, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> { pg }); id2 = new Identification(mzml, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> { pg }); results = new FlashLFQEngine(new List <Identification> { id1, id2 }, normalize: true).Run(); int int3 = (int)System.Math.Round(results.Peaks[mzml].First().Intensity, 0); int int4 = (int)System.Math.Round(results.Peaks[raw].First().Intensity, 0); Assert.That(int3 > 0); Assert.That(int3 == int4); // ********************************* check techrep normalization ********************************* raw = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-raw.raw"), "a", 0, 0, 0); mzml = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-mzml.mzml"), "a", 0, 1, 0); id1 = new Identification(raw, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> { pg }); id2 = new Identification(mzml, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> { pg }); results = new FlashLFQEngine(new List <Identification> { id1, id2 }, normalize: true).Run(); int int5 = (int)System.Math.Round(results.Peaks[mzml].First().Intensity, 0); int int6 = (int)System.Math.Round(results.Peaks[raw].First().Intensity, 0); Assert.That(int5 > 0); Assert.That(int5 == int6); Assert.That(int1 == int3); Assert.That(int1 == int5); // ********************************* check fraction normalization ********************************* raw = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-raw.raw"), "a", 0, 0, 0); var raw2 = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-raw.raw"), "a", 0, 0, 1); mzml = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-mzml.mzml"), "a", 1, 0, 0); var mzml2 = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-mzml.mzml"), "a", 1, 0, 1); id1 = new Identification(raw, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> { pg }); id2 = new Identification(raw2, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> { pg }); var id3 = new Identification(mzml, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> { pg }); var id4 = new Identification(mzml2, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> { pg }); results = new FlashLFQEngine(new List <Identification> { id1, id2, id3, id4 }, normalize: true).Run(); int int7 = (int)System.Math.Round(results.PeptideBaseSequences["EGFQVADGPLYR"].GetIntensity(raw) + results.PeptideBaseSequences["EGFQVADGPLYR"].GetIntensity(raw2)); int int8 = (int)System.Math.Round(results.PeptideBaseSequences["EGFQVADGPLYR"].GetIntensity(mzml) + results.PeptideBaseSequences["EGFQVADGPLYR"].GetIntensity(mzml2)); Assert.That(int7 > 0); Assert.That(int7 == int8); }
public static void TestFlashLfqMatchBetweenRunsProteinQuant() { List <string> filesToWrite = new List <string> { "mzml_1", "mzml_2" }; List <string> pepSequences = new List <string> { "PEPTIDE", "PEPTIDEV", "PEPTIDEVV", "PEPTIDEVVV", "PEPTIDEVVVV" }; double intensity = 1e6; double[] file1Rt = new double[] { 1.01, 1.02, 1.03, 1.04, 1.05 }; double[] file2Rt = new double[] { 1.015, 1.030, 1.036, 1.050, 1.065 }; Loaders.LoadElements(Path.Combine(TestContext.CurrentContext.TestDirectory, @"elements.dat")); // generate mzml files (5 peptides each) for (int f = 0; f < filesToWrite.Count; f++) { // 1 MS1 scan per peptide MsDataScan[] scans = new MsDataScan[5]; for (int p = 0; p < pepSequences.Count; p++) { ChemicalFormula cf = new Proteomics.AminoAcidPolymer.Peptide(pepSequences[p]).GetChemicalFormula(); IsotopicDistribution dist = IsotopicDistribution.GetDistribution(cf, 0.125, 1e-8); double[] mz = dist.Masses.Select(v => v.ToMz(1)).ToArray(); double[] intensities = dist.Intensities.Select(v => v * intensity).ToArray(); double rt; if (f == 0) { rt = file1Rt[p]; } else { rt = file2Rt[p]; } // add the scan scans[p] = new MsDataScan(massSpectrum: new MzSpectrum(mz, intensities, false), oneBasedScanNumber: p + 1, msnOrder: 1, isCentroid: true, polarity: Polarity.Positive, retentionTime: rt, scanWindowRange: new MzRange(400, 1600), scanFilter: "f", mzAnalyzer: MZAnalyzerType.Orbitrap, totalIonCurrent: intensities.Sum(), injectionTime: 1.0, noiseData: null, nativeId: "scan=" + (p + 1)); } // write the .mzML IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(new FakeMsDataFile(scans), Path.Combine(TestContext.CurrentContext.TestDirectory, filesToWrite[f] + ".mzML"), false); } // set up spectra file info SpectraFileInfo file1 = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, filesToWrite[0] + ".mzML"), "a", 0, 0, 0); SpectraFileInfo file2 = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, filesToWrite[1] + ".mzML"), "a", 1, 0, 0); // create some PSMs var pg = new ProteinGroup("MyProtein", "gene", "org"); var myMbrProteinGroup = new ProteinGroup("MyMbrProtein", "MbrGene", "org"); Identification id1 = new Identification(file1, "PEPTIDE", "PEPTIDE", new Proteomics.AminoAcidPolymer.Peptide("PEPTIDE").MonoisotopicMass, file1Rt[0] + 0.001, 1, new List <ProteinGroup> { pg }); Identification id2 = new Identification(file1, "PEPTIDEV", "PEPTIDEV", new Proteomics.AminoAcidPolymer.Peptide("PEPTIDEV").MonoisotopicMass, file1Rt[1] + 0.001, 1, new List <ProteinGroup> { pg }); Identification id3 = new Identification(file1, "PEPTIDEVV", "PEPTIDEVV", new Proteomics.AminoAcidPolymer.Peptide("PEPTIDEVV").MonoisotopicMass, file1Rt[2] + 0.001, 1, new List <ProteinGroup> { myMbrProteinGroup }); Identification id4 = new Identification(file1, "PEPTIDEVVV", "PEPTIDEVVV", new Proteomics.AminoAcidPolymer.Peptide("PEPTIDEVVV").MonoisotopicMass, file1Rt[3] + 0.001, 1, new List <ProteinGroup> { pg }); Identification id5 = new Identification(file1, "PEPTIDEVVVV", "PEPTIDEVVVV", new Proteomics.AminoAcidPolymer.Peptide("PEPTIDEVVVV").MonoisotopicMass, file1Rt[4] + 0.001, 1, new List <ProteinGroup> { pg }); Identification id6 = new Identification(file2, "PEPTIDE", "PEPTIDE", new Proteomics.AminoAcidPolymer.Peptide("PEPTIDE").MonoisotopicMass, file2Rt[0] + 0.001, 1, new List <ProteinGroup> { pg }); Identification id7 = new Identification(file2, "PEPTIDEV", "PEPTIDEV", new Proteomics.AminoAcidPolymer.Peptide("PEPTIDEV").MonoisotopicMass, file2Rt[1] + 0.001, 1, new List <ProteinGroup> { pg }); // missing ID 8 - MBR feature Identification id9 = new Identification(file2, "PEPTIDEVVV", "PEPTIDEVVV", new Proteomics.AminoAcidPolymer.Peptide("PEPTIDEVVV").MonoisotopicMass, file2Rt[3] + 0.001, 1, new List <ProteinGroup> { pg }); Identification id10 = new Identification(file2, "PEPTIDEVVVV", "PEPTIDEVVVV", new Proteomics.AminoAcidPolymer.Peptide("PEPTIDEVVVV").MonoisotopicMass, file2Rt[4] + 0.001, 1, new List <ProteinGroup> { pg }); // test with top3 protein quant engine FlashLFQEngine engine = new FlashLFQEngine(new List <Identification> { id1, id2, id3, id4, id5, id6, id7, id9, id10 }, matchBetweenRuns: true); var results = engine.Run(); Assert.That(results.ProteinGroups["MyMbrProtein"].GetIntensity(file1) > 0); Assert.That(results.ProteinGroups["MyMbrProtein"].GetIntensity(file2) == 0); // test with advanced protein quant engine engine = new FlashLFQEngine(new List <Identification> { id1, id2, id3, id4, id5, id6, id7, id9, id10 }, matchBetweenRuns: true, advancedProteinQuant: true); results = engine.Run(); Assert.That(results.ProteinGroups["MyMbrProtein"].GetIntensity(file1) > 0); Assert.That(results.ProteinGroups["MyMbrProtein"].GetIntensity(file2) == 0); }
public static void TestFlashLfqAdvancedProteinQuant() { List <string> filesToWrite = new List <string> { "mzml_1", "mzml_2" }; List <string> pepSequences = new List <string> { "PEPTIDE", "MYPEPTIDE", "VVVVVPEPTIDE" }; double[,] amounts = new double[2, 3] { { 1000000, 1000000, 1000000 }, { 2000000, 2000000, 900000 } }; Loaders.LoadElements(Path.Combine(TestContext.CurrentContext.TestDirectory, @"elements.dat")); // generate mzml files (3 peptides each) for (int f = 0; f < filesToWrite.Count; f++) { // 1 MS1 scan per peptide MsDataScan[] scans = new MsDataScan[3]; for (int p = 0; p < pepSequences.Count; p++) { ChemicalFormula cf = new Proteomics.AminoAcidPolymer.Peptide(pepSequences[p]).GetChemicalFormula(); IsotopicDistribution dist = IsotopicDistribution.GetDistribution(cf, 0.125, 1e-8); double[] mz = dist.Masses.Select(v => v.ToMz(1)).ToArray(); double[] intensities = dist.Intensities.Select(v => v * amounts[f, p]).ToArray(); // add the scan scans[p] = new MsDataScan(massSpectrum: new MzSpectrum(mz, intensities, false), oneBasedScanNumber: p + 1, msnOrder: 1, isCentroid: true, polarity: Polarity.Positive, retentionTime: 1.0 + (p / 10.0), scanWindowRange: new MzRange(400, 1600), scanFilter: "f", mzAnalyzer: MZAnalyzerType.Orbitrap, totalIonCurrent: intensities.Sum(), injectionTime: 1.0, noiseData: null, nativeId: "scan=" + (p + 1)); } // write the .mzML IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(new FakeMsDataFile(scans), Path.Combine(TestContext.CurrentContext.TestDirectory, filesToWrite[f] + ".mzML"), false); } // set up spectra file info SpectraFileInfo file1 = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, filesToWrite[0] + ".mzML"), "a", 0, 0, 0); SpectraFileInfo file2 = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, filesToWrite[1] + ".mzML"), "a", 1, 0, 0); // create some PSMs var pg = new ProteinGroup("MyProtein", "gene", "org"); Identification id1 = new Identification(file1, "PEPTIDE", "PEPTIDE", 799.35996, 1.01, 1, new List <ProteinGroup> { pg }); Identification id2 = new Identification(file1, "MYPEPTIDE", "MYPEPTIDE", 1093.46377, 1.11, 1, new List <ProteinGroup> { pg }); Identification id3 = new Identification(file1, "VVVVVPEPTIDE", "VVVVVPEPTIDE", 1294.70203, 1.21, 1, new List <ProteinGroup> { pg }); Identification id4 = new Identification(file2, "PEPTIDE", "PEPTIDE", 799.35996, 1.01, 1, new List <ProteinGroup> { pg }); Identification id5 = new Identification(file2, "MYPEPTIDE", "MYPEPTIDE", 1093.46377, 1.11, 1, new List <ProteinGroup> { pg }); Identification id6 = new Identification(file2, "VVVVVPEPTIDE", "VVVVVPEPTIDE", 1294.70203, 1.21, 1, new List <ProteinGroup> { pg }); // create the FlashLFQ engine FlashLFQEngine engine = new FlashLFQEngine(new List <Identification> { id1, id2, id3, id4, id5, id6 }, normalize: false, advancedProteinQuant: true); // run the engine var results = engine.Run(); // third peptide should be low-weighted // protein should be ~sum of first two peptide intensities (a little lower, because some smaller isotope peaks get skipped) double file1ProteinIntensity = results.ProteinGroups["MyProtein"].GetIntensity(file1); Assert.That(file1ProteinIntensity < 2e6); Assert.That(file1ProteinIntensity > 1e6); double file2ProteinIntensity = results.ProteinGroups["MyProtein"].GetIntensity(file2); Assert.That(file2ProteinIntensity < 4e6); Assert.That(file2ProteinIntensity > 3e6); }
public static void TestFlashLfqMergeResults() { SpectraFileInfo rawA = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-raw.raw"), "a", 0, 0, 0); SpectraFileInfo mzmlA = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-mzml.mzml"), "a", 0, 1, 0); // create some PSMs var pgA = new ProteinGroup("MyProtein", "gene", "org"); Identification id1A = new Identification(rawA, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> { pgA }); Identification id2A = new Identification(rawA, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.05811, 2, new List <ProteinGroup> { pgA }); Identification id3A = new Identification(mzmlA, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> { pgA }); Identification id4A = new Identification(mzmlA, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.05811, 2, new List <ProteinGroup> { pgA }); // create the FlashLFQ engine FlashLFQEngine engineA = new FlashLFQEngine(new List <Identification> { id1A, id2A, id3A, id4A }); // run the engine var resultsA = engineA.Run(); SpectraFileInfo rawB = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-raw.raw"), "b", 0, 0, 0); SpectraFileInfo mzmlB = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-mzml.mzml"), "b", 0, 1, 0); // create some PSMs var pgB = new ProteinGroup("MyProtein", "gene", "org"); Identification id1 = new Identification(rawB, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> { pgB }); Identification id2 = new Identification(rawB, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.05811, 2, new List <ProteinGroup> { pgB }); Identification id3 = new Identification(mzmlB, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> { pgB }); Identification id4 = new Identification(mzmlB, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.05811, 2, new List <ProteinGroup> { pgB }); // create the FlashLFQ engine FlashLFQEngine engineB = new FlashLFQEngine(new List <Identification> { id1, id2, id3, id4 }); // run the engine var resultsB = engineB.Run(); resultsA.MergeResultsWith(resultsB); Assert.AreEqual(4, resultsA.Peaks.Count); Assert.AreEqual(1, resultsA.PeptideBaseSequences.Count); Assert.AreEqual(1, resultsA.PeptideModifiedSequences.Count); Assert.AreEqual(1, resultsA.ProteinGroups.Count); Assert.AreEqual(4, resultsA.SpectraFiles.Count); }