public static void ExperimentalDesignCalibrationTest(string nonCalibratedFile)
        {
            // set up directories
            string unitTestFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"ExperimentalDesignCalibrationTest");
            string outputFolder   = Path.Combine(unitTestFolder, @"TaskOutput");

            Directory.CreateDirectory(unitTestFolder);
            Directory.CreateDirectory(outputFolder);

            // set up original spectra file (input to calibration)
            string nonCalibratedFilePath = Path.Combine(unitTestFolder, nonCalibratedFile);

            File.Copy(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\SmallCalibratible_Yeast.mzML"), nonCalibratedFilePath, true);

            // protein db
            string myDatabase = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\smalldb.fasta");

            // set up original experimental design (input to calibration)
            SpectraFileInfo fileInfo = new SpectraFileInfo(nonCalibratedFilePath, "condition", 0, 0, 0);
            var             experimentalDesignFilePath = ExperimentalDesign.WriteExperimentalDesignToFile(new List <SpectraFileInfo> {
                fileInfo
            });

            // run calibration
            CalibrationTask calibrationTask = new CalibrationTask();

            calibrationTask.RunTask(outputFolder, new List <DbForTask> {
                new DbForTask(myDatabase, false)
            }, new List <string> {
                nonCalibratedFilePath
            }, "test");

            // test new experimental design written by calibration
            var    newExpDesignPath           = Path.Combine(outputFolder, @"ExperimentalDesign.tsv");
            string expectedCalibratedFileName = Path.GetFileNameWithoutExtension(nonCalibratedFilePath) + "-calib.mzML";
            var    expectedCalibratedFilePath = Path.Combine(outputFolder, expectedCalibratedFileName);
            var    newExperDesign             = ExperimentalDesign.ReadExperimentalDesign(newExpDesignPath, new List <string> {
                expectedCalibratedFilePath
            }, out var errors);

            Assert.That(!errors.Any());
            Assert.That(newExperDesign.Count == 1);

            // test file-specific toml written by calibration w/ suggested ppm tolerances
            string expectedTomlName = Path.GetFileNameWithoutExtension(nonCalibratedFilePath) + "-calib.toml";

            Assert.That(File.Exists(Path.Combine(outputFolder, expectedTomlName)));

            var lines      = File.ReadAllLines(Path.Combine(outputFolder, expectedTomlName));
            var tolerance  = Regex.Match(lines[0], @"\d+\.\d*").Value;
            var tolerance1 = Regex.Match(lines[1], @"\d+\.\d*").Value;

            Assert.That(double.TryParse(tolerance, out double tol) == true);
            Assert.That(double.TryParse(tolerance1, out double tol1) == true);
            Assert.That(lines[0].Contains("PrecursorMassTolerance"));
            Assert.That(lines[1].Contains("ProductMassTolerance"));

            // check that calibrated .mzML exists
            Assert.That(File.Exists(Path.Combine(outputFolder, expectedCalibratedFilePath)));

            // clean up
            Directory.Delete(unitTestFolder, true);
        }
        public static void TestProteinQuantFileHeaders(bool hasDefinedExperimentalDesign, int bioreps, int fractions, int techreps)
        {
            // create the unit test directory
            string unitTestFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestProteinQuantFileHeaders");

            Directory.CreateDirectory(unitTestFolder);

            List <SpectraFileInfo> fileInfos = new List <SpectraFileInfo>();
            string peptide      = "PEPTIDE";
            double ionIntensity = 1e6;
            string condition    = hasDefinedExperimentalDesign ? "TestCondition" : "";

            // create the protein database
            Protein prot   = new Protein(peptide, @"");
            string  dbName = Path.Combine(unitTestFolder, "testDB.fasta");

            UsefulProteomicsDatabases.ProteinDbWriter.WriteFastaDatabase(new List <Protein> {
                prot
            }, dbName, ">");

            // create the .mzML files to search/quantify
            for (int b = 0; b < bioreps; b++)
            {
                for (int f = 0; f < fractions; f++)
                {
                    for (int r = 0; r < techreps; r++)
                    {
                        string fileToWrite = "file_" + "b" + b + "f" + f + "r" + r + ".mzML";

                        // generate mzml file
                        MsDataScan[] scans = new MsDataScan[2];

                        // create the MS1 scan
                        ChemicalFormula      cf          = new Proteomics.AminoAcidPolymer.Peptide(peptide).GetChemicalFormula();
                        IsotopicDistribution dist        = IsotopicDistribution.GetDistribution(cf, 0.125, 1e-8);
                        double[]             mz          = dist.Masses.Select(v => v.ToMz(1)).ToArray();
                        double[]             intensities = dist.Intensities.Select(v => v * ionIntensity).ToArray();

                        scans[0] = new MsDataScan(massSpectrum: new MzSpectrum(mz, intensities, false), oneBasedScanNumber: 1, msnOrder: 1, isCentroid: true,
                                                  polarity: Polarity.Positive, retentionTime: 1.0, scanWindowRange: new MzRange(400, 1600), scanFilter: "f",
                                                  mzAnalyzer: MZAnalyzerType.Orbitrap, totalIonCurrent: intensities.Sum(), injectionTime: 1.0, noiseData: null, nativeId: "scan=1");

                        // create the MS2 scan
                        var            pep   = new PeptideWithSetModifications(peptide, new Dictionary <string, Proteomics.Modification>());
                        List <Product> frags = new List <Product>();
                        pep.Fragment(DissociationType.HCD, FragmentationTerminus.Both, frags);
                        double[] mz2          = frags.Select(v => v.NeutralMass.ToMz(1)).ToArray();
                        double[] intensities2 = frags.Select(v => 1e6).ToArray();

                        scans[1] = new MsDataScan(massSpectrum: new MzSpectrum(mz2, intensities2, false), oneBasedScanNumber: 2, msnOrder: 2, isCentroid: true,
                                                  polarity: Polarity.Positive, retentionTime: 1.01, scanWindowRange: new MzRange(100, 1600), scanFilter: "f",
                                                  mzAnalyzer: MZAnalyzerType.Orbitrap, totalIonCurrent: intensities.Sum(), injectionTime: 1.0, noiseData: null, nativeId: "scan=2", selectedIonMz: pep.MonoisotopicMass.ToMz(1),
                                                  selectedIonChargeStateGuess: 1, selectedIonIntensity: 1e6, isolationMZ: pep.MonoisotopicMass.ToMz(1), isolationWidth: 1.5, dissociationType: DissociationType.HCD,
                                                  oneBasedPrecursorScanNumber: 1, selectedIonMonoisotopicGuessMz: pep.MonoisotopicMass.ToMz(1), hcdEnergy: "35");

                        // write the .mzML
                        string fullPath = Path.Combine(unitTestFolder, fileToWrite);
                        IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(
                            new MsDataFile(scans, new SourceFile(@"scan number only nativeID format", "mzML format", null, "SHA-1", @"C:\fake.mzML", null)),
                            fullPath, false);

                        var spectraFileInfo = new SpectraFileInfo(fullPath, condition, b, r, f);
                        fileInfos.Add(spectraFileInfo);
                    }
                }
            }

            // write the experimental design for this quantification test
            if (hasDefinedExperimentalDesign)
            {
                ExperimentalDesign.WriteExperimentalDesignToFile(fileInfos);
            }

            // run the search/quantification
            SearchTask task = new SearchTask();

            task.RunTask(unitTestFolder, new List <DbForTask> {
                new DbForTask(dbName, false)
            }, fileInfos.Select(p => p.FullFilePathWithExtension).ToList(), "");

            // read in the protein quant results
            Assert.That(File.Exists(Path.Combine(unitTestFolder, "AllQuantifiedProteinGroups.tsv")));
            var lines = File.ReadAllLines(Path.Combine(unitTestFolder, "AllQuantifiedProteinGroups.tsv"));

            // check the intensity column headers
            var splitHeader            = lines[0].Split(new char[] { '\t' }).ToList();
            var intensityColumnHeaders = splitHeader.Where(p => p.Contains("Intensity", StringComparison.OrdinalIgnoreCase)).ToList();

            Assert.That(intensityColumnHeaders.Count == 2);

            if (!hasDefinedExperimentalDesign)
            {
                Assert.That(intensityColumnHeaders[0] == "Intensity_file_b0f0r0");
                Assert.That(intensityColumnHeaders[1] == "Intensity_file_b1f0r0");
            }
            else
            {
                Assert.That(intensityColumnHeaders[0] == "Intensity_TestCondition_1");
                Assert.That(intensityColumnHeaders[1] == "Intensity_TestCondition_2");
            }

            // check the protein intensity values
            int    ind1       = splitHeader.IndexOf(intensityColumnHeaders[0]);
            int    ind2       = splitHeader.IndexOf(intensityColumnHeaders[1]);
            double intensity1 = double.Parse(lines[1].Split(new char[] { '\t' })[ind1]);
            double intensity2 = double.Parse(lines[1].Split(new char[] { '\t' })[ind2]);

            Assert.That(intensity1 > 0);
            Assert.That(intensity2 > 0);
            Assert.That(intensity1 == intensity2);

            Directory.Delete(unitTestFolder, true);
        }
Beispiel #3
0
        //If SILAC (Post-Quantification), compress the light/heavy protein group pairs into the same light protein group but different files
        //Create new files for each silac label and file so that "file 1" now becomes "file 1 (light)" and "file 1 (heavy)"
        //Change heavy residue into the light residue plus a string label ("PEPTIDEa" -> "PEPTIDEK(+8.014)")
        //This light to heavy conversion needs to happen for the flashLFQ peptides here, but can't for the psm peptides, which are constrained to the protein
        //i.e. pwsms currently don't have sequences; they have start/end residues and a protein sequence. We have to change the output sequences when they're created.
        public static void SilacConversionsPostQuantification(List <SilacLabel> allSilacLabels, SilacLabel startLabel, SilacLabel endLabel,
                                                              List <SpectraFileInfo> spectraFileInfo, List <ProteinGroup> proteinGroups, HashSet <DigestionParams> listOfDigestionParams, FlashLfqResults flashLfqResults,
                                                              List <PeptideSpectralMatch> allPsms, Dictionary <string, int> modsToWriteSelection, bool quantifyUnlabeledPeptides)
        {
            //do protein quant if we had any results
            //if no results, we still may need to edit the psms
            if (flashLfqResults != null) //can be null if no unambiguous psms were found
            {
                //after this point, we now have quantification values for the peptides, but they all belong to the same "unlabeled" protein and are in the same file
                //We can remove "labeled" peptides from each file and put them in a new file as "unlabeled".

                //MAKE NEW RAW FILES

                //update number of spectra files to include a new file for each label/condition
                Dictionary <SpectraFileInfo, List <SpectraFileInfo> > originalToLabeledFileInfoDictionary = CreateSilacRawFiles(flashLfqResults, allSilacLabels, startLabel, endLabel, quantifyUnlabeledPeptides, spectraFileInfo);

                //we have the files, now let's reassign the psms.
                //there are a few ways to do this, but we're going to generate the "base" peptide and assign to that

                //Get Dictionary of protein accessions to peptides
                Dictionary <string, List <FlashLFQ.Peptide> > unlabeledToPeptidesDictionary = GetDictionaryOfProteinAccessionsToPeptides(flashLfqResults.PeptideModifiedSequences.Values, allSilacLabels, startLabel, endLabel);

                //we now have a dictionary of unlabeledBaseSequence to the labeled peptides
                //Better SILAC results can be obtained by using the summed intensities from ms1 scans where all peaks were found, rather than the apex
                //foreach peptide, unlabeled peptide, get the isotopic envelope intensities for each labeled peptide in each file
                //save the intensities from ms1s that are shared. If no ms1s contains all the peaks, then just use the apex intensity (default)
                CalculateSilacIntensities(flashLfqResults.Peaks, unlabeledToPeptidesDictionary);


                //SPLIT THE FILES
                List <FlashLFQ.Peptide> updatedPeptides = new List <FlashLFQ.Peptide>();

                //split the heavy/light peptides into separate raw files, remove the heavy peptide
                if (startLabel != null || endLabel != null) //if turnover
                {
                    //foreach group, the labeled peptides should be split into their labeled files
                    //we're deleting the heavy results after we pull those results into a different file
                    foreach (SpectraFileInfo info in spectraFileInfo)
                    {
                        string          fullPathWithExtension    = info.FullFilePathWithExtension;
                        string[]        pathArray                = fullPathWithExtension.Split('.');
                        string          extension                = pathArray.Last();
                        string          filePathWithoutExtension = fullPathWithExtension.Substring(0, fullPathWithExtension.Length - extension.Length - 1); //-1 removes the '.'
                        SpectraFileInfo lightInfo                = new SpectraFileInfo(filePathWithoutExtension + "_Original." + extension, info.Condition, info.BiologicalReplicate, info.TechnicalReplicate, info.Fraction);
                        SpectraFileInfo heavyInfo                = new SpectraFileInfo(filePathWithoutExtension + "_NewlySynthesized." + extension, info.Condition, info.BiologicalReplicate, info.TechnicalReplicate, info.Fraction);
                        originalToLabeledFileInfoDictionary[info] = new List <SpectraFileInfo> {
                            lightInfo, heavyInfo
                        };
                        flashLfqResults.SpectraFiles.Add(lightInfo);
                        flashLfqResults.SpectraFiles.Add(heavyInfo);
                    }

                    //This step converts the quantification intensities from light/heavy to original/newlySynthesized by splitting up the missed cleavage mixtures
                    foreach (KeyValuePair <string, List <FlashLFQ.Peptide> > kvp in unlabeledToPeptidesDictionary)
                    {
                        string unlabeledSequence         = kvp.Key; //this will be the key for the new quant entry
                        List <FlashLFQ.Peptide> peptides = kvp.Value;
                        if (peptides.Count != 1)                    //sometimes it's one if there is no label site on the peptide (e.g. label K, peptide is PEPTIDER)
                        {
                            //Missed cleavages can yield multiple peptides (e.g. 1 missed = LL, LH, HH; 2 missed = LLL, LLH, LHH, HHH; etc)
                            //Compress into 2 values: Light and Heavy
                            FlashLFQ.Peptide updatedPeptide = new FlashLFQ.Peptide(unlabeledSequence, unlabeledSequence, peptides[0].UseForProteinQuant, CleanPastProteinQuant(peptides[0].ProteinGroups)); //needed to keep protein info.
                            foreach (SpectraFileInfo info in spectraFileInfo)
                            {
                                int    maxNumberHeavyAminoAcids    = peptides.Count - 1;
                                double lightIntensity              = 0;
                                double heavyIntensity              = 0;
                                int    numUniquePeptidesQuantified = 0;
                                for (int numHeavyAminoAcids = 0; numHeavyAminoAcids < peptides.Count; numHeavyAminoAcids++)
                                {
                                    double totalIntensity = peptides[numHeavyAminoAcids].GetIntensity(info);
                                    if (totalIntensity > 0)
                                    {
                                        //prevent confidence of a ratio if only the HL (and not the LL or HH) is observed.
                                        //If LL or HH is observed (but not any other), the user knows the ratio is only from one peak.
                                        if (numHeavyAminoAcids == 0 || numHeavyAminoAcids == maxNumberHeavyAminoAcids)
                                        {
                                            numUniquePeptidesQuantified += 2;
                                        }
                                        else
                                        {
                                            numUniquePeptidesQuantified++;
                                        }
                                        double partHeavyIntensity = totalIntensity * numHeavyAminoAcids / maxNumberHeavyAminoAcids;
                                        lightIntensity += totalIntensity - partHeavyIntensity;
                                        heavyIntensity += partHeavyIntensity;
                                    }
                                }
                                //If only a mixed peptide with a missed cleavage was identified, reset the intensity values to zero so the user doesn't get a discreet, inaccurate measurement
                                if (numUniquePeptidesQuantified < 2)
                                {
                                    lightIntensity = 0;
                                    heavyIntensity = 0;
                                }

                                List <SpectraFileInfo> updatedInfo = originalToLabeledFileInfoDictionary[info];
                                SpectraFileInfo        startInfo   = updatedInfo[0];
                                SpectraFileInfo        endInfo     = updatedInfo[1];

                                updatedPeptide.SetIntensity(startInfo, lightIntensity);                           //assign the corrected light intensity
                                updatedPeptide.SetDetectionType(startInfo, peptides.First().GetDetectionType(info));
                                updatedPeptide.SetIntensity(endInfo, heavyIntensity);                             //assign the corrected heavy intensity to the heavy file
                                updatedPeptide.SetDetectionType(endInfo, peptides.Last().GetDetectionType(info)); //could include the mixed here if it really matters
                            }

                            //add the updated peptide to the list
                            updatedPeptides.Add(updatedPeptide);
                        }
                        else
                        {
                            updatedPeptides.Add(peptides[0]);
                        }
                    }
                }
                else //multiplex
                {
                    foreach (var kvp in unlabeledToPeptidesDictionary)
                    {
                        string unlabeledSequence         = kvp.Key;
                        List <FlashLFQ.Peptide> peptides = kvp.Value;
                        FlashLFQ.Peptide        representativePeptide = peptides[0];
                        FlashLFQ.Peptide        updatedPeptide        = new FlashLFQ.Peptide(unlabeledSequence, unlabeledSequence, representativePeptide.UseForProteinQuant, CleanPastProteinQuant(representativePeptide.ProteinGroups)); //needed to keep protein info.

                        //foreach original file
                        foreach (SpectraFileInfo info in spectraFileInfo)
                        {
                            List <SpectraFileInfo> filesForThisFile = originalToLabeledFileInfoDictionary[info];
                            for (int i = 0; i < peptides.Count; i++) //the files and the peptides can use the same index, because there should be a distinct file for each label/peptide
                            {
                                SpectraFileInfo  currentInfo    = filesForThisFile[i];
                                FlashLFQ.Peptide currentPeptide = peptides[i];
                                updatedPeptide.SetIntensity(currentInfo, currentPeptide.GetIntensity(info));
                                updatedPeptide.SetDetectionType(currentInfo, currentPeptide.GetDetectionType(info));
                            }
                        }
                        updatedPeptides.Add(updatedPeptide);
                    }
                }

                //Update peptides
                var peptideResults = flashLfqResults.PeptideModifiedSequences;
                peptideResults.Clear();
                foreach (FlashLFQ.Peptide peptide in updatedPeptides)
                {
                    peptideResults.Add(peptide.Sequence, peptide);
                }

                //Do protein quant
                flashLfqResults.CalculateProteinResultsTop3(true);

                //update proteingroups to have all files for quantification
                if (proteinGroups != null)
                {
                    List <SpectraFileInfo> allInfo = originalToLabeledFileInfoDictionary.SelectMany(x => x.Value).ToList();
                    foreach (ProteinGroup proteinGroup in proteinGroups)
                    {
                        proteinGroup.FilesForQuantification = allInfo;
                        proteinGroup.IntensitiesByFile      = new Dictionary <SpectraFileInfo, double>();

                        foreach (var spectraFile in allInfo)
                        {
                            if (flashLfqResults.ProteinGroups.TryGetValue(proteinGroup.ProteinGroupName, out var flashLfqProteinGroup))
                            {
                                proteinGroup.IntensitiesByFile.Add(spectraFile, flashLfqProteinGroup.GetIntensity(spectraFile));
                            }
                            else
                            {
                                //needed for decoys/contaminants/proteins that aren't quantified
                                proteinGroup.IntensitiesByFile.Add(spectraFile, 0);
                            }
                        }
                    }
                }

                //Convert all lfqpeaks from heavy (a) to light (K+8.014) for output
                if (flashLfqResults != null) //can be null if nothing was quantified (all peptides are ambiguous)
                {
                    var lfqPeaks = flashLfqResults.Peaks;
                    List <SpectraFileInfo> peakKeys = lfqPeaks.Keys.ToList();

                    foreach (SpectraFileInfo key in peakKeys)
                    {
                        List <ChromatographicPeak> peaks = lfqPeaks[key];
                        for (int i = 0; i < peaks.Count; i++)
                        {
                            var peak = peaks[i];
                            //check if we're removing light peaks and if it's a light peak
                            if (peak.Identifications.Any(x => GetRelevantLabelFromBaseSequence(x.BaseSequence, allSilacLabels) != null)) //if no ids have any labels, remove them
                            {
                                List <Identification> updatedIds = new List <Identification>();
                                foreach (var id in peak.Identifications)
                                {
                                    string            baseSequence = id.BaseSequence;
                                    string            fullSequence = id.ModifiedSequence;
                                    List <SilacLabel> labels       = GetRelevantLabelsFromBaseSequenceForOutput(id.BaseSequence, allSilacLabels);
                                    if (labels != null)
                                    {
                                        foreach (SilacLabel label in labels)
                                        {
                                            baseSequence = GetSilacLightBaseSequence(baseSequence, label);
                                            fullSequence = GetSilacLightFullSequence(fullSequence, label);
                                        }
                                    }

                                    Identification updatedId = new Identification(
                                        id.FileInfo,
                                        baseSequence,
                                        fullSequence,
                                        id.MonoisotopicMass,
                                        id.Ms2RetentionTimeInMinutes,
                                        id.PrecursorChargeState,
                                        id.ProteinGroups.ToList(),
                                        id.OptionalChemicalFormula,
                                        id.UseForProteinQuant
                                        );
                                    updatedIds.Add(updatedId);
                                }
                                peak.Identifications.Clear();
                                peak.Identifications.AddRange(updatedIds);
                            }
                        }
                    }
                }
            }

            //convert all psms into human readable format
            for (int i = 0; i < allPsms.Count; i++)
            {
                allPsms[i].ResolveHeavySilacLabel(allSilacLabels, modsToWriteSelection);
            }
        }
        //If SILAC (Post-Quantification), compress the light/heavy protein group pairs into the same light protein group but different files
        //Create new files for each silac label and file so that "file 1" now becomes "file 1 (light)" and "file 1 (heavy)"
        //Change heavy residue into the light residue plus a string label ("PEPTIDEa" -> "PEPTIDEK(+8.014)")
        //This light to heavy conversion needs to happen for the flashLFQ peptides here, but can't for the psm peptides, which are constrained to the protein
        //i.e. pwsms currently don't have sequences; they have start/end residues and a protein sequence. We have to change the output sequences when they're created.
        public static void SilacConversionsPostQuantification(List <SilacLabel> silacLabels, List <SpectraFileInfo> spectraFileInfo, List <ProteinGroup> ProteinGroups,
                                                              HashSet <DigestionParams> ListOfDigestionParams, Dictionary <string, List <string> > silacProteinGroupMatcher, FlashLfqResults FlashLfqResults,
                                                              List <PeptideSpectralMatch> allPsms, Dictionary <string, int> ModsToWriteSelection, bool Integrate)
        {
            bool outputLightIntensities = ListOfDigestionParams.Any(x => x.GeneratehUnlabeledProteinsForSilac);


            //MAKE NEW RAW FILES
            //update number of spectra files to include a new file for each label*condition
            Dictionary <SpectraFileInfo, string>          fileToLabelDictionary  = new Dictionary <SpectraFileInfo, string>();          //figure out which file is which label, since some files will be only light and others only heavy. Key is file, value is the label string (label.MassDifference)
            Dictionary <SpectraFileInfo, SpectraFileInfo> labeledToUnlabeledFile = new Dictionary <SpectraFileInfo, SpectraFileInfo>(); //keep track of the heavy-to-light pairs. If multiple, looks like 3-1 and 2-1, but no 3-2 (only heavy to light, no heavy to heavy)
            List <SpectraFileInfo> silacSpectraFileInfo = new List <SpectraFileInfo>();                                                 //new files

            //foreach existing file
            foreach (SpectraFileInfo originalFile in spectraFileInfo)
            {
                //add the existing file as the light
                silacSpectraFileInfo.Add(originalFile);
                //foreach label, add a new file with the label
                foreach (SilacLabel label in silacLabels)
                {
                    SpectraFileInfo silacFile = GetHeavyFileInfo(originalFile, label);
                    silacSpectraFileInfo.Add(silacFile);
                    fileToLabelDictionary[silacFile]  = label.MassDifference;
                    labeledToUnlabeledFile[silacFile] = originalFile;
                }
            }


            //UPDATE PROTEIN GROUPS
            //remove the heavy protein groups so that there are only light ones
            //add the intensities of the heavy groups into the newly created heavy SpectraFileInfos
            HashSet <SpectraFileInfo> lightFilesToRemove = new HashSet <SpectraFileInfo>(); //this is only used when there user specified no unlabeled proteins

            if (ProteinGroups != null)                                                      //if we did parsimony
            {
                List <EngineLayer.ProteinGroup> silacProteinGroups = new List <EngineLayer.ProteinGroup>();
                //The light/unlabeled peptides/proteins were not searched if specified, but they were still quantified to keep track of the labels
                //we need to remove these unlabeled peptides/proteins before output
                //foreach protein group (which has its own quant for each file)
                foreach (EngineLayer.ProteinGroup proteinGroup in ProteinGroups)
                {
                    proteinGroup.FilesForQuantification = silacSpectraFileInfo;                                                    //update fileinfo for the group
                                                                                                                                   //grab the light groups. Using these light groups, find their heavy group pair(s), add them to the light group quant info, and then remove the heavy groups
                    if (silacProteinGroupMatcher.TryGetValue(proteinGroup.ProteinGroupName, out List <string> silacSubGroupNames)) //try to find the light protein groups. If it's not light, ignore it
                    {
                        //the out variable contains all the other heavy protein groups that were generated for this light protein group
                        //go through the files and see if any of them contain the same label. If not, put zeroes for those missing "files"
                        //If the user didn't specify to search light intensities, then don't output them
                        Dictionary <SpectraFileInfo, double> updatedIntensitiesByFile = proteinGroup.IntensitiesByFile; //light intensities
                        List <SpectraFileInfo> lightKeys = updatedIntensitiesByFile.Keys.ToList();

                        //go through all files (including "silac" files)
                        List <ProteinGroup> subGroup = ProteinGroups.Where(x => silacSubGroupNames.Contains(x.ProteinGroupName)).ToList(); //find the protein groups where the accession contains "light" accession of the current protein group
                        foreach (SpectraFileInfo fileInfo in silacSpectraFileInfo)                                                         //for every file (light and heavy)
                        {
                            //if it doesn't have a value, then it's a silac file (light missing values still have a value "0")
                            if (!updatedIntensitiesByFile.ContainsKey(fileInfo))
                            {
                                string       labelSignature = fileToLabelDictionary[fileInfo];                                                                 //a string associated with a silac label
                                ProteinGroup foundGroup     = subGroup.Where(x => x.Proteins.Any(y => y.Accession.Contains(labelSignature))).FirstOrDefault(); //get the protein groups containing this label
                                updatedIntensitiesByFile[fileInfo] = foundGroup == null ? 0 : foundGroup.IntensitiesByFile[labeledToUnlabeledFile[fileInfo]];  //update the intensity for that label in the light group
                            }
                            //else do nothing. The light version is already in the dictionary
                        }

                        //The light/unlabeled peptides/proteins were not searched if specified, but they were still quantified to keep track of the labels
                        //we need to remove these unlabeled peptides/proteins before output
                        if (!outputLightIntensities)
                        {
                            foreach (SpectraFileInfo info in lightKeys)
                            {
                                updatedIntensitiesByFile.Remove(info);
                                proteinGroup.FilesForQuantification.Remove(info);
                                lightFilesToRemove.Add(info);
                            }
                        }

                        silacProteinGroups.Add(proteinGroup);
                    }
                }

                //update
                ProteinGroups.Clear();
                ProteinGroups.AddRange(silacProteinGroups);
                //remove light files (if necessary)
                foreach (SpectraFileInfo info in lightFilesToRemove)
                {
                    FlashLfqResults.SpectraFiles.Remove(info);
                }

                //UPDATE FLASHLFQ PROTEINS
                if (FlashLfqResults != null)                                                                     //can be null if nothing was quantified (all peptides are ambiguous)
                {
                    Dictionary <string, FlashLFQ.ProteinGroup> flashLfqProteins = FlashLfqResults.ProteinGroups; //dictionary of protein group names to protein groups
                                                                                                                 //if the protein group is a heavy protein group, get rid of it. We already accounted for it above.
                    var keys = flashLfqProteins.Keys.ToList();
                    foreach (string key in keys)
                    {
                        if (silacLabels.Any(x => key.Contains(x.MassDifference)))
                        {
                            flashLfqProteins.Remove(key);
                        }
                    }
                }
            }

            ////UPDATE FLASHLFQ SPECTRA FILES
            if (FlashLfqResults != null)                                             //can be null if nothing was quantified (all peptides are ambiguous)
            {
                List <SpectraFileInfo> originalFiles = FlashLfqResults.SpectraFiles; //pass reference
                foreach (SpectraFileInfo info in silacSpectraFileInfo)
                {
                    if (!originalFiles.Contains(info))
                    {
                        originalFiles.Add(info);
                    }
                }
            }

            //UPDATE PEPTIDE INFO
            //convert all psm/peptide/proteingroup sequences from the heavy label to the light label for output
            //We can do this for all of the FlashLFQ peptides/peaks, because they use string sequences.
            //We are unable to do this for Parameters.AllPsms, because they store proteins and start/end residues instead
            //for Psms, we need to convert during the writing.
            for (int i = 0; i < allPsms.Count; i++)
            {
                allPsms[i].ResolveHeavySilacLabel(silacLabels, ModsToWriteSelection);
            }

            //Convert all lfqpeaks from heavy (a) to light (K+8.014) for output
            if (FlashLfqResults != null) //can be null if nothing was quantified (all peptides are ambiguous)
            {
                var lfqPeaks = FlashLfqResults.Peaks;
                List <SpectraFileInfo> peakKeys = lfqPeaks.Keys.ToList();

                foreach (SpectraFileInfo key in peakKeys)
                {
                    List <FlashLFQ.ChromatographicPeak> peaks = lfqPeaks[key];
                    for (int i = 0; i < peaks.Count; i++)
                    {
                        var peak = peaks[i];
                        List <Identification> identifications = new List <Identification>();
                        //check if we're removing light peaks and if it's a light peak
                        if (!outputLightIntensities && !peak.Identifications.Any(x => GetRelevantLabelFromBaseSequence(x.BaseSequence, silacLabels) != null)) //if no ids have any labels, remove them
                        {
                            peaks.RemoveAt(i);
                            i--;
                        }
                        else
                        {
                            foreach (var id in peak.Identifications)
                            {
                                SilacLabel label = GetRelevantLabelFromBaseSequence(id.BaseSequence, silacLabels);
                                HashSet <FlashLFQ.ProteinGroup> originalGroups = id.proteinGroups;
                                List <FlashLFQ.ProteinGroup>    updatedGroups  = new List <FlashLFQ.ProteinGroup>();
                                foreach (FlashLFQ.ProteinGroup group in originalGroups)
                                {
                                    string groupName = group.ProteinGroupName;
                                    if (label == null) //if light
                                    {
                                        updatedGroups.Add(group);
                                    }
                                    else
                                    {
                                        string labelString = "(" + label.OriginalAminoAcid + label.MassDifference;
                                        int    labelIndex  = groupName.IndexOf(labelString);
                                        if (labelIndex != -1) //labelIndex == 1 if a) 2+ peptides are required per protein or b) somebody broke parsimony
                                        {
                                            groupName = groupName.Substring(0, labelIndex);
                                            updatedGroups.Add(new FlashLFQ.ProteinGroup(groupName, group.GeneName, group.Organism));
                                        }
                                    }
                                }

                                Identification updatedId = new Identification(
                                    id.fileInfo,
                                    GetSilacLightBaseSequence(id.BaseSequence, label),
                                    GetSilacLightFullSequence(id.ModifiedSequence, label),
                                    id.monoisotopicMass,
                                    id.ms2RetentionTimeInMinutes,
                                    id.precursorChargeState,
                                    updatedGroups,
                                    id.OptionalChemicalFormula,
                                    id.UseForProteinQuant
                                    );
                                identifications.Add(updatedId);
                            }
                            FlashLFQ.ChromatographicPeak updatedPeak = new FlashLFQ.ChromatographicPeak(identifications.First(), peak.IsMbrPeak, peak.SpectraFileInfo);
                            for (int j = 1; j < identifications.Count; j++) //add all the original identification
                            {
                                updatedPeak.MergeFeatureWith(new FlashLFQ.ChromatographicPeak(identifications[j], peak.IsMbrPeak, peak.SpectraFileInfo), Integrate);
                            }
                            updatedPeak.IsotopicEnvelopes = peak.IsotopicEnvelopes;  //need to set isotopicEnevelopes, since the new identifications didn't have them.
                            updatedPeak.CalculateIntensityForThisFeature(Integrate); //needed to update info
                            peaks[i] = updatedPeak;
                        }
                    }
                }

                //convert all lfq peptides from heavy to light for output
                Dictionary <string, FlashLFQ.Peptide> lfqPwsms = FlashLfqResults.PeptideModifiedSequences;
                List <string> pwsmKeys = lfqPwsms.Keys.ToList();
                foreach (string key in pwsmKeys)
                {
                    FlashLFQ.Peptide currentPeptide = lfqPwsms[key];
                    SilacLabel       label          = GetRelevantLabelFromFullSequence(currentPeptide.Sequence, silacLabels);
                    if (label != null)                                                                                                                           //if it's a heavy peptide
                    {
                        lfqPwsms.Remove(key);                                                                                                                    //get rid of it
                                                                                                                                                                 //update the light version
                        string lightSequence = GetSilacLightFullSequence(currentPeptide.Sequence, label, false);                                                 //get the light sequence
                        List <SpectraFileInfo> heavyFiles = silacSpectraFileInfo.Where(x => x.FilenameWithoutExtension.Contains(label.MassDifference)).ToList(); //these are the heavy raw file names

                        //Find the light peptide (which has a value for the light datafile) and set the intensity for the heavy datafile from the current peptide
                        if (lfqPwsms.TryGetValue(lightSequence, out FlashLFQ.Peptide lightPeptide)) //this should always have a value, since we made replicas earlier, and yet it sometimes doesn't...
                        {
                            foreach (SpectraFileInfo heavyFile in heavyFiles)
                            {
                                SpectraFileInfo lightFile = labeledToUnlabeledFile[heavyFile];
                                lightPeptide.SetIntensity(heavyFile, currentPeptide.GetIntensity(lightFile));
                                lightPeptide.SetDetectionType(heavyFile, currentPeptide.GetDetectionType(lightFile));
                            }
                        }
                        else //if there's no light, create a new entry for the heavy
                        {
                            //new peptide
                            FlashLFQ.Peptide updatedPeptide = new FlashLFQ.Peptide(lightSequence, currentPeptide.UseForProteinQuant);
                            //update the heavy info, set the light values to zero
                            foreach (SpectraFileInfo info in heavyFiles)
                            {
                                updatedPeptide.SetIntensity(info, currentPeptide.GetIntensity(info));
                                updatedPeptide.SetDetectionType(info, currentPeptide.GetDetectionType(info));
                            }

                            //set the other values to zero
                            List <SpectraFileInfo> otherInfo = silacSpectraFileInfo.Where(x => !heavyFiles.Contains(x)).ToList();
                            foreach (SpectraFileInfo info in otherInfo)
                            {
                                updatedPeptide.SetIntensity(info, 0);
                                updatedPeptide.SetDetectionType(info, DetectionType.NotDetected);
                            }
                            HashSet <FlashLFQ.ProteinGroup> originalGroups = currentPeptide.proteinGroups;
                            HashSet <FlashLFQ.ProteinGroup> updatedGroups  = new HashSet <FlashLFQ.ProteinGroup>();
                            foreach (FlashLFQ.ProteinGroup group in originalGroups)
                            {
                                string groupName = group.ProteinGroupName;
                                groupName = groupName.Replace(label.MassDifference, "");
                                updatedGroups.Add(new FlashLFQ.ProteinGroup(groupName, group.GeneName, group.Organism));
                            }
                            updatedPeptide.proteinGroups      = updatedGroups;
                            lfqPwsms[updatedPeptide.Sequence] = updatedPeptide;
                        }
                    }
                }
            }
        }
Beispiel #5
0
        public static List <SpectraFileInfo> ReadExperimentalDesign(string experimentalDesignPath, List <string> fullFilePathsWithExtension, out List <string> errors)
        {
            var expDesign = new List <SpectraFileInfo>();

            errors = new List <string>();

            if (!File.Exists(experimentalDesignPath))
            {
                errors.Add("Experimental design file not found!");
                return(expDesign);
            }

            var lines = File.ReadAllLines(experimentalDesignPath);

            for (int i = 1; i < lines.Length; i++)
            {
                var split = lines[i].Split(new char[] { '\t' });

                if (split.Length < 5)
                {
                    errors.Add("Error: The experimental design was not formatted correctly. Expected 5 cells, but found " + split.Length + " on line " + (i + 1));
                    return(expDesign);
                }

                string fileNameWithExtension = split[0];
                string condition             = split[1];
                string strBiorep             = split[2];
                string strFraction           = split[3];
                string strTechrep            = split[4];

                if (!int.TryParse(strBiorep, out int biorep))
                {
                    errors.Add("Error: The experimental design was not formatted correctly. The biorep on line " + (i + 1) + " is not an integer");
                    return(expDesign);
                }
                if (!int.TryParse(strFraction, out int fraction))
                {
                    errors.Add("Error: The experimental design was not formatted correctly. The fraction on line " + (i + 1) + " is not an integer");
                    return(expDesign);
                }
                if (!int.TryParse(strTechrep, out int techrep))
                {
                    errors.Add("Error: The experimental design was not formatted correctly. The techrep on line " + (i + 1) + " is not an integer");
                    return(expDesign);
                }

                var foundFilePath = fullFilePathsWithExtension.FirstOrDefault(p => Path.GetFileName(p) == fileNameWithExtension);
                if (foundFilePath == null)
                {
                    // the experimental design could include files that aren't in the spectra file list but that's ok.
                    // it's fine to have extra files defined in the experimental design as long as the remainder is valid
                    continue;
                }

                var fileInfo = new SpectraFileInfo(foundFilePath, condition, biorep - 1, techrep - 1, fraction - 1);
                expDesign.Add(fileInfo);
            }

            // check to see if there are any files missing from the experimental design
            var filesDefinedInExpDesign = expDesign.Select(p => p.FullFilePathWithExtension).ToList();
            var notDefined = fullFilePathsWithExtension.Where(p => !filesDefinedInExpDesign.Contains(p));

            if (notDefined.Any())
            {
                errors.Add("Error: The experimental design did not contain the file(s): " + string.Join(", ", notDefined));
                return(expDesign);
            }

            // check to see if the design is valid
            var designError = GetErrorsInExperimentalDesign(expDesign);

            if (designError != null)
            {
                errors.Add(designError);
                return(expDesign);
            }

            // all files passed in are defined in the experimental design and the exp design is valid
            return(expDesign);
        }
Beispiel #6
0
        private static void Main(string[] args)
        {
            Console.WriteLine("Welcome to MetaMorpheus");


            // EDGAR: Createing the FlashLfqEngine is unforunately required,
            // otherwise the code just crashes when executed.

            SpectraFileInfo mzml = new SpectraFileInfo("sliced-mzml.mzml", "a", 0, 1, 0);
            var             pg   = new FlashLFQ.ProteinGroup("MyProtein", "gene", "org");
            Identification  id3  = new Identification(mzml, "EGFQVADGPLYR", "EGFQVADGPLYR",
                                                      1350.65681, 94.12193, 2, new List <FlashLFQ.ProteinGroup> {
                pg
            });
            Identification id4 = new Identification(mzml, "EGFQVADGPLYR", "EGFQVADGPLYR",
                                                    1350.65681, 94.05811, 2, new List <FlashLFQ.ProteinGroup> {
                pg
            });
            FlashLfqEngine engine = new FlashLfqEngine(new List <Identification> {
                id3, id4
            }, normalize: true);

            // EDGAR: End of part required to avoid crash


            //generate toml
            Console.WriteLine("generating toml with {0} key-value pairs", args[1]);
            var tomlData = Toml.Create();

            for (int i = 0; i < int.Parse(args[1]); i++)
            {
                tomlData.Add(i.ToString(), i);
            }

            //write toml
            Console.WriteLine("writing toml file {0}", args[0]);
            Stopwatch stopwatch = Stopwatch.StartNew();

            Toml.WriteFile(tomlData, args[0]);
            stopwatch.Stop();
            Console.WriteLine("Time elapsed for toml write: {0}\n", stopwatch.ElapsedMilliseconds);


            //read file
            Console.WriteLine("reading toml file {0}", args[0]);
            stopwatch = Stopwatch.StartNew();
            var tomlRead = Toml.ReadFile(args[0]);

            stopwatch.Stop();
            Console.WriteLine("Time elapsed for toml read: {0}\n", stopwatch.ElapsedMilliseconds);


            //read mzml file
            Console.WriteLine("reading mzml file {0}", args[2]);
            stopwatch = Stopwatch.StartNew();
            var msData = Mzml.LoadAllStaticData(args[2]);

            stopwatch.Stop();
            Console.WriteLine("Time elapsed for mzML read: {0}\n", stopwatch.ElapsedMilliseconds);


            //write mzml file
            Console.WriteLine("writing mzml file {0}", args[3]);
            stopwatch = Stopwatch.StartNew();
            MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(msData, args[3], false);
            stopwatch.Stop();
            Console.WriteLine("Time elapsed for mzML write: {0}", stopwatch.ElapsedMilliseconds);
        }
Beispiel #7
0
        public static void TestMergePeaks()
        {
            string fileToWrite = "myMzml.mzML";
            string peptide     = "PEPTIDE";
            double intensity   = 1e6;

            Loaders.LoadElements(Path.Combine(TestContext.CurrentContext.TestDirectory, @"elements.dat"));

            // generate mzml file
            MsDataScan[] scans = new MsDataScan[5];
            double[]     intensityMultipliers = { 1, 3, 1, 1, 1 };

            for (int s = 0; s < scans.Length; s++)
            {
                ChemicalFormula      cf          = new Proteomics.AminoAcidPolymer.Peptide(peptide).GetChemicalFormula();
                IsotopicDistribution dist        = IsotopicDistribution.GetDistribution(cf, 0.125, 1e-8);
                double[]             mz          = dist.Masses.Select(v => v.ToMz(1)).ToArray();
                double[]             intensities = dist.Intensities.Select(v => v * intensity * intensityMultipliers[s]).ToArray();

                if (s == 2 || s == 3)
                {
                    mz          = new[] { 401.0 };
                    intensities = new[] { 1000.0 };
                }

                // add the scan
                scans[s] = new MsDataScan(massSpectrum: new MzSpectrum(mz, intensities, false), oneBasedScanNumber: s + 1, msnOrder: 1, isCentroid: true,
                                          polarity: Polarity.Positive, retentionTime: 1.0 + s / 10.0, scanWindowRange: new MzRange(400, 1600), scanFilter: "f",
                                          mzAnalyzer: MZAnalyzerType.Orbitrap, totalIonCurrent: intensities.Sum(), injectionTime: 1.0, noiseData: null, nativeId: "scan=" + (s + 1));
            }

            // write the .mzML
            IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(new FakeMsDataFile(scans),
                                                                          Path.Combine(TestContext.CurrentContext.TestDirectory, fileToWrite), false);

            // set up spectra file info
            SpectraFileInfo file1 = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, fileToWrite), "", 0, 0, 0);

            // create some PSMs
            var pg = new ProteinGroup("MyProtein", "gene", "org");

            Identification id1 = new Identification(file1, peptide, peptide,
                                                    new Proteomics.AminoAcidPolymer.Peptide(peptide).MonoisotopicMass, 1.1 + 0.001, 1, new List <ProteinGroup> {
                pg
            });
            Identification id2 = new Identification(file1, peptide, peptide,
                                                    new Proteomics.AminoAcidPolymer.Peptide(peptide).MonoisotopicMass, 1.4 + 0.001, 1, new List <ProteinGroup> {
                pg
            });

            // create the FlashLFQ engine
            FlashLFQEngine engine = new FlashLFQEngine(new List <Identification> {
                id1, id2
            });

            // run the engine
            var results = engine.Run();
            ChromatographicPeak peak = results.Peaks.First().Value.First();

            Assert.That(results.Peaks.First().Value.Count == 1);
            Assert.That(peak.Apex.RetentionTime == 1.1);
        }
Beispiel #8
0
        public static void TestFlashLfqNormalization()
        {
            // ********************************* check biorep normalization *********************************
            // get the raw file paths
            SpectraFileInfo raw  = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-raw.raw"), "a", 0, 0, 0);
            SpectraFileInfo mzml = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-mzml.mzml"), "a", 1, 0, 0);

            // create some PSMs
            var            pg  = new ProteinGroup("MyProtein", "gene", "org");
            Identification id1 = new Identification(raw, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> {
                pg
            });
            Identification id2 = new Identification(mzml, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> {
                pg
            });

            // create the FlashLFQ engine
            var results = new FlashLFQEngine(new List <Identification> {
                id1, id2
            }, normalize: true).Run();

            // check that biorep normalization worked
            int int1 = (int)System.Math.Round(results.Peaks[mzml].First().Intensity, 0);
            int int2 = (int)System.Math.Round(results.Peaks[raw].First().Intensity, 0);

            Assert.That(int1 > 0);
            Assert.That(int1 == int2);

            // ********************************* check condition normalization *********************************
            raw  = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-raw.raw"), "a", 0, 0, 0);
            mzml = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-mzml.mzml"), "b", 0, 0, 0);

            id1 = new Identification(raw, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> {
                pg
            });
            id2 = new Identification(mzml, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> {
                pg
            });

            results = new FlashLFQEngine(new List <Identification> {
                id1, id2
            }, normalize: true).Run();

            int int3 = (int)System.Math.Round(results.Peaks[mzml].First().Intensity, 0);
            int int4 = (int)System.Math.Round(results.Peaks[raw].First().Intensity, 0);

            Assert.That(int3 > 0);
            Assert.That(int3 == int4);

            // ********************************* check techrep normalization *********************************
            raw  = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-raw.raw"), "a", 0, 0, 0);
            mzml = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-mzml.mzml"), "a", 0, 1, 0);

            id1 = new Identification(raw, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> {
                pg
            });
            id2 = new Identification(mzml, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> {
                pg
            });

            results = new FlashLFQEngine(new List <Identification> {
                id1, id2
            }, normalize: true).Run();

            int int5 = (int)System.Math.Round(results.Peaks[mzml].First().Intensity, 0);
            int int6 = (int)System.Math.Round(results.Peaks[raw].First().Intensity, 0);

            Assert.That(int5 > 0);
            Assert.That(int5 == int6);

            Assert.That(int1 == int3);
            Assert.That(int1 == int5);


            // ********************************* check fraction normalization *********************************
            raw = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-raw.raw"), "a", 0, 0, 0);
            var raw2 = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-raw.raw"), "a", 0, 0, 1);

            mzml = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-mzml.mzml"), "a", 1, 0, 0);
            var mzml2 = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-mzml.mzml"), "a", 1, 0, 1);

            id1 = new Identification(raw, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> {
                pg
            });
            id2 = new Identification(raw2, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> {
                pg
            });
            var id3 = new Identification(mzml, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> {
                pg
            });
            var id4 = new Identification(mzml2, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> {
                pg
            });

            results = new FlashLFQEngine(new List <Identification> {
                id1, id2, id3, id4
            }, normalize: true).Run();

            int int7 = (int)System.Math.Round(results.PeptideBaseSequences["EGFQVADGPLYR"].GetIntensity(raw) + results.PeptideBaseSequences["EGFQVADGPLYR"].GetIntensity(raw2));
            int int8 = (int)System.Math.Round(results.PeptideBaseSequences["EGFQVADGPLYR"].GetIntensity(mzml) + results.PeptideBaseSequences["EGFQVADGPLYR"].GetIntensity(mzml2));

            Assert.That(int7 > 0);
            Assert.That(int7 == int8);
        }
Beispiel #9
0
        public static void TestFlashLfqMatchBetweenRunsProteinQuant()
        {
            List <string> filesToWrite = new List <string> {
                "mzml_1", "mzml_2"
            };
            List <string> pepSequences = new List <string> {
                "PEPTIDE", "PEPTIDEV", "PEPTIDEVV", "PEPTIDEVVV", "PEPTIDEVVVV"
            };
            double intensity = 1e6;

            double[] file1Rt = new double[] { 1.01, 1.02, 1.03, 1.04, 1.05 };
            double[] file2Rt = new double[] { 1.015, 1.030, 1.036, 1.050, 1.065 };

            Loaders.LoadElements(Path.Combine(TestContext.CurrentContext.TestDirectory, @"elements.dat"));

            // generate mzml files (5 peptides each)
            for (int f = 0; f < filesToWrite.Count; f++)
            {
                // 1 MS1 scan per peptide
                MsDataScan[] scans = new MsDataScan[5];

                for (int p = 0; p < pepSequences.Count; p++)
                {
                    ChemicalFormula      cf          = new Proteomics.AminoAcidPolymer.Peptide(pepSequences[p]).GetChemicalFormula();
                    IsotopicDistribution dist        = IsotopicDistribution.GetDistribution(cf, 0.125, 1e-8);
                    double[]             mz          = dist.Masses.Select(v => v.ToMz(1)).ToArray();
                    double[]             intensities = dist.Intensities.Select(v => v * intensity).ToArray();
                    double rt;
                    if (f == 0)
                    {
                        rt = file1Rt[p];
                    }
                    else
                    {
                        rt = file2Rt[p];
                    }

                    // add the scan
                    scans[p] = new MsDataScan(massSpectrum: new MzSpectrum(mz, intensities, false), oneBasedScanNumber: p + 1, msnOrder: 1, isCentroid: true,
                                              polarity: Polarity.Positive, retentionTime: rt, scanWindowRange: new MzRange(400, 1600), scanFilter: "f",
                                              mzAnalyzer: MZAnalyzerType.Orbitrap, totalIonCurrent: intensities.Sum(), injectionTime: 1.0, noiseData: null, nativeId: "scan=" + (p + 1));
                }

                // write the .mzML
                IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(new FakeMsDataFile(scans),
                                                                              Path.Combine(TestContext.CurrentContext.TestDirectory, filesToWrite[f] + ".mzML"), false);
            }

            // set up spectra file info
            SpectraFileInfo file1 = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, filesToWrite[0] + ".mzML"), "a", 0, 0, 0);
            SpectraFileInfo file2 = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, filesToWrite[1] + ".mzML"), "a", 1, 0, 0);

            // create some PSMs
            var pg = new ProteinGroup("MyProtein", "gene", "org");
            var myMbrProteinGroup = new ProteinGroup("MyMbrProtein", "MbrGene", "org");

            Identification id1 = new Identification(file1, "PEPTIDE", "PEPTIDE",
                                                    new Proteomics.AminoAcidPolymer.Peptide("PEPTIDE").MonoisotopicMass, file1Rt[0] + 0.001, 1, new List <ProteinGroup> {
                pg
            });
            Identification id2 = new Identification(file1, "PEPTIDEV", "PEPTIDEV",
                                                    new Proteomics.AminoAcidPolymer.Peptide("PEPTIDEV").MonoisotopicMass, file1Rt[1] + 0.001, 1, new List <ProteinGroup> {
                pg
            });
            Identification id3 = new Identification(file1, "PEPTIDEVV", "PEPTIDEVV",
                                                    new Proteomics.AminoAcidPolymer.Peptide("PEPTIDEVV").MonoisotopicMass, file1Rt[2] + 0.001, 1, new List <ProteinGroup> {
                myMbrProteinGroup
            });
            Identification id4 = new Identification(file1, "PEPTIDEVVV", "PEPTIDEVVV",
                                                    new Proteomics.AminoAcidPolymer.Peptide("PEPTIDEVVV").MonoisotopicMass, file1Rt[3] + 0.001, 1, new List <ProteinGroup> {
                pg
            });
            Identification id5 = new Identification(file1, "PEPTIDEVVVV", "PEPTIDEVVVV",
                                                    new Proteomics.AminoAcidPolymer.Peptide("PEPTIDEVVVV").MonoisotopicMass, file1Rt[4] + 0.001, 1, new List <ProteinGroup> {
                pg
            });

            Identification id6 = new Identification(file2, "PEPTIDE", "PEPTIDE",
                                                    new Proteomics.AminoAcidPolymer.Peptide("PEPTIDE").MonoisotopicMass, file2Rt[0] + 0.001, 1, new List <ProteinGroup> {
                pg
            });
            Identification id7 = new Identification(file2, "PEPTIDEV", "PEPTIDEV",
                                                    new Proteomics.AminoAcidPolymer.Peptide("PEPTIDEV").MonoisotopicMass, file2Rt[1] + 0.001, 1, new List <ProteinGroup> {
                pg
            });
            // missing ID 8 - MBR feature
            Identification id9 = new Identification(file2, "PEPTIDEVVV", "PEPTIDEVVV",
                                                    new Proteomics.AminoAcidPolymer.Peptide("PEPTIDEVVV").MonoisotopicMass, file2Rt[3] + 0.001, 1, new List <ProteinGroup> {
                pg
            });
            Identification id10 = new Identification(file2, "PEPTIDEVVVV", "PEPTIDEVVVV",
                                                     new Proteomics.AminoAcidPolymer.Peptide("PEPTIDEVVVV").MonoisotopicMass, file2Rt[4] + 0.001, 1, new List <ProteinGroup> {
                pg
            });

            // test with top3 protein quant engine
            FlashLFQEngine engine = new FlashLFQEngine(new List <Identification> {
                id1, id2, id3, id4, id5, id6, id7, id9, id10
            }, matchBetweenRuns: true);
            var results = engine.Run();

            Assert.That(results.ProteinGroups["MyMbrProtein"].GetIntensity(file1) > 0);
            Assert.That(results.ProteinGroups["MyMbrProtein"].GetIntensity(file2) == 0);

            // test with advanced protein quant engine
            engine = new FlashLFQEngine(new List <Identification> {
                id1, id2, id3, id4, id5, id6, id7, id9, id10
            }, matchBetweenRuns: true, advancedProteinQuant: true);
            results = engine.Run();

            Assert.That(results.ProteinGroups["MyMbrProtein"].GetIntensity(file1) > 0);
            Assert.That(results.ProteinGroups["MyMbrProtein"].GetIntensity(file2) == 0);
        }
Beispiel #10
0
        public static void TestFlashLfqAdvancedProteinQuant()
        {
            List <string> filesToWrite = new List <string> {
                "mzml_1", "mzml_2"
            };
            List <string> pepSequences = new List <string> {
                "PEPTIDE", "MYPEPTIDE", "VVVVVPEPTIDE"
            };

            double[,] amounts = new double[2, 3] {
                { 1000000, 1000000, 1000000 },
                { 2000000, 2000000, 900000 }
            };
            Loaders.LoadElements(Path.Combine(TestContext.CurrentContext.TestDirectory, @"elements.dat"));

            // generate mzml files (3 peptides each)
            for (int f = 0; f < filesToWrite.Count; f++)
            {
                // 1 MS1 scan per peptide
                MsDataScan[] scans = new MsDataScan[3];

                for (int p = 0; p < pepSequences.Count; p++)
                {
                    ChemicalFormula      cf          = new Proteomics.AminoAcidPolymer.Peptide(pepSequences[p]).GetChemicalFormula();
                    IsotopicDistribution dist        = IsotopicDistribution.GetDistribution(cf, 0.125, 1e-8);
                    double[]             mz          = dist.Masses.Select(v => v.ToMz(1)).ToArray();
                    double[]             intensities = dist.Intensities.Select(v => v * amounts[f, p]).ToArray();

                    // add the scan
                    scans[p] = new MsDataScan(massSpectrum: new MzSpectrum(mz, intensities, false), oneBasedScanNumber: p + 1, msnOrder: 1, isCentroid: true,
                                              polarity: Polarity.Positive, retentionTime: 1.0 + (p / 10.0), scanWindowRange: new MzRange(400, 1600), scanFilter: "f",
                                              mzAnalyzer: MZAnalyzerType.Orbitrap, totalIonCurrent: intensities.Sum(), injectionTime: 1.0, noiseData: null, nativeId: "scan=" + (p + 1));
                }

                // write the .mzML
                IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(new FakeMsDataFile(scans),
                                                                              Path.Combine(TestContext.CurrentContext.TestDirectory, filesToWrite[f] + ".mzML"), false);
            }

            // set up spectra file info
            SpectraFileInfo file1 = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, filesToWrite[0] + ".mzML"), "a", 0, 0, 0);
            SpectraFileInfo file2 = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, filesToWrite[1] + ".mzML"), "a", 1, 0, 0);

            // create some PSMs
            var            pg  = new ProteinGroup("MyProtein", "gene", "org");
            Identification id1 = new Identification(file1, "PEPTIDE", "PEPTIDE", 799.35996, 1.01, 1, new List <ProteinGroup> {
                pg
            });
            Identification id2 = new Identification(file1, "MYPEPTIDE", "MYPEPTIDE", 1093.46377, 1.11, 1, new List <ProteinGroup> {
                pg
            });
            Identification id3 = new Identification(file1, "VVVVVPEPTIDE", "VVVVVPEPTIDE", 1294.70203, 1.21, 1, new List <ProteinGroup> {
                pg
            });

            Identification id4 = new Identification(file2, "PEPTIDE", "PEPTIDE", 799.35996, 1.01, 1, new List <ProteinGroup> {
                pg
            });
            Identification id5 = new Identification(file2, "MYPEPTIDE", "MYPEPTIDE", 1093.46377, 1.11, 1, new List <ProteinGroup> {
                pg
            });
            Identification id6 = new Identification(file2, "VVVVVPEPTIDE", "VVVVVPEPTIDE", 1294.70203, 1.21, 1, new List <ProteinGroup> {
                pg
            });

            // create the FlashLFQ engine
            FlashLFQEngine engine = new FlashLFQEngine(new List <Identification> {
                id1, id2, id3, id4, id5, id6
            }, normalize: false, advancedProteinQuant: true);

            // run the engine
            var results = engine.Run();

            // third peptide should be low-weighted
            // protein should be ~sum of first two peptide intensities (a little lower, because some smaller isotope peaks get skipped)
            double file1ProteinIntensity = results.ProteinGroups["MyProtein"].GetIntensity(file1);

            Assert.That(file1ProteinIntensity < 2e6);
            Assert.That(file1ProteinIntensity > 1e6);

            double file2ProteinIntensity = results.ProteinGroups["MyProtein"].GetIntensity(file2);

            Assert.That(file2ProteinIntensity < 4e6);
            Assert.That(file2ProteinIntensity > 3e6);
        }
Beispiel #11
0
        public static void TestFlashLfqMergeResults()
        {
            SpectraFileInfo rawA  = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-raw.raw"), "a", 0, 0, 0);
            SpectraFileInfo mzmlA = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-mzml.mzml"), "a", 0, 1, 0);

            // create some PSMs
            var            pgA  = new ProteinGroup("MyProtein", "gene", "org");
            Identification id1A = new Identification(rawA, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> {
                pgA
            });
            Identification id2A = new Identification(rawA, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.05811, 2, new List <ProteinGroup> {
                pgA
            });
            Identification id3A = new Identification(mzmlA, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> {
                pgA
            });
            Identification id4A = new Identification(mzmlA, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.05811, 2, new List <ProteinGroup> {
                pgA
            });

            // create the FlashLFQ engine
            FlashLFQEngine engineA = new FlashLFQEngine(new List <Identification> {
                id1A, id2A, id3A, id4A
            });

            // run the engine
            var resultsA = engineA.Run();

            SpectraFileInfo rawB  = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-raw.raw"), "b", 0, 0, 0);
            SpectraFileInfo mzmlB = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"sliced-mzml.mzml"), "b", 0, 1, 0);

            // create some PSMs
            var            pgB = new ProteinGroup("MyProtein", "gene", "org");
            Identification id1 = new Identification(rawB, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> {
                pgB
            });
            Identification id2 = new Identification(rawB, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.05811, 2, new List <ProteinGroup> {
                pgB
            });
            Identification id3 = new Identification(mzmlB, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <ProteinGroup> {
                pgB
            });
            Identification id4 = new Identification(mzmlB, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.05811, 2, new List <ProteinGroup> {
                pgB
            });

            // create the FlashLFQ engine
            FlashLFQEngine engineB = new FlashLFQEngine(new List <Identification> {
                id1, id2, id3, id4
            });

            // run the engine
            var resultsB = engineB.Run();

            resultsA.MergeResultsWith(resultsB);
            Assert.AreEqual(4, resultsA.Peaks.Count);
            Assert.AreEqual(1, resultsA.PeptideBaseSequences.Count);
            Assert.AreEqual(1, resultsA.PeptideModifiedSequences.Count);
            Assert.AreEqual(1, resultsA.ProteinGroups.Count);
            Assert.AreEqual(4, resultsA.SpectraFiles.Count);
        }