Beispiel #1
0
        public void IdentifyAA(double v)
        {
            var ok = new HashSet <string>();

            for (char c = 'A'; c <= 'Z'; c++)
            {
                if (Residue.TryGetResidue(c, out Residue residue))
                {
                    if (Math.Abs(residue.MonoisotopicMass - MassShift) <= v)
                    {
                        ok.Add("Add " + residue.Name);
                    }
                    if (Math.Abs(residue.MonoisotopicMass + MassShift) <= v)
                    {
                        ok.Add("Remove " + residue.Name);
                    }
                    for (char cc = 'A'; cc <= 'Z'; cc++)
                    {
                        if (Residue.TryGetResidue(cc, out Residue residueCC))
                        {
                            if (Math.Abs(residueCC.MonoisotopicMass + residue.MonoisotopicMass - MassShift) <= v)
                            {
                                ok.Add("Add (" + residue.Name + "+" + residueCC.Name + ")");
                            }
                            if (Math.Abs(residueCC.MonoisotopicMass + residue.MonoisotopicMass + MassShift) <= v)
                            {
                                ok.Add("Remove (" + residue.Name + "+" + residueCC.Name + ")");
                            }
                        }
                    }
                }
            }
            AA = string.Join("|", ok);
        }
Beispiel #2
0
        public static string SanitizeAminoAcidSequence(string originalSequence, char replacementCharacter)
        {
            string cleaned = UnicodeRegex.Replace(originalSequence, replacementCharacter.ToString());

            for (int r = 0; r < cleaned.Length; r++)
            {
                if (!Residue.TryGetResidue(cleaned[r], out Residue res))
                {
                    cleaned = cleaned.Replace(cleaned[r], replacementCharacter);
                }
            }

            return(cleaned);
        }
Beispiel #3
0
        public static void TestCustomAminoAcidReading()
        {
            string aminoAcidPath = Path.Combine(GlobalVariables.DataDir, @"CustomAminoAcids", @"CustomAminoAcids.txt");

            //Manually add an entry to it
            List <string> lines = new List <string>(File.ReadAllLines(aminoAcidPath));

            lines.Add("fake\tf\t60\tC5");
            File.WriteAllLines(aminoAcidPath, lines);

            GlobalVariables.RefreshAminoAcidDictionary(); //read the file

            //test that we read the new amino acid
            Assert.IsTrue(Residue.TryGetResidue('f', out Residue r));
            Assert.IsTrue(r.MonoisotopicMass.Equals(60));

            //now crash it intentionally with an invalid character
            lines.Add("evenFaker\tX\t72\tC6");
            File.WriteAllLines(aminoAcidPath, lines);
            try
            {
                GlobalVariables.RefreshAminoAcidDictionary(); //read the file
                //we're trying to crash it, so if we didn't, we failed :/
                Assert.IsTrue(false);
            }
            catch (MetaMorpheusException)
            {
                //Yay we passed!
            }

            //now crash it intentionally with a bad chemical formula
            lines.RemoveAt(lines.Count - 1); //get rid of that last bad one
            lines.Add("theFakest\ta\t50\t");
            File.WriteAllLines(aminoAcidPath, lines);
            try
            {
                GlobalVariables.RefreshAminoAcidDictionary(); //read the file
                //we're trying to crash it, so if we didn't, we failed :/
                Assert.IsTrue(false);
            }
            catch (MetaMorpheusException)
            {
                //Yay we passed!
            }

            //Delete so it doesn't crash the next time
            File.Delete(aminoAcidPath);
        }
        private void AminoAcidLookup(object sender, System.Windows.Controls.TextChangedEventArgs e)
        {
            System.Windows.Controls.TextBox textBox = ((System.Windows.Controls.TextBox)sender);
            char letter = textBox.Text.ToUpper().FirstOrDefault();

            if (letter == 0)
            {
                C12.IsEnabled = false;
                C13.IsEnabled = false;
                N14.IsEnabled = false;
                N15.IsEnabled = false;
                O16.IsEnabled = false;
                O18.IsEnabled = false;
                H1.IsEnabled  = false;
                H2.IsEnabled  = false;
                S32.IsEnabled = false;
                S34.IsEnabled = false;
            }
            else if (textBox.Text.Length != 1) //if too long, don't let them add stuff and don't change anything
            {
                textBox.Text = textBox.Text[0].ToString();
            }
            else if (Residue.TryGetResidue(letter, out Residue residue))
            {
                AminoAcid = letter;
                AminoAcidMonoisotopicMass = residue.MonoisotopicMass;
                ChemicalFormula.Text      = residue.ThisChemicalFormula.Formula;
                ParseChemicalFormula();
                MassDifference.Text = "0.000";
                C12.IsEnabled       = true;
                C13.IsEnabled       = true;
                N14.IsEnabled       = true;
                N15.IsEnabled       = true;
                O16.IsEnabled       = true;
                O18.IsEnabled       = true;
                H1.IsEnabled        = true;
                H2.IsEnabled        = true;
                S32.IsEnabled       = true;
                S34.IsEnabled       = true;
            }
            else
            {
                MessageBox.Show('"' + letter.ToString() + '"' + " is not a valid amino acid. Please enter a single amino acid letter.");
                txtBoxAminoAcidLookup.Text = "";
            }
        }
        public static void RefreshAminoAcidDictionary()
        {
            //read in all the amino acids (they already exist in mzlib, but there might be synthetic amino acids that need to be included)
            string aminoAcidPath = Path.Combine(DataDir, @"CustomAminoAcids", @"CustomAminoAcids.txt");

            if (File.Exists(aminoAcidPath)) //if it already exists
            {
                string[]       aminoAcidLines = File.ReadAllLines(aminoAcidPath);
                List <Residue> residuesToAdd  = new List <Residue>();
                for (int i = 1; i < aminoAcidLines.Length; i++)
                {
                    string[] line = aminoAcidLines[i].Split('\t').ToArray(); //tsv Name, one letter, monoisotopic, chemical formula
                    if (line.Length >= 4)                                    //check something is there (not a blank line)
                    {
                        char letter = line[1][0];
                        if (InvalidAminoAcids.Contains(letter))
                        {
                            throw new ProteaseGuruException("Error while reading 'CustomAminoAcids.txt'. Line " + (i + 1).ToString() + " contains an invalid amino acid. (Ex: " + string.Join(", ", InvalidAminoAcids.Select(x => x.ToString())) + ")");
                        }
                        try
                        {
                            ChemicalFormula formula = ChemicalFormula.ParseFormula(line[3]);

                            //if it doesn't already exist or it does exist but has a different mass, add the entry
                            if (!(Residue.TryGetResidue(letter, out Residue residue)) ||
                                !(formula.Formula.Equals(residue.ThisChemicalFormula.Formula)))
                            {
                                residuesToAdd.Add(new Residue(line[0], letter, line[1], formula, ModificationSites.Any));
                            }
                        }
                        catch
                        {
                            throw new ProteaseGuruException("Error while reading 'CustomAminoAcids.txt'. Line " + (i + 1).ToString() + " was not in the correct format.");
                        }
                    }
                }
                Residue.AddNewResiduesToDictionary(residuesToAdd);
            }
            else //create it so that it can be manipulated
            {
                WriteAminoAcidsFile();
            }
        }
        public static void WriteAminoAcidsFile()
        {
            string directory = Path.Combine(DataDir, @"CustomAminoAcids");

            if (!Directory.Exists(directory))
            {
                Directory.CreateDirectory(directory);
            }
            string        aminoAcidPath = Path.Combine(DataDir, @"CustomAminoAcids", @"CustomAminoAcids.txt");
            List <string> linesToWrite  = new List <string> {
                "Name\tOneLetterAbbr.\tMonoisotopicMass\tChemicalFormula"
            };

            for (char letter = 'A'; letter <= 'Z'; letter++) //just the basic residues
            {
                if (Residue.TryGetResidue(letter, out Residue residue))
                {
                    linesToWrite.Add(residue.Name + '\t' + residue.Letter.ToString() + '\t' + residue.MonoisotopicMass.ToString() + '\t' + residue.ThisChemicalFormula.Formula);
                }
            }
            File.WriteAllLines(aminoAcidPath, linesToWrite.ToArray());
        }
Beispiel #7
0
        public static (SilacLabel updatedLabel, char nextHeavyLabel) UpdateAminoAcidLabel(SilacLabel currentLabel, char heavyLabel)
        {
            //make sure we're not overwriting something. , , and if it's a valid residue (not a motif/delimiter)
            while ((Residue.TryGetResidue(heavyLabel, out Residue residue) && //Check if the amino acid exists. If it already exists, we don't want to overwrite it
                    !residue.ThisChemicalFormula.Formula.Equals(currentLabel.LabelChemicalFormula)) || //if it exists but it's already the label (so we're not overwriting anything), then we're fine
                   GlobalVariables.InvalidAminoAcids.Contains(heavyLabel)) //If it didn't already exist, but it's invalid, we need to keep going
            {
                heavyLabel++;
            }
            SilacLabel updatedLabel = AssignValidHeavyCharacter(currentLabel, heavyLabel);

            heavyLabel++;
            if (currentLabel.AdditionalLabels != null)
            {
                foreach (SilacLabel additionalLabel in currentLabel.AdditionalLabels)
                {
                    updatedLabel.AddAdditionalSilacLabel(AssignValidHeavyCharacter(additionalLabel, heavyLabel));
                    heavyLabel++;
                }
            }
            return(updatedLabel, heavyLabel);
        }
        private void SaveCustomAminoAcid_Click(object sender, RoutedEventArgs e)
        {
            //VALIDATE INPUT
            if (AminoAcidTextBox.Text.Length == 0)
            {
                MessageBox.Show("Please specify the character that represents a synthetic amino acid in the database");
                return;
            }
            char aminoAcidLetter = AminoAcidTextBox.Text.First();

            ChemicalFormula formula;

            try
            {
                formula = ChemicalFormula.ParseFormula(ChemicalFormulaTextBox.Text);
            }
            catch
            {
                MessageBox.Show("The checmical formula '" + ChemicalFormulaTextBox.Text + "' could not be parsed. Please try again.");
                return;
            }

            if (GlobalVariables.InvalidAminoAcids.Contains(aminoAcidLetter))
            {
                MessageBox.Show("The amino acid '" + aminoAcidLetter + "' cannot be assigned. " +
                                "\nThis character is used for modification motifs or as result delimiters. " +
                                "\nThe following amino acids are not allowed:" +
                                string.Join(", ", GlobalVariables.InvalidAminoAcids.Select(x => x.ToString())) + ")");
                return;
            }

            //check if the specified amino acid already exists
            if (Residue.TryGetResidue(aminoAcidLetter, out Residue residue))
            {
                MessageBox.Show("The amino acid '" + aminoAcidLetter + "' already exists." +
                                "\nMonoisotopic Mass: " + residue.MonoisotopicMass +
                                "\nChemical Formula: " + residue.ThisChemicalFormula.Formula +

                                "\n\nYou may overwrite this amino acid by manually deleting/modifying the current entry. " +
                                "\nThis can be done in MetaMorpheus by navigating to 'Data' in the top-left corner, " +
                                "selecting 'Open folder with mods/data files' from the drop down menu, " +
                                "opening the folder 'CustomAminoAcids', and opening the file 'CustomAminoAcids.txt." +
                                "\nMetaMorpheus will need to be restarted for these changes to take effect." +

                                "\n\nAmino acids can be reset to their default values by deleting the file 'CustomAminoAcids.txt' and restarting MetaMorpheus.");
                return;
            }

            //Alright, the entry is valid
            //Append the entry to the CustomAminoAcids.txt file
            string aminoAcidDirectory  = Path.Combine(GlobalVariables.DataDir, @"CustomAminoAcids");
            string customAminoAcidPath = Path.Combine(aminoAcidDirectory, @"CustomAminoAcids.txt");

            //check that the file exists and create it if it doesn't
            if (!File.Exists(customAminoAcidPath))
            {
                GlobalVariables.WriteAminoAcidsFile();
            }

            //save it in the amino acid file
            List <string> customAminoAcidsText = File.ReadAllLines(customAminoAcidPath).ToList();

            customAminoAcidsText.Add(AminoAcidTextBox.Text + '\t' + aminoAcidLetter + '\t' + formula.MonoisotopicMass.ToString() + '\t' + formula.Formula); //tsv Name, one letter, monoisotopic, chemical formula
            File.WriteAllLines(customAminoAcidPath, customAminoAcidsText);

            //add the mod to the residue dictionary
            Residue.AddNewResiduesToDictionary(new List <Residue> {
                new Residue(AminoAcidTextBox.Text, aminoAcidLetter, AminoAcidTextBox.Text, formula, ModificationSites.Any)
            });
            MessageBox.Show("Success! Amino Acid '" + aminoAcidLetter + "' has been added to the dictionary." +
                            "\nMonoisotopic Mass: " + formula.MonoisotopicMass.ToString() +
                            "\nChemical Formula: " + formula.Formula);
            DialogResult = true;
        }
        /// <summary>
        /// Generates theoretical fragments for given dissociation type for this peptide.
        /// The "products" parameter is filled with these fragments.
        /// </summary>
        public void Fragment(DissociationType dissociationType, FragmentationTerminus fragmentationTerminus, List <Product> products)
        {
            // This code is specifically written to be memory- and CPU -efficient because it is
            // called millions of times for a typical search (i.e., at least once per peptide).
            // If you modify this code, BE VERY CAREFUL about allocating new memory, especially
            // for new collections. This code also deliberately avoids using "yield return", again
            // for performance reasons. Be sure to benchmark any changes with a parallelized
            // fragmentation of every peptide in a database (i.e., test for speed decreases and
            // memory issues).

            products.Clear();

            var massCaps = DissociationTypeCollection.GetNAndCTerminalMassShiftsForDissociationType(dissociationType);

            double cTermMass = 0;
            double nTermMass = 0;

            List <ProductType> nTermProductTypes = DissociationTypeCollection.GetTerminusSpecificProductTypesFromDissociation(dissociationType, FragmentationTerminus.N);
            List <ProductType> cTermProductTypes = DissociationTypeCollection.GetTerminusSpecificProductTypesFromDissociation(dissociationType, FragmentationTerminus.C);

            bool calculateNTermFragments = fragmentationTerminus == FragmentationTerminus.N ||
                                           fragmentationTerminus == FragmentationTerminus.Both;

            bool calculateCTermFragments = fragmentationTerminus == FragmentationTerminus.C ||
                                           fragmentationTerminus == FragmentationTerminus.Both;

            //From http://www.matrixscience.com/help/fragmentation_help.html
            //Low Energy CID -- In low energy CID(i.e.collision induced dissociation in a triple quadrupole or an ion trap) a peptide carrying a positive charge fragments mainly along its backbone,
            //generating predominantly b and y ions. In addition, for fragments containing RKNQ, peaks are seen for ions that have lost ammonia (-17 Da) denoted a*, b* and y*. For fragments containing
            //STED, loss of water(-18 Da) is denoted a°, b° and y°. Satellite ions from side chain cleavage are not observed.
            bool haveSeenNTermDegreeIon = false;
            bool haveSeenNTermStarIon   = false;
            bool haveSeenCTermDegreeIon = false;
            bool haveSeenCTermStarIon   = false;

            // these two collections keep track of the neutral losses observed so far on the n-term or c-term.
            // they are apparently necessary, but allocating memory for collections in this function results in
            // inefficient memory usage and thus frequent garbage collection.
            // TODO: If you can think of a way to remove these collections and still maintain correct
            // fragmentation, please do so.
            HashSet <double> nTermNeutralLosses = null;
            HashSet <double> cTermNeutralLosses = null;

            // n-terminus mod
            if (calculateNTermFragments)
            {
                if (AllModsOneIsNterminus.TryGetValue(1, out Modification mod))
                {
                    nTermMass += mod.MonoisotopicMass.Value;
                }
            }

            // c-terminus mod
            if (calculateCTermFragments)
            {
                if (AllModsOneIsNterminus.TryGetValue(BaseSequence.Length + 2, out Modification mod))
                {
                    cTermMass += mod.MonoisotopicMass.Value;
                }
            }

            for (int r = 0; r < BaseSequence.Length - 1; r++)
            {
                // n-term fragments
                if (calculateNTermFragments)
                {
                    char nTermResidue = BaseSequence[r];

                    // get n-term residue mass
                    if (Residue.TryGetResidue(nTermResidue, out Residue residue))
                    {
                        nTermMass += residue.MonoisotopicMass;
                    }
                    else
                    {
                        nTermMass = double.NaN;
                    }

                    // add side-chain mod
                    if (AllModsOneIsNterminus.TryGetValue(r + 2, out Modification mod))
                    {
                        nTermMass += mod.MonoisotopicMass.Value;
                    }

                    // handle star and degree ions for low-res CID
                    if (dissociationType == DissociationType.LowCID)
                    {
                        if (nTermResidue == 'R' || nTermResidue == 'K' || nTermResidue == 'N' || nTermResidue == 'Q')
                        {
                            haveSeenNTermStarIon = true;
                        }

                        if (nTermResidue == 'S' || nTermResidue == 'T' || nTermResidue == 'E' || nTermResidue == 'D')
                        {
                            haveSeenNTermDegreeIon = true;
                        }
                    }

                    // skip first N-terminal fragment (b1, aDegree1, ...) for CID
                    if (r == 0 && (dissociationType == DissociationType.CID || dissociationType == DissociationType.LowCID))
                    {
                        goto CTerminusFragments;
                    }

                    // generate products
                    for (int i = 0; i < nTermProductTypes.Count; i++)
                    {
                        if (dissociationType == DissociationType.LowCID)
                        {
                            if (!haveSeenNTermStarIon && (nTermProductTypes[i] == ProductType.aStar || nTermProductTypes[i] == ProductType.bStar))
                            {
                                continue;
                            }

                            if (!haveSeenNTermDegreeIon && (nTermProductTypes[i] == ProductType.aDegree || nTermProductTypes[i] == ProductType.bDegree))
                            {
                                continue;
                            }
                        }

                        products.Add(new Product(
                                         nTermProductTypes[i],
                                         FragmentationTerminus.N,
                                         nTermMass + massCaps.Item1[i],
                                         r + 1,
                                         r + 1,
                                         0));

                        if (mod != null && mod.NeutralLosses != null &&
                            mod.NeutralLosses.TryGetValue(dissociationType, out List <double> neutralLosses))
                        {
                            foreach (double neutralLoss in neutralLosses.Where(p => p != 0))
                            {
                                if (nTermNeutralLosses == null)
                                {
                                    nTermNeutralLosses = new HashSet <double>();
                                }

                                nTermNeutralLosses.Add(neutralLoss);
                            }
                        }

                        if (nTermNeutralLosses != null)
                        {
                            foreach (double neutralLoss in nTermNeutralLosses)
                            {
                                products.Add(new Product(
                                                 nTermProductTypes[i],
                                                 FragmentationTerminus.N,
                                                 nTermMass + massCaps.Item1[i] - neutralLoss,
                                                 r + 1,
                                                 r + 1,
                                                 neutralLoss));
                            }
                        }
                    }
                }

                // c-term fragments
CTerminusFragments:
                if (calculateCTermFragments)
                {
                    char cTermResidue = BaseSequence[BaseSequence.Length - r - 1];

                    // get c-term residue mass
                    if (Residue.TryGetResidue(cTermResidue, out Residue residue))
                    {
                        cTermMass += residue.MonoisotopicMass;
                    }
                    else
                    {
                        cTermMass = double.NaN;
                    }

                    // add side-chain mod
                    if (AllModsOneIsNterminus.TryGetValue(BaseSequence.Length - r + 1, out Modification mod))
                    {
                        cTermMass += mod.MonoisotopicMass.Value;
                    }

                    // handle star and degree ions for low-res CID
                    if (dissociationType == DissociationType.LowCID)
                    {
                        if (cTermResidue == 'R' || cTermResidue == 'K' || cTermResidue == 'N' || cTermResidue == 'Q')
                        {
                            haveSeenCTermStarIon = true;
                        }

                        if (cTermResidue == 'S' || cTermResidue == 'T' || cTermResidue == 'E' || cTermResidue == 'D')
                        {
                            haveSeenCTermDegreeIon = true;
                        }
                    }

                    // generate products
                    for (int i = 0; i < cTermProductTypes.Count; i++)
                    {
                        // skip zDot ions for proline residues for ETD/ECD/EThcD
                        if (cTermResidue == 'P' &&
                            (dissociationType == DissociationType.ECD || dissociationType == DissociationType.ETD || dissociationType == DissociationType.EThcD) &&
                            cTermProductTypes[i] == ProductType.zDot)
                        {
                            continue;
                        }

                        if (dissociationType == DissociationType.LowCID)
                        {
                            if (!haveSeenCTermStarIon && cTermProductTypes[i] == ProductType.yStar)
                            {
                                continue;
                            }

                            if (!haveSeenCTermDegreeIon && cTermProductTypes[i] == ProductType.yDegree)
                            {
                                continue;
                            }
                        }

                        products.Add(new Product(
                                         cTermProductTypes[i],
                                         FragmentationTerminus.C,
                                         cTermMass + massCaps.Item2[i],
                                         r + 1,
                                         BaseSequence.Length - r,
                                         0));

                        if (mod != null && mod.NeutralLosses != null &&
                            mod.NeutralLosses.TryGetValue(dissociationType, out List <double> neutralLosses))
                        {
                            foreach (double neutralLoss in neutralLosses.Where(p => p != 0))
                            {
                                if (cTermNeutralLosses == null)
                                {
                                    cTermNeutralLosses = new HashSet <double>();
                                }

                                cTermNeutralLosses.Add(neutralLoss);
                            }
                        }

                        if (cTermNeutralLosses != null)
                        {
                            foreach (double neutralLoss in cTermNeutralLosses)
                            {
                                products.Add(new Product(
                                                 cTermProductTypes[i],
                                                 FragmentationTerminus.C,
                                                 cTermMass + massCaps.Item2[i] - neutralLoss,
                                                 r + 1,
                                                 BaseSequence.Length - r,
                                                 neutralLoss));
                            }
                        }
                    }
                }
            }

            // zDot generates one more ion...
            if (cTermProductTypes.Contains(ProductType.zDot) && BaseSequence[0] != 'P')
            {
                // get c-term residue mass
                if (Residue.TryGetResidue(BaseSequence[0], out Residue residue))
                {
                    cTermMass += residue.MonoisotopicMass;
                }
                else
                {
                    cTermMass = double.NaN;
                }

                // add side-chain mod
                if (AllModsOneIsNterminus.TryGetValue(1, out Modification mod))
                {
                    cTermMass += mod.MonoisotopicMass.Value;
                }

                // generate zDot product
                products.Add(new Product(
                                 ProductType.zDot,
                                 FragmentationTerminus.C,
                                 cTermMass + DissociationTypeCollection.GetMassShiftFromProductType(ProductType.zDot),
                                 BaseSequence.Length,
                                 1,
                                 0));

                if (mod != null && mod.NeutralLosses != null &&
                    mod.NeutralLosses.TryGetValue(dissociationType, out List <double> neutralLosses))
                {
                    foreach (double neutralLoss in neutralLosses.Where(p => p != 0))
                    {
                        products.Add(new Product(
                                         ProductType.zDot,
                                         FragmentationTerminus.C,
                                         cTermMass + DissociationTypeCollection.GetMassShiftFromProductType(ProductType.zDot) - neutralLoss,
                                         BaseSequence.Length,
                                         1,
                                         neutralLoss));
                    }
                }
            }

            foreach (var mod in AllModsOneIsNterminus.Where(p => p.Value.NeutralLosses != null))
            {
                // molecular ion minus neutral losses
                if (mod.Value.NeutralLosses.TryGetValue(dissociationType, out List <double> losses))
                {
                    foreach (double neutralLoss in losses.Where(p => p != 0))
                    {
                        if (neutralLoss != 0)
                        {
                            products.Add(new Product(ProductType.M, FragmentationTerminus.Both, MonoisotopicMass - neutralLoss, 0, 0, neutralLoss));
                        }
                    }
                }
            }

            // generate diagnostic ions
            // TODO: this code is memory-efficient but sort of CPU inefficient; it can be further optimized.
            // however, diagnostic ions are fairly rare so it's probably OK for now
            foreach (double diagnosticIon in AllModsOneIsNterminus.Where(p => p.Value.DiagnosticIons != null &&
                                                                         p.Value.DiagnosticIons.ContainsKey(dissociationType)).SelectMany(p => p.Value.DiagnosticIons[dissociationType]).Distinct())
            {
                int diagnosticIonLabel = (int)Math.Round(diagnosticIon.ToMz(1), 0);

                // the diagnostic ion is assumed to be annotated in the mod info as the *neutral mass* of the diagnostic ion, not the ionized species
                products.Add(new Product(ProductType.D, FragmentationTerminus.Both, diagnosticIon, diagnosticIonLabel, 0, 0));
            }
        }
Beispiel #10
0
 public void GetResidueNotInDictionary()
 {
     Assert.IsFalse(Residue.TryGetResidue("?", out Residue r));
     Assert.IsFalse(Residue.TryGetResidue('?', out r));
 }
Beispiel #11
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList)
        {
            if (SearchParameters.DoQuantification)
            {
                // disable quantification if a .mgf is being used
                if (currentRawFileList.Any(x => Path.GetExtension(x).Equals(".mgf", StringComparison.OrdinalIgnoreCase)))
                {
                    SearchParameters.DoQuantification = false;
                }
                //if we're doing SILAC, add the silac labels to the residue dictionary
                else if (SearchParameters.SilacLabels != null)
                {
                    //change the silac residues to lower case amino acids (currently null)
                    List <SilacLabel> updatedLabels = new List <SilacLabel>();
                    char heavyLabel = 'a';
                    for (int i = 0; i < SearchParameters.SilacLabels.Count; i++)
                    {
                        SilacLabel currentLabel = SearchParameters.SilacLabels[i];
                        //make sure we're not overwriting something. , , and if it's a valid residue (not a motif/delimiter)
                        while ((Residue.TryGetResidue(heavyLabel, out Residue residue) && //Check if the amino acid exists. If it already exists, we don't want to overwrite it
                                !residue.ThisChemicalFormula.Formula.Equals(currentLabel.LabelChemicalFormula)) || //if it exists but it's already the label (so we're not overwriting anything), then we're fine
                               GlobalVariables.InvalidAminoAcids.Contains(heavyLabel)) //If it didn't already exist, but it's invalid, we need to keep going
                        {
                            heavyLabel++;
                        }
                        SilacLabel updatedLabel = SilacConversions.AssignValidHeavyCharacter(currentLabel, heavyLabel);
                        heavyLabel++;
                        if (currentLabel.AdditionalLabels != null)
                        {
                            foreach (SilacLabel additionalLabel in currentLabel.AdditionalLabels)
                            {
                                updatedLabel.AddAdditionalSilacLabel(SilacConversions.AssignValidHeavyCharacter(additionalLabel, heavyLabel));
                                heavyLabel++;
                            }
                        }
                        updatedLabels.Add(updatedLabel);
                    }
                    SearchParameters.SilacLabels = updatedLabels;
                }
            }
            //if no quant, remove any silac labels that may have been added, because they screw up downstream analysis
            if (!SearchParameters.DoQuantification) //using "if" instead of "else", because DoQuantification can change if it's an mgf
            {
                SearchParameters.SilacLabels = null;
            }

            LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes);

            // load proteins
            List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, SearchParameters.SearchTarget, SearchParameters.DecoyType, localizeableModificationTypes, CommonParameters);

            // write prose settings
            ProseCreatedWhileRunning.Append("The following search settings were used: ");
            ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ?
                                            "maximum peptide length = unspecified; " :
                                            "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif)) + "; ");
            ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; ");
            ProseCreatedWhileRunning.Append("max mods per peptide = " + CommonParameters.DigestionParams.MaxModsForPeptide + "; ");
            ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");
            ProseCreatedWhileRunning.Append("precursor mass tolerance = " + CommonParameters.PrecursorMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("report PSM ambiguity = " + CommonParameters.ReportAllAmbiguity + ". ");
            ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Count(p => p.IsContaminant) + " contaminant sequences. ");

            // start the search task
            MyTaskResults = new MyTaskResults(this);
            List <PeptideSpectralMatch> allPsms = new List <PeptideSpectralMatch>();

            //generate an array to store category specific fdr values (for speedy semi/nonspecific searches)
            int numFdrCategories = (int)(Enum.GetValues(typeof(FdrCategory)).Cast <FdrCategory>().Last() + 1); //+1 because it starts at zero

            List <PeptideSpectralMatch>[] allCategorySpecificPsms = new List <PeptideSpectralMatch> [numFdrCategories];
            for (int i = 0; i < numFdrCategories; i++)
            {
                allCategorySpecificPsms[i] = new List <PeptideSpectralMatch>();
            }

            FlashLfqResults flashLfqResults = null;

            MyFileManager myFileManager = new MyFileManager(SearchParameters.DisposeOfFileWhenDone);

            var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b));

            int    completedFiles = 0;
            object indexLock      = new object();
            object psmLock        = new object();

            Status("Searching files...", taskId);
            Status("Searching files...", new List <string> {
                taskId, "Individual Spectra Files"
            });

            Dictionary <string, int[]> numMs2SpectraPerFile = new Dictionary <string, int[]>();

            for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
            {
                if (GlobalVariables.StopLoops)
                {
                    break;
                }

                var origDataFile = currentRawFileList[spectraFileIndex];

                // mark the file as in-progress
                StartingDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });

                CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);

                MassDiffAcceptor massDiffAcceptor = GetMassDiffAcceptor(combinedParams.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac);

                var thisId = new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                };
                NewCollection(Path.GetFileName(origDataFile), thisId);
                Status("Loading spectra file...", thisId);
                MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams);
                Status("Getting ms2 scans...", thisId);
                Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray();
                numMs2SpectraPerFile.Add(Path.GetFileNameWithoutExtension(origDataFile), new int[] { myMsDataFile.GetAllScansList().Count(p => p.MsnOrder == 2), arrayOfMs2ScansSortedByMass.Length });
                myFileManager.DoneWithFile(origDataFile);

                PeptideSpectralMatch[] fileSpecificPsms = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length];

                // modern search
                if (SearchParameters.SearchType == SearchType.Modern)
                {
                    for (int currentPartition = 0; currentPartition < combinedParams.TotalPartitions; currentPartition++)
                    {
                        List <PeptideWithSetModifications> peptideIndex = null;
                        List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count / combinedParams.TotalPartitions));

                        Status("Getting fragment dictionary...", new List <string> {
                            taskId
                        });
                        var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, SearchParameters.SilacLabels, currentPartition, SearchParameters.DecoyType, combinedParams, SearchParameters.MaxFragmentSize, false, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), new List <string> {
                            taskId
                        });
                        List <int>[] fragmentIndex  = null;
                        List <int>[] precursorIndex = null;

                        lock (indexLock)
                        {
                            GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId);
                        }

                        Status("Searching files...", taskId);

                        new ModernSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, currentPartition, combinedParams, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run();

                        ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + combinedParams.TotalPartitions + "!", thisId));
                        if (GlobalVariables.StopLoops)
                        {
                            break;
                        }
                    }
                }
                // nonspecific search
                else if (SearchParameters.SearchType == SearchType.NonSpecific)
                {
                    PeptideSpectralMatch[][] fileSpecificPsmsSeparatedByFdrCategory = new PeptideSpectralMatch[numFdrCategories][]; //generate an array of all possible locals
                    for (int i = 0; i < numFdrCategories; i++)                                                                      //only add if we're using for FDR, else ignore it as null.
                    {
                        fileSpecificPsmsSeparatedByFdrCategory[i] = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length];
                    }

                    List <CommonParameters> paramsToUse = new List <CommonParameters> {
                        combinedParams
                    };
                    if (combinedParams.DigestionParams.SearchModeType == CleavageSpecificity.Semi) //if semi, we need to do both N and C to hit everything
                    {
                        paramsToUse.Clear();
                        List <FragmentationTerminus> terminiToUse = new List <FragmentationTerminus> {
                            FragmentationTerminus.N, FragmentationTerminus.C
                        };
                        foreach (FragmentationTerminus terminus in terminiToUse) //set both termini
                        {
                            paramsToUse.Add(combinedParams.CloneWithNewTerminus(terminus));
                        }
                    }
                    foreach (CommonParameters paramToUse in paramsToUse)
                    {
                        for (int currentPartition = 0; currentPartition < paramToUse.TotalPartitions; currentPartition++)
                        {
                            List <PeptideWithSetModifications> peptideIndex = null;

                            List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count / paramToUse.TotalPartitions,
                                                                                    ((currentPartition + 1) * proteinList.Count / paramToUse.TotalPartitions) - (currentPartition * proteinList.Count / paramToUse.TotalPartitions));

                            List <int>[] fragmentIndex  = null;
                            List <int>[] precursorIndex = null;

                            Status("Getting fragment dictionary...", new List <string> {
                                taskId
                            });
                            var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, SearchParameters.SilacLabels, currentPartition,
                                                                 SearchParameters.DecoyType, paramToUse, SearchParameters.MaxFragmentSize, true, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), new List <string> {
                                taskId
                            });
                            lock (indexLock)
                            {
                                GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId);
                            }

                            Status("Searching files...", taskId);

                            new NonSpecificEnzymeSearchEngine(fileSpecificPsmsSeparatedByFdrCategory, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, precursorIndex, currentPartition, paramToUse, variableModifications, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run();

                            ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + paramToUse.TotalPartitions + "!", thisId));
                            if (GlobalVariables.StopLoops)
                            {
                                break;
                            }
                        }
                    }
                    lock (psmLock)
                    {
                        for (int i = 0; i < allCategorySpecificPsms.Length; i++)
                        {
                            if (allCategorySpecificPsms[i] != null)
                            {
                                allCategorySpecificPsms[i].AddRange(fileSpecificPsmsSeparatedByFdrCategory[i]);
                            }
                        }
                    }
                }
                // classic search
                else
                {
                    Status("Starting search...", thisId);
                    new ClassicSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, SearchParameters.SilacLabels, proteinList, massDiffAcceptor, combinedParams, thisId).Run();

                    ReportProgress(new ProgressEventArgs(100, "Done with search!", thisId));
                }

                lock (psmLock)
                {
                    allPsms.AddRange(fileSpecificPsms);
                }

                completedFiles++;
                FinishedDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> {
                    taskId, "Individual Spectra Files"
                }));
            }

            ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            int numNotches = GetNumNotches(SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac);

            //resolve category specific fdrs (for speedy semi and nonspecific
            if (SearchParameters.SearchType == SearchType.NonSpecific)
            {
                allPsms = NonSpecificEnzymeSearchEngine.ResolveFdrCategorySpecificPsms(allCategorySpecificPsms, numNotches, taskId, CommonParameters);
            }

            PostSearchAnalysisParameters parameters = new PostSearchAnalysisParameters
            {
                SearchTaskResults             = MyTaskResults,
                SearchTaskId                  = taskId,
                SearchParameters              = SearchParameters,
                ProteinList                   = proteinList,
                AllPsms                       = allPsms,
                VariableModifications         = variableModifications,
                FixedModifications            = fixedModifications,
                ListOfDigestionParams         = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams)),
                CurrentRawFileList            = currentRawFileList,
                MyFileManager                 = myFileManager,
                NumNotches                    = numNotches,
                OutputFolder                  = OutputFolder,
                IndividualResultsOutputFolder = Path.Combine(OutputFolder, "Individual File Results"),
                FlashLfqResults               = flashLfqResults,
                FileSettingsList              = fileSettingsList,
                NumMs2SpectraPerFile          = numMs2SpectraPerFile,
                DatabaseFilenameList          = dbFilenameList
            };
            PostSearchAnalysisTask postProcessing = new PostSearchAnalysisTask
            {
                Parameters       = parameters,
                CommonParameters = CommonParameters
            };

            return(postProcessing.Run());
        }