/// <summary> /// Create a sequence using the supplied character sequence, modifications, and amino acid set /// </summary> /// <param name="sequence"></param> /// <param name="modStr">E.g. Acetyl 0,Oxidation 1,Oxidation 20,Oxidation 27</param> /// <param name="aminoAcidSet"></param> /// <returns></returns> public static Sequence CreateSequence(string sequence, string modStr, AminoAcidSet aminoAcidSet) { if (string.IsNullOrEmpty(modStr)) { return(new Sequence(sequence, aminoAcidSet)); } var indexModMap = new Dictionary <int, Modification>(); foreach (var modIns in modStr.Split(',')) { var token = modIns.Split(' '); if (token.Length != 2) { return(null); // invalid modStr } var mod = Modification.Get(token[0]); if (mod == null) { return(null); } var index = Convert.ToInt32(token[1]) - 1; indexModMap.Add(index, mod); } var aaList = new List <AminoAcid>(); for (var i = 0; i < sequence.Length; i++) { var residue = sequence[i]; var aa = aminoAcidSet.GetAminoAcid(residue); if (i == 0 && indexModMap.ContainsKey(-1)) // N-term modification { var nTermMod = indexModMap[-1]; aa = new ModifiedAminoAcid(aa, nTermMod); } if (indexModMap.TryGetValue(i, out var mod)) { var modifiedAa = new ModifiedAminoAcid(aa, mod); aaList.Add(modifiedAa); } else { aaList.Add(aa); } } return(new Sequence(aaList)); }
/// <summary> /// Parse the provided modification line /// </summary> /// <param name="line"></param> /// <returns></returns> public static List <SearchModification> ParseModification(string line) { var token = line.Split(','); if (token.Length != 5) { return(null); } // Composition var compStr = token[0].Trim(); var composition = Composition.Composition.ParseFromPlainString(compStr) ?? Composition.Composition.Parse(compStr); if (composition == null) { throw new Exception(string.Format("Illegal Composition: \"{0}\" in \"{1}\"", compStr, line)); } // Residues var residueStr = token[1].Trim(); var isResidueStrLegitimate = residueStr.Equals("*") || residueStr.Any() && residueStr.All(AminoAcid.IsStandardAminoAcidResidue); if (!isResidueStrLegitimate) { throw new Exception(string.Format("Illegal residues: \"{0}\" in \"{1}\"", residueStr, line)); } // isFixedModification bool isFixedModification; if (token[2].Trim().Equals("fix", StringComparison.InvariantCultureIgnoreCase)) { isFixedModification = true; } else if (token[2].Trim().Equals("opt", StringComparison.InvariantCultureIgnoreCase)) { isFixedModification = false; } else { throw new Exception(string.Format("Illegal modification type (fix or opt): \"{0}\" in \"{1}\"", token[2].Trim(), line)); } // Location SequenceLocation location; var locStr = token[3].Trim().Split()[0]; if (locStr.Equals("any", StringComparison.InvariantCultureIgnoreCase)) { location = SequenceLocation.Everywhere; } else if (locStr.Equals("N-Term", StringComparison.InvariantCultureIgnoreCase) || locStr.Equals("NTerm", StringComparison.InvariantCultureIgnoreCase)) { location = SequenceLocation.PeptideNTerm; } else if (locStr.Equals("C-Term", StringComparison.InvariantCultureIgnoreCase) || locStr.Equals("CTerm", StringComparison.InvariantCultureIgnoreCase)) { location = SequenceLocation.PeptideCTerm; } else if (locStr.Equals("Prot-N-Term", StringComparison.InvariantCultureIgnoreCase) || locStr.Equals("ProtNTerm", StringComparison.InvariantCultureIgnoreCase)) { location = SequenceLocation.ProteinNTerm; } else if (locStr.Equals("Prot-C-Term", StringComparison.InvariantCultureIgnoreCase) || locStr.Equals("ProtCTerm", StringComparison.InvariantCultureIgnoreCase)) { location = SequenceLocation.ProteinCTerm; } else { throw new Exception(string.Format("Illegal modification location (any|(Prot-?)?(N|C)-?Term): \"{0}\" in \"{1}\"", token[3].Trim(), line)); } // Check if it's valid if (residueStr.Equals("*") && location == SequenceLocation.Everywhere) { throw new Exception(string.Format("Invalid modification: * should not be applied to \"any\": \"{0}\"", line)); } var name = token[4].Split()[0].Trim(); var mod = Modification.Get(name) ?? Modification.RegisterAndGetModification(name, composition); return(residueStr.Select(residue => new SearchModification(mod, residue, location, isFixedModification)).ToList()); }
private static IEnumerable <SearchModification> Parse(string modFilePath, out int maxNumDynModsPerPeptide) { var searchModList = new List <SearchModification>(); var numMods = 0; var lineNum = 0; foreach (var line in File.ReadLines(modFilePath)) { lineNum++; var tokenArr = line.Split('#'); if (tokenArr.Length == 0) { continue; } var s = tokenArr[0].Trim(); if (s.Length == 0) { continue; } if (s.StartsWith("NumMods=")) { try { numMods = Convert.ToInt32(s.Split('=')[1].Trim()); } catch (FormatException) { Console.WriteLine("{0}: Illegal NumMods parameter at line {1} - {2}", modFilePath, lineNum, s); maxNumDynModsPerPeptide = -1; return(null); } } else { var token = s.Split(','); if (token.Length != 5) { continue; } // Composition var compStr = token[0].Trim(); var composition = Composition.Composition.ParseFromPlainString(compStr) ?? Composition.Composition.Parse(compStr); if (composition == null) { Console.WriteLine("{0}: Illegal Composition at line {1} - {2}", modFilePath, lineNum, s); maxNumDynModsPerPeptide = -1; return(null); } // Residues var residueStr = token[1].Trim(); var isResidueStrLegitimate = residueStr.Equals("*") || residueStr.Any() && residueStr.All(AminoAcid.IsStandardAminoAcidResidue); if (!isResidueStrLegitimate) { Console.WriteLine("{0}: Illegal residues at line {1} - {2}", modFilePath, lineNum, s); maxNumDynModsPerPeptide = -1; return(null); } // isFixedModification bool isFixedModification; if (token[2].Trim().Equals("fix", StringComparison.InvariantCultureIgnoreCase)) { isFixedModification = true; } else if (token[2].Trim().Equals("opt", StringComparison.InvariantCultureIgnoreCase)) { isFixedModification = false; } else { Console.WriteLine("{0}: Illegal modification type (fix or opt) at line {1} - {2}", modFilePath, lineNum, s); maxNumDynModsPerPeptide = -1; return(null); } // Location SequenceLocation location; var locStr = token[3].Trim().Split()[0]; if (locStr.Equals("any", StringComparison.InvariantCultureIgnoreCase)) { location = SequenceLocation.Everywhere; } else if (locStr.Equals("N-Term", StringComparison.InvariantCultureIgnoreCase) || locStr.Equals("NTerm", StringComparison.InvariantCultureIgnoreCase)) { location = SequenceLocation.PeptideNTerm; } else if (locStr.Equals("C-Term", StringComparison.InvariantCultureIgnoreCase) || locStr.Equals("CTerm", StringComparison.InvariantCultureIgnoreCase)) { location = SequenceLocation.PeptideCTerm; } else if (locStr.Equals("Prot-N-Term", StringComparison.InvariantCultureIgnoreCase) || locStr.Equals("ProtNTerm", StringComparison.InvariantCultureIgnoreCase)) { location = SequenceLocation.ProteinNTerm; } else if (locStr.Equals("Prot-C-Term", StringComparison.InvariantCultureIgnoreCase) || locStr.Equals("ProtCTerm", StringComparison.InvariantCultureIgnoreCase)) { location = SequenceLocation.ProteinCTerm; } else { Console.WriteLine("{0}: Illegal modification location (fix or opt) at line {1} - {2}", modFilePath, lineNum, s); maxNumDynModsPerPeptide = -1; return(null); } // Check if it's valid if (residueStr.Equals("*") && location == SequenceLocation.Everywhere) { Console.WriteLine("{0}: Invalid modification: * should not be applied to \"any\"", modFilePath); maxNumDynModsPerPeptide = -1; return(null); } var name = token[4].Split()[0].Trim(); var mod = Modification.Get(name) ?? Modification.RegisterAndGetModification(name, composition); searchModList.AddRange(residueStr.Select( residue => new SearchModification(mod, residue, location, isFixedModification) )); } } maxNumDynModsPerPeptide = numMods; return(searchModList); }