public void CalculateSequenceCoverage() { var proteinsWithUnambigSeqPsms = new Dictionary <Protein, List <PeptideWithSetModifications> >(); var proteinsWithPsmsWithLocalizedMods = new Dictionary <Protein, List <PeptideWithSetModifications> >(); foreach (var protein in Proteins) { proteinsWithUnambigSeqPsms.Add(protein, new List <PeptideWithSetModifications>()); proteinsWithPsmsWithLocalizedMods.Add(protein, new List <PeptideWithSetModifications>()); } foreach (var psm in AllPsmsBelowOnePercentFDR) { // null BaseSequence means that the amino acid sequence is ambiguous; do not use these to calculate sequence coverage if (psm.BaseSequence != null) { var PepsWithSetMods = psm.CompactPeptides.SelectMany(b => b.Value.Item2); foreach (var pepWithSetMods in PepsWithSetMods) { // might be unambiguous but also shared; make sure this protein group contains this peptide+protein combo if (Proteins.Contains(pepWithSetMods.Protein)) { proteinsWithUnambigSeqPsms[pepWithSetMods.Protein].Add(pepWithSetMods); // null FullSequence means that mods were not successfully localized; do not display them on the sequence coverage mods info if (psm.FullSequence != null) { proteinsWithPsmsWithLocalizedMods[pepWithSetMods.Protein].Add(pepWithSetMods); } } } } } foreach (var protein in ListOfProteinsOrderedByAccession) { bool errorResult = false; var sequenceCoverageDisplay = protein.BaseSequence.ToLower(CultureInfo.InvariantCulture); HashSet <int> coveredOneBasedResidues = new HashSet <int>(); // get residue numbers of each peptide in the protein and identify them as observed if the sequence is unambiguous foreach (var peptide in proteinsWithUnambigSeqPsms[protein]) { string sequenceExtractedFromProtein = ""; for (int i = peptide.OneBasedStartResidueInProtein; i <= peptide.OneBasedEndResidueInProtein; i++) { // check for bugs in sequence coverage; make sure we have the right amino acids! sequenceExtractedFromProtein += sequenceCoverageDisplay[i - 1]; coveredOneBasedResidues.Add(i); } if (!sequenceExtractedFromProtein.ToUpper().Equals(peptide.BaseSequence)) { errorResult = true; } } // calculate sequence coverage percent double seqCoveragePercent = (double)coveredOneBasedResidues.Count / protein.Length; if (seqCoveragePercent > 1) { errorResult = true; } // add the percent coverage or NaN if there was an error if (!errorResult) { SequenceCoveragePercent.Add(seqCoveragePercent); } else { SequenceCoveragePercent.Add(double.NaN); } // convert the observed amino acids to upper case if they are unambiguously observed var coverageArray = sequenceCoverageDisplay.ToCharArray(); foreach (var obsResidueLocation in coveredOneBasedResidues) { coverageArray[obsResidueLocation - 1] = char.ToUpper(coverageArray[obsResidueLocation - 1]); } sequenceCoverageDisplay = new string(coverageArray); // check to see if there was an errored result; if not, add the coverage display if (!errorResult) { SequenceCoverageDisplayList.Add(sequenceCoverageDisplay); } else { SequenceCoverageDisplayList.Add("Error calculating sequence coverage"); } // put mods in the sequence coverage display if (!errorResult) { // get mods to display in sequence (only unambiguously identified mods) var modsOnThisProtein = new HashSet <KeyValuePair <int, ModificationWithMass> >(); foreach (var pep in proteinsWithPsmsWithLocalizedMods[protein]) { foreach (var mod in pep.AllModsOneIsNterminus) { if (!mod.Value.modificationType.Contains("PeptideTermMod") && !mod.Value.modificationType.Contains("Common Variable") && !mod.Value.modificationType.Contains("Common Fixed")) { modsOnThisProtein.Add(new KeyValuePair <int, ModificationWithMass>(pep.OneBasedStartResidueInProtein + mod.Key - 2, mod.Value)); } } } var temp1 = modsOnThisProtein.OrderBy(p => p.Key).ToList(); foreach (var mod in temp1) { if (mod.Value.terminusLocalization.Equals(TerminusLocalization.NProt)) { sequenceCoverageDisplay = sequenceCoverageDisplay.Insert(0, "[" + mod.Value.id + "]-"); } else if (mod.Value.terminusLocalization.Equals(TerminusLocalization.Any)) { int modStringIndex = sequenceCoverageDisplay.Length - (protein.Length - mod.Key); sequenceCoverageDisplay = sequenceCoverageDisplay.Insert(modStringIndex, "[" + mod.Value.id + "]"); } else if (mod.Value.terminusLocalization.Equals(TerminusLocalization.ProtC)) { sequenceCoverageDisplay = sequenceCoverageDisplay.Insert(sequenceCoverageDisplay.Length, "-[" + mod.Value.id + "]"); } } SequenceCoverageDisplayListWithMods.Add(sequenceCoverageDisplay); if (modsOnThisProtein.Any()) { // calculate spectral count percentage of modified observation string tempModStrings = ""; //The whole string List <int> tempPepModTotals = new List <int>(); //The List of (For one mod, The Modified Pep Num) List <int> tempPepTotals = new List <int>(); //The List of (For one mod, The total Pep Num) List <string> tempPepModValues = new List <string>(); //The List of (For one mod, the Modified Name) List <int> tempModIndex = new List <int>(); //The Index of the modified position. foreach (var pep in proteinsWithPsmsWithLocalizedMods[protein]) { foreach (var mod in pep.AllModsOneIsNterminus) { int tempPepNumTotal = 0; //For one mod, The total Pep Num if (!mod.Value.modificationType.Contains("Common Variable") && !mod.Value.modificationType.Contains("Common Fixed") && !mod.Value.terminusLocalization.Equals(TerminusLocalization.PepC) && !mod.Value.terminusLocalization.Equals(TerminusLocalization.NPep)) { int tempIndexInProtein; if (mod.Value.terminusLocalization.Equals(TerminusLocalization.NProt)) { tempIndexInProtein = 1; } else if (mod.Value.terminusLocalization.Equals(TerminusLocalization.Any)) { tempIndexInProtein = pep.OneBasedStartResidueInProtein + mod.Key - 2; } else if (mod.Value.terminusLocalization.Equals(TerminusLocalization.ProtC)) { tempIndexInProtein = protein.Length; } else { // In case it's a peptide mod, skip! continue; } if (tempModIndex.Contains(tempIndexInProtein) && tempPepModValues[tempModIndex.IndexOf(tempIndexInProtein)] == mod.Value.id) { tempPepModTotals[tempModIndex.IndexOf(tempIndexInProtein)] += 1; } else { tempModIndex.Add(tempIndexInProtein); foreach (var pept in proteinsWithPsmsWithLocalizedMods[protein]) { if (tempIndexInProtein >= pept.OneBasedStartResidueInProtein - (tempIndexInProtein == 1 ? 1 : 0) && tempIndexInProtein <= pept.OneBasedEndResidueInProtein) { tempPepNumTotal += 1; } } tempPepTotals.Add(tempPepNumTotal); tempPepModValues.Add(mod.Value.id); tempPepModTotals.Add(1); } } } } for (int i = 0; i < tempPepModTotals.Count; i++) { string tempString = ("#aa" + tempModIndex[i].ToString() + "[" + tempPepModValues[i].ToString() + ",info:occupancy=" + ((double)tempPepModTotals[i] / (double)tempPepTotals[i]).ToString("F2") + "(" + tempPepModTotals[i].ToString() + "/" + tempPepTotals[i].ToString() + ")" + "];"); tempModStrings += tempString; } if (!string.IsNullOrEmpty(tempModStrings)) { ModsInfo.Add(tempModStrings); } } } } }
public void CalculateSequenceCoverage() { var proteinsWithUnambigSeqPsms = new Dictionary <Protein, List <PeptideWithSetModifications> >(); var proteinsWithPsmsWithLocalizedMods = new Dictionary <Protein, List <PeptideWithSetModifications> >(); foreach (var protein in Proteins) { proteinsWithUnambigSeqPsms.Add(protein, new List <PeptideWithSetModifications>()); proteinsWithPsmsWithLocalizedMods.Add(protein, new List <PeptideWithSetModifications>()); } foreach (var psm in AllPsmsBelowOnePercentFDR) { // null BaseSequence means that the amino acid sequence is ambiguous; do not use these to calculate sequence coverage if (psm.BaseSequence != null) { var peptides = psm.BestMatchingPeptides.Select(p => p.Peptide); foreach (var peptide in peptides) { // might be unambiguous but also shared; make sure this protein group contains this peptide+protein combo if (Proteins.Contains(peptide.Protein)) { proteinsWithUnambigSeqPsms[peptide.Protein].Add(peptide); // null FullSequence means that mods were not successfully localized; do not display them on the sequence coverage mods info if (psm.FullSequence != null) { proteinsWithPsmsWithLocalizedMods[peptide.Protein].Add(peptide); } } } } } foreach (var protein in ListOfProteinsOrderedByAccession) { HashSet <int> coveredOneBasedResidues = new HashSet <int>(); // get residue numbers of each peptide in the protein and identify them as observed if the sequence is unambiguous foreach (var peptide in proteinsWithUnambigSeqPsms[protein]) { for (int i = peptide.OneBasedStartResidueInProtein; i <= peptide.OneBasedEndResidueInProtein; i++) { coveredOneBasedResidues.Add(i); } } // calculate sequence coverage percent double seqCoverageFract = (double)coveredOneBasedResidues.Count / protein.Length; // add the percent coverage SequenceCoverageFraction.Add(seqCoverageFract); // convert the observed amino acids to upper case if they are unambiguously observed string sequenceCoverageDisplay = protein.BaseSequence.ToLower(); var coverageArray = sequenceCoverageDisplay.ToCharArray(); foreach (var obsResidueLocation in coveredOneBasedResidues) { coverageArray[obsResidueLocation - 1] = char.ToUpper(coverageArray[obsResidueLocation - 1]); } sequenceCoverageDisplay = new string(coverageArray); // add the coverage display SequenceCoverageDisplayList.Add(sequenceCoverageDisplay); // put mods in the sequence coverage display // get mods to display in sequence (only unambiguously identified mods) var modsOnThisProtein = new HashSet <KeyValuePair <int, Modification> >(); foreach (var pep in proteinsWithPsmsWithLocalizedMods[protein]) { foreach (var mod in pep.AllModsOneIsNterminus) { if (!mod.Value.ModificationType.Contains("PeptideTermMod") && !mod.Value.ModificationType.Contains("Common Variable") && !mod.Value.ModificationType.Contains("Common Fixed")) { modsOnThisProtein.Add(new KeyValuePair <int, Modification>(pep.OneBasedStartResidueInProtein + mod.Key - 2, mod.Value)); } } } var tempMods = modsOnThisProtein.OrderBy(p => p.Key).ToList(); foreach (var mod in tempMods) { if (mod.Value.LocationRestriction.Equals("N-terminal.")) { sequenceCoverageDisplay = sequenceCoverageDisplay.Insert( 0, $"[{mod.Value.IdWithMotif}]-"); } else if (mod.Value.LocationRestriction.Equals("Anywhere.")) { int modStringIndex = sequenceCoverageDisplay.Length - (protein.Length - mod.Key); sequenceCoverageDisplay = sequenceCoverageDisplay.Insert( modStringIndex, $"[{mod.Value.IdWithMotif}]"); } else if (mod.Value.LocationRestriction.Equals("C-terminal.")) { sequenceCoverageDisplay = sequenceCoverageDisplay.Insert( sequenceCoverageDisplay.Length, $"-[{mod.Value.IdWithMotif}]"); } } SequenceCoverageDisplayListWithMods.Add(sequenceCoverageDisplay); if (!modsOnThisProtein.Any()) { continue; } // calculate spectral count % of modified observations var pepModTotals = new List <int>(); // count of modified peptides for each mod/index var pepTotals = new List <int>(); // count of all peptides for each mod/index var modIndex = new List <(int index, string modName)>(); // index and name of the modified position foreach (var pep in proteinsWithPsmsWithLocalizedMods[protein]) { foreach (var mod in pep.AllModsOneIsNterminus) { int pepNumTotal = 0; //For one mod, The total Pep Num if (mod.Value.ModificationType.Contains("Common Variable") || mod.Value.ModificationType.Contains("Common Fixed") || mod.Value.LocationRestriction.Equals(ModLocationOnPeptideOrProtein.PepC) || mod.Value.LocationRestriction.Equals(ModLocationOnPeptideOrProtein.NPep)) { continue; } int indexInProtein; if (mod.Value.LocationRestriction.Equals("N-terminal.")) { indexInProtein = 1; } else if (mod.Value.LocationRestriction.Equals("Anywhere.")) { indexInProtein = pep.OneBasedStartResidueInProtein + mod.Key - 2; } else if (mod.Value.LocationRestriction.Equals("C-terminal.")) { indexInProtein = protein.Length; } else { // In case it's a peptide terminal mod, skip! // we don't want this annotated in the protein's modifications continue; } var modKey = (indexInProtein, mod.Value.IdWithMotif); if (modIndex.Contains(modKey)) { pepModTotals[modIndex.IndexOf(modKey)] += 1; } else { modIndex.Add(modKey); foreach (var pept in proteinsWithPsmsWithLocalizedMods[protein]) { if (indexInProtein >= pept.OneBasedStartResidueInProtein - (indexInProtein == 1 ? 1 : 0) && indexInProtein <= pept.OneBasedEndResidueInProtein) { pepNumTotal += 1; } } pepTotals.Add(pepNumTotal); pepModTotals.Add(1); } } } var modStrings = new List <(int aaNum, string part)>(); for (int i = 0; i < pepModTotals.Count; i++) { string aa = modIndex[i].index.ToString(); string modName = modIndex[i].modName.ToString(); string occupancy = ((double)pepModTotals[i] / (double)pepTotals[i]).ToString("F2"); string fractOccupancy = $"{pepModTotals[i].ToString()}/{pepTotals[i].ToString()}"; string tempString = ($"#aa{aa}[{modName},info:occupancy={occupancy}({fractOccupancy})]"); modStrings.Add((modIndex[i].index, tempString)); } var modInfoString = string.Join(";", modStrings.OrderBy(x => x.aaNum).Select(x => x.part)); if (!string.IsNullOrEmpty(modInfoString)) { ModsInfo.Add(modInfoString); } } }