Exemplo n.º 1
0
        public void CalculateSequenceCoverage()
        {
            var proteinsWithUnambigSeqPsms        = new Dictionary <Protein, List <PeptideWithSetModifications> >();
            var proteinsWithPsmsWithLocalizedMods = new Dictionary <Protein, List <PeptideWithSetModifications> >();

            foreach (var protein in Proteins)
            {
                proteinsWithUnambigSeqPsms.Add(protein, new List <PeptideWithSetModifications>());
                proteinsWithPsmsWithLocalizedMods.Add(protein, new List <PeptideWithSetModifications>());
            }

            foreach (var psm in AllPsmsBelowOnePercentFDR)
            {
                // null BaseSequence means that the amino acid sequence is ambiguous; do not use these to calculate sequence coverage
                if (psm.BaseSequence != null)
                {
                    var PepsWithSetMods = psm.CompactPeptides.SelectMany(b => b.Value.Item2);
                    foreach (var pepWithSetMods in PepsWithSetMods)
                    {
                        // might be unambiguous but also shared; make sure this protein group contains this peptide+protein combo
                        if (Proteins.Contains(pepWithSetMods.Protein))
                        {
                            proteinsWithUnambigSeqPsms[pepWithSetMods.Protein].Add(pepWithSetMods);

                            // null FullSequence means that mods were not successfully localized; do not display them on the sequence coverage mods info
                            if (psm.FullSequence != null)
                            {
                                proteinsWithPsmsWithLocalizedMods[pepWithSetMods.Protein].Add(pepWithSetMods);
                            }
                        }
                    }
                }
            }

            foreach (var protein in ListOfProteinsOrderedByAccession)
            {
                bool          errorResult             = false;
                var           sequenceCoverageDisplay = protein.BaseSequence.ToLower(CultureInfo.InvariantCulture);
                HashSet <int> coveredOneBasedResidues = new HashSet <int>();

                // get residue numbers of each peptide in the protein and identify them as observed if the sequence is unambiguous
                foreach (var peptide in proteinsWithUnambigSeqPsms[protein])
                {
                    string sequenceExtractedFromProtein = "";
                    for (int i = peptide.OneBasedStartResidueInProtein; i <= peptide.OneBasedEndResidueInProtein; i++)
                    {
                        // check for bugs in sequence coverage; make sure we have the right amino acids!
                        sequenceExtractedFromProtein += sequenceCoverageDisplay[i - 1];
                        coveredOneBasedResidues.Add(i);
                    }

                    if (!sequenceExtractedFromProtein.ToUpper().Equals(peptide.BaseSequence))
                    {
                        errorResult = true;
                    }
                }

                // calculate sequence coverage percent
                double seqCoveragePercent = (double)coveredOneBasedResidues.Count / protein.Length;
                if (seqCoveragePercent > 1)
                {
                    errorResult = true;
                }

                // add the percent coverage or NaN if there was an error
                if (!errorResult)
                {
                    SequenceCoveragePercent.Add(seqCoveragePercent);
                }
                else
                {
                    SequenceCoveragePercent.Add(double.NaN);
                }

                // convert the observed amino acids to upper case if they are unambiguously observed
                var coverageArray = sequenceCoverageDisplay.ToCharArray();
                foreach (var obsResidueLocation in coveredOneBasedResidues)
                {
                    coverageArray[obsResidueLocation - 1] = char.ToUpper(coverageArray[obsResidueLocation - 1]);
                }
                sequenceCoverageDisplay = new string(coverageArray);

                // check to see if there was an errored result; if not, add the coverage display
                if (!errorResult)
                {
                    SequenceCoverageDisplayList.Add(sequenceCoverageDisplay);
                }
                else
                {
                    SequenceCoverageDisplayList.Add("Error calculating sequence coverage");
                }

                // put mods in the sequence coverage display
                if (!errorResult)
                {
                    // get mods to display in sequence (only unambiguously identified mods)
                    var modsOnThisProtein = new HashSet <KeyValuePair <int, ModificationWithMass> >();
                    foreach (var pep in proteinsWithPsmsWithLocalizedMods[protein])
                    {
                        foreach (var mod in pep.AllModsOneIsNterminus)
                        {
                            if (!mod.Value.modificationType.Contains("PeptideTermMod") && !mod.Value.modificationType.Contains("Common Variable") && !mod.Value.modificationType.Contains("Common Fixed"))
                            {
                                modsOnThisProtein.Add(new KeyValuePair <int, ModificationWithMass>(pep.OneBasedStartResidueInProtein + mod.Key - 2, mod.Value));
                            }
                        }
                    }

                    var temp1 = modsOnThisProtein.OrderBy(p => p.Key).ToList();

                    foreach (var mod in temp1)
                    {
                        if (mod.Value.terminusLocalization.Equals(TerminusLocalization.NProt))
                        {
                            sequenceCoverageDisplay = sequenceCoverageDisplay.Insert(0, "[" + mod.Value.id + "]-");
                        }
                        else if (mod.Value.terminusLocalization.Equals(TerminusLocalization.Any))
                        {
                            int modStringIndex = sequenceCoverageDisplay.Length - (protein.Length - mod.Key);
                            sequenceCoverageDisplay = sequenceCoverageDisplay.Insert(modStringIndex, "[" + mod.Value.id + "]");
                        }
                        else if (mod.Value.terminusLocalization.Equals(TerminusLocalization.ProtC))
                        {
                            sequenceCoverageDisplay = sequenceCoverageDisplay.Insert(sequenceCoverageDisplay.Length, "-[" + mod.Value.id + "]");
                        }
                    }

                    SequenceCoverageDisplayListWithMods.Add(sequenceCoverageDisplay);

                    if (modsOnThisProtein.Any())
                    {
                        // calculate spectral count percentage of modified observation
                        string        tempModStrings   = "";                  //The whole string
                        List <int>    tempPepModTotals = new List <int>();    //The List of (For one mod, The Modified Pep Num)
                        List <int>    tempPepTotals    = new List <int>();    //The List of (For one mod, The total Pep Num)
                        List <string> tempPepModValues = new List <string>(); //The List of (For one mod, the Modified Name)
                        List <int>    tempModIndex     = new List <int>();    //The Index of the modified position.

                        foreach (var pep in proteinsWithPsmsWithLocalizedMods[protein])
                        {
                            foreach (var mod in pep.AllModsOneIsNterminus)
                            {
                                int tempPepNumTotal = 0; //For one mod, The total Pep Num
                                if (!mod.Value.modificationType.Contains("Common Variable") && !mod.Value.modificationType.Contains("Common Fixed") && !mod.Value.terminusLocalization.Equals(TerminusLocalization.PepC) && !mod.Value.terminusLocalization.Equals(TerminusLocalization.NPep))
                                {
                                    int tempIndexInProtein;
                                    if (mod.Value.terminusLocalization.Equals(TerminusLocalization.NProt))
                                    {
                                        tempIndexInProtein = 1;
                                    }
                                    else if (mod.Value.terminusLocalization.Equals(TerminusLocalization.Any))
                                    {
                                        tempIndexInProtein = pep.OneBasedStartResidueInProtein + mod.Key - 2;
                                    }
                                    else if (mod.Value.terminusLocalization.Equals(TerminusLocalization.ProtC))
                                    {
                                        tempIndexInProtein = protein.Length;
                                    }
                                    else
                                    {
                                        // In case it's a peptide mod, skip!
                                        continue;
                                    }

                                    if (tempModIndex.Contains(tempIndexInProtein) && tempPepModValues[tempModIndex.IndexOf(tempIndexInProtein)] == mod.Value.id)
                                    {
                                        tempPepModTotals[tempModIndex.IndexOf(tempIndexInProtein)] += 1;
                                    }
                                    else
                                    {
                                        tempModIndex.Add(tempIndexInProtein);
                                        foreach (var pept in proteinsWithPsmsWithLocalizedMods[protein])
                                        {
                                            if (tempIndexInProtein >= pept.OneBasedStartResidueInProtein - (tempIndexInProtein == 1 ? 1 : 0) && tempIndexInProtein <= pept.OneBasedEndResidueInProtein)
                                            {
                                                tempPepNumTotal += 1;
                                            }
                                        }
                                        tempPepTotals.Add(tempPepNumTotal);
                                        tempPepModValues.Add(mod.Value.id);
                                        tempPepModTotals.Add(1);
                                    }
                                }
                            }
                        }
                        for (int i = 0; i < tempPepModTotals.Count; i++)
                        {
                            string tempString = ("#aa" + tempModIndex[i].ToString() + "[" + tempPepModValues[i].ToString() + ",info:occupancy=" + ((double)tempPepModTotals[i] / (double)tempPepTotals[i]).ToString("F2") + "(" + tempPepModTotals[i].ToString() + "/" + tempPepTotals[i].ToString() + ")" + "];");
                            tempModStrings += tempString;
                        }

                        if (!string.IsNullOrEmpty(tempModStrings))
                        {
                            ModsInfo.Add(tempModStrings);
                        }
                    }
                }
            }
        }
Exemplo n.º 2
0
        public void CalculateSequenceCoverage()
        {
            var proteinsWithUnambigSeqPsms        = new Dictionary <Protein, List <PeptideWithSetModifications> >();
            var proteinsWithPsmsWithLocalizedMods = new Dictionary <Protein, List <PeptideWithSetModifications> >();

            foreach (var protein in Proteins)
            {
                proteinsWithUnambigSeqPsms.Add(protein, new List <PeptideWithSetModifications>());
                proteinsWithPsmsWithLocalizedMods.Add(protein, new List <PeptideWithSetModifications>());
            }

            foreach (var psm in AllPsmsBelowOnePercentFDR)
            {
                // null BaseSequence means that the amino acid sequence is ambiguous; do not use these to calculate sequence coverage
                if (psm.BaseSequence != null)
                {
                    var peptides = psm.BestMatchingPeptides.Select(p => p.Peptide);
                    foreach (var peptide in peptides)
                    {
                        // might be unambiguous but also shared; make sure this protein group contains this peptide+protein combo
                        if (Proteins.Contains(peptide.Protein))
                        {
                            proteinsWithUnambigSeqPsms[peptide.Protein].Add(peptide);

                            // null FullSequence means that mods were not successfully localized; do not display them on the sequence coverage mods info
                            if (psm.FullSequence != null)
                            {
                                proteinsWithPsmsWithLocalizedMods[peptide.Protein].Add(peptide);
                            }
                        }
                    }
                }
            }

            foreach (var protein in ListOfProteinsOrderedByAccession)
            {
                HashSet <int> coveredOneBasedResidues = new HashSet <int>();

                // get residue numbers of each peptide in the protein and identify them as observed if the sequence is unambiguous
                foreach (var peptide in proteinsWithUnambigSeqPsms[protein])
                {
                    for (int i = peptide.OneBasedStartResidueInProtein; i <= peptide.OneBasedEndResidueInProtein; i++)
                    {
                        coveredOneBasedResidues.Add(i);
                    }
                }

                // calculate sequence coverage percent
                double seqCoverageFract = (double)coveredOneBasedResidues.Count / protein.Length;

                // add the percent coverage
                SequenceCoverageFraction.Add(seqCoverageFract);

                // convert the observed amino acids to upper case if they are unambiguously observed
                string sequenceCoverageDisplay = protein.BaseSequence.ToLower();
                var    coverageArray           = sequenceCoverageDisplay.ToCharArray();
                foreach (var obsResidueLocation in coveredOneBasedResidues)
                {
                    coverageArray[obsResidueLocation - 1] = char.ToUpper(coverageArray[obsResidueLocation - 1]);
                }
                sequenceCoverageDisplay = new string(coverageArray);

                // add the coverage display
                SequenceCoverageDisplayList.Add(sequenceCoverageDisplay);

                // put mods in the sequence coverage display
                // get mods to display in sequence (only unambiguously identified mods)
                var modsOnThisProtein = new HashSet <KeyValuePair <int, Modification> >();
                foreach (var pep in proteinsWithPsmsWithLocalizedMods[protein])
                {
                    foreach (var mod in pep.AllModsOneIsNterminus)
                    {
                        if (!mod.Value.ModificationType.Contains("PeptideTermMod") &&
                            !mod.Value.ModificationType.Contains("Common Variable") &&
                            !mod.Value.ModificationType.Contains("Common Fixed"))
                        {
                            modsOnThisProtein.Add(new KeyValuePair <int, Modification>(pep.OneBasedStartResidueInProtein + mod.Key - 2, mod.Value));
                        }
                    }
                }

                var tempMods = modsOnThisProtein.OrderBy(p => p.Key).ToList();
                foreach (var mod in tempMods)
                {
                    if (mod.Value.LocationRestriction.Equals("N-terminal."))
                    {
                        sequenceCoverageDisplay = sequenceCoverageDisplay.Insert(
                            0,
                            $"[{mod.Value.IdWithMotif}]-");
                    }
                    else if (mod.Value.LocationRestriction.Equals("Anywhere."))
                    {
                        int modStringIndex = sequenceCoverageDisplay.Length - (protein.Length - mod.Key);
                        sequenceCoverageDisplay = sequenceCoverageDisplay.Insert(
                            modStringIndex,
                            $"[{mod.Value.IdWithMotif}]");
                    }
                    else if (mod.Value.LocationRestriction.Equals("C-terminal."))
                    {
                        sequenceCoverageDisplay = sequenceCoverageDisplay.Insert(
                            sequenceCoverageDisplay.Length,
                            $"-[{mod.Value.IdWithMotif}]");
                    }
                }
                SequenceCoverageDisplayListWithMods.Add(sequenceCoverageDisplay);

                if (!modsOnThisProtein.Any())
                {
                    continue;
                }

                // calculate spectral count % of modified observations
                var pepModTotals = new List <int>();                         // count of modified peptides for each mod/index
                var pepTotals    = new List <int>();                         // count of all peptides for each mod/index
                var modIndex     = new List <(int index, string modName)>(); // index and name of the modified position

                foreach (var pep in proteinsWithPsmsWithLocalizedMods[protein])
                {
                    foreach (var mod in pep.AllModsOneIsNterminus)
                    {
                        int pepNumTotal = 0; //For one mod, The total Pep Num

                        if (mod.Value.ModificationType.Contains("Common Variable") ||
                            mod.Value.ModificationType.Contains("Common Fixed") ||
                            mod.Value.LocationRestriction.Equals(ModLocationOnPeptideOrProtein.PepC) ||
                            mod.Value.LocationRestriction.Equals(ModLocationOnPeptideOrProtein.NPep))
                        {
                            continue;
                        }

                        int indexInProtein;
                        if (mod.Value.LocationRestriction.Equals("N-terminal."))
                        {
                            indexInProtein = 1;
                        }
                        else if (mod.Value.LocationRestriction.Equals("Anywhere."))
                        {
                            indexInProtein = pep.OneBasedStartResidueInProtein + mod.Key - 2;
                        }
                        else if (mod.Value.LocationRestriction.Equals("C-terminal."))
                        {
                            indexInProtein = protein.Length;
                        }
                        else
                        {
                            // In case it's a peptide terminal mod, skip!
                            // we don't want this annotated in the protein's modifications
                            continue;
                        }

                        var modKey = (indexInProtein, mod.Value.IdWithMotif);
                        if (modIndex.Contains(modKey))
                        {
                            pepModTotals[modIndex.IndexOf(modKey)] += 1;
                        }
                        else
                        {
                            modIndex.Add(modKey);
                            foreach (var pept in proteinsWithPsmsWithLocalizedMods[protein])
                            {
                                if (indexInProtein >= pept.OneBasedStartResidueInProtein - (indexInProtein == 1 ? 1 : 0) &&
                                    indexInProtein <= pept.OneBasedEndResidueInProtein)
                                {
                                    pepNumTotal += 1;
                                }
                            }
                            pepTotals.Add(pepNumTotal);
                            pepModTotals.Add(1);
                        }
                    }
                }

                var modStrings = new List <(int aaNum, string part)>();
                for (int i = 0; i < pepModTotals.Count; i++)
                {
                    string aa             = modIndex[i].index.ToString();
                    string modName        = modIndex[i].modName.ToString();
                    string occupancy      = ((double)pepModTotals[i] / (double)pepTotals[i]).ToString("F2");
                    string fractOccupancy = $"{pepModTotals[i].ToString()}/{pepTotals[i].ToString()}";
                    string tempString     = ($"#aa{aa}[{modName},info:occupancy={occupancy}({fractOccupancy})]");
                    modStrings.Add((modIndex[i].index, tempString));
                }
                var modInfoString = string.Join(";", modStrings.OrderBy(x => x.aaNum).Select(x => x.part));

                if (!string.IsNullOrEmpty(modInfoString))
                {
                    ModsInfo.Add(modInfoString);
                }
            }
        }