示例#1
0
        public void CalculateSequenceCoverage()
        {
            var proteinsWithUnambigSeqPsms        = new Dictionary <Protein, List <PeptideWithSetModifications> >();
            var proteinsWithPsmsWithLocalizedMods = new Dictionary <Protein, List <PeptideWithSetModifications> >();

            foreach (var protein in Proteins)
            {
                proteinsWithUnambigSeqPsms.Add(protein, new List <PeptideWithSetModifications>());
                proteinsWithPsmsWithLocalizedMods.Add(protein, new List <PeptideWithSetModifications>());
            }

            foreach (var psm in AllPsmsBelowOnePercentFDR)
            {
                // null BaseSequence means that the amino acid sequence is ambiguous; do not use these to calculate sequence coverage
                if (psm.BaseSequence != null)
                {
                    var PepsWithSetMods = psm.CompactPeptides.SelectMany(b => b.Value.Item2);
                    foreach (var pepWithSetMods in PepsWithSetMods)
                    {
                        // might be unambiguous but also shared; make sure this protein group contains this peptide+protein combo
                        if (Proteins.Contains(pepWithSetMods.Protein))
                        {
                            proteinsWithUnambigSeqPsms[pepWithSetMods.Protein].Add(pepWithSetMods);

                            // null FullSequence means that mods were not successfully localized; do not display them on the sequence coverage mods info
                            if (psm.FullSequence != null)
                            {
                                proteinsWithPsmsWithLocalizedMods[pepWithSetMods.Protein].Add(pepWithSetMods);
                            }
                        }
                    }
                }
            }

            foreach (var protein in ListOfProteinsOrderedByAccession)
            {
                bool          errorResult             = false;
                var           sequenceCoverageDisplay = protein.BaseSequence.ToLower(CultureInfo.InvariantCulture);
                HashSet <int> coveredOneBasedResidues = new HashSet <int>();

                // get residue numbers of each peptide in the protein and identify them as observed if the sequence is unambiguous
                foreach (var peptide in proteinsWithUnambigSeqPsms[protein])
                {
                    string sequenceExtractedFromProtein = "";
                    for (int i = peptide.OneBasedStartResidueInProtein; i <= peptide.OneBasedEndResidueInProtein; i++)
                    {
                        // check for bugs in sequence coverage; make sure we have the right amino acids!
                        sequenceExtractedFromProtein += sequenceCoverageDisplay[i - 1];
                        coveredOneBasedResidues.Add(i);
                    }

                    if (!sequenceExtractedFromProtein.ToUpper().Equals(peptide.BaseSequence))
                    {
                        errorResult = true;
                    }
                }

                // calculate sequence coverage percent
                double seqCoveragePercent = (double)coveredOneBasedResidues.Count / protein.Length;
                if (seqCoveragePercent > 1)
                {
                    errorResult = true;
                }

                // add the percent coverage or NaN if there was an error
                if (!errorResult)
                {
                    SequenceCoveragePercent.Add(seqCoveragePercent);
                }
                else
                {
                    SequenceCoveragePercent.Add(double.NaN);
                }

                // convert the observed amino acids to upper case if they are unambiguously observed
                var coverageArray = sequenceCoverageDisplay.ToCharArray();
                foreach (var obsResidueLocation in coveredOneBasedResidues)
                {
                    coverageArray[obsResidueLocation - 1] = char.ToUpper(coverageArray[obsResidueLocation - 1]);
                }
                sequenceCoverageDisplay = new string(coverageArray);

                // check to see if there was an errored result; if not, add the coverage display
                if (!errorResult)
                {
                    SequenceCoverageDisplayList.Add(sequenceCoverageDisplay);
                }
                else
                {
                    SequenceCoverageDisplayList.Add("Error calculating sequence coverage");
                }

                // put mods in the sequence coverage display
                if (!errorResult)
                {
                    // get mods to display in sequence (only unambiguously identified mods)
                    var modsOnThisProtein = new HashSet <KeyValuePair <int, ModificationWithMass> >();
                    foreach (var pep in proteinsWithPsmsWithLocalizedMods[protein])
                    {
                        foreach (var mod in pep.AllModsOneIsNterminus)
                        {
                            if (!mod.Value.modificationType.Contains("PeptideTermMod") && !mod.Value.modificationType.Contains("Common Variable") && !mod.Value.modificationType.Contains("Common Fixed"))
                            {
                                modsOnThisProtein.Add(new KeyValuePair <int, ModificationWithMass>(pep.OneBasedStartResidueInProtein + mod.Key - 2, mod.Value));
                            }
                        }
                    }

                    var temp1 = modsOnThisProtein.OrderBy(p => p.Key).ToList();

                    foreach (var mod in temp1)
                    {
                        if (mod.Value.terminusLocalization.Equals(TerminusLocalization.NProt))
                        {
                            sequenceCoverageDisplay = sequenceCoverageDisplay.Insert(0, "[" + mod.Value.id + "]-");
                        }
                        else if (mod.Value.terminusLocalization.Equals(TerminusLocalization.Any))
                        {
                            int modStringIndex = sequenceCoverageDisplay.Length - (protein.Length - mod.Key);
                            sequenceCoverageDisplay = sequenceCoverageDisplay.Insert(modStringIndex, "[" + mod.Value.id + "]");
                        }
                        else if (mod.Value.terminusLocalization.Equals(TerminusLocalization.ProtC))
                        {
                            sequenceCoverageDisplay = sequenceCoverageDisplay.Insert(sequenceCoverageDisplay.Length, "-[" + mod.Value.id + "]");
                        }
                    }

                    SequenceCoverageDisplayListWithMods.Add(sequenceCoverageDisplay);

                    if (modsOnThisProtein.Any())
                    {
                        // calculate spectral count percentage of modified observation
                        string        tempModStrings   = "";                  //The whole string
                        List <int>    tempPepModTotals = new List <int>();    //The List of (For one mod, The Modified Pep Num)
                        List <int>    tempPepTotals    = new List <int>();    //The List of (For one mod, The total Pep Num)
                        List <string> tempPepModValues = new List <string>(); //The List of (For one mod, the Modified Name)
                        List <int>    tempModIndex     = new List <int>();    //The Index of the modified position.

                        foreach (var pep in proteinsWithPsmsWithLocalizedMods[protein])
                        {
                            foreach (var mod in pep.AllModsOneIsNterminus)
                            {
                                int tempPepNumTotal = 0; //For one mod, The total Pep Num
                                if (!mod.Value.modificationType.Contains("Common Variable") && !mod.Value.modificationType.Contains("Common Fixed") && !mod.Value.terminusLocalization.Equals(TerminusLocalization.PepC) && !mod.Value.terminusLocalization.Equals(TerminusLocalization.NPep))
                                {
                                    int tempIndexInProtein;
                                    if (mod.Value.terminusLocalization.Equals(TerminusLocalization.NProt))
                                    {
                                        tempIndexInProtein = 1;
                                    }
                                    else if (mod.Value.terminusLocalization.Equals(TerminusLocalization.Any))
                                    {
                                        tempIndexInProtein = pep.OneBasedStartResidueInProtein + mod.Key - 2;
                                    }
                                    else if (mod.Value.terminusLocalization.Equals(TerminusLocalization.ProtC))
                                    {
                                        tempIndexInProtein = protein.Length;
                                    }
                                    else
                                    {
                                        // In case it's a peptide mod, skip!
                                        continue;
                                    }

                                    if (tempModIndex.Contains(tempIndexInProtein) && tempPepModValues[tempModIndex.IndexOf(tempIndexInProtein)] == mod.Value.id)
                                    {
                                        tempPepModTotals[tempModIndex.IndexOf(tempIndexInProtein)] += 1;
                                    }
                                    else
                                    {
                                        tempModIndex.Add(tempIndexInProtein);
                                        foreach (var pept in proteinsWithPsmsWithLocalizedMods[protein])
                                        {
                                            if (tempIndexInProtein >= pept.OneBasedStartResidueInProtein - (tempIndexInProtein == 1 ? 1 : 0) && tempIndexInProtein <= pept.OneBasedEndResidueInProtein)
                                            {
                                                tempPepNumTotal += 1;
                                            }
                                        }
                                        tempPepTotals.Add(tempPepNumTotal);
                                        tempPepModValues.Add(mod.Value.id);
                                        tempPepModTotals.Add(1);
                                    }
                                }
                            }
                        }
                        for (int i = 0; i < tempPepModTotals.Count; i++)
                        {
                            string tempString = ("#aa" + tempModIndex[i].ToString() + "[" + tempPepModValues[i].ToString() + ",info:occupancy=" + ((double)tempPepModTotals[i] / (double)tempPepTotals[i]).ToString("F2") + "(" + tempPepModTotals[i].ToString() + "/" + tempPepTotals[i].ToString() + ")" + "];");
                            tempModStrings += tempString;
                        }

                        if (!string.IsNullOrEmpty(tempModStrings))
                        {
                            ModsInfo.Add(tempModStrings);
                        }
                    }
                }
            }
        }
示例#2
0
        public override string ToString()
        {
            var sb = new StringBuilder();

            // list of protein accession numbers
            sb.Append(ProteinGroupName);
            sb.Append("\t");

            // genes
            sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", ListOfProteinsOrderedByAccession.Select(p => p.GeneNames.Select(x => x.Item2).FirstOrDefault()))));
            sb.Append("\t");

            // organisms
            sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", ListOfProteinsOrderedByAccession.Select(p => p.Organism).Distinct())));
            sb.Append("\t");

            // list of protein names
            sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", ListOfProteinsOrderedByAccession.Select(p => p.FullName).Distinct())));
            sb.Append("\t");

            // list of masses
            var           sequences = ListOfProteinsOrderedByAccession.Select(p => p.BaseSequence).Distinct();
            List <double> masses    = new List <double>();

            foreach (var sequence in sequences)
            {
                try
                {
                    masses.Add(new Proteomics.AminoAcidPolymer.Peptide(sequence).MonoisotopicMass);
                }
                catch (System.Exception)
                {
                    masses.Add(double.NaN);
                }
            }
            sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", masses)));
            sb.Append("\t");

            // number of proteins in group
            sb.Append("" + Proteins.Count);
            sb.Append("\t");

            // list of unique peptides
            if (!DisplayModsOnPeptides)
            {
                sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", UniquePeptides.Select(p => p.BaseSequence).Distinct())));
            }
            else
            {
                sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", UniquePeptides.Select(p => p.Sequence).Distinct())));
            }
            sb.Append("\t");

            // list of shared peptides
            var SharedPeptides = AllPeptides.Except(UniquePeptides);

            if (!DisplayModsOnPeptides)
            {
                sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", SharedPeptides.Select(p => p.BaseSequence).Distinct())));
            }
            else
            {
                sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", SharedPeptides.Select(p => p.Sequence).Distinct())));
            }
            sb.Append("\t");

            // number of peptides
            if (!DisplayModsOnPeptides)
            {
                sb.Append("" + AllPeptides.Select(p => p.BaseSequence).Distinct().Count());
            }
            else
            {
                sb.Append("" + AllPeptides.Select(p => p.Sequence).Distinct().Count());
            }
            sb.Append("\t");

            // number of unique peptides
            if (!DisplayModsOnPeptides)
            {
                sb.Append("" + UniquePeptides.Select(p => p.BaseSequence).Distinct().Count());
            }
            else
            {
                sb.Append("" + UniquePeptides.Select(p => p.Sequence).Distinct().Count());
            }
            sb.Append("\t");

            // sequence coverage percent
            sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", SequenceCoveragePercent.Select(p => string.Format("{0:0}" + "%", (p * 100))))));
            sb.Append("\t");

            // sequence coverage
            sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", SequenceCoverageDisplayList)));
            sb.Append("\t");

            // sequence coverage with mods
            sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", SequenceCoverageDisplayListWithMods)));
            sb.Append("\t");

            //Detailed mods information list
            sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", ModsInfo)));
            sb.Append("\t");

            // MS1 intensity (retrieved from FlashLFQ in the SearchTask)
            if (IntensitiesByFile != null && FilesForQuantification != null)
            {
                foreach (var file in FilesForQuantification)
                {
                    if (IntensitiesByFile[file] > 0)
                    {
                        sb.Append(IntensitiesByFile[file]);
                    }
                    else
                    {
                        sb.Append("");
                    }
                    sb.Append("\t");
                }
            }

            // number of PSMs for listed peptides
            sb.Append("" + AllPsmsBelowOnePercentFDR.Count);
            sb.Append("\t");

            // isDecoy
            if (IsDecoy)
            {
                sb.Append("D");
            }
            else if (IsContaminant)
            {
                sb.Append("C");
            }
            else
            {
                sb.Append("T");
            }
            sb.Append("\t");

            // cumulative target
            sb.Append(CumulativeTarget);
            sb.Append("\t");

            // cumulative decoy
            sb.Append(CumulativeDecoy);
            sb.Append("\t");

            // q value
            sb.Append(QValue);
            sb.Append("\t");

            // best peptide score
            sb.Append(BestPeptideScore);
            sb.Append("\t");

            // best peptide q value
            sb.Append(BestPeptideQValue);
            sb.Append("\t");

            return(sb.ToString());
        }