Esempio n. 1
0
        public ProteinGroup ConstructSubsetProteinGroup(string fullFilePath)
        {
            var allPsmsForThisFile           = new HashSet <PeptideSpectralMatch>(this.AllPsmsBelowOnePercentFDR.Where(p => p.FullFilePath.Equals(fullFilePath)));
            var allPeptidesForThisFile       = new HashSet <PeptideWithSetModifications>(allPsmsForThisFile.SelectMany(p => p.CompactPeptides.SelectMany(b => b.Value.Item2)));
            var allUniquePeptidesForThisFile = new HashSet <PeptideWithSetModifications>(this.UniquePeptides.Intersect(allPeptidesForThisFile));

            ProteinGroup subsetPg = new ProteinGroup(this.Proteins, allPeptidesForThisFile, allUniquePeptidesForThisFile)
            {
                AllPsmsBelowOnePercentFDR = allPsmsForThisFile,
                DisplayModsOnPeptides     = this.DisplayModsOnPeptides
            };

            SpectraFileInfo spectraFileInfo = null;

            if (FilesForQuantification != null)
            {
                spectraFileInfo = FilesForQuantification.Where(p => p.FullFilePathWithExtension == fullFilePath).First();
                subsetPg.FilesForQuantification = new List <SpectraFileInfo> {
                    spectraFileInfo
                };
            }

            if (IntensitiesByFile == null)
            {
                subsetPg.IntensitiesByFile = null;
            }
            else
            {
                subsetPg.IntensitiesByFile = new Dictionary <SpectraFileInfo, double> {
                    { spectraFileInfo, IntensitiesByFile[spectraFileInfo] }
                };
            }

            return(subsetPg);
        }
Esempio n. 2
0
        public string GetTabSeparatedHeader()
        {
            var sb = new StringBuilder();

            sb.Append("Protein Accession" + '\t');
            sb.Append("Gene" + '\t');
            sb.Append("Organism" + '\t');
            sb.Append("Protein Full Name" + '\t');
            sb.Append("Protein Unmodified Mass" + '\t');
            sb.Append("Number of Proteins in Group" + '\t');
            sb.Append("Unique Peptides" + '\t');
            sb.Append("Shared Peptides" + '\t');
            sb.Append("Number of Peptides" + '\t');
            sb.Append("Number of Unique Peptides" + '\t');
            sb.Append("Sequence Coverage Fraction" + '\t');
            sb.Append("Sequence Coverage" + '\t');
            sb.Append("Sequence Coverage with Mods" + '\t');
            sb.Append("Modification Info List" + "\t");
            if (FilesForQuantification != null)
            {
                bool unfractionated      = FilesForQuantification.Select(p => p.Fraction).Distinct().Count() == 1;
                bool conditionsUndefined = FilesForQuantification.All(p => string.IsNullOrEmpty(p.Condition));

                // this is a hacky way to test for SILAC-labeled data...
                // Currently SILAC will report 1 column of intensities per label per spectra file, and is NOT summarized
                // into biorep-level intensity values. the SILAC code uses the "condition" field to organize this info,
                // even if the experimental design is not defined by the user. So the following bool is a way to distinguish
                // between experimental design being used in SILAC automatically vs. being defined by the user
                bool silacExperimentalDesign = FilesForQuantification.Any(p => !File.Exists(p.FullFilePathWithExtension));

                foreach (var sampleGroup in FilesForQuantification.GroupBy(p => p.Condition))
                {
                    foreach (var sample in sampleGroup.GroupBy(p => p.BiologicalReplicate).OrderBy(p => p.Key))
                    {
                        if ((conditionsUndefined && unfractionated) || silacExperimentalDesign)
                        {
                            // if the data is unfractionated and the conditions haven't been defined, just use the file name as the intensity header
                            sb.Append("Intensity_" + sample.First().FilenameWithoutExtension + "\t");
                        }
                        else
                        {
                            // if the data is fractionated and/or the conditions have been defined, label the header w/ the condition and biorep number
                            sb.Append("Intensity_" + sample.First().Condition + "_" + (sample.First().BiologicalReplicate + 1) + "\t");
                        }
                    }
                }
            }
            sb.Append("Number of PSMs" + '\t');
            sb.Append("Protein Decoy/Contaminant/Target" + '\t');
            sb.Append("Protein Cumulative Target" + '\t');
            sb.Append("Protein Cumulative Decoy" + '\t');
            sb.Append("Protein QValue" + '\t');
            sb.Append("Best Peptide Score" + '\t');
            sb.Append("Best Peptide Notch QValue");
            return(sb.ToString());
        }
Esempio n. 3
0
        public string GetTabSeparatedHeader()
        {
            var sb = new StringBuilder();

            sb.Append("Protein Accession" + '\t');
            sb.Append("Gene" + '\t');
            sb.Append("Organism" + '\t');
            sb.Append("Protein Full Name" + '\t');
            sb.Append("Protein Unmodified Mass" + '\t');
            sb.Append("Number of Proteins in Group" + '\t');
            sb.Append("Unique Peptides" + '\t');
            sb.Append("Shared Peptides" + '\t');
            sb.Append("Number of Peptides" + '\t');
            sb.Append("Number of Unique Peptides" + '\t');
            sb.Append("Sequence Coverage Fraction" + '\t');
            sb.Append("Sequence Coverage" + '\t');
            sb.Append("Sequence Coverage with Mods" + '\t');
            sb.Append("Modification Info List" + "\t");
            if (FilesForQuantification != null)
            {
                bool unfractionated    = FilesForQuantification.Select(p => p.Fraction).Distinct().Count() == 1;
                bool conditionsDefined = FilesForQuantification.All(p => p.Condition == "Default") || FilesForQuantification.All(p => string.IsNullOrWhiteSpace(p.Condition));

                foreach (var sampleGroup in FilesForQuantification.GroupBy(p => p.Condition))
                {
                    foreach (var sample in sampleGroup.GroupBy(p => p.BiologicalReplicate).OrderBy(p => p.Key))
                    {
                        if (!conditionsDefined && unfractionated)
                        {
                            // if the data is unfractionated and the conditions haven't been defined, just use the file name as the intensity header
                            sb.Append("Intensity_" + sample.First().FilenameWithoutExtension + "\t");
                        }
                        else
                        {
                            // if the data is fractionated and/or the conditions have been defined, label the header w/ the condition and biorep number
                            sb.Append("Intensity_" + sample.First().Condition + "_" + (sample.First().BiologicalReplicate + 1) + "\t");
                        }
                    }
                }
            }
            sb.Append("Number of PSMs" + '\t');
            sb.Append("Protein Decoy/Contaminant/Target" + '\t');
            sb.Append("Protein Cumulative Target" + '\t');
            sb.Append("Protein Cumulative Decoy" + '\t');
            sb.Append("Protein QValue" + '\t');
            sb.Append("Best Peptide Score" + '\t');
            sb.Append("Best Peptide Notch QValue");
            return(sb.ToString());
        }
Esempio n. 4
0
        public override string ToString()
        {
            var sb = new StringBuilder();

            // list of protein accession numbers
            sb.Append(ProteinGroupName);
            sb.Append("\t");

            // genes
            sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", ListOfProteinsOrderedByAccession.Select(p => p.GeneNames.Select(x => x.Item2).FirstOrDefault()))));
            sb.Append("\t");

            // organisms
            sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", ListOfProteinsOrderedByAccession.Select(p => p.Organism).Distinct())));
            sb.Append("\t");

            // list of protein names
            sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", ListOfProteinsOrderedByAccession.Select(p => p.FullName).Distinct())));
            sb.Append("\t");

            // list of masses
            var           sequences = ListOfProteinsOrderedByAccession.Select(p => p.BaseSequence).Distinct();
            List <double> masses    = new List <double>();

            foreach (var sequence in sequences)
            {
                try
                {
                    masses.Add(new Proteomics.AminoAcidPolymer.Peptide(sequence).MonoisotopicMass);
                }
                catch (System.Exception)
                {
                    masses.Add(double.NaN);
                }
            }
            sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", masses)));
            sb.Append("\t");

            // number of proteins in group
            sb.Append("" + Proteins.Count);
            sb.Append("\t");

            // list of unique peptides
            if (UniquePeptidesOutput != null)
            {
                sb.Append(GlobalVariables.CheckLengthOfOutput(UniquePeptidesOutput));
            }
            sb.Append("\t");

            // list of shared peptides
            if (SharedPeptidesOutput != null)
            {
                sb.Append(GlobalVariables.CheckLengthOfOutput(SharedPeptidesOutput));
            }
            sb.Append("\t");

            // number of peptides
            if (!DisplayModsOnPeptides)
            {
                sb.Append("" + AllPeptides.Select(p => p.BaseSequence).Distinct().Count());
            }
            else
            {
                sb.Append("" + AllPeptides.Select(p => p.FullSequence).Distinct().Count());
            }
            sb.Append("\t");

            // number of unique peptides
            if (!DisplayModsOnPeptides)
            {
                sb.Append("" + UniquePeptides.Select(p => p.BaseSequence).Distinct().Count());
            }
            else
            {
                sb.Append("" + UniquePeptides.Select(p => p.FullSequence).Distinct().Count());
            }
            sb.Append("\t");

            // sequence coverage percent
            sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", SequenceCoverageFraction.Select(p => string.Format("{0:0.#####}", p)))));
            sb.Append("\t");

            // sequence coverage
            sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", SequenceCoverageDisplayList)));
            sb.Append("\t");

            // sequence coverage with mods
            sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", SequenceCoverageDisplayListWithMods)));
            sb.Append("\t");

            //Detailed mods information list
            sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", ModsInfo)));
            sb.Append("\t");

            // MS1 intensity (retrieved from FlashLFQ in the SearchTask)
            if (IntensitiesByFile != null && FilesForQuantification != null)
            {
                foreach (var sampleGroup in FilesForQuantification.GroupBy(p => p.Condition))
                {
                    foreach (var sample in sampleGroup.GroupBy(p => p.BiologicalReplicate).OrderBy(p => p.Key))
                    {
                        // if the samples are fractionated, the protein will only have 1 intensity in the first fraction
                        // and the other fractions will be zero. we could find the first/only fraction with an intensity,
                        // but simply summing the fractions is easier than finding the single non-zero value
                        double summedIntensity = sample.Sum(file => IntensitiesByFile[file]);

                        if (summedIntensity > 0)
                        {
                            sb.Append(summedIntensity);
                        }

                        sb.Append("\t");
                    }
                }
            }

            // number of PSMs for listed peptides
            sb.Append("" + AllPsmsBelowOnePercentFDR.Count);
            sb.Append("\t");

            // isDecoy
            if (IsDecoy)
            {
                sb.Append("D");
            }
            else if (IsContaminant)
            {
                sb.Append("C");
            }
            else
            {
                sb.Append("T");
            }
            sb.Append("\t");

            // cumulative target
            sb.Append(CumulativeTarget);
            sb.Append("\t");

            // cumulative decoy
            sb.Append(CumulativeDecoy);
            sb.Append("\t");

            // q value
            sb.Append(QValue);
            sb.Append("\t");

            // best peptide score
            sb.Append(BestPeptideScore);
            sb.Append("\t");

            // best peptide q value
            sb.Append(BestPeptideQValue);
            sb.Append("\t");

            return(sb.ToString());
        }