예제 #1
0
        private void buttonGenerateSummary_Click(object sender, EventArgs e)
        {
            SEProSummary ss = new SEProSummary();

            ss.MyRichTextBox.AppendText("\tFileName\t\tMass Spectra\t(unique/decoy/total)Peptides\tProteins\tMax Parsimony Proteins\n\n");
            foreach (DirectoryClassDescription cdd in multipleDirectorySelector1.MyDirectoryDescriptionDictionary)
            {
                ss.MyRichTextBox.AppendText(new DirectoryInfo(cdd.MyDirectoryFullName).Name + "\t" + cdd.Description + "\n");

                Console.WriteLine("SEPro files found in directory: " + new DirectoryInfo(cdd.MyDirectoryFullName).Name);
                List <FileInfo> files = new DirectoryInfo(cdd.MyDirectoryFullName).GetFiles("*.sepr", SearchOption.AllDirectories).ToList();

                foreach (FileInfo fi in files)
                {
                    Console.WriteLine("\tLoading " + fi.Name);
                    ResultPackage sepro = ResultPackage.Load(fi.FullName);

                    ss.MyRichTextBox.AppendText("\t" + fi.Name + "\t\t" + sepro.MyFDRResult.SpectraFDRLabel + "\t" + sepro.MyProteins.MyPeptideList.Count(a => a.MyMapableProteins.Count == 1) + "/" + sepro.MyFDRResult.PeptideFDRLabel + "\t" + sepro.MyFDRResult.ProteinFDRLabel + "\t" + sepro.MaxParsimonyList().Count + "\n");
                }
            }

            ss.ShowDialog();
        }
예제 #2
0
        public Core35(List <DirectoryClassDescription> dcds, XQuantClusteringParameters myClusteringParameters)
        {
            MyClusterParams = myClusteringParameters;
            IdentifiedSequencesInFullDirDict = new Dictionary <string, List <string> >();
            MyFastaItems = new List <FastaItem>();
            SEProFiles   = new List <SEProFileInfo>();

            //First we will need to load all SEPros and generate a list of peptides with their respectve quantitations

            foreach (DirectoryClassDescription cdc in dcds)
            {
                FileInfo[] files = new DirectoryInfo(cdc.MyDirectoryFullName).GetFiles("*.sepr", SearchOption.AllDirectories);

                SEProFiles.Add(new SEProFileInfo(cdc.MyDirectoryFullName, cdc.ClassLabel, cdc.Description, files.Select(a => a.FullName).ToList()));

                foreach (FileInfo fi in files)
                {
                    //Make sure we only have 1 sepro file per directory
                    if (fi.Directory.GetFiles("*.sepr").Count() != 1)
                    {
                        throw new Exception("There can be only one SEPro file per directory; error in directory:\n" + fi.DirectoryName);
                    }

                    Console.WriteLine("Loading " + fi.FullName);
                    ResultPackage rp = ResultPackage.Load(fi.FullName);

                    //Verify if all equivalent ms1 or raw files are in directory

                    List <string> filesInSEPro = RemoveExtensions((from sqt in rp.MyProteins.AllSQTScans
                                                                   select sqt.FileName).Distinct().ToList());


                    List <string> ms1OrRawOrmzMLFiles = fi.Directory.GetFiles("*.ms1").Select(a => a.Name).Concat(fi.Directory.GetFiles("*.RAW").Select(a => a.Name)).Concat(fi.Directory.GetFiles("*.mzML").Select(a => a.Name)).ToList();
                    ms1OrRawOrmzMLFiles = RemoveExtensions(ms1OrRawOrmzMLFiles);

                    //Lets store the fasta items
                    List <MyProtein> proteinsToAnalyze = new List <MyProtein>();

                    if (myClusteringParameters.MaxParsimony)
                    {
                        proteinsToAnalyze = rp.MaxParsimonyList();
                    }
                    else
                    {
                        proteinsToAnalyze = rp.MyProteins.MyProteinList;
                    }


                    IdentifiedSequencesInFullDirDict.Add(fi.Directory.FullName, proteinsToAnalyze.Select(a => a.Locus).ToList());

                    foreach (MyProtein p in proteinsToAnalyze)
                    {
                        if (!MyFastaItems.Exists(a => a.SequenceIdentifier.Equals(p.Locus)))
                        {
                            MyFastaItems.Add(new FastaItem(p.Locus, p.Sequence, p.Description));
                        }
                    }

                    //End storing fasta stuff


                    foreach (string f in filesInSEPro)
                    {
                        if (!ms1OrRawOrmzMLFiles.Contains(f))
                        {
                            throw new Exception("All .ms1, .mzML, or Thermo .RAW files must be placed in each corresponding SEPro directory.  Error in directory:\n" + fi.DirectoryName + "\nfor file:" + f);
                        }
                    }
                }
            }

            Process();

            PreparePeptideProteinDictionary();
            if (myClusteringParameters.RetainOptimal)
            {
                RetainOptimalSignal();
            }

            Console.WriteLine("Done");
        }
예제 #3
0
        /// <summary>
        /// This eliminates decoys and builds the index
        /// </summary>
        public void ProcessParsedData()
        {
            //Generate a SEPro Fusion---------------------
            ResultPackage seprofusion = FusionSEPro(MyResultPackages.Select(a => a.MyResultPackage).ToList());

            ///-------------------------------------------

            //If we wish to eliminate decoy proteins
            if (MyParameters.EliminateDecoys)
            {
                seprofusion.MyProteins.RemoveDecoyProteins(MyParameters.DecoyTag);
            }

            //we do this in two steps... first we build the index, then, we build the sparse matrix rows

            List <MyProtein> myProteins = seprofusion.MyProteins.MyProteinList;

            if (UseMaxParsimony)
            {
                myProteins = seprofusion.MaxParsimonyList();
            }

            List <TMPProt> result = (from prot in myProteins
                                     select new TMPProt(prot.Locus, prot.Description, prot.ContainsUniquePeptide)).ToList();


            //If we only want proteins of unique peptides
            if (MyParameters.MyProteinType == ProteinOutputType.SpecCountsOfUniquePeptides)
            {
                result = (from r in result
                          where r.UniquePeptides > 0
                          select r).ToList();
            }



            theIndex = new List <ProteinIndexStruct>(result.Count());


            for (int i = 0; i < result.Count; i++)
            {
                ProteinIndexStruct p = new ProteinIndexStruct();
                p.Locus       = result[i].Locus;
                p.Description = result[i].Description;
                p.Index       = i;

                if (!theIndex.Exists(a => a.Locus.Equals(result[i].Locus)))
                {
                    theIndex.Add(p);
                }
                else
                {
                    throw new Exception("Problems generating SEPro fusion file");
                }
            }

            //int counter = 0;
            //foreach (TMPProt r in result)
            //{
            //    counter++;
            //    ProteinIndexStruct p = new ProteinIndexStruct();
            //    p.Locus = r.Locus;
            //    p.Description = r.Description;
            //    p.Index = counter;

            //    if (!theIndex.Exists(a => a.Locus.Equals(r.Locus)))
            //    {
            //        theIndex.Add(p);
            //    }
            //    else
            //    {
            //        counter--;
            //    }
            //}
        }