private void buttonGenerateSummary_Click(object sender, EventArgs e) { SEProSummary ss = new SEProSummary(); ss.MyRichTextBox.AppendText("\tFileName\t\tMass Spectra\t(unique/decoy/total)Peptides\tProteins\tMax Parsimony Proteins\n\n"); foreach (DirectoryClassDescription cdd in multipleDirectorySelector1.MyDirectoryDescriptionDictionary) { ss.MyRichTextBox.AppendText(new DirectoryInfo(cdd.MyDirectoryFullName).Name + "\t" + cdd.Description + "\n"); Console.WriteLine("SEPro files found in directory: " + new DirectoryInfo(cdd.MyDirectoryFullName).Name); List <FileInfo> files = new DirectoryInfo(cdd.MyDirectoryFullName).GetFiles("*.sepr", SearchOption.AllDirectories).ToList(); foreach (FileInfo fi in files) { Console.WriteLine("\tLoading " + fi.Name); ResultPackage sepro = ResultPackage.Load(fi.FullName); ss.MyRichTextBox.AppendText("\t" + fi.Name + "\t\t" + sepro.MyFDRResult.SpectraFDRLabel + "\t" + sepro.MyProteins.MyPeptideList.Count(a => a.MyMapableProteins.Count == 1) + "/" + sepro.MyFDRResult.PeptideFDRLabel + "\t" + sepro.MyFDRResult.ProteinFDRLabel + "\t" + sepro.MaxParsimonyList().Count + "\n"); } } ss.ShowDialog(); }
public Core35(List <DirectoryClassDescription> dcds, XQuantClusteringParameters myClusteringParameters) { MyClusterParams = myClusteringParameters; IdentifiedSequencesInFullDirDict = new Dictionary <string, List <string> >(); MyFastaItems = new List <FastaItem>(); SEProFiles = new List <SEProFileInfo>(); //First we will need to load all SEPros and generate a list of peptides with their respectve quantitations foreach (DirectoryClassDescription cdc in dcds) { FileInfo[] files = new DirectoryInfo(cdc.MyDirectoryFullName).GetFiles("*.sepr", SearchOption.AllDirectories); SEProFiles.Add(new SEProFileInfo(cdc.MyDirectoryFullName, cdc.ClassLabel, cdc.Description, files.Select(a => a.FullName).ToList())); foreach (FileInfo fi in files) { //Make sure we only have 1 sepro file per directory if (fi.Directory.GetFiles("*.sepr").Count() != 1) { throw new Exception("There can be only one SEPro file per directory; error in directory:\n" + fi.DirectoryName); } Console.WriteLine("Loading " + fi.FullName); ResultPackage rp = ResultPackage.Load(fi.FullName); //Verify if all equivalent ms1 or raw files are in directory List <string> filesInSEPro = RemoveExtensions((from sqt in rp.MyProteins.AllSQTScans select sqt.FileName).Distinct().ToList()); List <string> ms1OrRawOrmzMLFiles = fi.Directory.GetFiles("*.ms1").Select(a => a.Name).Concat(fi.Directory.GetFiles("*.RAW").Select(a => a.Name)).Concat(fi.Directory.GetFiles("*.mzML").Select(a => a.Name)).ToList(); ms1OrRawOrmzMLFiles = RemoveExtensions(ms1OrRawOrmzMLFiles); //Lets store the fasta items List <MyProtein> proteinsToAnalyze = new List <MyProtein>(); if (myClusteringParameters.MaxParsimony) { proteinsToAnalyze = rp.MaxParsimonyList(); } else { proteinsToAnalyze = rp.MyProteins.MyProteinList; } IdentifiedSequencesInFullDirDict.Add(fi.Directory.FullName, proteinsToAnalyze.Select(a => a.Locus).ToList()); foreach (MyProtein p in proteinsToAnalyze) { if (!MyFastaItems.Exists(a => a.SequenceIdentifier.Equals(p.Locus))) { MyFastaItems.Add(new FastaItem(p.Locus, p.Sequence, p.Description)); } } //End storing fasta stuff foreach (string f in filesInSEPro) { if (!ms1OrRawOrmzMLFiles.Contains(f)) { throw new Exception("All .ms1, .mzML, or Thermo .RAW files must be placed in each corresponding SEPro directory. Error in directory:\n" + fi.DirectoryName + "\nfor file:" + f); } } } } Process(); PreparePeptideProteinDictionary(); if (myClusteringParameters.RetainOptimal) { RetainOptimalSignal(); } Console.WriteLine("Done"); }
/// <summary> /// This eliminates decoys and builds the index /// </summary> public void ProcessParsedData() { //Generate a SEPro Fusion--------------------- ResultPackage seprofusion = FusionSEPro(MyResultPackages.Select(a => a.MyResultPackage).ToList()); ///------------------------------------------- //If we wish to eliminate decoy proteins if (MyParameters.EliminateDecoys) { seprofusion.MyProteins.RemoveDecoyProteins(MyParameters.DecoyTag); } //we do this in two steps... first we build the index, then, we build the sparse matrix rows List <MyProtein> myProteins = seprofusion.MyProteins.MyProteinList; if (UseMaxParsimony) { myProteins = seprofusion.MaxParsimonyList(); } List <TMPProt> result = (from prot in myProteins select new TMPProt(prot.Locus, prot.Description, prot.ContainsUniquePeptide)).ToList(); //If we only want proteins of unique peptides if (MyParameters.MyProteinType == ProteinOutputType.SpecCountsOfUniquePeptides) { result = (from r in result where r.UniquePeptides > 0 select r).ToList(); } theIndex = new List <ProteinIndexStruct>(result.Count()); for (int i = 0; i < result.Count; i++) { ProteinIndexStruct p = new ProteinIndexStruct(); p.Locus = result[i].Locus; p.Description = result[i].Description; p.Index = i; if (!theIndex.Exists(a => a.Locus.Equals(result[i].Locus))) { theIndex.Add(p); } else { throw new Exception("Problems generating SEPro fusion file"); } } //int counter = 0; //foreach (TMPProt r in result) //{ // counter++; // ProteinIndexStruct p = new ProteinIndexStruct(); // p.Locus = r.Locus; // p.Description = r.Description; // p.Index = counter; // if (!theIndex.Exists(a => a.Locus.Equals(r.Locus))) // { // theIndex.Add(p); // } // else // { // counter--; // } //} }