public void ParseDirs(List <DirectoryClassDescription> myDirectoryDescriptionDictionary) { MyDirectoryDescriptionDictionary = myDirectoryDescriptionDictionary; MyResultPackages = new List <ResultEntry>(); foreach (DirectoryClassDescription dcd in myDirectoryDescriptionDictionary) { //Get all Sepro Files in this and in deeper directories //FileInfo[] fileInfo = dcd.MyDirectory.GetFiles("*.sepr"); FileInfo[] fileInfo = new DirectoryInfo(dcd.MyDirectoryFullName).GetFiles("*.sepr", SearchOption.AllDirectories); foreach (FileInfo file in fileInfo) { //First lets unserialize the object Console.WriteLine("Loading .. " + file.FullName); ResultPackage rp = ResultPackage.Load(file.FullName); //Lets free um some ram foreach (SQTScan s in rp.MyProteins.AllSQTScans) { s.MSLight = null; } MyResultPackages.Add(new ResultEntry(rp, file, dcd.ClassLabel)); } Console.WriteLine("Done loading."); } }
private List <ResultPackage> LoadSEProFiles(string directory) { List <ResultPackage> theFiles = new List <ResultPackage>(); string[] SEProFiles = Directory.GetFiles(directory, "*.sepr"); foreach (string file in SEProFiles) { Console.WriteLine("Loading file: " + file); theFiles.Add(ResultPackage.Load(file)); } return(theFiles); }
public void ParseDir(DirectoryClassDescription dir) { MyDirectoryDescriptionDictionary = new List <DirectoryClassDescription>(); MyDirectoryDescriptionDictionary.Add(dir); MyResultPackages = new List <ResultEntry>(); FileInfo[] fileInfo = new DirectoryInfo(dir.MyDirectoryFullName).GetFiles("*.sepr", SearchOption.AllDirectories); foreach (FileInfo file in fileInfo) { //First lets unserialize the object ResultPackage rp = ResultPackage.Load(file.FullName); //Lets free um some ram foreach (SQTScan s in rp.MyProteins.AllSQTScans) { s.MSLight = null; } MyResultPackages.Add(new ResultEntry(rp, file, dir.ClassLabel)); } }
private void buttonGenerateSummary_Click(object sender, EventArgs e) { SEProSummary ss = new SEProSummary(); ss.MyRichTextBox.AppendText("\tFileName\t\tMass Spectra\t(unique/decoy/total)Peptides\tProteins\tMax Parsimony Proteins\n\n"); foreach (DirectoryClassDescription cdd in multipleDirectorySelector1.MyDirectoryDescriptionDictionary) { ss.MyRichTextBox.AppendText(new DirectoryInfo(cdd.MyDirectoryFullName).Name + "\t" + cdd.Description + "\n"); Console.WriteLine("SEPro files found in directory: " + new DirectoryInfo(cdd.MyDirectoryFullName).Name); List <FileInfo> files = new DirectoryInfo(cdd.MyDirectoryFullName).GetFiles("*.sepr", SearchOption.AllDirectories).ToList(); foreach (FileInfo fi in files) { Console.WriteLine("\tLoading " + fi.Name); ResultPackage sepro = ResultPackage.Load(fi.FullName); ss.MyRichTextBox.AppendText("\t" + fi.Name + "\t\t" + sepro.MyFDRResult.SpectraFDRLabel + "\t" + sepro.MyProteins.MyPeptideList.Count(a => a.MyMapableProteins.Count == 1) + "/" + sepro.MyFDRResult.PeptideFDRLabel + "\t" + sepro.MyFDRResult.ProteinFDRLabel + "\t" + sepro.MaxParsimonyList().Count + "\n"); } } ss.ShowDialog(); }
public void LoadSepro(string fileName) { Console.WriteLine("Loading: " + fileName); pckg = ResultPackage.Load(fileName); Console.WriteLine("Done loading SEProFile"); }
public void Process() { myQuantPkgs = new List <QuantPackage2>(); SignalGenerator isotopicSignal = new SignalGenerator(); foreach (SEProFileInfo sfi in SEProFiles) { foreach (string file in sfi.MyFilesFullPath) { Console.WriteLine("Processing for " + file); ResultPackage rp = ResultPackage.Load(file); List <string> filesInSEPro = (from sqt in rp.MyProteins.AllSQTScans select sqt.FileName).Distinct().ToList(); FileInfo fi = new FileInfo(file); foreach (string msFile in filesInSEPro) { QuantPackage2 qp = new QuantPackage2(msFile, fi.Directory.FullName, sfi.ClassLabel); Console.WriteLine("\t" + msFile); List <string> ms1OrRawOrmzMLFiles = fi.Directory.GetFiles("*.ms1").ToList().Concat(fi.Directory.GetFiles("*.RAW")).Concat(fi.Directory.GetFiles("*.mzML")).ToList().Select(a => a.Name).ToList(); int fileToRead = ms1OrRawOrmzMLFiles.FindIndex(a => RemoveExtension(a).Equals(RemoveExtension(msFile))); XICGet5 xic = new XICGet5(fi.DirectoryName + "/" + ms1OrRawOrmzMLFiles[fileToRead]); List <SQTScan> scansTMP = rp.MyProteins.AllSQTScans.FindAll(a => a.FileName.Equals(msFile)); if (MyClusterParams.OnlyUniquePeptides) { int removedForNotUnique = scansTMP.RemoveAll(a => a.IsUnique); Console.WriteLine("Scans removed for not dealing with unique peptides: " + removedForNotUnique); } List <SQTLight> scans = scansTMP.Select(a => new SQTLight(a)).ToList(); int counter = 0; for (int i = 0; i < scans.Count; i++) { Dictionary <string, List <Quant> > theseQuants = Core35.Quant(xic, scans[i], scans[i].TheoreticalMH, isotopicSignal, scans[i].ScanNumber, MyClusterParams); foreach (var kvp in theseQuants) { kvp.Value.RemoveAll(a => a.MyIons.GetLength(1) < MyClusterParams.MinMS1Counts); if (kvp.Value.Count > 0) { if (qp.MyQuants.ContainsKey(kvp.Key)) { qp.MyQuants[kvp.Key].AddRange(kvp.Value); } else { qp.MyQuants.Add(kvp.Key, kvp.Value); } } } counter++; Console.Write("\rScans Processed: " + counter + "/" + scans.Count); } //Store them myQuantPkgs.Add(qp); Console.WriteLine("\nTotal quants stored so far = " + myQuantPkgs.Sum(a => a.MyQuants.Count)); Console.WriteLine("Total files analyzed so far = " + myQuantPkgs.Count); Console.WriteLine("Done procesing :" + msFile); System.GC.Collect(); System.GC.WaitForPendingFinalizers(); System.GC.Collect(); } } } GenerateAssociationItems(); }
public Core35(List <DirectoryClassDescription> dcds, XQuantClusteringParameters myClusteringParameters) { MyClusterParams = myClusteringParameters; IdentifiedSequencesInFullDirDict = new Dictionary <string, List <string> >(); MyFastaItems = new List <FastaItem>(); SEProFiles = new List <SEProFileInfo>(); //First we will need to load all SEPros and generate a list of peptides with their respectve quantitations foreach (DirectoryClassDescription cdc in dcds) { FileInfo[] files = new DirectoryInfo(cdc.MyDirectoryFullName).GetFiles("*.sepr", SearchOption.AllDirectories); SEProFiles.Add(new SEProFileInfo(cdc.MyDirectoryFullName, cdc.ClassLabel, cdc.Description, files.Select(a => a.FullName).ToList())); foreach (FileInfo fi in files) { //Make sure we only have 1 sepro file per directory if (fi.Directory.GetFiles("*.sepr").Count() != 1) { throw new Exception("There can be only one SEPro file per directory; error in directory:\n" + fi.DirectoryName); } Console.WriteLine("Loading " + fi.FullName); ResultPackage rp = ResultPackage.Load(fi.FullName); //Verify if all equivalent ms1 or raw files are in directory List <string> filesInSEPro = RemoveExtensions((from sqt in rp.MyProteins.AllSQTScans select sqt.FileName).Distinct().ToList()); List <string> ms1OrRawOrmzMLFiles = fi.Directory.GetFiles("*.ms1").Select(a => a.Name).Concat(fi.Directory.GetFiles("*.RAW").Select(a => a.Name)).Concat(fi.Directory.GetFiles("*.mzML").Select(a => a.Name)).ToList(); ms1OrRawOrmzMLFiles = RemoveExtensions(ms1OrRawOrmzMLFiles); //Lets store the fasta items List <MyProtein> proteinsToAnalyze = new List <MyProtein>(); if (myClusteringParameters.MaxParsimony) { proteinsToAnalyze = rp.MaxParsimonyList(); } else { proteinsToAnalyze = rp.MyProteins.MyProteinList; } IdentifiedSequencesInFullDirDict.Add(fi.Directory.FullName, proteinsToAnalyze.Select(a => a.Locus).ToList()); foreach (MyProtein p in proteinsToAnalyze) { if (!MyFastaItems.Exists(a => a.SequenceIdentifier.Equals(p.Locus))) { MyFastaItems.Add(new FastaItem(p.Locus, p.Sequence, p.Description)); } } //End storing fasta stuff foreach (string f in filesInSEPro) { if (!ms1OrRawOrmzMLFiles.Contains(f)) { throw new Exception("All .ms1, .mzML, or Thermo .RAW files must be placed in each corresponding SEPro directory. Error in directory:\n" + fi.DirectoryName + "\nfor file:" + f); } } } } Process(); PreparePeptideProteinDictionary(); if (myClusteringParameters.RetainOptimal) { RetainOptimalSignal(); } Console.WriteLine("Done"); }
private void buttonGo_Click(object sender, EventArgs e) { if (!File.Exists(textBoxSeproFile.Text)) { MessageBox.Show("Please enter an input SEPro file to receive the multinotch patch."); return; } FileInfo fi = new FileInfo(textBoxSeproFile.Text); saveFileDialog1.InitialDirectory = fi.Directory.FullName; saveFileDialog1.FileName = "MN_" + fi.Name; saveFileDialog1.Filter = openFileDialog1.FileName = "SEPro file (*.sepr)|*.sepr"; if (saveFileDialog1.ShowDialog() != DialogResult.Cancel) { Console.WriteLine("Loading SEpro file."); ResultPackage sepro = ResultPackage.Load(textBoxSeproFile.Text); List <string> ms3FileNames = sepro.MyProteins.AllSQTScans.Select(a => Regex.Replace(a.FileName, ".sqt", ".ms3")).Distinct().ToList(); Dictionary <string, List <MultiNotchMS3Item> > myMS3MNDict = new Dictionary <string, List <MultiNotchMS3Item> >(); foreach (string ms3File in ms3FileNames) { Console.WriteLine("Loading MS3 file :" + ms3File); List <MSLight> theMS3 = PatternTools.MSParserLight.ParserLight.ParseLightMS2(textBoxMS3.Text + "\\" + ms3File); List <MultiNotchMS3Item> myMNItems = theMS3.Select(a => new MultiNotchMS3Item(a)).ToList(); myMS3MNDict.Add(Regex.Replace(ms3File, ".ms3", ".sqt"), myMNItems); } foreach (SQTScan sqt in sepro.MyProteins.AllSQTScans) { List <MultiNotchMS3Item> theItems = myMS3MNDict[sqt.FileName].FindAll(a => a.MS2PrecursorScan == sqt.ScanNumber); if (theItems.Count == 1) { //check if the spectrum is null if (object.ReferenceEquals(sqt.MSLight, null)) { Console.WriteLine("Creating and providing quant data to spectrum: " + sqt.FileNameWithScanNumberAndChargeState); sqt.MSLight = new MSLight(); sqt.MSLight.ScanNumber = sqt.ScanNumber; foreach (Ion i in theItems[0].MyIons) { sqt.MSLight.MZ.Add(i.MZ); sqt.MSLight.Intensity.Add(i.Intensity); } } else { Console.WriteLine("Providing uant data to spectrum: " + sqt.FileNameWithScanNumberAndChargeState); List <Ion> newIons = sqt.MSLight.Ions.Concat(theItems[0].MyIons).ToList(); newIons.Sort((a, b) => a.MZ.CompareTo(b.MZ)); sqt.MSLight.MZ.Clear(); sqt.MSLight.Intensity.Clear(); sqt.MSLight.MZ = newIons.Select(a => a.MZ).ToList(); sqt.MSLight.Intensity = newIons.Select(a => a.Intensity).ToList(); } } else { Console.Write("Trouble finding MS3 for spectrum " + sqt.FileNameWithScanNumberAndChargeState); } } Console.WriteLine("Saving the new result package"); sepro.Save(saveFileDialog1.FileName); } Console.WriteLine("Done."); }
private void buttonGO_Click(object sender, EventArgs e) { double ppmTolerance = 65; AcetylationParams myParams = GetParamsFromGUI(); string [] seproFiles = Directory.GetFiles(myParams.MS1SEProDirectory, "*.sepr"); foreach (string seproFile in seproFiles) { Console.WriteLine("Loading SEPro File: " + seproFile); ResultPackage sepro = ResultPackage.Load(seproFile); Console.WriteLine("Done loading SEPro File"); //Lets sort sqt scans by their file name and then load an ms1 file at a time List <string> files = Directory.GetFiles(myParams.MS1SEProDirectory, "*.ms1").ToList(); foreach (string file in files) { Console.WriteLine("Loading " + file); List <MSLight> theMS1Spectra = PatternTools.MSParserLight.ParserLight.ParseLightMS2(file); List <SQTScan> scans = sepro.MyProteins.AllSQTScans.FindAll(a => file.Contains(Regex.Replace(a.FileName, "sqt", "ms1"))); foreach (SQTScan scan in scans) { //theMS1Spectra.Sort((a, b) => Math.Abs(scan.ScanNumber - a.ScanNumber).CompareTo(Math.Abs(scan.ScanNumber - b.ScanNumber))); List <MSLight> mstmp = theMS1Spectra.FindAll(a => a.ScanNumber < scan.ScanNumber); mstmp.Sort((a, b) => Math.Abs(a.ScanNumber - scan.ScanNumber).CompareTo(Math.Abs(scan.ScanNumber - b.ScanNumber))); //theMS1Spectra.Sort((a, b) => Math.Abs(a.ScanNumber - scan.ScanNumber).CompareTo(Math.Abs(scan.ScanNumber - b.ScanNumber))); //Check if we are dealing with a medium or heavy peptide bool isMedium = false; if (scan.PeptideSequenceCleaned.StartsWith("(" + textBoxMediumMarker.Text + ")")) { isMedium = true; } //Find the delta int noMods = Regex.Matches(scan.PeptideSequenceCleaned, Regex.Escape("(")).Count; double delta = ((double)noMods * myParams.DeltaHighMedium) / (double)scan.ChargeState; //scan.PeptideSequenceCleaned double chargedPrecursor = (scan.MeasuredMH + ((double)(scan.ChargeState - 1) * 1.0078)) / (double)(scan.ChargeState); scan.Quantitation = new List <List <double> >(); Console.WriteLine("Scan: " + scan.ScanNumber + " +" + scan.ChargeState + " " + scan.PeptideSequence); for (int i = 0; i < myParams.SearchSpaceSize; i++) { //Obtain ratios from MS1 double mediumIntensity = -1; double heavyIntensity = -1; if (isMedium) { mediumIntensity = mstmp[i].Ions.FindAll(a => Math.Abs(pTools.PPM(a.MZ, chargedPrecursor)) < ppmTolerance).Sum(a => a.Intensity); heavyIntensity = mstmp[i].Ions.FindAll(a => Math.Abs(pTools.PPM(a.MZ, chargedPrecursor + delta)) < ppmTolerance).Sum(a => a.Intensity); } else { mediumIntensity = mstmp[i].Ions.FindAll(a => Math.Abs(pTools.PPM(a.MZ, chargedPrecursor - delta)) < ppmTolerance).Sum(a => a.Intensity); heavyIntensity = mstmp[i].Ions.FindAll(a => Math.Abs(pTools.PPM(a.MZ, chargedPrecursor)) < ppmTolerance).Sum(a => a.Intensity); } double summed = mediumIntensity + heavyIntensity; scan.Quantitation.Add(new List <double>() { mediumIntensity, heavyIntensity }); Console.WriteLine("\tRatio: " + scan.ScanNumber + ": " + Math.Round(mediumIntensity / heavyIntensity, 2)); Console.WriteLine("\tIntensities: " + mediumIntensity + " " + heavyIntensity); } Console.WriteLine(""); } } Console.WriteLine("Patching Sepro File, " + seproFile + " with quantitative data"); sepro.Save(seproFile); } Console.WriteLine("Done!"); }
private void Old() { //Load the SEPro files; PatternTools.LstSquQuadRegr lstsqr = new PatternTools.LstSquQuadRegr(); Console.WriteLine("Loading SEPro1"); ResultPackage sepro1 = ResultPackage.Load(textBoxSEProL1.Text); Console.WriteLine("Loading SEPro2"); ResultPackage sepro2 = ResultPackage.Load(textBoxSEProL2.Text); //Generate a list of all clean peptide sequences List <string> cleanPeptideSequences = sepro1.AllPeptideSequences; cleanPeptideSequences.AddRange(sepro2.AllPeptideSequences); cleanPeptideSequences = cleanPeptideSequences.Distinct().ToList(); Console.WriteLine(" Peptides loaded: " + cleanPeptideSequences.Count); //First lets normalize List <GraphData> theData = new List <GraphData>(cleanPeptideSequences.Count); double totalXP1 = 0; double totalXP2 = 0; double totalYP1 = 0; double totalYP2 = 0; foreach (string peptide in cleanPeptideSequences) { PeptideResult pep1 = sepro1.MyProteins.MyPeptideList.Find(a => a.CleanedPeptideSequence.Equals(peptide)); PeptideResult pep2 = sepro2.MyProteins.MyPeptideList.Find(a => a.CleanedPeptideSequence.Equals(peptide)); CorrectIsotopic(pep1); CorrectIsotopic(pep2); if (pep1 == null || pep2 == null) { continue; } totalXP1 += ExtractValues(pep1, 0).Sum(); totalYP1 += ExtractValues(pep1, 1).Sum(); totalXP2 += ExtractValues(pep2, 0).Sum(); totalYP2 += ExtractValues(pep2, 1).Sum(); } foreach (string peptide in cleanPeptideSequences) { PeptideResult pep1 = sepro1.MyProteins.MyPeptideList.Find(a => a.CleanedPeptideSequence.Equals(peptide)); PeptideResult pep2 = sepro2.MyProteins.MyPeptideList.Find(a => a.CleanedPeptideSequence.Equals(peptide)); if (pep1 == null || pep2 == null) { continue; } List <double> beforeNorm = ExtractRatios(pep1); if (pep1 == null || pep2 == null) { continue; } foreach (SQTScan scan in pep1.MyScans) { for (int i = 0; i < scan.Quantitation.Count; i++) { scan.Quantitation[i][0] /= totalXP1; scan.Quantitation[i][1] /= totalYP1; } } foreach (SQTScan scan in pep2.MyScans) { for (int i = 0; i < scan.Quantitation.Count; i++) { scan.Quantitation[i][0] /= totalXP2; scan.Quantitation[i][1] /= totalYP2; } } List <double> afterNorm = ExtractRatios(pep1); } //finished Normalizing foreach (string peptide in cleanPeptideSequences) { PeptideResult pep1 = sepro1.MyProteins.MyPeptideList.Find(a => a.CleanedPeptideSequence.Equals(peptide)); PeptideResult pep2 = sepro2.MyProteins.MyPeptideList.Find(a => a.CleanedPeptideSequence.Equals(peptide)); if (pep1 == null || pep2 == null) { continue; } List <double> q1 = ExtractRatios(pep1); List <double> q2 = ExtractRatios(pep2); if (q1.Count == 0 || q2.Count == 0) { //Most likely we are here because this peptide is a false positive as it has two different labels in it. Console.Write("x"); continue; } double x = 0; if (q1.Count > 0) { x = q1.Average(); } //x = q1[0]; double y = 0; if (q2.Count > 0) { y = q2.Average(); } //y = q2[0]; theData.Add(new GraphData(x, y)); Console.Write("."); } //And now, print the data to the graph chart1.Series[0].Points.Clear(); chart1.ChartAreas[0].AxisX.Crossing = 0; chart1.ChartAreas[0].AxisY.Crossing = 0; chart1.ChartAreas[0].AxisY.IsStartedFromZero = true; chart1.ChartAreas[0].AxisX.IsStartedFromZero = true; foreach (GraphData g in theData) { lstsqr.AddPoints(g.X, g.Y); chart1.Series[0].Points.AddXY(g.X, g.Y); } //Finally, update the labels labelL1.Text = Math.Round(theData.Average(a => a.X), 3).ToString() + " +- " + Math.Round(PatternTools.pTools.Stdev(theData.Select(a => a.X).ToList(), true), 3); labelL2.Text = Math.Round(theData.Average(a => a.Y), 3).ToString() + " +- " + Math.Round(PatternTools.pTools.Stdev(theData.Select(a => a.Y).ToList(), true), 3); //labelRSquared.Text = Math.Round(lstsqr.rSquare(),3).ToString(); //labelRegressorEquation.Text = "y = " + Math.Round(lstsqr.aTerm(),2) + " x^2 + " + Math.Round(lstsqr.bTerm(),2) + " x + " + Math.Round(lstsqr.cTerm(),2); dataGridViewPoints.DataSource = theData; }
private void buttonGo_Click(object sender, EventArgs e) { //Verify write permission to directory if (!Directory.Exists(textBoxOutputDirectory.Text)) { MessageBox.Show("Please specify a valid output directory"); return; } if (!Regex.IsMatch(textBoxIsobaricMasses.Text, "[0-9]+ [0-9]+")) { MessageBox.Show("Please fill out the masses of the isobaric tags."); return; } if (!PatternTools.pTools.HasWriteAccessToFolder(textBoxOutputDirectory.Text)) { MessageBox.Show("Please specify a valid output directory"); return; } //Obtain class labels if (textBoxClassLabels.Text.Length == 0) { MessageBox.Show("Please input the class labels (eg., for iTRAQ 1,2,3,4"); return; } List <int> labels = Regex.Split(textBoxClassLabels.Text, " ").Select(a => int.Parse(a)).ToList(); //Obtain the isobaric masses string[] im = Regex.Split(textBoxIsobaricMasses.Text, " "); List <double> isobaricMasses = im.Select(a => double.Parse(a)).ToList(); if (labels.Count != isobaricMasses.Count) { MessageBox.Show("Please make sure that the class labels and isobaric masses match"); return; } buttonGo.Text = "Working..."; this.Update(); richTextBoxLog.Clear(); //-------------------------------------------- //Get signal from all signalAllNormalizationDictionary = new Dictionary <string, double[]>(); //if (false) FileInfo fi = new FileInfo(textBoxitraqSEPro.Text); bool extractSignal = false; ResultPackage rp = null; if (checkBoxNormalizationChannelSignal.Checked) { //We should get the MS infor and merge it the the sepro package if (fi.Extension.Equals(".sepr")) { rp = ResultPackage.Load(textBoxitraqSEPro.Text); extractSignal = true; } List <FileInfo> rawFiles = fi.Directory.GetFiles("*.RAW").ToList(); foreach (FileInfo rawFile in rawFiles) { Console.WriteLine("Extracting data for " + rawFile.Name); PatternTools.RawReader.RawReaderParams rParams = new PatternTools.RawReader.RawReaderParams(); rParams.ExtractMS1 = false; rParams.ExtractMS2 = true; rParams.ExtractMS3 = false; PatternTools.RawReader.Reader reader = new PatternTools.RawReader.Reader(rParams); List <MSLight> theMS2 = reader.GetSpectra(rawFile.FullName, new List <int>(), false); theMS2.RemoveAll(a => a.Ions == null); double [] totalSignal = new double[isobaricMasses.Count]; List <SQTScan> theScans = null; //Update the sepro result package with the signal if (extractSignal) { //Get all the scans from this file string rawName = rawFile.Name.Substring(0, rawFile.Name.Length - 4); theScans = rp.MyProteins.AllSQTScans.FindAll(a => a.FileName.Substring(0, a.FileName.Length - 4).Equals(rawName)); } foreach (MSLight ms in theMS2) { double[] thisQuantitation = GetIsobaricSignal(ms.Ions, isobaricMasses); if (extractSignal) { SQTScan scn = theScans.Find(a => a.ScanNumber == ms.ScanNumber); if (scn != null) { scn.MSLight = ms; scn.MSLight.Ions.RemoveAll(a => a.MZ > 400); } } for (int i = 0; i < thisQuantitation.Length; i++) { totalSignal[i] += thisQuantitation[i]; } } string theName = rawFile.Name.Substring(0, rawFile.Name.Length - 3); theName += "sqt"; signalAllNormalizationDictionary.Add(theName, totalSignal); } } Console.WriteLine("Loading SEPro File"); if (!File.Exists(textBoxitraqSEPro.Text)) { MessageBox.Show("Unable to find SEPro file"); return; } #region Load the spero or pepexplorer file theScansToAnalyze = new List <SQTScan>(); List <FastaItem> theFastaItems = new List <FastaItem>(); if (fi.Extension.Equals(".sepr")) { Console.WriteLine("Loading SEPro file"); if (!extractSignal) { rp = ResultPackage.Load(textBoxitraqSEPro.Text); } rp.MyProteins.AllSQTScans.RemoveAll(a => a.MSLight == null); theScansToAnalyze = rp.MyProteins.AllSQTScans; Console.WriteLine("Done reading SEPro result"); theFastaItems = rp.MyProteins.MyProteinList.Select(a => new FastaItem(a.Locus, a.Sequence, a.Description)).ToList(); } else if (fi.Extension.Equals(".mpex")) { Console.WriteLine("Loading PepExplorer file...."); PepExplorer2.Result2.ResultPckg2 result = PepExplorer2.Result2.ResultPckg2.DeserializeResultPackage(textBoxitraqSEPro.Text); theFastaItems = result.MyFasta; theScansToAnalyze = new List <SQTScan>(); foreach (PepExplorer2.Result2.AlignmentResult al in result.Alignments) { foreach (var dnr in al.DeNovoRegistries) { SQTScan sqt = new SQTScan(); sqt.ScanNumber = dnr.ScanNumber; sqt.FileName = dnr.FileName; sqt.PeptideSequence = dnr.PtmSequence; theScansToAnalyze.Add(sqt); } } //And now we need to retrieve the mass spectra. For this, the raw files should be inside the directory containing the mpex file List <string> rawFiles = theScansToAnalyze.Select(a => a.FileName).Distinct().ToList(); for (int i = 0; i < rawFiles.Count; i++) { rawFiles[i] = rawFiles[i].Remove(rawFiles[i].Length - 3, 3); rawFiles[i] = rawFiles[i] += "raw"; } foreach (string fn in rawFiles) { Console.WriteLine("Retrieving spectra for file: " + fn); ParserUltraLightRAW parser = new ParserUltraLightRAW(); string tmpFile = fn.Substring(0, fn.Length - 3); List <SQTScan> scansForThisFile = theScansToAnalyze.FindAll(a => Regex.IsMatch(tmpFile, a.FileName.Substring(0, a.FileName.Length - 3), RegexOptions.IgnoreCase)).ToList(); List <int> scnNumbers = scansForThisFile.Select(a => a.ScanNumber).ToList(); FileInfo theInputFile = new FileInfo(textBoxitraqSEPro.Text); List <MSUltraLight> theSpectra = parser.ParseFile(theInputFile.DirectoryName + "/" + fn, -1, 2, scnNumbers); foreach (SQTScan sqt in scansForThisFile) { MSUltraLight spec = theSpectra.Find(a => a.ScanNumber == sqt.ScanNumber); sqt.MSLight = new MSLight(); sqt.MSLight.MZ = spec.Ions.Select(a => (double)a.Item1).ToList(); sqt.MSLight.Intensity = spec.Ions.Select(a => (double)a.Item2).ToList(); } Console.WriteLine("\tDone processing this file."); } } else { throw new Exception("This file format is not supported."); } #endregion //Obtaining multiplexed spectra SEProQ.IsobaricQuant.YadaMultiplexCorrection.YMC ymc = null; if (textBoxCorrectedYadaDirectory.Text.Length > 0) { Console.WriteLine("Reading Yada results"); ymc = new IsobaricQuant.YadaMultiplexCorrection.YMC(new DirectoryInfo(textBoxCorrectedYadaDirectory.Text)); Console.WriteLine("Done loading Yada results"); } //Remove multiplexed spectra from sepro results if (textBoxCorrectedYadaDirectory.Text.Length > 0) { int removedCounter = 0; foreach (KeyValuePair <string, List <int> > kvp in ymc.fileNameScanNumberMultiplexDictionary) { Console.WriteLine("Removing multiplexed spectra for file :: " + kvp.Key); richTextBoxLog.AppendText("Removing multiplexed spectra for file :: " + kvp.Key + "\n"); string cleanName = kvp.Key.Substring(0, kvp.Key.Length - 4); cleanName += ".sqt"; foreach (int scnNo in kvp.Value) { int index = theScansToAnalyze.FindIndex(a => a.ScanNumber == scnNo && a.FileName.Equals(cleanName)); if (index >= 0) { Console.Write(theScansToAnalyze[index].ScanNumber + " "); richTextBoxLog.AppendText(theScansToAnalyze[index].ScanNumber + " "); removedCounter++; theScansToAnalyze.RemoveAt(index); } } Console.WriteLine("\n"); richTextBoxLog.AppendText("\n"); } Console.WriteLine("Done removing multiplexed spectra :: " + removedCounter); } PatternTools.CSML.Matrix correctionMatrix = new PatternTools.CSML.Matrix(); if (checkBoxApplyPurityCorrection.Checked) { List <List <double> > correctionData = GetPurityCorrectionsFromForm(); correctionMatrix = IsobaricQuant.IsobaricImpurityCorrection.GenerateInverseCorrectionMatrix(correctionData); } //-------------------------------------------------------------------------------------------------------------------- //Prepare normalization Dictionary signalIdentifiedNormalizationDictionary = new Dictionary <string, double[]>(); List <string> fileNames = theScansToAnalyze.Select(a => a.FileName).Distinct().ToList(); foreach (string fileName in fileNames) { signalIdentifiedNormalizationDictionary.Add(fileName, new double[isobaricMasses.Count]); } //------------------------------------- //If necessary, correct for impurity and feed global signal dictionary foreach (SQTScan scn in theScansToAnalyze) { double[] thisQuantitation = GetIsobaricSignal(scn.MSLight.Ions, isobaricMasses); double maxSignal = thisQuantitation.Max(); //We can only correct for signal for those that have quantitation values in all places if (checkBoxApplyPurityCorrection.Checked && (thisQuantitation.Count(a => a > maxSignal * (double)numericUpDownIonCountThreshold.Value) == isobaricMasses.Count)) { thisQuantitation = IsobaricQuant.IsobaricImpurityCorrection.CorrectForSignal(correctionMatrix, thisQuantitation).ToArray(); } if (checkBoxNormalizationChannelSignal.Checked) { for (int k = 0; k < thisQuantitation.Length; k++) { signalIdentifiedNormalizationDictionary[scn.FileName][k] += thisQuantitation[k]; } } scn.Quantitation = new List <List <double> >() { thisQuantitation.ToList() }; } //And now normalize ------------------- if (checkBoxNormalizationChannelSignal.Checked) { Console.WriteLine("Performing channel signal normalization for " + theScansToAnalyze.Count + " scans."); foreach (SQTScan scn2 in theScansToAnalyze) { for (int m = 0; m < isobaricMasses.Count; m++) { scn2.Quantitation[0][m] /= signalIdentifiedNormalizationDictionary[scn2.FileName][m]; } if (scn2.Quantitation[0].Contains(double.NaN)) { Console.WriteLine("Problems on signal of scan " + scn2.FileNameWithScanNumberAndChargeState); } } } comboBoxSelectFileForGraphs.Items.Clear(); foreach (string file in signalIdentifiedNormalizationDictionary.Keys.ToList()) { comboBoxSelectFileForGraphs.Items.Add(file); } tabControlMain.SelectedIndex = 1; if (radioButtonAnalysisPeptideReport.Checked) { //Peptide Analysis //Write Peptide Analysis StreamWriter sw = new StreamWriter(textBoxOutputDirectory.Text + "/" + "PeptideQuantitationReport.txt"); //Eliminate problematic quants int removed = theScansToAnalyze.RemoveAll(a => Object.ReferenceEquals(a.Quantitation, null)); Console.WriteLine("Problematic scans removed: " + removed); var pepDic = from scn in theScansToAnalyze group scn by scn.PeptideSequenceCleaned into groupedSequences select new { PeptideSequence = groupedSequences.Key, TheScans = groupedSequences.ToList() }; foreach (var pep in pepDic) { sw.WriteLine("Peptide:" + pep.PeptideSequence + "\tSpecCounts:" + pep.TheScans.Count); foreach (SQTScan sqt in pep.TheScans) { sw.WriteLine(sqt.FileNameWithScanNumberAndChargeState + "\t" + string.Join("\t", sqt.Quantitation[0])); } } //And now write the Fasta sw.WriteLine("#Fasta Items"); foreach (FastaItem fastaItem in theFastaItems) { sw.WriteLine(">" + fastaItem.SequenceIdentifier + " " + fastaItem.Description); sw.WriteLine(fastaItem.Sequence); } sw.Close(); } else { rp = ResultPackage.Load(textBoxitraqSEPro.Text); //Peptide Level if (true) { PatternTools.SparseMatrixIndexParserV2 ip = new SparseMatrixIndexParserV2(); List <int> allDims = new List <int>(); List <PeptideResult> peptides = rp.MyProteins.MyPeptideList; if (checkBoxOnlyUniquePeptides.Checked) { int removedPeptides = peptides.RemoveAll(a => a.MyMapableProteins.Count > 1); Console.WriteLine("Removing {0} peptides for not being unique.", removedPeptides); } for (int i = 0; i < peptides.Count; i++) { SparseMatrixIndexParserV2.Index index = new SparseMatrixIndexParserV2.Index(); index.Name = peptides[i].PeptideSequence; index.Description = string.Join(" ", peptides[i].MyMapableProteins); index.ID = i; ip.Add(index, true); allDims.Add(i); } SparseMatrix sm = new SparseMatrix(); List <int> dims = ip.allIDs(); for (int l = 0; l < labels.Count; l++) { if (labels[l] < 0) { continue; } sparseMatrixRow smr = new sparseMatrixRow(labels[l]); List <double> values = new List <double>(dims.Count); List <int> dimsWithValues = new List <int>(); foreach (int d in dims) { List <SQTScan> scns = peptides[d].MyScans.FindAll(a => !object.ReferenceEquals(a.Quantitation, null)); if (scns.Count > 0) { double signalSum = scns.FindAll(a => !double.IsNaN(a.Quantitation[0][l])).Sum(a => a.Quantitation[0][l]); values.Add(signalSum); dimsWithValues.Add(d); } } smr.Dims = dimsWithValues; smr.Values = values; smr.FileName = isobaricMasses[l].ToString(); sm.addRow(smr); } PatternLabProject plp = new PatternLabProject(sm, ip, "IsobaricQuant"); plp.Save(textBoxOutputDirectory.Text + "/MyPatternLabProjectPeptides.plp"); } //Protein Level if (true) { //Generate Index PatternTools.SparseMatrixIndexParserV2 ip = new SparseMatrixIndexParserV2(); List <MyProtein> theProteins = rp.MyProteins.MyProteinList; if (checkBoxOnlyUniquePeptides.Checked) { int removedProteins = theProteins.RemoveAll(a => !a.PeptideResults.Exists(b => b.NoMyMapableProteins == 1)); Console.WriteLine("{0} removed proteins for not having unique peptides", removedProteins); } for (int i = 0; i < theProteins.Count; i++) { SparseMatrixIndexParserV2.Index index = new SparseMatrixIndexParserV2.Index(); index.ID = i; index.Name = theProteins[i].Locus; index.Description = theProteins[i].Description; ip.Add(index, false); } //SparseMatrix SparseMatrix sm = new SparseMatrix(); List <int> dims = ip.allIDs(); for (int l = 0; l < labels.Count; l++) { if (labels[l] < 0) { continue; } if (!sm.ClassDescriptionDictionary.ContainsKey(labels[l])) { sm.ClassDescriptionDictionary.Add(labels[l], labels[l].ToString()); } sparseMatrixRow smr = new sparseMatrixRow(labels[l]); List <double> values = new List <double>(dims.Count); List <int> dimsToInclude = new List <int>(); foreach (int d in dims) { double signalSum = 0; List <PeptideResult> thePeptides = theProteins[d].PeptideResults; if (checkBoxOnlyUniquePeptides.Checked) { thePeptides.RemoveAll(a => a.MyMapableProteins.Count > 1); } foreach (PeptideResult pr in thePeptides) { List <SQTScan> scns = pr.MyScans.FindAll(a => !object.ReferenceEquals(a.Quantitation, null)); foreach (SQTScan sqt in scns) { if (!double.IsNaN(sqt.Quantitation[0][l]) && !double.IsInfinity(sqt.Quantitation[0][l])) { signalSum += sqt.Quantitation[0][l]; } } } if (signalSum > 0) { dimsToInclude.Add(d); values.Add(signalSum); } else { Console.WriteLine("No signal found for " + theProteins[d].Locus + " on marker " + l); } } smr.Dims = dims; smr.Values = values; smr.FileName = isobaricMasses[l].ToString(); sm.addRow(smr); } PatternLabProject plp = new PatternLabProject(sm, ip, "IsobaricQuant"); plp.Save(textBoxOutputDirectory.Text + "/MyPatternLabProjectProteins.plp"); } } comboBoxSelectFileForGraphs.Enabled = true; tabControlMain.SelectedIndex = 2; Console.WriteLine("Done"); buttonGo.Text = "Generate Report"; }
private void buttonGetGoodHitsFromSEPro_Click(object sender, EventArgs e) { if (!File.Exists(textBoxSEProFileToExtractGoodPeptides.Text)) { MessageBox.Show("Please enter a valid SEPro file."); return; } ResultPackage sepro = ResultPackage.Load(textBoxSEProFileToExtractGoodPeptides.Text); richTextBoxResultScans.Clear(); richTextBoxPeptides.Clear(); richTextBoxResultScans.AppendText("File Name\tCharge\tScan Number\tSequence\tp-value\n"); Dictionary <string, List <double> > peptideScoreDictionary = new Dictionary <string, List <double> >(); List <SQTScan> theScans = sepro.MyProteins.AllSQTScans.Select(a => a).ToList(); if (numericUpDownNoPhosphoSites.Value > 0) { //Calculate an XDScore for each mass spectrum Regex seventyNine = new Regex("79"); theScans = theScans.FindAll(a => seventyNine.Matches(a.PeptideSequence).Count == numericUpDownNoPhosphoSites.Value); } if (numericUpDownModelChargeState.Value > 0) { theScans = theScans.FindAll(a => a.ChargeState == numericUpDownModelChargeState.Value); } foreach (SQTScan sqtScan in theScans) { if (sqtScan.PeptideSequenceCleaned.Contains("79")) { double pValue = xdScore.RetrievePValueForScanNumber(sqtScan.ScanNumber, (double)numericUpDownXCorrPrime.Value); if (pValue == -1) { Console.WriteLine("Shouldn´t be here"); } if (peptideScoreDictionary.ContainsKey(sqtScan.PeptideSequenceCleaned)) { peptideScoreDictionary[sqtScan.PeptideSequenceCleaned].Add(pValue); } else { peptideScoreDictionary.Add(sqtScan.PeptideSequenceCleaned, new List <double>() { pValue }); } richTextBoxResultScans.AppendText(sqtScan.FileName + "\t" + sqtScan.ChargeState + "\t" + sqtScan.ScanNumber + "\t" + sqtScan.PeptideSequenceCleaned + "\t" + Math.Round(pValue, 4) + "\n"); } } foreach (KeyValuePair <string, List <double> > kvp in peptideScoreDictionary) { kvp.Value.Sort(); richTextBoxPeptides.AppendText(kvp.Key + "\t" + kvp.Value[0] + "\n"); } }
private void buttonGo_Click(object sender, EventArgs e) { if (!Directory.Exists(textBoxInputDirectory.Text)) { richTextBoxLog.AppendText("Directory " + textBoxInputDirectory.Text + " does not exist"); return; } DirectoryInfo di = new DirectoryInfo(textBoxInputDirectory.Text); List <FileInfo> SEProFiles = di.GetFiles("*.sepr", SearchOption.AllDirectories).ToList(); if (SEProFiles.Count == 0) { MessageBox.Show("No SEPro files found in the provided directory."); return; } List <SQTScan> allScans = new List <SQTScan>(); Parameters theparams = new Parameters(); List <string> allProteinIDs = new List <string>(); string database = ""; List <FastaItem> fastaItems = new List <FastaItem>(); foreach (FileInfo fi in SEProFiles) { richTextBoxLog.AppendText("Loading " + fi.Name + "\n"); this.Update(); ResultPackage rp = ResultPackage.Load(fi.FullName); allScans.AddRange(rp.MyProteins.AllSQTScans); theparams = rp.MyParameters; allProteinIDs.AddRange(rp.MyProteins.MyProteinList.Select(a => a.Locus).ToList()); foreach (MyProtein p in rp.MyProteins.MyProteinList) { if (!fastaItems.Exists(a => a.SequenceIdentifier.Equals(p.Locus))) { FastaItem fasta = new FastaItem(); fasta.Description = p.Description; fasta.Sequence = p.Sequence; fasta.SequenceIdentifier = p.Locus; fastaItems.Add(fasta); } } database = rp.Database; } richTextBoxLog.AppendText("Generating new SEPro file\n"); this.Update(); theparams.SeachResultDirectoy = "Fusion SEPro file of" + string.Join(",", SEProFiles.Select(a => a.FullName).ToList()); ProteinManager pm = new ProteinManager(allScans, theparams, allProteinIDs.Distinct().ToList()); pm.CalculateProteinCoverage(fastaItems); pm.GroupProteinsHavingCommonPeptides(1); saveFileDialog1.Filter = "SEPro files (*.sepr)|*.sepr"; saveFileDialog1.InitialDirectory = textBoxInputDirectory.Text; saveFileDialog1.FileName = "Fusion_"; if (saveFileDialog1.ShowDialog() != System.Windows.Forms.DialogResult.Cancel) { ResultPackage rp = new ResultPackage(pm, theparams, database, theparams.SeachResultDirectoy, false); rp.MyProteins.RebuildProteinsFromScans(); rp.Save(saveFileDialog1.FileName); } MessageBox.Show("Done."); }