public KeyValuePair <SparseMatrixIndexParserV2, SparseMatrix> GenerateIndexAndSparseMatrix() { StringBuilder sb = new StringBuilder(); //Get a list of all proteins identified in all packages List <string> fastaIDs = (from pckg in MyResultPackages from fst in pckg.MyPackage.MyFasta select fst.SequenceIdentifier).Distinct().ToList(); SparseMatrixIndexParserV2 smi = new SparseMatrixIndexParserV2(); foreach (string fastaID in fastaIDs) { List <FastaItem> fi = (from pckg in MyResultPackages from fst in pckg.MyPackage.MyFasta where fst.SequenceIdentifier.Equals(fastaID) select fst).ToList(); smi.Add(fastaID, fi[0].Description); } SparseMatrix sm = new SparseMatrix(); foreach (DirectoryClassDescription myDir in MyDirectoryDescriptionDictionary) { sm.ClassDescriptionDictionary.Add(myDir.ClassLabel, myDir.Description); } foreach (ThePackage rp in MyResultPackages) { sparseMatrixRow smr = new sparseMatrixRow(rp.MyClassLabel); smr.FileName = rp.MyFileInfo.FullName; List <int> dims = new List <int>(); List <double> values = new List <double>(); for (int i = 0; i < smi.TheIndexes.Count; i++) { int count = rp.MyPackage.Alignments.FindAll(a => a.ProtIDs.Contains(smi.TheIndexes[i].Name)).Count; if (count > 0) { dims.Add(i + 1); values.Add(count); } } smr.Dims = dims; smr.Values = values; sm.addRow(smr); } return(new KeyValuePair <SparseMatrixIndexParserV2, SparseMatrix>(smi, sm)); }
private void MenuItemExporToPLP_Click(object sender, RoutedEventArgs e) { SaveFileDialog sfd = new SaveFileDialog(); sfd.DefaultExt = ".txt"; sfd.Filter = "PatternLab Project (*.plp)|*.plp"; Nullable <bool> result = sfd.ShowDialog(); // Get the selected file name and display in a TextBox if (result == true) { SparseMatrixIndexParserV2 smi = new SparseMatrixIndexParserV2(); int counter = 0; List <FastaItem> orderedKeys = new List <FastaItem>(); foreach (KeyValuePair <FastaItem, List <PepQuant> > kvp in protPepDict) { if (kvp.Value.Count > IntegerUpDown.Value) { counter++; SparseMatrixIndexParserV2.Index i = new SparseMatrixIndexParserV2.Index(); i.ID = counter; i.Name = kvp.Key.SequenceIdentifier; i.Description = kvp.Key.Description; smi.Add(i); orderedKeys.Add(kvp.Key); } } SparseMatrix sm = new SparseMatrix(); sm.ClassDescriptionDictionary = new Dictionary <int, string>(); List <int> labels = Regex.Split(TextBoxClassLabel.Text, " ").Select(a => int.Parse(a)).ToList(); //Generate the dictionary for (int i = 0; i < labels.Count; i++) { if (labels[i] < 0) { continue; } //Create the dictionary for the class sm.ClassDescriptionDictionary.Add(i, (i).ToString()); List <int> dims = new List <int>(); List <double> values = new List <double>(); for (int j = 0; j < orderedKeys.Count; j++) { FastaItem fi = orderedKeys[j]; List <PepQuant> thePepQuants = protPepDict[fi]; double theIntensitySum = 0; foreach (PepQuant pq in thePepQuants) { theIntensitySum += pq.MyQuants.Sum(a => a.MarkerIntensities[i]); } if (theIntensitySum > 0) { dims.Add(j + 1); values.Add(theIntensitySum); } } sparseMatrixRow smr = new sparseMatrixRow(i, dims, values); sm.theMatrixInRows.Add(smr); } PatternLabProject plp = new PatternLabProject(sm, smi, "Isobaric Quant Project"); plp.Save(sfd.FileName); MessageBox.Show("PLP file was saved"); Console.WriteLine("PLP file was saved."); } }
private void buttonGo_Click(object sender, EventArgs e) { //Verify write permission to directory if (!Directory.Exists(textBoxOutputDirectory.Text)) { MessageBox.Show("Please specify a valid output directory"); return; } if (!Regex.IsMatch(textBoxIsobaricMasses.Text, "[0-9]+ [0-9]+")) { MessageBox.Show("Please fill out the masses of the isobaric tags."); return; } if (!PatternTools.pTools.HasWriteAccessToFolder(textBoxOutputDirectory.Text)) { MessageBox.Show("Please specify a valid output directory"); return; } //Obtain class labels if (textBoxClassLabels.Text.Length == 0) { MessageBox.Show("Please input the class labels (eg., for iTRAQ 1,2,3,4"); return; } List <int> labels = Regex.Split(textBoxClassLabels.Text, " ").Select(a => int.Parse(a)).ToList(); //Obtain the isobaric masses string[] im = Regex.Split(textBoxIsobaricMasses.Text, " "); List <double> isobaricMasses = im.Select(a => double.Parse(a)).ToList(); if (labels.Count != isobaricMasses.Count) { MessageBox.Show("Please make sure that the class labels and isobaric masses match"); return; } buttonGo.Text = "Working..."; this.Update(); richTextBoxLog.Clear(); //-------------------------------------------- //Get signal from all signalAllNormalizationDictionary = new Dictionary <string, double[]>(); //if (false) FileInfo fi = new FileInfo(textBoxitraqSEPro.Text); bool extractSignal = false; ResultPackage rp = null; if (checkBoxNormalizationChannelSignal.Checked) { //We should get the MS infor and merge it the the sepro package if (fi.Extension.Equals(".sepr")) { rp = ResultPackage.Load(textBoxitraqSEPro.Text); extractSignal = true; } List <FileInfo> rawFiles = fi.Directory.GetFiles("*.RAW").ToList(); foreach (FileInfo rawFile in rawFiles) { Console.WriteLine("Extracting data for " + rawFile.Name); PatternTools.RawReader.RawReaderParams rParams = new PatternTools.RawReader.RawReaderParams(); rParams.ExtractMS1 = false; rParams.ExtractMS2 = true; rParams.ExtractMS3 = false; PatternTools.RawReader.Reader reader = new PatternTools.RawReader.Reader(rParams); List <MSLight> theMS2 = reader.GetSpectra(rawFile.FullName, new List <int>(), false); theMS2.RemoveAll(a => a.Ions == null); double [] totalSignal = new double[isobaricMasses.Count]; List <SQTScan> theScans = null; //Update the sepro result package with the signal if (extractSignal) { //Get all the scans from this file string rawName = rawFile.Name.Substring(0, rawFile.Name.Length - 4); theScans = rp.MyProteins.AllSQTScans.FindAll(a => a.FileName.Substring(0, a.FileName.Length - 4).Equals(rawName)); } foreach (MSLight ms in theMS2) { double[] thisQuantitation = GetIsobaricSignal(ms.Ions, isobaricMasses); if (extractSignal) { SQTScan scn = theScans.Find(a => a.ScanNumber == ms.ScanNumber); if (scn != null) { scn.MSLight = ms; scn.MSLight.Ions.RemoveAll(a => a.MZ > 400); } } for (int i = 0; i < thisQuantitation.Length; i++) { totalSignal[i] += thisQuantitation[i]; } } string theName = rawFile.Name.Substring(0, rawFile.Name.Length - 3); theName += "sqt"; signalAllNormalizationDictionary.Add(theName, totalSignal); } } Console.WriteLine("Loading SEPro File"); if (!File.Exists(textBoxitraqSEPro.Text)) { MessageBox.Show("Unable to find SEPro file"); return; } #region Load the spero or pepexplorer file theScansToAnalyze = new List <SQTScan>(); List <FastaItem> theFastaItems = new List <FastaItem>(); if (fi.Extension.Equals(".sepr")) { Console.WriteLine("Loading SEPro file"); if (!extractSignal) { rp = ResultPackage.Load(textBoxitraqSEPro.Text); } rp.MyProteins.AllSQTScans.RemoveAll(a => a.MSLight == null); theScansToAnalyze = rp.MyProteins.AllSQTScans; Console.WriteLine("Done reading SEPro result"); theFastaItems = rp.MyProteins.MyProteinList.Select(a => new FastaItem(a.Locus, a.Sequence, a.Description)).ToList(); } else if (fi.Extension.Equals(".mpex")) { Console.WriteLine("Loading PepExplorer file...."); PepExplorer2.Result2.ResultPckg2 result = PepExplorer2.Result2.ResultPckg2.DeserializeResultPackage(textBoxitraqSEPro.Text); theFastaItems = result.MyFasta; theScansToAnalyze = new List <SQTScan>(); foreach (PepExplorer2.Result2.AlignmentResult al in result.Alignments) { foreach (var dnr in al.DeNovoRegistries) { SQTScan sqt = new SQTScan(); sqt.ScanNumber = dnr.ScanNumber; sqt.FileName = dnr.FileName; sqt.PeptideSequence = dnr.PtmSequence; theScansToAnalyze.Add(sqt); } } //And now we need to retrieve the mass spectra. For this, the raw files should be inside the directory containing the mpex file List <string> rawFiles = theScansToAnalyze.Select(a => a.FileName).Distinct().ToList(); for (int i = 0; i < rawFiles.Count; i++) { rawFiles[i] = rawFiles[i].Remove(rawFiles[i].Length - 3, 3); rawFiles[i] = rawFiles[i] += "raw"; } foreach (string fn in rawFiles) { Console.WriteLine("Retrieving spectra for file: " + fn); ParserUltraLightRAW parser = new ParserUltraLightRAW(); string tmpFile = fn.Substring(0, fn.Length - 3); List <SQTScan> scansForThisFile = theScansToAnalyze.FindAll(a => Regex.IsMatch(tmpFile, a.FileName.Substring(0, a.FileName.Length - 3), RegexOptions.IgnoreCase)).ToList(); List <int> scnNumbers = scansForThisFile.Select(a => a.ScanNumber).ToList(); FileInfo theInputFile = new FileInfo(textBoxitraqSEPro.Text); List <MSUltraLight> theSpectra = parser.ParseFile(theInputFile.DirectoryName + "/" + fn, -1, 2, scnNumbers); foreach (SQTScan sqt in scansForThisFile) { MSUltraLight spec = theSpectra.Find(a => a.ScanNumber == sqt.ScanNumber); sqt.MSLight = new MSLight(); sqt.MSLight.MZ = spec.Ions.Select(a => (double)a.Item1).ToList(); sqt.MSLight.Intensity = spec.Ions.Select(a => (double)a.Item2).ToList(); } Console.WriteLine("\tDone processing this file."); } } else { throw new Exception("This file format is not supported."); } #endregion //Obtaining multiplexed spectra SEProQ.IsobaricQuant.YadaMultiplexCorrection.YMC ymc = null; if (textBoxCorrectedYadaDirectory.Text.Length > 0) { Console.WriteLine("Reading Yada results"); ymc = new IsobaricQuant.YadaMultiplexCorrection.YMC(new DirectoryInfo(textBoxCorrectedYadaDirectory.Text)); Console.WriteLine("Done loading Yada results"); } //Remove multiplexed spectra from sepro results if (textBoxCorrectedYadaDirectory.Text.Length > 0) { int removedCounter = 0; foreach (KeyValuePair <string, List <int> > kvp in ymc.fileNameScanNumberMultiplexDictionary) { Console.WriteLine("Removing multiplexed spectra for file :: " + kvp.Key); richTextBoxLog.AppendText("Removing multiplexed spectra for file :: " + kvp.Key + "\n"); string cleanName = kvp.Key.Substring(0, kvp.Key.Length - 4); cleanName += ".sqt"; foreach (int scnNo in kvp.Value) { int index = theScansToAnalyze.FindIndex(a => a.ScanNumber == scnNo && a.FileName.Equals(cleanName)); if (index >= 0) { Console.Write(theScansToAnalyze[index].ScanNumber + " "); richTextBoxLog.AppendText(theScansToAnalyze[index].ScanNumber + " "); removedCounter++; theScansToAnalyze.RemoveAt(index); } } Console.WriteLine("\n"); richTextBoxLog.AppendText("\n"); } Console.WriteLine("Done removing multiplexed spectra :: " + removedCounter); } PatternTools.CSML.Matrix correctionMatrix = new PatternTools.CSML.Matrix(); if (checkBoxApplyPurityCorrection.Checked) { List <List <double> > correctionData = GetPurityCorrectionsFromForm(); correctionMatrix = IsobaricQuant.IsobaricImpurityCorrection.GenerateInverseCorrectionMatrix(correctionData); } //-------------------------------------------------------------------------------------------------------------------- //Prepare normalization Dictionary signalIdentifiedNormalizationDictionary = new Dictionary <string, double[]>(); List <string> fileNames = theScansToAnalyze.Select(a => a.FileName).Distinct().ToList(); foreach (string fileName in fileNames) { signalIdentifiedNormalizationDictionary.Add(fileName, new double[isobaricMasses.Count]); } //------------------------------------- //If necessary, correct for impurity and feed global signal dictionary foreach (SQTScan scn in theScansToAnalyze) { double[] thisQuantitation = GetIsobaricSignal(scn.MSLight.Ions, isobaricMasses); double maxSignal = thisQuantitation.Max(); //We can only correct for signal for those that have quantitation values in all places if (checkBoxApplyPurityCorrection.Checked && (thisQuantitation.Count(a => a > maxSignal * (double)numericUpDownIonCountThreshold.Value) == isobaricMasses.Count)) { thisQuantitation = IsobaricQuant.IsobaricImpurityCorrection.CorrectForSignal(correctionMatrix, thisQuantitation).ToArray(); } if (checkBoxNormalizationChannelSignal.Checked) { for (int k = 0; k < thisQuantitation.Length; k++) { signalIdentifiedNormalizationDictionary[scn.FileName][k] += thisQuantitation[k]; } } scn.Quantitation = new List <List <double> >() { thisQuantitation.ToList() }; } //And now normalize ------------------- if (checkBoxNormalizationChannelSignal.Checked) { Console.WriteLine("Performing channel signal normalization for " + theScansToAnalyze.Count + " scans."); foreach (SQTScan scn2 in theScansToAnalyze) { for (int m = 0; m < isobaricMasses.Count; m++) { scn2.Quantitation[0][m] /= signalIdentifiedNormalizationDictionary[scn2.FileName][m]; } if (scn2.Quantitation[0].Contains(double.NaN)) { Console.WriteLine("Problems on signal of scan " + scn2.FileNameWithScanNumberAndChargeState); } } } comboBoxSelectFileForGraphs.Items.Clear(); foreach (string file in signalIdentifiedNormalizationDictionary.Keys.ToList()) { comboBoxSelectFileForGraphs.Items.Add(file); } tabControlMain.SelectedIndex = 1; if (radioButtonAnalysisPeptideReport.Checked) { //Peptide Analysis //Write Peptide Analysis StreamWriter sw = new StreamWriter(textBoxOutputDirectory.Text + "/" + "PeptideQuantitationReport.txt"); //Eliminate problematic quants int removed = theScansToAnalyze.RemoveAll(a => Object.ReferenceEquals(a.Quantitation, null)); Console.WriteLine("Problematic scans removed: " + removed); var pepDic = from scn in theScansToAnalyze group scn by scn.PeptideSequenceCleaned into groupedSequences select new { PeptideSequence = groupedSequences.Key, TheScans = groupedSequences.ToList() }; foreach (var pep in pepDic) { sw.WriteLine("Peptide:" + pep.PeptideSequence + "\tSpecCounts:" + pep.TheScans.Count); foreach (SQTScan sqt in pep.TheScans) { sw.WriteLine(sqt.FileNameWithScanNumberAndChargeState + "\t" + string.Join("\t", sqt.Quantitation[0])); } } //And now write the Fasta sw.WriteLine("#Fasta Items"); foreach (FastaItem fastaItem in theFastaItems) { sw.WriteLine(">" + fastaItem.SequenceIdentifier + " " + fastaItem.Description); sw.WriteLine(fastaItem.Sequence); } sw.Close(); } else { rp = ResultPackage.Load(textBoxitraqSEPro.Text); //Peptide Level if (true) { PatternTools.SparseMatrixIndexParserV2 ip = new SparseMatrixIndexParserV2(); List <int> allDims = new List <int>(); List <PeptideResult> peptides = rp.MyProteins.MyPeptideList; if (checkBoxOnlyUniquePeptides.Checked) { int removedPeptides = peptides.RemoveAll(a => a.MyMapableProteins.Count > 1); Console.WriteLine("Removing {0} peptides for not being unique.", removedPeptides); } for (int i = 0; i < peptides.Count; i++) { SparseMatrixIndexParserV2.Index index = new SparseMatrixIndexParserV2.Index(); index.Name = peptides[i].PeptideSequence; index.Description = string.Join(" ", peptides[i].MyMapableProteins); index.ID = i; ip.Add(index, true); allDims.Add(i); } SparseMatrix sm = new SparseMatrix(); List <int> dims = ip.allIDs(); for (int l = 0; l < labels.Count; l++) { if (labels[l] < 0) { continue; } sparseMatrixRow smr = new sparseMatrixRow(labels[l]); List <double> values = new List <double>(dims.Count); List <int> dimsWithValues = new List <int>(); foreach (int d in dims) { List <SQTScan> scns = peptides[d].MyScans.FindAll(a => !object.ReferenceEquals(a.Quantitation, null)); if (scns.Count > 0) { double signalSum = scns.FindAll(a => !double.IsNaN(a.Quantitation[0][l])).Sum(a => a.Quantitation[0][l]); values.Add(signalSum); dimsWithValues.Add(d); } } smr.Dims = dimsWithValues; smr.Values = values; smr.FileName = isobaricMasses[l].ToString(); sm.addRow(smr); } PatternLabProject plp = new PatternLabProject(sm, ip, "IsobaricQuant"); plp.Save(textBoxOutputDirectory.Text + "/MyPatternLabProjectPeptides.plp"); } //Protein Level if (true) { //Generate Index PatternTools.SparseMatrixIndexParserV2 ip = new SparseMatrixIndexParserV2(); List <MyProtein> theProteins = rp.MyProteins.MyProteinList; if (checkBoxOnlyUniquePeptides.Checked) { int removedProteins = theProteins.RemoveAll(a => !a.PeptideResults.Exists(b => b.NoMyMapableProteins == 1)); Console.WriteLine("{0} removed proteins for not having unique peptides", removedProteins); } for (int i = 0; i < theProteins.Count; i++) { SparseMatrixIndexParserV2.Index index = new SparseMatrixIndexParserV2.Index(); index.ID = i; index.Name = theProteins[i].Locus; index.Description = theProteins[i].Description; ip.Add(index, false); } //SparseMatrix SparseMatrix sm = new SparseMatrix(); List <int> dims = ip.allIDs(); for (int l = 0; l < labels.Count; l++) { if (labels[l] < 0) { continue; } if (!sm.ClassDescriptionDictionary.ContainsKey(labels[l])) { sm.ClassDescriptionDictionary.Add(labels[l], labels[l].ToString()); } sparseMatrixRow smr = new sparseMatrixRow(labels[l]); List <double> values = new List <double>(dims.Count); List <int> dimsToInclude = new List <int>(); foreach (int d in dims) { double signalSum = 0; List <PeptideResult> thePeptides = theProteins[d].PeptideResults; if (checkBoxOnlyUniquePeptides.Checked) { thePeptides.RemoveAll(a => a.MyMapableProteins.Count > 1); } foreach (PeptideResult pr in thePeptides) { List <SQTScan> scns = pr.MyScans.FindAll(a => !object.ReferenceEquals(a.Quantitation, null)); foreach (SQTScan sqt in scns) { if (!double.IsNaN(sqt.Quantitation[0][l]) && !double.IsInfinity(sqt.Quantitation[0][l])) { signalSum += sqt.Quantitation[0][l]; } } } if (signalSum > 0) { dimsToInclude.Add(d); values.Add(signalSum); } else { Console.WriteLine("No signal found for " + theProteins[d].Locus + " on marker " + l); } } smr.Dims = dims; smr.Values = values; smr.FileName = isobaricMasses[l].ToString(); sm.addRow(smr); } PatternLabProject plp = new PatternLabProject(sm, ip, "IsobaricQuant"); plp.Save(textBoxOutputDirectory.Text + "/MyPatternLabProjectProteins.plp"); } } comboBoxSelectFileForGraphs.Enabled = true; tabControlMain.SelectedIndex = 2; Console.WriteLine("Done"); buttonGo.Text = "Generate Report"; }