public static MSUltraLight RichTextBox2MS(string textBoxInput, double minimumIntensity) { MSUltraLight myMS = new MSUltraLight(); string[] theLines = Regex.Split(textBoxInput, @"\n"); List <Tuple <float, float> > myIons = new List <Tuple <float, float> >(); foreach (string line in theLines) { if (Regex.IsMatch(line, "^[0-9]")) { string[] entries = Regex.Split(line, @" |\t"); float intensity = float.Parse(entries[1]); if (intensity >= minimumIntensity) { float thisMZ = float.Parse(entries[0]); myIons.Add(new Tuple <float, float>(thisMZ, intensity)); } } } myMS.UpdateIons(myIons); Tuple <float, short> precursor = new Tuple <float, short>(myMS.Ions.Last().Item1, 2); myMS.Precursors = new List <Tuple <float, short> >() { precursor }; return(myMS); }
private void ShowSpectrum(int scanNumber) { MSUltraLight theMS = MyMS2.Find(a => a.ScanNumber == scanNumber); List <Ion> ions = theMS.Ions.Select(a => new Ion(a.Item1, a.Item2, 0, scanNumber)).ToList(); SpectrumEye1.Plot(ions, ions[0].MZ, ions.Last().MZ, 0); //MyMSViewer.Modifications = new List<ModificationItem>(); //MyMSViewer.PeptideSequence = "P"; //MyMSViewer.MyMS = new MSFull(theMS); //MyMSViewer.FuncPrintMS(theMS.Ions.Min(a => a.Item1), theMS.Ions.Max(a => a.Item1)); TextBoxMSPeaks.Text = PatternTools.MSParserLight.MSUltraLightPrinter.PrintSpectrum(theMS); }
public MSFull(MSUltraLight ms2) { MSData = ms2.Ions.Select(a => new Ion(a.Item1, a.Item2, ms2.CromatographyRetentionTime, ms2.ScanNumber)).ToList(); isMS2 = true; ActivationType = ms2.ActivationType.ToString(); ChargedPrecursor = ms2.Precursors[0].Item1; //These lines are not working for velos data Charges = new List <int>() { ms2.Precursors[0].Item2 }; //--------------------------------------------------- CromatographyRetentionTime = ms2.CromatographyRetentionTime; InstrumentType = ms2.InstrumentType.ToString(); ScanNumber = ms2.ScanNumber; ZLines = ms2.GetZLines(); }
/// <summary> /// Parsers an MS2 mgf file /// </summary> /// <param name="file"></param> /// <returns></returns> public static List <MSUltraLight> ParseMGFFile(string file) { System.IO.StreamReader sr = new System.IO.StreamReader(file); List <MSUltraLight> theMS2 = new List <MSUltraLight>(); string line = ""; MSUltraLight ms = new MSUltraLight(); List <double> charges = new List <double>(); while ((line = sr.ReadLine()) != null) { //take care of the header if (line.StartsWith("#") || line.StartsWith("_") || line.Equals("")) { //This is a header line //msFile.Header += line; } else if (line.Equals("BEGIN IONS")) { //Prepare Z lines theMS2.Add(ms); ms = new MSUltraLight(); ms.MSLevel = 2; ms.ActivationType = -1; ms.InstrumentType = -1; } else if (line.StartsWith("CHARGE")) { string[] cols = Regex.Split(line, "="); string c = cols[1].Replace(@"+", ""); if (c.Equals("2,3")) { charges = new List <double>() { 2, 3 }; } else { int charge = int.Parse(c); charges = new List <double>() { charge }; } } else if (line.StartsWith("PEPMASS")) { string[] cols = Regex.Split(line, "="); string[] nums = Regex.Split(cols[1], " "); float chargedPrecursor; if (nums[0].Equals("")) { chargedPrecursor = float.Parse(nums[1]); } else { chargedPrecursor = float.Parse(nums[0]); } foreach (short z in charges) { float mh = (chargedPrecursor + (z * 1.007276466f)) / (float)z; ms.Precursors.Add(new Tuple <float, short>(mh, z)); } } else if (line.StartsWith("RTINSECONDS")) { string[] cols = Regex.Split(line, "="); float retTime; if (cols[1].Contains("-")) { string[] nums = Regex.Split(cols[1], "-"); retTime = float.Parse(nums[0]); } else { retTime = float.Parse(cols[1]); } ms.CromatographyRetentionTime = retTime; } else if (line.StartsWith("SCANS")) { string[] cols = Regex.Split(line, "="); double scan; if (cols[1].Contains("-")) { string[] nums = Regex.Split(cols[1], "-"); scan = double.Parse(nums[0]); } else { scan = double.Parse(cols[1]); } ms.ScanNumber = (int)scan; } else if (line.StartsWith("TITLE")) { if (line.Contains("Fragmentation:hcd")) { ms.ActivationType = 3; ms.InstrumentType = -1; } } else if (Regex.IsMatch(line, "^[0-9]+")) { //If the line begins with a number it is an ion line string[] cols = Regex.Split(line, "\t| "); try { Tuple <float, float> i = new Tuple <float, float>(float.Parse(cols[0]), float.Parse(cols[1])); ms.Ions.Add(i); } catch { throw new Exception("An inconsistency has been found in file: " + file + "\nThe line reads:\n" + line); } } } theMS2.Add(ms); //The first one is always bogus! if (theMS2.Count > 0) { theMS2.RemoveAt(0); } sr.Close(); return(theMS2); }
private void buttonGo_Click(object sender, EventArgs e) { //Verify write permission to directory if (!Directory.Exists(textBoxOutputDirectory.Text)) { MessageBox.Show("Please specify a valid output directory"); return; } if (!Regex.IsMatch(textBoxIsobaricMasses.Text, "[0-9]+ [0-9]+")) { MessageBox.Show("Please fill out the masses of the isobaric tags."); return; } if (!PatternTools.pTools.HasWriteAccessToFolder(textBoxOutputDirectory.Text)) { MessageBox.Show("Please specify a valid output directory"); return; } //Obtain class labels if (textBoxClassLabels.Text.Length == 0) { MessageBox.Show("Please input the class labels (eg., for iTRAQ 1,2,3,4"); return; } List <int> labels = Regex.Split(textBoxClassLabels.Text, " ").Select(a => int.Parse(a)).ToList(); //Obtain the isobaric masses string[] im = Regex.Split(textBoxIsobaricMasses.Text, " "); List <double> isobaricMasses = im.Select(a => double.Parse(a)).ToList(); if (labels.Count != isobaricMasses.Count) { MessageBox.Show("Please make sure that the class labels and isobaric masses match"); return; } buttonGo.Text = "Working..."; this.Update(); richTextBoxLog.Clear(); //-------------------------------------------- //Get signal from all signalAllNormalizationDictionary = new Dictionary <string, double[]>(); //if (false) FileInfo fi = new FileInfo(textBoxitraqSEPro.Text); bool extractSignal = false; ResultPackage rp = null; if (checkBoxNormalizationChannelSignal.Checked) { //We should get the MS infor and merge it the the sepro package if (fi.Extension.Equals(".sepr")) { rp = ResultPackage.Load(textBoxitraqSEPro.Text); extractSignal = true; } List <FileInfo> rawFiles = fi.Directory.GetFiles("*.RAW").ToList(); foreach (FileInfo rawFile in rawFiles) { Console.WriteLine("Extracting data for " + rawFile.Name); PatternTools.RawReader.RawReaderParams rParams = new PatternTools.RawReader.RawReaderParams(); rParams.ExtractMS1 = false; rParams.ExtractMS2 = true; rParams.ExtractMS3 = false; PatternTools.RawReader.Reader reader = new PatternTools.RawReader.Reader(rParams); List <MSLight> theMS2 = reader.GetSpectra(rawFile.FullName, new List <int>(), false); theMS2.RemoveAll(a => a.Ions == null); double [] totalSignal = new double[isobaricMasses.Count]; List <SQTScan> theScans = null; //Update the sepro result package with the signal if (extractSignal) { //Get all the scans from this file string rawName = rawFile.Name.Substring(0, rawFile.Name.Length - 4); theScans = rp.MyProteins.AllSQTScans.FindAll(a => a.FileName.Substring(0, a.FileName.Length - 4).Equals(rawName)); } foreach (MSLight ms in theMS2) { double[] thisQuantitation = GetIsobaricSignal(ms.Ions, isobaricMasses); if (extractSignal) { SQTScan scn = theScans.Find(a => a.ScanNumber == ms.ScanNumber); if (scn != null) { scn.MSLight = ms; scn.MSLight.Ions.RemoveAll(a => a.MZ > 400); } } for (int i = 0; i < thisQuantitation.Length; i++) { totalSignal[i] += thisQuantitation[i]; } } string theName = rawFile.Name.Substring(0, rawFile.Name.Length - 3); theName += "sqt"; signalAllNormalizationDictionary.Add(theName, totalSignal); } } Console.WriteLine("Loading SEPro File"); if (!File.Exists(textBoxitraqSEPro.Text)) { MessageBox.Show("Unable to find SEPro file"); return; } #region Load the spero or pepexplorer file theScansToAnalyze = new List <SQTScan>(); List <FastaItem> theFastaItems = new List <FastaItem>(); if (fi.Extension.Equals(".sepr")) { Console.WriteLine("Loading SEPro file"); if (!extractSignal) { rp = ResultPackage.Load(textBoxitraqSEPro.Text); } rp.MyProteins.AllSQTScans.RemoveAll(a => a.MSLight == null); theScansToAnalyze = rp.MyProteins.AllSQTScans; Console.WriteLine("Done reading SEPro result"); theFastaItems = rp.MyProteins.MyProteinList.Select(a => new FastaItem(a.Locus, a.Sequence, a.Description)).ToList(); } else if (fi.Extension.Equals(".mpex")) { Console.WriteLine("Loading PepExplorer file...."); PepExplorer2.Result2.ResultPckg2 result = PepExplorer2.Result2.ResultPckg2.DeserializeResultPackage(textBoxitraqSEPro.Text); theFastaItems = result.MyFasta; theScansToAnalyze = new List <SQTScan>(); foreach (PepExplorer2.Result2.AlignmentResult al in result.Alignments) { foreach (var dnr in al.DeNovoRegistries) { SQTScan sqt = new SQTScan(); sqt.ScanNumber = dnr.ScanNumber; sqt.FileName = dnr.FileName; sqt.PeptideSequence = dnr.PtmSequence; theScansToAnalyze.Add(sqt); } } //And now we need to retrieve the mass spectra. For this, the raw files should be inside the directory containing the mpex file List <string> rawFiles = theScansToAnalyze.Select(a => a.FileName).Distinct().ToList(); for (int i = 0; i < rawFiles.Count; i++) { rawFiles[i] = rawFiles[i].Remove(rawFiles[i].Length - 3, 3); rawFiles[i] = rawFiles[i] += "raw"; } foreach (string fn in rawFiles) { Console.WriteLine("Retrieving spectra for file: " + fn); ParserUltraLightRAW parser = new ParserUltraLightRAW(); string tmpFile = fn.Substring(0, fn.Length - 3); List <SQTScan> scansForThisFile = theScansToAnalyze.FindAll(a => Regex.IsMatch(tmpFile, a.FileName.Substring(0, a.FileName.Length - 3), RegexOptions.IgnoreCase)).ToList(); List <int> scnNumbers = scansForThisFile.Select(a => a.ScanNumber).ToList(); FileInfo theInputFile = new FileInfo(textBoxitraqSEPro.Text); List <MSUltraLight> theSpectra = parser.ParseFile(theInputFile.DirectoryName + "/" + fn, -1, 2, scnNumbers); foreach (SQTScan sqt in scansForThisFile) { MSUltraLight spec = theSpectra.Find(a => a.ScanNumber == sqt.ScanNumber); sqt.MSLight = new MSLight(); sqt.MSLight.MZ = spec.Ions.Select(a => (double)a.Item1).ToList(); sqt.MSLight.Intensity = spec.Ions.Select(a => (double)a.Item2).ToList(); } Console.WriteLine("\tDone processing this file."); } } else { throw new Exception("This file format is not supported."); } #endregion //Obtaining multiplexed spectra SEProQ.IsobaricQuant.YadaMultiplexCorrection.YMC ymc = null; if (textBoxCorrectedYadaDirectory.Text.Length > 0) { Console.WriteLine("Reading Yada results"); ymc = new IsobaricQuant.YadaMultiplexCorrection.YMC(new DirectoryInfo(textBoxCorrectedYadaDirectory.Text)); Console.WriteLine("Done loading Yada results"); } //Remove multiplexed spectra from sepro results if (textBoxCorrectedYadaDirectory.Text.Length > 0) { int removedCounter = 0; foreach (KeyValuePair <string, List <int> > kvp in ymc.fileNameScanNumberMultiplexDictionary) { Console.WriteLine("Removing multiplexed spectra for file :: " + kvp.Key); richTextBoxLog.AppendText("Removing multiplexed spectra for file :: " + kvp.Key + "\n"); string cleanName = kvp.Key.Substring(0, kvp.Key.Length - 4); cleanName += ".sqt"; foreach (int scnNo in kvp.Value) { int index = theScansToAnalyze.FindIndex(a => a.ScanNumber == scnNo && a.FileName.Equals(cleanName)); if (index >= 0) { Console.Write(theScansToAnalyze[index].ScanNumber + " "); richTextBoxLog.AppendText(theScansToAnalyze[index].ScanNumber + " "); removedCounter++; theScansToAnalyze.RemoveAt(index); } } Console.WriteLine("\n"); richTextBoxLog.AppendText("\n"); } Console.WriteLine("Done removing multiplexed spectra :: " + removedCounter); } PatternTools.CSML.Matrix correctionMatrix = new PatternTools.CSML.Matrix(); if (checkBoxApplyPurityCorrection.Checked) { List <List <double> > correctionData = GetPurityCorrectionsFromForm(); correctionMatrix = IsobaricQuant.IsobaricImpurityCorrection.GenerateInverseCorrectionMatrix(correctionData); } //-------------------------------------------------------------------------------------------------------------------- //Prepare normalization Dictionary signalIdentifiedNormalizationDictionary = new Dictionary <string, double[]>(); List <string> fileNames = theScansToAnalyze.Select(a => a.FileName).Distinct().ToList(); foreach (string fileName in fileNames) { signalIdentifiedNormalizationDictionary.Add(fileName, new double[isobaricMasses.Count]); } //------------------------------------- //If necessary, correct for impurity and feed global signal dictionary foreach (SQTScan scn in theScansToAnalyze) { double[] thisQuantitation = GetIsobaricSignal(scn.MSLight.Ions, isobaricMasses); double maxSignal = thisQuantitation.Max(); //We can only correct for signal for those that have quantitation values in all places if (checkBoxApplyPurityCorrection.Checked && (thisQuantitation.Count(a => a > maxSignal * (double)numericUpDownIonCountThreshold.Value) == isobaricMasses.Count)) { thisQuantitation = IsobaricQuant.IsobaricImpurityCorrection.CorrectForSignal(correctionMatrix, thisQuantitation).ToArray(); } if (checkBoxNormalizationChannelSignal.Checked) { for (int k = 0; k < thisQuantitation.Length; k++) { signalIdentifiedNormalizationDictionary[scn.FileName][k] += thisQuantitation[k]; } } scn.Quantitation = new List <List <double> >() { thisQuantitation.ToList() }; } //And now normalize ------------------- if (checkBoxNormalizationChannelSignal.Checked) { Console.WriteLine("Performing channel signal normalization for " + theScansToAnalyze.Count + " scans."); foreach (SQTScan scn2 in theScansToAnalyze) { for (int m = 0; m < isobaricMasses.Count; m++) { scn2.Quantitation[0][m] /= signalIdentifiedNormalizationDictionary[scn2.FileName][m]; } if (scn2.Quantitation[0].Contains(double.NaN)) { Console.WriteLine("Problems on signal of scan " + scn2.FileNameWithScanNumberAndChargeState); } } } comboBoxSelectFileForGraphs.Items.Clear(); foreach (string file in signalIdentifiedNormalizationDictionary.Keys.ToList()) { comboBoxSelectFileForGraphs.Items.Add(file); } tabControlMain.SelectedIndex = 1; if (radioButtonAnalysisPeptideReport.Checked) { //Peptide Analysis //Write Peptide Analysis StreamWriter sw = new StreamWriter(textBoxOutputDirectory.Text + "/" + "PeptideQuantitationReport.txt"); //Eliminate problematic quants int removed = theScansToAnalyze.RemoveAll(a => Object.ReferenceEquals(a.Quantitation, null)); Console.WriteLine("Problematic scans removed: " + removed); var pepDic = from scn in theScansToAnalyze group scn by scn.PeptideSequenceCleaned into groupedSequences select new { PeptideSequence = groupedSequences.Key, TheScans = groupedSequences.ToList() }; foreach (var pep in pepDic) { sw.WriteLine("Peptide:" + pep.PeptideSequence + "\tSpecCounts:" + pep.TheScans.Count); foreach (SQTScan sqt in pep.TheScans) { sw.WriteLine(sqt.FileNameWithScanNumberAndChargeState + "\t" + string.Join("\t", sqt.Quantitation[0])); } } //And now write the Fasta sw.WriteLine("#Fasta Items"); foreach (FastaItem fastaItem in theFastaItems) { sw.WriteLine(">" + fastaItem.SequenceIdentifier + " " + fastaItem.Description); sw.WriteLine(fastaItem.Sequence); } sw.Close(); } else { rp = ResultPackage.Load(textBoxitraqSEPro.Text); //Peptide Level if (true) { PatternTools.SparseMatrixIndexParserV2 ip = new SparseMatrixIndexParserV2(); List <int> allDims = new List <int>(); List <PeptideResult> peptides = rp.MyProteins.MyPeptideList; if (checkBoxOnlyUniquePeptides.Checked) { int removedPeptides = peptides.RemoveAll(a => a.MyMapableProteins.Count > 1); Console.WriteLine("Removing {0} peptides for not being unique.", removedPeptides); } for (int i = 0; i < peptides.Count; i++) { SparseMatrixIndexParserV2.Index index = new SparseMatrixIndexParserV2.Index(); index.Name = peptides[i].PeptideSequence; index.Description = string.Join(" ", peptides[i].MyMapableProteins); index.ID = i; ip.Add(index, true); allDims.Add(i); } SparseMatrix sm = new SparseMatrix(); List <int> dims = ip.allIDs(); for (int l = 0; l < labels.Count; l++) { if (labels[l] < 0) { continue; } sparseMatrixRow smr = new sparseMatrixRow(labels[l]); List <double> values = new List <double>(dims.Count); List <int> dimsWithValues = new List <int>(); foreach (int d in dims) { List <SQTScan> scns = peptides[d].MyScans.FindAll(a => !object.ReferenceEquals(a.Quantitation, null)); if (scns.Count > 0) { double signalSum = scns.FindAll(a => !double.IsNaN(a.Quantitation[0][l])).Sum(a => a.Quantitation[0][l]); values.Add(signalSum); dimsWithValues.Add(d); } } smr.Dims = dimsWithValues; smr.Values = values; smr.FileName = isobaricMasses[l].ToString(); sm.addRow(smr); } PatternLabProject plp = new PatternLabProject(sm, ip, "IsobaricQuant"); plp.Save(textBoxOutputDirectory.Text + "/MyPatternLabProjectPeptides.plp"); } //Protein Level if (true) { //Generate Index PatternTools.SparseMatrixIndexParserV2 ip = new SparseMatrixIndexParserV2(); List <MyProtein> theProteins = rp.MyProteins.MyProteinList; if (checkBoxOnlyUniquePeptides.Checked) { int removedProteins = theProteins.RemoveAll(a => !a.PeptideResults.Exists(b => b.NoMyMapableProteins == 1)); Console.WriteLine("{0} removed proteins for not having unique peptides", removedProteins); } for (int i = 0; i < theProteins.Count; i++) { SparseMatrixIndexParserV2.Index index = new SparseMatrixIndexParserV2.Index(); index.ID = i; index.Name = theProteins[i].Locus; index.Description = theProteins[i].Description; ip.Add(index, false); } //SparseMatrix SparseMatrix sm = new SparseMatrix(); List <int> dims = ip.allIDs(); for (int l = 0; l < labels.Count; l++) { if (labels[l] < 0) { continue; } if (!sm.ClassDescriptionDictionary.ContainsKey(labels[l])) { sm.ClassDescriptionDictionary.Add(labels[l], labels[l].ToString()); } sparseMatrixRow smr = new sparseMatrixRow(labels[l]); List <double> values = new List <double>(dims.Count); List <int> dimsToInclude = new List <int>(); foreach (int d in dims) { double signalSum = 0; List <PeptideResult> thePeptides = theProteins[d].PeptideResults; if (checkBoxOnlyUniquePeptides.Checked) { thePeptides.RemoveAll(a => a.MyMapableProteins.Count > 1); } foreach (PeptideResult pr in thePeptides) { List <SQTScan> scns = pr.MyScans.FindAll(a => !object.ReferenceEquals(a.Quantitation, null)); foreach (SQTScan sqt in scns) { if (!double.IsNaN(sqt.Quantitation[0][l]) && !double.IsInfinity(sqt.Quantitation[0][l])) { signalSum += sqt.Quantitation[0][l]; } } } if (signalSum > 0) { dimsToInclude.Add(d); values.Add(signalSum); } else { Console.WriteLine("No signal found for " + theProteins[d].Locus + " on marker " + l); } } smr.Dims = dims; smr.Values = values; smr.FileName = isobaricMasses[l].ToString(); sm.addRow(smr); } PatternLabProject plp = new PatternLabProject(sm, ip, "IsobaricQuant"); plp.Save(textBoxOutputDirectory.Text + "/MyPatternLabProjectProteins.plp"); } } comboBoxSelectFileForGraphs.Enabled = true; tabControlMain.SelectedIndex = 2; Console.WriteLine("Done"); buttonGo.Text = "Generate Report"; }