private void CalibrateSpectra(RegressionForestModel ms1predictor, RegressionForestModel ms2predictor) { Parallel.ForEach(Partitioner.Create(1, myMsDataFile.NumSpectra + 1), fff => { for (int i = fff.Item1; i < fff.Item2; i++) { var scan = myMsDataFile.GetOneBasedScan(i); if (scan is IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> > ms2Scan) { var precursorScan = myMsDataFile.GetOneBasedScan(ms2Scan.OneBasedPrecursorScanNumber.Value); if (!ms2Scan.SelectedIonMonoisotopicGuessIntensity.HasValue && ms2Scan.SelectedIonMonoisotopicGuessMz.HasValue) { ms2Scan.ComputeMonoisotopicPeakIntensity(precursorScan.MassSpectrum); } double theFunc(IPeak x) => x.X - ms2predictor.Predict(new double[] { x.X, scan.RetentionTime, Math.Log(scan.TotalIonCurrent), scan.InjectionTime.HasValue ? Math.Log(scan.InjectionTime.Value) : double.NaN, Math.Log(x.Y) }); double theFuncForPrecursor(IPeak x) => x.X - ms1predictor.Predict(new double[] { x.X, precursorScan.RetentionTime, Math.Log(precursorScan.TotalIonCurrent), precursorScan.InjectionTime.HasValue ? Math.Log(precursorScan.InjectionTime.Value) : double.NaN, Math.Log(x.Y) }); ms2Scan.TransformMzs(theFunc, theFuncForPrecursor); } else { Func <IPeak, double> theFunc = x => x.X - ms1predictor.Predict(new double[] { x.X, scan.RetentionTime, Math.Log(scan.TotalIonCurrent), scan.InjectionTime.HasValue ? Math.Log(scan.InjectionTime.Value) : double.NaN, Math.Log(x.Y) }); scan.MassSpectrum.ReplaceXbyApplyingFunction(theFunc); } } } ); }
protected override MetaMorpheusEngineResults RunSpecific() { TerminusType terminusType = ProductTypeMethod.IdentifyTerminusType(lp); foreach (var ok in allResultingIdentifications) { ok.MatchedIonDictOnlyMatches = new Dictionary <ProductType, double[]>(); ok.ProductMassErrorDa = new Dictionary <ProductType, double[]>(); ok.ProductMassErrorPpm = new Dictionary <ProductType, double[]>(); var theScan = myMsDataFile.GetOneBasedScan(ok.ScanNumber); double thePrecursorMass = ok.ScanPrecursorMass; foreach (var huh in lp) { var ionMasses = ok.CompactPeptides.First().Key.ProductMassesMightHaveDuplicatesAndNaNs(new List <ProductType> { huh }); Array.Sort(ionMasses); List <double> matchedIonMassesList = new List <double>(); List <double> productMassErrorDaList = new List <double>(); List <double> productMassErrorPpmList = new List <double>(); MatchIons(theScan, fragmentTolerance, ionMasses, matchedIonMassesList, productMassErrorDaList, productMassErrorPpmList, thePrecursorMass, dissociationTypes, addCompIons); double[] matchedIonMassesOnlyMatches = matchedIonMassesList.ToArray(); ok.MatchedIonDictOnlyMatches.Add(huh, matchedIonMassesOnlyMatches); ok.ProductMassErrorDa.Add(huh, productMassErrorDaList.ToArray()); ok.ProductMassErrorPpm.Add(huh, productMassErrorPpmList.ToArray()); } } foreach (var ok in allResultingIdentifications.Where(b => b.NumDifferentCompactPeptides == 1)) { var theScan = myMsDataFile.GetOneBasedScan(ok.ScanNumber); double thePrecursorMass = ok.ScanPrecursorMass; if (ok.FullSequence == null) { continue; } var representative = ok.CompactPeptides.First().Value.Item2.First(); var localizedScores = new List <double>(); for (int indexToLocalize = 0; indexToLocalize < representative.Length; indexToLocalize++) { PeptideWithSetModifications localizedPeptide = representative.Localize(indexToLocalize, ok.ScanPrecursorMass - representative.MonoisotopicMass); var gg = localizedPeptide.CompactPeptide(terminusType).ProductMassesMightHaveDuplicatesAndNaNs(lp); Array.Sort(gg); var score = CalculatePeptideScore(theScan, fragmentTolerance, gg, thePrecursorMass, dissociationTypes, addCompIons, 0); localizedScores.Add(score); } ok.LocalizedScores = localizedScores; } return(new LocalizationEngineResults(this)); }
public override IMsDataScan <IMzSpectrum <IMzPeak> > GetOneBasedScan(int oneBasedScanNumber) { if (Scans[oneBasedScanNumber - 1] == null) { var representativeScanNumber = oneBasedScanNumber + (numScansToAverage - 1) / 2; var representative = raw.GetOneBasedScan(representativeScanNumber); if (representative.MsnOrder != 1) { throw new MzLibException("Scan " + representativeScanNumber + " is not MS1 scan"); } int msnOrder = 1; Polarity polarity = representative.Polarity; if (!representative.IsCentroid) { throw new MzLibException("Scan " + representativeScanNumber + " is not centroid scan"); } bool isCentroid = true; double retentionTime = representative.RetentionTime; MZAnalyzerType mzAnalyzer = representative.MzAnalyzer; IMzSpectrum <IMzPeak> peaks = CombinePeaks(raw.Where(b => b.OneBasedScanNumber >= oneBasedScanNumber && b.OneBasedScanNumber <= oneBasedScanNumber + numScansToAverage - 1).Select(b => b.MassSpectrum).ToList(), ppmToleranceForPeakCombination); MzRange scanWindowRange = representative.ScanWindowRange; double totalIonCurrent = peaks.SumOfAllY; double injectionTime = double.NaN; double[,] noiseData = null; Scans[oneBasedScanNumber - 1] = new MsDataScan <IMzSpectrum <IMzPeak> >(peaks, oneBasedScanNumber, msnOrder, isCentroid, polarity, retentionTime, scanWindowRange, null, mzAnalyzer, totalIonCurrent, injectionTime, noiseData, "scan=" + oneBasedScanNumber); } return(Scans[oneBasedScanNumber - 1]); }
public static IEnumerable <Ms2ScanWithSpecificMass> GetMs2Scans( IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMSDataFile, string fullFilePath, bool doPrecursorDeconvolution, bool useProvidedPrecursorInfo, double deconvolutionIntensityRatio, int deconvolutionMaxAssumedChargeState, Tolerance deconvolutionMassTolerance) { foreach (var ms2scan in myMSDataFile.OfType <IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> > >()) { List <(double, int)> isolatedStuff = new List <(double, int)>(); if (ms2scan.OneBasedPrecursorScanNumber.HasValue) { var precursorSpectrum = myMSDataFile.GetOneBasedScan(ms2scan.OneBasedPrecursorScanNumber.Value); ms2scan.RefineSelectedMzAndIntensity(precursorSpectrum.MassSpectrum); if (ms2scan.SelectedIonMonoisotopicGuessMz.HasValue) { ms2scan.ComputeMonoisotopicPeakIntensity(precursorSpectrum.MassSpectrum); } if (doPrecursorDeconvolution) { foreach (var envelope in ms2scan.GetIsolatedMassesAndCharges(precursorSpectrum.MassSpectrum, 1, deconvolutionMaxAssumedChargeState, deconvolutionMassTolerance.Value, deconvolutionIntensityRatio)) { var monoPeakMz = envelope.monoisotopicMass.ToMz(envelope.charge); isolatedStuff.Add((monoPeakMz, envelope.charge)); } } } if (useProvidedPrecursorInfo && ms2scan.SelectedIonChargeStateGuess.HasValue) { var precursorCharge = ms2scan.SelectedIonChargeStateGuess.Value; if (ms2scan.SelectedIonMonoisotopicGuessMz.HasValue) { var precursorMZ = ms2scan.SelectedIonMonoisotopicGuessMz.Value; if (!isolatedStuff.Any(b => deconvolutionMassTolerance.Within(precursorMZ.ToMass(precursorCharge), b.Item1.ToMass(b.Item2)))) { isolatedStuff.Add((precursorMZ, precursorCharge)); } } else { var precursorMZ = ms2scan.SelectedIonMZ; if (!isolatedStuff.Any(b => deconvolutionMassTolerance.Within(precursorMZ.ToMass(precursorCharge), b.Item1.ToMass(b.Item2)))) { isolatedStuff.Add((precursorMZ, precursorCharge)); } } } foreach (var heh in isolatedStuff) { yield return(new Ms2ScanWithSpecificMass(ms2scan, heh.Item1, heh.Item2, fullFilePath)); } } }
public static void CreateAndWriteMyMzmlWithCalibratedSpectra(IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile, string outputFile, bool writeIndexed) { var mzML = new Generated.mzMLType() { version = "1", cvList = new Generated.CVListType() }; mzML.cvList.count = "1"; mzML.cvList.cv = new Generated.CVType[1]; mzML.cvList.cv[0] = new Generated.CVType() { URI = @"https://raw.githubusercontent.com/HUPO-PSI/psi-ms-CV/master/psi-ms.obo", fullName = "Proteomics Standards Initiative Mass Spectrometry Ontology", id = "MS" }; mzML.fileDescription = new Generated.FileDescriptionType() { fileContent = new Generated.ParamGroupType() }; mzML.fileDescription.fileContent.cvParam = new Generated.CVParamType[2]; mzML.fileDescription.fileContent.cvParam[0] = new Generated.CVParamType() { accession = "MS:1000579" // MS1 Data }; mzML.fileDescription.fileContent.cvParam[1] = new Generated.CVParamType() { accession = "MS:1000580" // MSn Data }; mzML.softwareList = new Generated.SoftwareListType() { count = "1", software = new Generated.SoftwareType[1] }; // TODO: add the raw file fields mzML.softwareList.software[0] = new Generated.SoftwareType() { id = "mzLib", version = "1", cvParam = new Generated.CVParamType[1] }; mzML.softwareList.software[0].cvParam[0] = new Generated.CVParamType() { accession = "MS:1000799", value = "mzLib" }; // Leaving empty. Can't figure out the configurations. // ToDo: read instrumentConfigurationList from mzML file mzML.instrumentConfigurationList = new Generated.InstrumentConfigurationListType(); mzML.dataProcessingList = new Generated.DataProcessingListType() { count = "1", dataProcessing = new Generated.DataProcessingType[1] }; // Only writing mine! Might have had some other data processing (but not if it is a raw file) // ToDo: read dataProcessingList from mzML file mzML.dataProcessingList.dataProcessing[0] = new Generated.DataProcessingType() { id = "mzLibProcessing" }; mzML.run = new Generated.RunType() { chromatogramList = new Generated.ChromatogramListType() { count = "1", chromatogram = new Generated.ChromatogramType[1] } }; // ToDo: Finish the chromatogram writing! mzML.run.chromatogramList.chromatogram[0] = new Generated.ChromatogramType(); mzML.run.spectrumList = new Generated.SpectrumListType() { count = (myMsDataFile.NumSpectra).ToString(CultureInfo.InvariantCulture), defaultDataProcessingRef = "mzLibProcessing", spectrum = new Generated.SpectrumType[myMsDataFile.NumSpectra] }; // Loop over all spectra for (int i = 1; i <= myMsDataFile.NumSpectra; i++) { mzML.run.spectrumList.spectrum[i - 1] = new Generated.SpectrumType() { defaultArrayLength = myMsDataFile.GetOneBasedScan(i).MassSpectrum.Size, index = i.ToString(CultureInfo.InvariantCulture), id = myMsDataFile.GetOneBasedScan(i).OneBasedScanNumber.ToString(), cvParam = new Generated.CVParamType[8] }; mzML.run.spectrumList.spectrum[i - 1].cvParam[0] = new Generated.CVParamType(); if (myMsDataFile.GetOneBasedScan(i).MsnOrder == 1) { mzML.run.spectrumList.spectrum[i - 1].cvParam[0].accession = "MS:1000579"; } else if (myMsDataFile.GetOneBasedScan(i) is IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> > ) { var scanWithPrecursor = myMsDataFile.GetOneBasedScan(i) as IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> >; mzML.run.spectrumList.spectrum[i - 1].cvParam[0].accession = "MS:1000580"; // So needs a precursor! mzML.run.spectrumList.spectrum[i - 1].precursorList = new Generated.PrecursorListType() { count = 1.ToString(), precursor = new Generated.PrecursorType[1] }; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0] = new Generated.PrecursorType(); string precursorID = scanWithPrecursor.OneBasedPrecursorScanNumber.ToString(); mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].spectrumRef = precursorID; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].selectedIonList = new Generated.SelectedIonListType() { count = 1.ToString(), selectedIon = new Generated.ParamGroupType[1] }; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].selectedIonList.selectedIon[0] = new Generated.ParamGroupType() { cvParam = new Generated.CVParamType[3] }; // Selected ion MZ mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].selectedIonList.selectedIon[0].cvParam[0] = new Generated.CVParamType() { name = "selected ion m/z", value = scanWithPrecursor.SelectedIonMZ.ToString(CultureInfo.InvariantCulture), accession = "MS:1000744" }; // Charge State if (scanWithPrecursor.SelectedIonChargeStateGuess.HasValue) { mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].selectedIonList.selectedIon[0].cvParam[1] = new Generated.CVParamType() { name = "charge state", value = scanWithPrecursor.SelectedIonChargeStateGuess.Value.ToString(CultureInfo.InvariantCulture), accession = "MS:1000041" }; } // Selected ion intensity if (scanWithPrecursor.SelectedIonIntensity.HasValue) { mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].selectedIonList.selectedIon[0].cvParam[2] = new Generated.CVParamType() { name = "peak intensity", value = scanWithPrecursor.SelectedIonIntensity.Value.ToString(CultureInfo.InvariantCulture), accession = "MS:1000042" }; } MzRange isolationRange = scanWithPrecursor.IsolationRange; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].isolationWindow = new Generated.ParamGroupType() { cvParam = new Generated.CVParamType[3] }; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].isolationWindow.cvParam[0] = new Generated.CVParamType() { accession = "MS:1000827", name = "isolation window target m/z", value = isolationRange.Mean.ToString(CultureInfo.InvariantCulture) }; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].isolationWindow.cvParam[1] = new Generated.CVParamType() { accession = "MS:1000828", name = "isolation window lower offset", value = (isolationRange.Width / 2).ToString(CultureInfo.InvariantCulture) }; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].isolationWindow.cvParam[2] = new Generated.CVParamType() { accession = "MS:1000829", name = "isolation window upper offset", value = (isolationRange.Width / 2).ToString(CultureInfo.InvariantCulture) }; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].activation = new Generated.ParamGroupType() { cvParam = new Generated.CVParamType[1] }; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].activation.cvParam[0] = new Generated.CVParamType(); DissociationType dissociationType = scanWithPrecursor.DissociationType; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].activation.cvParam[0].accession = DissociationTypeAccessions[dissociationType]; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].activation.cvParam[0].name = DissociationTypeNames[dissociationType]; } mzML.run.spectrumList.spectrum[i - 1].cvParam[1] = new Generated.CVParamType() { name = "ms level", accession = "MS:1000511", value = myMsDataFile.GetOneBasedScan(i).MsnOrder.ToString(CultureInfo.InvariantCulture) }; mzML.run.spectrumList.spectrum[i - 1].cvParam[2] = new Generated.CVParamType() { name = CentroidNames[myMsDataFile.GetOneBasedScan(i).IsCentroid], accession = CentroidAccessions[myMsDataFile.GetOneBasedScan(i).IsCentroid] }; if (PolarityNames.TryGetValue(myMsDataFile.GetOneBasedScan(i).Polarity, out string polarityName) && PolarityAccessions.TryGetValue(myMsDataFile.GetOneBasedScan(i).Polarity, out string polarityAccession)) { mzML.run.spectrumList.spectrum[i - 1].cvParam[3] = new Generated.CVParamType() { name = polarityName, accession = polarityAccession }; } // Spectrum title mzML.run.spectrumList.spectrum[i - 1].cvParam[4] = new Generated.CVParamType() { name = "spectrum title", accession = "MS:1000796", value = myMsDataFile.GetOneBasedScan(i).OneBasedScanNumber.ToString() }; if ((myMsDataFile.GetOneBasedScan(i).MassSpectrum.Size) > 0) { // Lowest observed mz mzML.run.spectrumList.spectrum[i - 1].cvParam[5] = new Generated.CVParamType() { name = "lowest observed m/z", accession = "MS:1000528", value = myMsDataFile.GetOneBasedScan(i).MassSpectrum.FirstX.ToString(CultureInfo.InvariantCulture) }; // Highest observed mz mzML.run.spectrumList.spectrum[i - 1].cvParam[6] = new Generated.CVParamType() { name = "highest observed m/z", accession = "MS:1000527", value = myMsDataFile.GetOneBasedScan(i).MassSpectrum.LastX.ToString(CultureInfo.InvariantCulture) }; } // Total ion current mzML.run.spectrumList.spectrum[i - 1].cvParam[7] = new Generated.CVParamType() { name = "total ion current", accession = "MS:1000285", value = myMsDataFile.GetOneBasedScan(i).TotalIonCurrent.ToString(CultureInfo.InvariantCulture) }; // Retention time mzML.run.spectrumList.spectrum[i - 1].scanList = new Generated.ScanListType() { count = "1", scan = new Generated.ScanType[1] }; mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0] = new Generated.ScanType() { cvParam = new Generated.CVParamType[3] }; mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].cvParam[0] = new Generated.CVParamType() { name = "scan start time", accession = "MS:1000016", value = myMsDataFile.GetOneBasedScan(i).RetentionTime.ToString(CultureInfo.InvariantCulture), unitCvRef = "UO", unitAccession = "UO:0000031", unitName = "minute" }; mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].cvParam[1] = new Generated.CVParamType() { name = "filter string", accession = "MS:1000512", value = myMsDataFile.GetOneBasedScan(i).ScanFilter }; if (myMsDataFile.GetOneBasedScan(i).InjectionTime.HasValue) { mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].cvParam[2] = new Generated.CVParamType() { name = "ion injection time", accession = "MS:1000927", value = myMsDataFile.GetOneBasedScan(i).InjectionTime.Value.ToString(CultureInfo.InvariantCulture) }; } if (myMsDataFile.GetOneBasedScan(i) is IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> > ) { var scanWithPrecursor = myMsDataFile.GetOneBasedScan(i) as IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> >; if (scanWithPrecursor.SelectedIonMonoisotopicGuessMz.HasValue) { mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].userParam = new Generated.UserParamType[1]; mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].userParam[0] = new Generated.UserParamType() { name = "[mzLib]Monoisotopic M/Z:", value = scanWithPrecursor.SelectedIonMonoisotopicGuessMz.Value.ToString(CultureInfo.InvariantCulture) }; } } mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].scanWindowList = new Generated.ScanWindowListType() { count = 1, scanWindow = new Generated.ParamGroupType[1] }; mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].scanWindowList.scanWindow[0] = new Generated.ParamGroupType() { cvParam = new Generated.CVParamType[2] }; mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].scanWindowList.scanWindow[0].cvParam[0] = new Generated.CVParamType() { name = "scan window lower limit", accession = "MS:1000501", value = myMsDataFile.GetOneBasedScan(i).ScanWindowRange.Minimum.ToString(CultureInfo.InvariantCulture) }; mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].scanWindowList.scanWindow[0].cvParam[1] = new Generated.CVParamType() { name = "scan window upper limit", accession = "MS:1000500", value = myMsDataFile.GetOneBasedScan(i).ScanWindowRange.Maximum.ToString(CultureInfo.InvariantCulture) }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList = new Generated.BinaryDataArrayListType() { // ONLY WRITING M/Z AND INTENSITY DATA, NOT THE CHARGE! (but can add charge info later) // CHARGE (and other stuff) CAN BE IMPORTANT IN ML APPLICATIONS!!!!! count = 2.ToString(), binaryDataArray = new Generated.BinaryDataArrayType[5] }; // M/Z Data mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0] = new Generated.BinaryDataArrayType() { binary = myMsDataFile.GetOneBasedScan(i).MassSpectrum.Get64BitXarray() }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0].encodedLength = (4 * Math.Ceiling(((double)mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0].binary.Length / 3))).ToString(CultureInfo.InvariantCulture); mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0].cvParam = new Generated.CVParamType[3]; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0].cvParam[0] = new Generated.CVParamType() { accession = "MS:1000514", name = "m/z array" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0].cvParam[1] = new Generated.CVParamType() { accession = "MS:1000523", name = "64-bit float" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0].cvParam[2] = new Generated.CVParamType() { accession = "MS:1000576", name = "no compression" }; // Intensity Data mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1] = new Generated.BinaryDataArrayType() { binary = myMsDataFile.GetOneBasedScan(i).MassSpectrum.Get64BitYarray() }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1].encodedLength = (4 * Math.Ceiling(((double)mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1].binary.Length / 3))).ToString(CultureInfo.InvariantCulture); mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1].cvParam = new Generated.CVParamType[3]; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1].cvParam[0] = new Generated.CVParamType() { accession = "MS:1000515", name = "intensity array" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1].cvParam[1] = new Generated.CVParamType() { accession = "MS:1000523", name = "64-bit float" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1].cvParam[2] = new Generated.CVParamType() { accession = "MS:1000576", name = "no compression" }; if (myMsDataFile.GetOneBasedScan(i).NoiseData != null) { // mass mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2] = new Generated.BinaryDataArrayType() { binary = myMsDataFile.GetOneBasedScan(i).Get64BitNoiseDataMass() }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].encodedLength = (4 * Math.Ceiling(((double)mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].binary.Length / 3))).ToString(CultureInfo.InvariantCulture); mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].cvParam = new Generated.CVParamType[3]; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].cvParam[0] = new Generated.CVParamType() { accession = "MS:1000786", name = "non-standard data array" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].cvParam[1] = new Generated.CVParamType() { accession = "MS:1000523", name = "64-bit float" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].cvParam[2] = new Generated.CVParamType() { accession = "MS:1000576", name = "no compression" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].userParam = new Generated.UserParamType[1]; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].userParam[0] = new Generated.UserParamType() { name = "kelleherCustomType", value = "noise m/z" }; // noise mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3] = new Generated.BinaryDataArrayType() { binary = myMsDataFile.GetOneBasedScan(i).Get64BitNoiseDataNoise() }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].encodedLength = (4 * Math.Ceiling(((double)mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].binary.Length / 3))).ToString(CultureInfo.InvariantCulture); mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].cvParam = new Generated.CVParamType[3]; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].cvParam[0] = new Generated.CVParamType() { accession = "MS:1000786", name = "non-standard data array" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].cvParam[1] = new Generated.CVParamType() { accession = "MS:1000523", name = "64-bit float" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].cvParam[2] = new Generated.CVParamType() { accession = "MS:1000576", name = "no compression" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].userParam = new Generated.UserParamType[1]; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].userParam[0] = new Generated.UserParamType() { name = "kelleherCustomType", value = "noise baseline" }; // baseline mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4] = new Generated.BinaryDataArrayType() { binary = myMsDataFile.GetOneBasedScan(i).Get64BitNoiseDataBaseline() }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].encodedLength = (4 * Math.Ceiling(((double)mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].binary.Length / 3))).ToString(CultureInfo.InvariantCulture); mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].cvParam = new Generated.CVParamType[3]; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].cvParam[0] = new Generated.CVParamType() { accession = "MS:1000786", name = "non-standard data array" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].cvParam[1] = new Generated.CVParamType() { accession = "MS:1000523", name = "64-bit float" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].cvParam[2] = new Generated.CVParamType() { accession = "MS:1000576", name = "no compression" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].userParam = new Generated.UserParamType[1]; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].userParam[0] = new Generated.UserParamType() { name = "kelleherCustomType", value = "noise intensity" }; } } if (writeIndexed) { throw new NotImplementedException("Writing indexed mzMLs not yet supported"); } else { using (TextWriter writer = new StreamWriter(outputFile)) { mzmlSerializer.Serialize(writer, mzML); } } }
protected override MetaMorpheusEngineResults RunSpecific() { Status("Extracting data points:"); // The final training point list int numMs1MassChargeCombinationsConsidered = 0; int numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0; int numMs2MassChargeCombinationsConsidered = 0; int numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0; List <LabeledMs1DataPoint> Ms1List = new List <LabeledMs1DataPoint>(); List <LabeledMs2DataPoint> Ms2List = new List <LabeledMs2DataPoint>(); int numIdentifications = goodIdentifications.Count; // Loop over identifications HashSet <string> sequences = new HashSet <string>(); object lockObj = new object(); object lockObj2 = new object(); Parallel.ForEach(Partitioner.Create(0, numIdentifications), fff => { for (int matchIndex = fff.Item1; matchIndex < fff.Item2; matchIndex++) { PeptideSpectralMatch identification = goodIdentifications[matchIndex]; // Each identification has an MS2 spectrum attached to it. int ms2scanNumber = identification.ScanNumber; int peptideCharge = identification.ScanPrecursorCharge; if (identification.FullSequence == null) { continue; } var representativeSinglePeptide = identification.CompactPeptides.First().Value.Item2.First(); // Get the peptide, don't forget to add the modifications!!!! var SequenceWithChemicalFormulas = representativeSinglePeptide.SequenceWithChemicalFormulas; if (SequenceWithChemicalFormulas == null || representativeSinglePeptide.allModsOneIsNterminus.Any(b => b.Value.neutralLosses.Count != 1 || b.Value.neutralLosses.First() != 0)) { continue; } Proteomics.Peptide coolPeptide = new Proteomics.Peptide(SequenceWithChemicalFormulas); var ms2tuple = SearchMS2Spectrum(myMsDataFile.GetOneBasedScan(ms2scanNumber) as IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> >, coolPeptide, peptideCharge, identification); // If MS2 has low evidence for peptide, skip and go to next one if (ms2tuple.Item4 < numFragmentsNeededForEveryIdentification) { continue; } lock (lockObj2) { Ms2List.AddRange(ms2tuple.Item1); numMs2MassChargeCombinationsConsidered += ms2tuple.Item2; numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks += ms2tuple.Item3; if (sequences.Contains(identification.FullSequence)) { continue; // Do not search same sequence multiple times in MS1 scans } sequences.Add(identification.FullSequence); } // Calculate isotopic distribution of the full peptide var dist = IsotopicDistribution.GetDistribution(coolPeptide.GetChemicalFormula(), fineResolutionForIsotopeDistCalculation, 0.001); double[] theoreticalMasses = dist.Masses.ToArray(); double[] theoreticalIntensities = dist.Intensities.ToArray(); Array.Sort(theoreticalIntensities, theoreticalMasses, Comparer <double> .Create((x, y) => y.CompareTo(x))); var ms1tupleBack = SearchMS1Spectra(theoreticalMasses, theoreticalIntensities, ms2scanNumber, -1, peptideCharge, identification); var ms1tupleForward = SearchMS1Spectra(theoreticalMasses, theoreticalIntensities, ms2scanNumber, 1, peptideCharge, identification); lock (lockObj) { Ms1List.AddRange(ms1tupleBack.Item1); numMs1MassChargeCombinationsConsidered += ms1tupleBack.Item2; numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks += ms1tupleBack.Item3; Ms1List.AddRange(ms1tupleForward.Item1); numMs1MassChargeCombinationsConsidered += ms1tupleForward.Item2; numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks += ms1tupleForward.Item3; } } }); return(new DataPointAquisitionResults(this, Ms1List, Ms2List, numMs1MassChargeCombinationsConsidered, numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks, numMs2MassChargeCombinationsConsidered, numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks )); }
private (List <PeptideSpectralMatch>, DataPointAquisitionResults) GetDataAcquisitionResults(IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile, string currentDataFile, List <ModificationWithMass> variableModifications, List <ModificationWithMass> fixedModifications, List <Protein> proteinList, string taskId, ICommonParameters combinedParameters, Tolerance initPrecTol, Tolerance initProdTol) { var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(currentDataFile); MassDiffAcceptor searchMode; if (initPrecTol is PpmTolerance) { searchMode = new SinglePpmAroundZeroSearchMode(initPrecTol.Value); } else { searchMode = new SingleAbsoluteAroundZeroSearchMode(initPrecTol.Value); } FragmentTypes fragmentTypesForCalibration = FragmentTypes.None; if (combinedParameters.BIons) { fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.b; } if (combinedParameters.YIons) { fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.y; } if (combinedParameters.CIons) { fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.c; } if (combinedParameters.ZdotIons) { fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.zdot; } var listOfSortedms2Scans = GetMs2Scans(myMsDataFile, currentDataFile, combinedParameters.DoPrecursorDeconvolution, combinedParameters.UseProvidedPrecursorInfo, combinedParameters.DeconvolutionIntensityRatio, combinedParameters.DeconvolutionMaxAssumedChargeState, combinedParameters.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray(); PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; List <ProductType> lp = new List <ProductType>(); if (combinedParameters.BIons) { lp.Add(ProductType.B); } if (combinedParameters.YIons) { lp.Add(ProductType.Y); } if (combinedParameters.CIons) { lp.Add(ProductType.C); } if (combinedParameters.ZdotIons) { lp.Add(ProductType.Zdot); } Log("Searching with searchMode: " + searchMode, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }); Log("Searching with productMassTolerance: " + initProdTol, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }); new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, proteinList, lp, searchMode, false, combinedParameters, initProdTol, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }).Run(); List <PeptideSpectralMatch> allPsms = allPsmsArray.ToList(); Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > compactPeptideToProteinPeptideMatching = ((SequencesToActualProteinPeptidesEngineResults) new SequencesToActualProteinPeptidesEngine(allPsms, proteinList, fixedModifications, variableModifications, lp, new List <IDigestionParams> { combinedParameters.DigestionParams }, combinedParameters.ReportAllAmbiguity, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }).Run()).CompactPeptideToProteinPeptideMatching; foreach (var huh in allPsms) { if (huh != null) { huh.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching); } } allPsms = allPsms.Where(b => b != null).OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => (b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList(); new FdrAnalysisEngine(allPsms, searchMode.NumNotches, false, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }).Run(); List <PeptideSpectralMatch> goodIdentifications = allPsms.Where(b => b.FdrInfo.QValueNotch < 0.01 && !b.IsDecoy && b.FullSequence != null).ToList(); if (!goodIdentifications.Any()) { Warn("No PSMs below 1% FDR observed!"); return(new List <PeptideSpectralMatch>(), null); } var dissociationTypes = MetaMorpheusEngine.DetermineDissociationType(lp); foreach (var psm in allPsms) { var theScan = myMsDataFile.GetOneBasedScan(psm.ScanNumber); double thePrecursorMass = psm.ScanPrecursorMass; foreach (var huh in lp) { var ionMasses = psm.CompactPeptides.First().Key.ProductMassesMightHaveDuplicatesAndNaNs(new List <ProductType> { huh }); Array.Sort(ionMasses); List <double> matchedIonMassesList = new List <double>(); List <double> productMassErrorDaList = new List <double>(); List <double> productMassErrorPpmList = new List <double>(); LocalizationEngine.MatchIons(theScan, initProdTol, ionMasses, matchedIonMassesList, productMassErrorDaList, productMassErrorPpmList, thePrecursorMass, dissociationTypes, false); double[] matchedIonMassesOnlyMatches = matchedIonMassesList.ToArray(); psm.MatchedIonDictOnlyMatches.Add(huh, matchedIonMassesOnlyMatches); psm.ProductMassErrorDa.Add(huh, productMassErrorDaList.ToArray()); psm.ProductMassErrorPpm.Add(huh, productMassErrorPpmList.ToArray()); } } DataPointAquisitionResults currentResult = (DataPointAquisitionResults) new DataPointAcquisitionEngine( goodIdentifications, myMsDataFile, initPrecTol, initProdTol, CalibrationParameters.NumFragmentsNeededForEveryIdentification, CalibrationParameters.MinMS1IsotopicPeaksNeededForConfirmedIdentification, CalibrationParameters.MinMS2IsotopicPeaksNeededForConfirmedIdentification, fragmentTypesForCalibration, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }).Run(); return(goodIdentifications, currentResult); }