private static int GetOneBasedPrecursorScanNumber(Generated.mzMLType _mzMLConnection, int oneBasedSpectrumNumber) { string precursorID = _mzMLConnection.run.spectrumList.spectrum[oneBasedSpectrumNumber - 1].precursorList.precursor[0].spectrumRef; do { oneBasedSpectrumNumber--; } while (!precursorID.Equals(_mzMLConnection.run.spectrumList.spectrum[oneBasedSpectrumNumber - 1].id)); return(oneBasedSpectrumNumber); }
private static MZAnalyzerType GetMzAnalyzer(Generated.mzMLType _mzMLConnection, string filter) { if (filter != null && analyzerDictionary.TryGetValue(MZAnalyzerTypeRegex.Match(filter).Captures[0].Value, out MZAnalyzerType valuee)) { return(valuee); } // Maybe in the beginning of the file, there is a single analyzer? // Gets the first analyzer used. if (_mzMLConnection.instrumentConfigurationList.instrumentConfiguration != null) { return(analyzerDictionary.TryGetValue(_mzMLConnection.instrumentConfigurationList.instrumentConfiguration[0].cvParam[0].accession, out valuee) ? valuee : MZAnalyzerType.Unknown); } return(MZAnalyzerType.Unknown); }
public static void CreateAndWriteMyMzmlWithCalibratedSpectra(IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile, string outputFile, bool writeIndexed) { var mzML = new Generated.mzMLType() { version = "1", cvList = new Generated.CVListType() }; mzML.cvList.count = "1"; mzML.cvList.cv = new Generated.CVType[1]; mzML.cvList.cv[0] = new Generated.CVType() { URI = @"https://raw.githubusercontent.com/HUPO-PSI/psi-ms-CV/master/psi-ms.obo", fullName = "Proteomics Standards Initiative Mass Spectrometry Ontology", id = "MS" }; mzML.fileDescription = new Generated.FileDescriptionType() { fileContent = new Generated.ParamGroupType() }; mzML.fileDescription.fileContent.cvParam = new Generated.CVParamType[2]; mzML.fileDescription.fileContent.cvParam[0] = new Generated.CVParamType() { accession = "MS:1000579" // MS1 Data }; mzML.fileDescription.fileContent.cvParam[1] = new Generated.CVParamType() { accession = "MS:1000580" // MSn Data }; mzML.softwareList = new Generated.SoftwareListType() { count = "1", software = new Generated.SoftwareType[1] }; // TODO: add the raw file fields mzML.softwareList.software[0] = new Generated.SoftwareType() { id = "mzLib", version = "1", cvParam = new Generated.CVParamType[1] }; mzML.softwareList.software[0].cvParam[0] = new Generated.CVParamType() { accession = "MS:1000799", value = "mzLib" }; // Leaving empty. Can't figure out the configurations. // ToDo: read instrumentConfigurationList from mzML file mzML.instrumentConfigurationList = new Generated.InstrumentConfigurationListType(); mzML.dataProcessingList = new Generated.DataProcessingListType() { count = "1", dataProcessing = new Generated.DataProcessingType[1] }; // Only writing mine! Might have had some other data processing (but not if it is a raw file) // ToDo: read dataProcessingList from mzML file mzML.dataProcessingList.dataProcessing[0] = new Generated.DataProcessingType() { id = "mzLibProcessing" }; mzML.run = new Generated.RunType() { chromatogramList = new Generated.ChromatogramListType() { count = "1", chromatogram = new Generated.ChromatogramType[1] } }; // ToDo: Finish the chromatogram writing! mzML.run.chromatogramList.chromatogram[0] = new Generated.ChromatogramType(); mzML.run.spectrumList = new Generated.SpectrumListType() { count = (myMsDataFile.NumSpectra).ToString(CultureInfo.InvariantCulture), defaultDataProcessingRef = "mzLibProcessing", spectrum = new Generated.SpectrumType[myMsDataFile.NumSpectra] }; // Loop over all spectra for (int i = 1; i <= myMsDataFile.NumSpectra; i++) { mzML.run.spectrumList.spectrum[i - 1] = new Generated.SpectrumType() { defaultArrayLength = myMsDataFile.GetOneBasedScan(i).MassSpectrum.Size, index = i.ToString(CultureInfo.InvariantCulture), id = myMsDataFile.GetOneBasedScan(i).OneBasedScanNumber.ToString(), cvParam = new Generated.CVParamType[8] }; mzML.run.spectrumList.spectrum[i - 1].cvParam[0] = new Generated.CVParamType(); if (myMsDataFile.GetOneBasedScan(i).MsnOrder == 1) { mzML.run.spectrumList.spectrum[i - 1].cvParam[0].accession = "MS:1000579"; } else if (myMsDataFile.GetOneBasedScan(i) is IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> > ) { var scanWithPrecursor = myMsDataFile.GetOneBasedScan(i) as IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> >; mzML.run.spectrumList.spectrum[i - 1].cvParam[0].accession = "MS:1000580"; // So needs a precursor! mzML.run.spectrumList.spectrum[i - 1].precursorList = new Generated.PrecursorListType() { count = 1.ToString(), precursor = new Generated.PrecursorType[1] }; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0] = new Generated.PrecursorType(); string precursorID = scanWithPrecursor.OneBasedPrecursorScanNumber.ToString(); mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].spectrumRef = precursorID; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].selectedIonList = new Generated.SelectedIonListType() { count = 1.ToString(), selectedIon = new Generated.ParamGroupType[1] }; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].selectedIonList.selectedIon[0] = new Generated.ParamGroupType() { cvParam = new Generated.CVParamType[3] }; // Selected ion MZ mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].selectedIonList.selectedIon[0].cvParam[0] = new Generated.CVParamType() { name = "selected ion m/z", value = scanWithPrecursor.SelectedIonMZ.ToString(CultureInfo.InvariantCulture), accession = "MS:1000744" }; // Charge State if (scanWithPrecursor.SelectedIonChargeStateGuess.HasValue) { mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].selectedIonList.selectedIon[0].cvParam[1] = new Generated.CVParamType() { name = "charge state", value = scanWithPrecursor.SelectedIonChargeStateGuess.Value.ToString(CultureInfo.InvariantCulture), accession = "MS:1000041" }; } // Selected ion intensity if (scanWithPrecursor.SelectedIonIntensity.HasValue) { mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].selectedIonList.selectedIon[0].cvParam[2] = new Generated.CVParamType() { name = "peak intensity", value = scanWithPrecursor.SelectedIonIntensity.Value.ToString(CultureInfo.InvariantCulture), accession = "MS:1000042" }; } MzRange isolationRange = scanWithPrecursor.IsolationRange; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].isolationWindow = new Generated.ParamGroupType() { cvParam = new Generated.CVParamType[3] }; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].isolationWindow.cvParam[0] = new Generated.CVParamType() { accession = "MS:1000827", name = "isolation window target m/z", value = isolationRange.Mean.ToString(CultureInfo.InvariantCulture) }; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].isolationWindow.cvParam[1] = new Generated.CVParamType() { accession = "MS:1000828", name = "isolation window lower offset", value = (isolationRange.Width / 2).ToString(CultureInfo.InvariantCulture) }; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].isolationWindow.cvParam[2] = new Generated.CVParamType() { accession = "MS:1000829", name = "isolation window upper offset", value = (isolationRange.Width / 2).ToString(CultureInfo.InvariantCulture) }; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].activation = new Generated.ParamGroupType() { cvParam = new Generated.CVParamType[1] }; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].activation.cvParam[0] = new Generated.CVParamType(); DissociationType dissociationType = scanWithPrecursor.DissociationType; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].activation.cvParam[0].accession = DissociationTypeAccessions[dissociationType]; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].activation.cvParam[0].name = DissociationTypeNames[dissociationType]; } mzML.run.spectrumList.spectrum[i - 1].cvParam[1] = new Generated.CVParamType() { name = "ms level", accession = "MS:1000511", value = myMsDataFile.GetOneBasedScan(i).MsnOrder.ToString(CultureInfo.InvariantCulture) }; mzML.run.spectrumList.spectrum[i - 1].cvParam[2] = new Generated.CVParamType() { name = CentroidNames[myMsDataFile.GetOneBasedScan(i).IsCentroid], accession = CentroidAccessions[myMsDataFile.GetOneBasedScan(i).IsCentroid] }; if (PolarityNames.TryGetValue(myMsDataFile.GetOneBasedScan(i).Polarity, out string polarityName) && PolarityAccessions.TryGetValue(myMsDataFile.GetOneBasedScan(i).Polarity, out string polarityAccession)) { mzML.run.spectrumList.spectrum[i - 1].cvParam[3] = new Generated.CVParamType() { name = polarityName, accession = polarityAccession }; } // Spectrum title mzML.run.spectrumList.spectrum[i - 1].cvParam[4] = new Generated.CVParamType() { name = "spectrum title", accession = "MS:1000796", value = myMsDataFile.GetOneBasedScan(i).OneBasedScanNumber.ToString() }; if ((myMsDataFile.GetOneBasedScan(i).MassSpectrum.Size) > 0) { // Lowest observed mz mzML.run.spectrumList.spectrum[i - 1].cvParam[5] = new Generated.CVParamType() { name = "lowest observed m/z", accession = "MS:1000528", value = myMsDataFile.GetOneBasedScan(i).MassSpectrum.FirstX.ToString(CultureInfo.InvariantCulture) }; // Highest observed mz mzML.run.spectrumList.spectrum[i - 1].cvParam[6] = new Generated.CVParamType() { name = "highest observed m/z", accession = "MS:1000527", value = myMsDataFile.GetOneBasedScan(i).MassSpectrum.LastX.ToString(CultureInfo.InvariantCulture) }; } // Total ion current mzML.run.spectrumList.spectrum[i - 1].cvParam[7] = new Generated.CVParamType() { name = "total ion current", accession = "MS:1000285", value = myMsDataFile.GetOneBasedScan(i).TotalIonCurrent.ToString(CultureInfo.InvariantCulture) }; // Retention time mzML.run.spectrumList.spectrum[i - 1].scanList = new Generated.ScanListType() { count = "1", scan = new Generated.ScanType[1] }; mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0] = new Generated.ScanType() { cvParam = new Generated.CVParamType[3] }; mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].cvParam[0] = new Generated.CVParamType() { name = "scan start time", accession = "MS:1000016", value = myMsDataFile.GetOneBasedScan(i).RetentionTime.ToString(CultureInfo.InvariantCulture), unitCvRef = "UO", unitAccession = "UO:0000031", unitName = "minute" }; mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].cvParam[1] = new Generated.CVParamType() { name = "filter string", accession = "MS:1000512", value = myMsDataFile.GetOneBasedScan(i).ScanFilter }; if (myMsDataFile.GetOneBasedScan(i).InjectionTime.HasValue) { mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].cvParam[2] = new Generated.CVParamType() { name = "ion injection time", accession = "MS:1000927", value = myMsDataFile.GetOneBasedScan(i).InjectionTime.Value.ToString(CultureInfo.InvariantCulture) }; } if (myMsDataFile.GetOneBasedScan(i) is IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> > ) { var scanWithPrecursor = myMsDataFile.GetOneBasedScan(i) as IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> >; if (scanWithPrecursor.SelectedIonMonoisotopicGuessMz.HasValue) { mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].userParam = new Generated.UserParamType[1]; mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].userParam[0] = new Generated.UserParamType() { name = "[mzLib]Monoisotopic M/Z:", value = scanWithPrecursor.SelectedIonMonoisotopicGuessMz.Value.ToString(CultureInfo.InvariantCulture) }; } } mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].scanWindowList = new Generated.ScanWindowListType() { count = 1, scanWindow = new Generated.ParamGroupType[1] }; mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].scanWindowList.scanWindow[0] = new Generated.ParamGroupType() { cvParam = new Generated.CVParamType[2] }; mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].scanWindowList.scanWindow[0].cvParam[0] = new Generated.CVParamType() { name = "scan window lower limit", accession = "MS:1000501", value = myMsDataFile.GetOneBasedScan(i).ScanWindowRange.Minimum.ToString(CultureInfo.InvariantCulture) }; mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].scanWindowList.scanWindow[0].cvParam[1] = new Generated.CVParamType() { name = "scan window upper limit", accession = "MS:1000500", value = myMsDataFile.GetOneBasedScan(i).ScanWindowRange.Maximum.ToString(CultureInfo.InvariantCulture) }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList = new Generated.BinaryDataArrayListType() { // ONLY WRITING M/Z AND INTENSITY DATA, NOT THE CHARGE! (but can add charge info later) // CHARGE (and other stuff) CAN BE IMPORTANT IN ML APPLICATIONS!!!!! count = 2.ToString(), binaryDataArray = new Generated.BinaryDataArrayType[5] }; // M/Z Data mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0] = new Generated.BinaryDataArrayType() { binary = myMsDataFile.GetOneBasedScan(i).MassSpectrum.Get64BitXarray() }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0].encodedLength = (4 * Math.Ceiling(((double)mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0].binary.Length / 3))).ToString(CultureInfo.InvariantCulture); mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0].cvParam = new Generated.CVParamType[3]; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0].cvParam[0] = new Generated.CVParamType() { accession = "MS:1000514", name = "m/z array" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0].cvParam[1] = new Generated.CVParamType() { accession = "MS:1000523", name = "64-bit float" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0].cvParam[2] = new Generated.CVParamType() { accession = "MS:1000576", name = "no compression" }; // Intensity Data mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1] = new Generated.BinaryDataArrayType() { binary = myMsDataFile.GetOneBasedScan(i).MassSpectrum.Get64BitYarray() }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1].encodedLength = (4 * Math.Ceiling(((double)mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1].binary.Length / 3))).ToString(CultureInfo.InvariantCulture); mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1].cvParam = new Generated.CVParamType[3]; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1].cvParam[0] = new Generated.CVParamType() { accession = "MS:1000515", name = "intensity array" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1].cvParam[1] = new Generated.CVParamType() { accession = "MS:1000523", name = "64-bit float" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1].cvParam[2] = new Generated.CVParamType() { accession = "MS:1000576", name = "no compression" }; if (myMsDataFile.GetOneBasedScan(i).NoiseData != null) { // mass mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2] = new Generated.BinaryDataArrayType() { binary = myMsDataFile.GetOneBasedScan(i).Get64BitNoiseDataMass() }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].encodedLength = (4 * Math.Ceiling(((double)mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].binary.Length / 3))).ToString(CultureInfo.InvariantCulture); mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].cvParam = new Generated.CVParamType[3]; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].cvParam[0] = new Generated.CVParamType() { accession = "MS:1000786", name = "non-standard data array" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].cvParam[1] = new Generated.CVParamType() { accession = "MS:1000523", name = "64-bit float" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].cvParam[2] = new Generated.CVParamType() { accession = "MS:1000576", name = "no compression" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].userParam = new Generated.UserParamType[1]; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].userParam[0] = new Generated.UserParamType() { name = "kelleherCustomType", value = "noise m/z" }; // noise mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3] = new Generated.BinaryDataArrayType() { binary = myMsDataFile.GetOneBasedScan(i).Get64BitNoiseDataNoise() }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].encodedLength = (4 * Math.Ceiling(((double)mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].binary.Length / 3))).ToString(CultureInfo.InvariantCulture); mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].cvParam = new Generated.CVParamType[3]; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].cvParam[0] = new Generated.CVParamType() { accession = "MS:1000786", name = "non-standard data array" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].cvParam[1] = new Generated.CVParamType() { accession = "MS:1000523", name = "64-bit float" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].cvParam[2] = new Generated.CVParamType() { accession = "MS:1000576", name = "no compression" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].userParam = new Generated.UserParamType[1]; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].userParam[0] = new Generated.UserParamType() { name = "kelleherCustomType", value = "noise baseline" }; // baseline mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4] = new Generated.BinaryDataArrayType() { binary = myMsDataFile.GetOneBasedScan(i).Get64BitNoiseDataBaseline() }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].encodedLength = (4 * Math.Ceiling(((double)mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].binary.Length / 3))).ToString(CultureInfo.InvariantCulture); mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].cvParam = new Generated.CVParamType[3]; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].cvParam[0] = new Generated.CVParamType() { accession = "MS:1000786", name = "non-standard data array" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].cvParam[1] = new Generated.CVParamType() { accession = "MS:1000523", name = "64-bit float" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].cvParam[2] = new Generated.CVParamType() { accession = "MS:1000576", name = "no compression" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].userParam = new Generated.UserParamType[1]; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].userParam[0] = new Generated.UserParamType() { name = "kelleherCustomType", value = "noise intensity" }; } } if (writeIndexed) { throw new NotImplementedException("Writing indexed mzMLs not yet supported"); } else { using (TextWriter writer = new StreamWriter(outputFile)) { mzmlSerializer.Serialize(writer, mzML); } } }
private static MsDataScan GetMsDataOneBasedScanFromConnection(Generated.mzMLType _mzMLConnection, int oneBasedIndex, IFilteringParams filterParams) { // Read in the instrument configuration types from connection (in mzml it's at the start) Generated.InstrumentConfigurationType[] configs = new Generated.InstrumentConfigurationType[_mzMLConnection.instrumentConfigurationList.instrumentConfiguration.Length]; for (int i = 0; i < _mzMLConnection.instrumentConfigurationList.instrumentConfiguration.Length; i++) { configs[i] = _mzMLConnection.instrumentConfigurationList.instrumentConfiguration[i]; } var defaultInstrumentConfig = _mzMLConnection.run.defaultInstrumentConfigurationRef; // May be null! var scanSpecificInsturmentConfig = _mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].scanList.scan[0].instrumentConfigurationRef; MZAnalyzerType analyzer = default(MZAnalyzerType); // use default if (scanSpecificInsturmentConfig == null || scanSpecificInsturmentConfig == defaultInstrumentConfig) { if (configs[0].componentList == null) { analyzer = default(MZAnalyzerType); } else if (analyzerDictionary.TryGetValue(configs[0].componentList.analyzer[0].cvParam[0].accession, out MZAnalyzerType returnVal)) { analyzer = returnVal; } } // use scan-specific else { for (int i = 0; i < _mzMLConnection.instrumentConfigurationList.instrumentConfiguration.Length; i++) { if (configs[i].id.Equals(scanSpecificInsturmentConfig)) { analyzerDictionary.TryGetValue(configs[i].componentList.analyzer[0].cvParam[0].accession, out MZAnalyzerType returnVal); analyzer = returnVal; } } } string nativeId = _mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].id; int? msOrder = null; bool? isCentroid = null; Polarity polarity = Polarity.Unknown; double tic = double.NaN; foreach (Generated.CVParamType cv in _mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].cvParam) { if (cv.accession.Equals(_msnOrderAccession)) { msOrder = int.Parse(cv.value); } if (cv.accession.Equals(_centroidSpectrum)) { isCentroid = true; } if (cv.accession.Equals(_profileSpectrum)) { throw new MzLibException("Reading profile mode mzmls not supported"); } if (cv.accession.Equals(_totalIonCurrent)) { tic = double.Parse(cv.value, CultureInfo.InvariantCulture); } if (polarity.Equals(Polarity.Unknown)) { polarityDictionary.TryGetValue(cv.accession, out polarity); } } if (!msOrder.HasValue || !isCentroid.HasValue) { throw new MzLibException("!msOrder.HasValue || !isCentroid.HasValue"); } double[] masses = new double[0]; double[] intensities = new double[0]; foreach (Generated.BinaryDataArrayType binaryData in _mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].binaryDataArrayList.binaryDataArray) { bool compressed = false; bool mzArray = false; bool intensityArray = false; bool is32bit = true; foreach (Generated.CVParamType cv in binaryData.cvParam) { compressed |= cv.accession.Equals(_zlibCompression); is32bit &= !cv.accession.Equals(_64bit); is32bit |= cv.accession.Equals(_32bit); mzArray |= cv.accession.Equals(_mzArray); intensityArray |= cv.accession.Equals(_intensityArray); } double[] data = ConvertBase64ToDoubles(binaryData.binary, compressed, is32bit); if (mzArray) { masses = data; } if (intensityArray) { intensities = data; } } double high = double.NaN; double low = double.NaN; var aScanWindowList = _mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].scanList.scan[0].scanWindowList; if (aScanWindowList != null) { foreach (Generated.CVParamType cv in _mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].scanList.scan[0].scanWindowList.scanWindow[0].cvParam) { if (cv.accession.Equals(_scanWindowLowerLimit)) { low = double.Parse(cv.value, CultureInfo.InvariantCulture); } else if (cv.accession.Equals(_scanWindowUpperLimit)) { high = double.Parse(cv.value, CultureInfo.InvariantCulture); } } } if (filterParams != null && intensities.Length > 0 && ((filterParams.ApplyTrimmingToMs1 && msOrder.Value == 1) || (filterParams.ApplyTrimmingToMsMs && msOrder.Value > 1))) { WindowModeHelper(ref intensities, ref masses, filterParams, low, high); } Array.Sort(masses, intensities); var mzmlMzSpectrum = new MzSpectrum(masses, intensities, false); double rtInMinutes = double.NaN; string scanFilter = null; double?injectionTime = null; int oneBasedScanNumber = oneBasedIndex; if (_mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].scanList.scan[0].cvParam != null) { foreach (Generated.CVParamType cv in _mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].scanList.scan[0].cvParam) { if (cv.accession.Equals(_retentionTime)) { rtInMinutes = double.Parse(cv.value, CultureInfo.InvariantCulture); if (cv.unitName == "second") { rtInMinutes /= 60; } } if (cv.accession.Equals(_filterString)) { scanFilter = cv.value; } if (cv.accession.Equals(_ionInjectionTime)) { injectionTime = double.Parse(cv.value, CultureInfo.InvariantCulture); } if (cv.accession.Equals(_oneBasedScanNumber)) //get the real one based spectrum number (if available), the other assumes they are in order. This is present in .mgf->.mzml conversions from MSConvert { oneBasedScanNumber = int.Parse(cv.value); } } } if (msOrder.Value == 1) { return(new MsDataScan( mzmlMzSpectrum, oneBasedScanNumber, msOrder.Value, isCentroid.Value, polarity, rtInMinutes, new MzRange(low, high), scanFilter, analyzer, tic, injectionTime, null, nativeId)); } double selectedIonMz = double.NaN; int? selectedIonCharge = null; double?selectedIonIntensity = null; foreach (Generated.CVParamType cv in _mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].precursorList.precursor[0].selectedIonList.selectedIon[0].cvParam) { if (cv.accession.Equals(_selectedIonMz)) { selectedIonMz = double.Parse(cv.value, CultureInfo.InvariantCulture); } if (cv.accession.Equals(_precursorCharge)) { selectedIonCharge = int.Parse(cv.value, CultureInfo.InvariantCulture); } if (cv.accession.Equals(_peakIntensity)) { selectedIonIntensity = double.Parse(cv.value, CultureInfo.InvariantCulture); } } double?isolationMz = null; double lowIsolation = double.NaN; double highIsolation = double.NaN; if (_mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].precursorList.precursor[0].isolationWindow != null) { foreach (Generated.CVParamType cv in _mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].precursorList.precursor[0].isolationWindow.cvParam) { if (cv.accession.Equals(_isolationWindowTargetMZ)) { isolationMz = double.Parse(cv.value, CultureInfo.InvariantCulture); } if (cv.accession.Equals(_isolationWindowLowerOffset)) { lowIsolation = double.Parse(cv.value, CultureInfo.InvariantCulture); } if (cv.accession.Equals(_isolationWindowUpperOffset)) { highIsolation = double.Parse(cv.value, CultureInfo.InvariantCulture); } } } DissociationType dissociationType = DissociationType.Unknown; if (_mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].precursorList.precursor[0].activation.cvParam != null) { foreach (Generated.CVParamType cv in _mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].precursorList.precursor[0].activation.cvParam) { dissociationDictionary.TryGetValue(cv.accession, out dissociationType); } } double?monoisotopicMz = null; if (_mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].scanList.scan[0].userParam != null) { foreach (var userParam in _mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].scanList.scan[0].userParam) { if (userParam.name.EndsWith("Monoisotopic M/Z:")) { monoisotopicMz = double.Parse(userParam.value, CultureInfo.InvariantCulture); } } } int?precursorScanNumber; if (_mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].precursorList.precursor[0].spectrumRef == null) { precursorScanNumber = null; } else { precursorScanNumber = GetOneBasedPrecursorScanNumber(_mzMLConnection, oneBasedIndex); } return(new MsDataScan( mzmlMzSpectrum, oneBasedIndex, msOrder.Value, isCentroid.Value, polarity, rtInMinutes, new MzRange(low, high), scanFilter, analyzer, tic, injectionTime, null, nativeId, selectedIonMz, selectedIonCharge, selectedIonIntensity, isolationMz, lowIsolation + highIsolation, dissociationType, precursorScanNumber, monoisotopicMz )); }
private static MsDataScan GetMsDataOneBasedScanFromConnection(Generated.mzMLType _mzMLConnection, int oneBasedIndex, IFilteringParams filterParams) { // Read in the instrument configuration types from connection (in mzml it's at the start) Generated.InstrumentConfigurationType[] configs = new Generated.InstrumentConfigurationType[_mzMLConnection.instrumentConfigurationList.instrumentConfiguration.Length]; for (int i = 0; i < _mzMLConnection.instrumentConfigurationList.instrumentConfiguration.Length; i++) { configs[i] = _mzMLConnection.instrumentConfigurationList.instrumentConfiguration[i]; } var defaultInstrumentConfig = _mzMLConnection.run.defaultInstrumentConfigurationRef; // May be null! var scanSpecificInsturmentConfig = _mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].scanList.scan[0].instrumentConfigurationRef; MZAnalyzerType analyzer = default(MZAnalyzerType); // use default if (scanSpecificInsturmentConfig == null || scanSpecificInsturmentConfig == defaultInstrumentConfig) { if (configs[0].componentList == null) { analyzer = default(MZAnalyzerType); } else if (AnalyzerDictionary.TryGetValue(configs[0].componentList.analyzer[0].cvParam[0].accession, out MZAnalyzerType returnVal)) { analyzer = returnVal; } } // use scan-specific else { for (int i = 0; i < _mzMLConnection.instrumentConfigurationList.instrumentConfiguration.Length; i++) { if (configs[i].id.Equals(scanSpecificInsturmentConfig)) { AnalyzerDictionary.TryGetValue(configs[i].componentList.analyzer[0].cvParam[0].accession, out MZAnalyzerType returnVal); analyzer = returnVal; } } } string nativeId = _mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].id; int? msOrder = null; bool? isCentroid = null; Polarity polarity = Polarity.Unknown; double tic = double.NaN; foreach (Generated.CVParamType cv in _mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].cvParam) { if (cv.accession.Equals(_msnOrderAccession)) { msOrder = int.Parse(cv.value); } if (cv.accession.Equals(_centroidSpectrum)) { isCentroid = true; } if (cv.accession.Equals(_profileSpectrum)) { throw new MzLibException("Reading profile mode mzmls not supported"); } if (cv.accession.Equals(_totalIonCurrent)) { tic = double.Parse(cv.value, CultureInfo.InvariantCulture); } if (polarity.Equals(Polarity.Unknown)) { PolarityDictionary.TryGetValue(cv.accession, out polarity); } } double rtInMinutes = double.NaN; string scanFilter = null; double?injectionTime = null; int oneBasedScanNumber = oneBasedIndex; if (_mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].scanList.scan[0].cvParam != null) { foreach (Generated.CVParamType cv in _mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].scanList.scan[0].cvParam) { if (cv.accession.Equals(_retentionTime)) { rtInMinutes = double.Parse(cv.value, CultureInfo.InvariantCulture); if (cv.unitName == "second") { rtInMinutes /= 60; } } if (cv.accession.Equals(_filterString)) { scanFilter = cv.value; } if (cv.accession.Equals(_ionInjectionTime)) { injectionTime = double.Parse(cv.value, CultureInfo.InvariantCulture); } if (cv.accession.Equals(_oneBasedScanNumber)) //get the real one based spectrum number (if available), the other assumes they are in order. This is present in .mgf->.mzml conversions from MSConvert { oneBasedScanNumber = int.Parse(cv.value); } } } if (!msOrder.HasValue || !isCentroid.HasValue) { //one instance when this if statment is true (i.e. not false) is when there is no mz/intensity data //so, we return the MsDataScan object with a null spectrum //scans w/ null spectra are checked later and the scan numbers associated w those scans are returned to the reader. return(new MsDataScan( null, oneBasedScanNumber, msOrder.Value, false, //have to return a value here b/c it is not nullable polarity, rtInMinutes, null, scanFilter, analyzer, tic, injectionTime, null, nativeId)); } double[] masses = new double[0]; double[] intensities = new double[0]; foreach (Generated.BinaryDataArrayType binaryData in _mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].binaryDataArrayList.binaryDataArray) { bool compressed = false; bool mzArray = false; bool intensityArray = false; bool is32bit = true; foreach (Generated.CVParamType cv in binaryData.cvParam) { compressed |= cv.accession.Equals(_zlibCompression); is32bit &= !cv.accession.Equals(_64bit); is32bit |= cv.accession.Equals(_32bit); mzArray |= cv.accession.Equals(_mzArray); intensityArray |= cv.accession.Equals(_intensityArray); } //in the futurem we may see scass w/ no data and there will be a crash here. if that happens, you can retrun an MsDataScan with null as the mzSpectrum //the scans with no spectra will be reported to the reader and left out of the scan list. double[] data = ConvertBase64ToDoubles(binaryData.binary, compressed, is32bit); if (mzArray) { masses = data; } if (intensityArray) { intensities = data; } } double high = double.NaN; double low = double.NaN; var aScanWindowList = _mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].scanList.scan[0].scanWindowList; if (aScanWindowList != null) { foreach (Generated.CVParamType cv in _mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].scanList.scan[0].scanWindowList.scanWindow[0].cvParam) { if (cv.accession.Equals(_scanWindowLowerLimit)) { low = double.Parse(cv.value, CultureInfo.InvariantCulture); } else if (cv.accession.Equals(_scanWindowUpperLimit)) { high = double.Parse(cv.value, CultureInfo.InvariantCulture); } } } //Remove Zero Intensity Peaks double zeroEquivalentIntensity = 0.01; int zeroIntensityCount = intensities.Count(i => i < zeroEquivalentIntensity); int intensityValueCount = intensities.Count(); if (zeroIntensityCount > 0 && zeroIntensityCount < intensityValueCount) { Array.Sort(intensities, masses); double[] nonZeroIntensities = new double[intensityValueCount - zeroIntensityCount]; double[] nonZeroMzs = new double[intensityValueCount - zeroIntensityCount]; intensities = intensities.SubArray(zeroIntensityCount, intensityValueCount - zeroIntensityCount); masses = masses.SubArray(zeroIntensityCount, intensityValueCount - zeroIntensityCount); Array.Sort(masses, intensities); } if (filterParams != null && intensities.Length > 0 && ((filterParams.ApplyTrimmingToMs1 && msOrder.Value == 1) || (filterParams.ApplyTrimmingToMsMs && msOrder.Value > 1))) { WindowModeHelper(ref intensities, ref masses, filterParams, low, high); } Array.Sort(masses, intensities); var mzmlMzSpectrum = new MzSpectrum(masses, intensities, false); if (msOrder.Value == 1) { return(new MsDataScan( mzmlMzSpectrum, oneBasedScanNumber, msOrder.Value, isCentroid.Value, polarity, rtInMinutes, new MzRange(low, high), scanFilter, analyzer, tic, injectionTime, null, nativeId)); } double selectedIonMz = double.NaN; int? selectedIonCharge = null; double?selectedIonIntensity = null; foreach (Generated.CVParamType cv in _mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].precursorList.precursor[0].selectedIonList.selectedIon[0].cvParam) { if (cv.accession.Equals(_selectedIonMz)) { selectedIonMz = double.Parse(cv.value, CultureInfo.InvariantCulture); } if (cv.accession.Equals(_precursorCharge)) { selectedIonCharge = int.Parse(cv.value, CultureInfo.InvariantCulture); } if (cv.accession.Equals(_peakIntensity)) { selectedIonIntensity = double.Parse(cv.value, CultureInfo.InvariantCulture); } } double?isolationMz = null; double lowIsolation = double.NaN; double highIsolation = double.NaN; if (_mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].precursorList.precursor[0].isolationWindow != null) { foreach (Generated.CVParamType cv in _mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].precursorList.precursor[0].isolationWindow.cvParam) { if (cv.accession.Equals(_isolationWindowTargetMZ)) { isolationMz = double.Parse(cv.value, CultureInfo.InvariantCulture); } if (cv.accession.Equals(_isolationWindowLowerOffset)) { lowIsolation = double.Parse(cv.value, CultureInfo.InvariantCulture); } if (cv.accession.Equals(_isolationWindowUpperOffset)) { highIsolation = double.Parse(cv.value, CultureInfo.InvariantCulture); } } } DissociationType dissociationType = DissociationType.Unknown; if (_mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].precursorList.precursor[0].activation.cvParam != null) { // for EThcD scans, the dissociation type will not be listed as EThcD. it will be 2 different dissociation types // in the list, one as ETD and one with HCD. so we need to check for that case and interpret it as EThcD. List <DissociationType> scanDissociationTypes = new List <DissociationType>(); foreach (Generated.CVParamType cv in _mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].precursorList.precursor[0].activation.cvParam) { if (DissociationDictionary.TryGetValue(cv.accession, out var scanDissociationType)) { scanDissociationTypes.Add(scanDissociationType); } } if (scanDissociationTypes.Contains(DissociationType.ETD) && scanDissociationTypes.Contains(DissociationType.HCD)) { dissociationType = DissociationType.EThcD; } else if (scanDissociationTypes.Any()) { dissociationType = scanDissociationTypes.First(); } else { dissociationType = DissociationType.Unknown; } } double?monoisotopicMz = null; if (_mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].scanList.scan[0].userParam != null) { foreach (var userParam in _mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].scanList.scan[0].userParam) { if (userParam.name.EndsWith("Monoisotopic M/Z:")) { monoisotopicMz = double.Parse(userParam.value, CultureInfo.InvariantCulture); } } } int?precursorScanNumber; if (_mzMLConnection.run.spectrumList.spectrum[oneBasedIndex - 1].precursorList.precursor[0].spectrumRef == null) { precursorScanNumber = null; } else { precursorScanNumber = GetOneBasedPrecursorScanNumber(_mzMLConnection, oneBasedIndex); } return(new MsDataScan( mzmlMzSpectrum, oneBasedIndex, msOrder.Value, isCentroid.Value, polarity, rtInMinutes, new MzRange(low, high), scanFilter, analyzer, tic, injectionTime, null, nativeId, selectedIonMz, selectedIonCharge, selectedIonIntensity, isolationMz, lowIsolation + highIsolation, dissociationType, precursorScanNumber, monoisotopicMz )); }
private static IMzmlScan GetMsDataOneBasedScanFromConnection(Generated.mzMLType _mzMLConnection, int oneBasedSpectrumNumber) { double[] masses = null; double[] intensities = null; foreach (Generated.BinaryDataArrayType binaryData in _mzMLConnection.run.spectrumList.spectrum[oneBasedSpectrumNumber - 1].binaryDataArrayList.binaryDataArray) { bool compressed = false; bool mzArray = false; bool intensityArray = false; bool is32bit = true; foreach (Generated.CVParamType cv in binaryData.cvParam) { compressed |= cv.accession.Equals(_zlibCompression); is32bit &= !cv.accession.Equals(_64bit); is32bit |= cv.accession.Equals(_32bit); mzArray |= cv.accession.Equals(_mzArray); intensityArray |= cv.accession.Equals(_intensityArray); } double[] data = ConvertBase64ToDoubles(binaryData.binary, compressed, is32bit); if (mzArray) { masses = data; } if (intensityArray) { intensities = data; } } var ok = new MzmlMzSpectrum(masses, intensities, false); int? msOrder = null; bool? isCentroid = null; Polarity polarity = Polarity.Unknown; double tic = double.NaN; foreach (Generated.CVParamType cv in _mzMLConnection.run.spectrumList.spectrum[oneBasedSpectrumNumber - 1].cvParam) { if (cv.accession.Equals(_msnOrderAccession)) { msOrder = int.Parse(cv.value); } if (cv.accession.Equals(_centroidSpectrum)) { isCentroid = true; } if (cv.accession.Equals(_profileSpectrum)) { isCentroid = false; } if (cv.accession.Equals(_totalIonCurrent)) { tic = double.Parse(cv.value); } polarityDictionary.TryGetValue(cv.accession, out polarity); } double rtInMinutes = double.NaN; string scanFilter = null; double?injectionTime = null; if (_mzMLConnection.run.spectrumList.spectrum[oneBasedSpectrumNumber - 1].scanList.scan[0].cvParam != null) { foreach (Generated.CVParamType cv in _mzMLConnection.run.spectrumList.spectrum[oneBasedSpectrumNumber - 1].scanList.scan[0].cvParam) { if (cv.accession.Equals(_retentionTime)) { rtInMinutes = double.Parse(cv.value); if (cv.unitName == "second") { rtInMinutes /= 60; } } if (cv.accession.Equals(_filterString)) { scanFilter = cv.value; } if (cv.accession.Equals(_ionInjectionTime)) { injectionTime = double.Parse(cv.value); } } } double high = double.NaN; double low = double.NaN; if (_mzMLConnection.run.spectrumList.spectrum[oneBasedSpectrumNumber - 1].scanList.scan[0].scanWindowList != null) { foreach (Generated.CVParamType cv in _mzMLConnection.run.spectrumList.spectrum[oneBasedSpectrumNumber - 1].scanList.scan[0].scanWindowList.scanWindow[0].cvParam) { if (cv.accession.Equals(_scanWindowLowerLimit)) { low = double.Parse(cv.value); } if (cv.accession.Equals(_scanWindowUpperLimit)) { high = double.Parse(cv.value); } } } if (msOrder.Value == 1) { return(new MzmlScan(oneBasedSpectrumNumber, ok, msOrder.Value, isCentroid.Value, polarity, rtInMinutes, new MzRange(low, high), scanFilter, GetMzAnalyzer(_mzMLConnection, scanFilter), tic, injectionTime)); } double selectedIonMz = double.NaN; int? selectedIonCharge = null; double?selectedIonIntensity = null; foreach (Generated.CVParamType cv in _mzMLConnection.run.spectrumList.spectrum[oneBasedSpectrumNumber - 1].precursorList.precursor[0].selectedIonList.selectedIon[0].cvParam) { if (cv.accession.Equals(_selectedIonMz)) { selectedIonMz = double.Parse(cv.value); } if (cv.accession.Equals(_precursorCharge)) { selectedIonCharge = int.Parse(cv.value); } if (cv.accession.Equals(_peakIntensity)) { selectedIonIntensity = double.Parse(cv.value); } } double?isolationMz = null; double lowIsolation = double.NaN; double highIsolation = double.NaN; foreach (Generated.CVParamType cv in _mzMLConnection.run.spectrumList.spectrum[oneBasedSpectrumNumber - 1].precursorList.precursor[0].isolationWindow.cvParam) { if (cv.accession.Equals(_isolationWindowTargetMZ)) { isolationMz = double.Parse(cv.value); } if (cv.accession.Equals(_isolationWindowLowerOffset)) { lowIsolation = double.Parse(cv.value); } if (cv.accession.Equals(_isolationWindowUpperOffset)) { highIsolation = double.Parse(cv.value); } } DissociationType dissociationType = DissociationType.Unknown; foreach (Generated.CVParamType cv in _mzMLConnection.run.spectrumList.spectrum[oneBasedSpectrumNumber - 1].precursorList.precursor[0].activation.cvParam) { dissociationDictionary.TryGetValue(cv.accession, out dissociationType); } double?monoisotopicMz = null; if (_mzMLConnection.run.spectrumList.spectrum[oneBasedSpectrumNumber - 1].scanList.scan[0].userParam != null) { foreach (var userParam in _mzMLConnection.run.spectrumList.spectrum[oneBasedSpectrumNumber - 1].scanList.scan[0].userParam) { if (userParam.name.EndsWith("Monoisotopic M/Z:")) { monoisotopicMz = double.Parse(userParam.value); } } } return(new MzmlScanWithPrecursor(oneBasedSpectrumNumber, ok, msOrder.Value, isCentroid.Value, polarity, rtInMinutes, new MzRange(low, high), scanFilter, GetMzAnalyzer(_mzMLConnection, scanFilter), tic, selectedIonMz, selectedIonCharge, selectedIonIntensity, isolationMz.Value, lowIsolation + highIsolation, dissociationType, GetOneBasedPrecursorScanNumber(_mzMLConnection, oneBasedSpectrumNumber), monoisotopicMz, injectionTime)); }