/// <summary> /// Handle a single binaryDataArray element and child nodes /// Called by ReadBinaryDataArrayList (xml hierarchy) /// </summary> /// <param name="reader">XmlReader that is only valid for the scope of the single binaryDataArray element</param> /// <param name="defaultLength">Default array length, coming from spectrum attribute</param> /// <returns></returns> private BinaryDataArray ReadBinaryDataArray(XmlReader reader, int defaultLength) { reader.MoveToContent(); BinaryDataArray bda = new BinaryDataArray(); bda.ArrayLength = defaultLength; int encLength = Convert.ToInt32(reader.GetAttribute("encodedLength")); int arrLength = Convert.ToInt32(reader.GetAttribute("arrayLength")); // Override the default; if non-existent, should get 0 if (arrLength > 0) { bda.ArrayLength = arrLength; } bool compressed = false; reader.ReadStartElement("binaryDataArray"); // Throws exception if we are not at the "spectrum" tag. List<Param> paramList = new List<Param>(); while (reader.ReadState == ReadState.Interactive) { // Handle exiting out properly at EndElement tags if (reader.NodeType != XmlNodeType.Element) { reader.Read(); continue; } switch (reader.Name) { case "referenceableParamGroupRef": // Schema requirements: zero to many instances of this element string rpgRef = reader.GetAttribute("ref"); paramList.AddRange(_referenceableParamGroups[rpgRef]); reader.Read(); break; case "cvParam": // Schema requirements: zero to many instances of this element paramList.Add(ReadCvParam(reader.ReadSubtree())); reader.Read(); // Consume the cvParam element (no child nodes) break; case "userParam": // Schema requirements: zero to many instances of this element paramList.Add(ReadUserParam(reader.ReadSubtree())); reader.Read(); break; case "binary": // Schema requirements: zero to many instances of this element // Process the ParamList first. foreach (Param param in paramList) { /* * MUST supply a *child* term of MS:1000572 (binary data compression type) only once * e.g.: MS:1000574 (zlib compression) * e.g.: MS:1000576 (no compression) * MUST supply a *child* term of MS:1000513 (binary data array) only once * e.g.: MS:1000514 (m/z array) * e.g.: MS:1000515 (intensity array) * e.g.: MS:1000516 (charge array) * e.g.: MS:1000517 (signal to noise array) * e.g.: MS:1000595 (time array) * e.g.: MS:1000617 (wavelength array) * e.g.: MS:1000786 (non-standard data array) * e.g.: MS:1000820 (flow rate array) * e.g.: MS:1000821 (pressure array) * e.g.: MS:1000822 (temperature array) * MUST supply a *child* term of MS:1000518 (binary data type) only once * e.g.: MS:1000521 (32-bit float) * e.g.: MS:1000523 (64-bit float) */ switch (param.Accession) { // MUST supply a *child* term of MS:1000572 (binary data compression type) only once case "MS:1000574": // e.g.: MS:1000574 (zlib compression) compressed = true; break; case "MS:1000576": // e.g.: MS:1000576 (no compression) compressed = false; break; // MUST supply a *child* term of MS:1000513 (binary data array) only once case "MS:1000514": // e.g.: MS:1000514 (m/z array) bda.ArrayType = ArrayType.m_z_array; break; case "MS:1000515": // e.g.: MS:1000515 (intensity array) bda.ArrayType = ArrayType.intensity_array; break; case "MS:1000516": // e.g.: MS:1000516 (charge array) bda.ArrayType = ArrayType.charge_array; break; case "MS:1000517": // e.g.: MS:1000517 (signal to noise array) bda.ArrayType = ArrayType.signal_to_noise_array; break; case "MS:1000595": // e.g.: MS:1000595 (time array) bda.ArrayType = ArrayType.time_array; break; case "MS:1000617": // e.g.: MS:1000617 (wavelength array) bda.ArrayType = ArrayType.wavelength_array; break; case "MS:1000786": // e.g.: MS:1000786 (non-standard data array) bda.ArrayType = ArrayType.non_standard_data_array; break; case "MS:1000820": // e.g.: MS:1000820 (flow rate array) bda.ArrayType = ArrayType.flow_rate_array; break; case "MS:1000821": // e.g.: MS:1000821 (pressure array) bda.ArrayType = ArrayType.pressure_array; break; case "MS:1000822": // e.g.: MS:1000822 (temperature array) bda.ArrayType = ArrayType.temperature_array; break; // MUST supply a *child* term of MS:1000518 (binary data type) only once case "MS:1000521": // e.g.: MS:1000521 (32-bit float) bda.Precision = Precision.Precision32; break; case "MS:1000523": // e.g.: MS:1000523 (64-bit float) bda.Precision = Precision.Precision64; break; } } int dataSize = 8; if (bda.Precision == Precision.Precision32) { dataSize = 4; } byte[] bytes = Convert.FromBase64String(reader.ReadElementContentAsString()); // Consumes the start and end elements. //var bytesread = reader.ReadContentAsBase64(bytes, 0, dataSize); if (compressed) { bytes = DecompressZLib(bytes, bda.ArrayLength * dataSize); } if (bytes.Length % dataSize != 0 || bytes.Length / dataSize != bda.ArrayLength) { // We need to fail out. } //byte[] oneNumber = new byte[dataSize]; //bool swapBytes = true; bda.Data = new double[bda.ArrayLength]; for (int i = 0; i < bytes.Length; i += dataSize) { // mzML binary data should always be Little Endian. Some other data formats may use Big Endian, which would require a byte swap //Array.Copy(bytes, i, oneNumber, 0, dataSize); //if (swapBytes) //{ // Array.Reverse(oneNumber); //} if (dataSize == 4) { //bda.Data[i / dataSize] = BitConverter.ToSingle(oneNumber, 0); bda.Data[i / dataSize] = BitConverter.ToSingle(bytes, i); } else if (dataSize == 8) { //bda.Data[i / dataSize] = BitConverter.ToDouble(oneNumber, 0); bda.Data[i / dataSize] = BitConverter.ToDouble(bytes, i); } } break; default: reader.Skip(); break; } } reader.Close(); return bda; }
private static double[] ToArray(BinaryDataArray binaryDataArray) { return binaryDataArray.data.ToArray(); }
/// <summary> /// Handle a single spectrum element and child nodes /// Called by ReadSpectrumList (xml hierarchy) /// </summary> /// <param name="reader">XmlReader that is only valid for the scope of the single spectrum element</param> /// <param name="includePeaks">Whether to read binary data arrays</param> private Spectrum ReadSpectrum(XmlReader reader, bool includePeaks = true) { reader.MoveToContent(); string index = reader.GetAttribute("index"); //Console.WriteLine("Reading spectrum indexed by " + index); // This is correct for Thermo files converted by msConvert, but need to implement for others as well string spectrumId = reader.GetAttribute("id"); // Native ID in mzML_1.1.0; unique identifier in mzML_1.0.0, often same as nativeID string nativeId = spectrumId; if (_version == MzML_Version.mzML1_0_0) { nativeId = reader.GetAttribute("nativeID"); // Native ID in mzML_1.0.0 } int scanNum = -1; // If a random access reader, there is already a scan number stored, based on the order of the index. Use it instead. if (_randomAccess) { scanNum = (int) (_spectrumOffsets.NativeToIdMap[nativeId]); } else { scanNum = (int)(_artificialScanNum++); // Interpret the NativeID (if the format has an interpreter) and use it instead of the artificial number. // TODO: Better handling than the artificial ID for other nativeIDs (ones currently not supported) int num = 0; if (NativeIdConversion.TryGetScanNumberInt(nativeId, out num)) { scanNum = num; } } int defaultArraySize = Convert.ToInt32(reader.GetAttribute("defaultArrayLength")); reader.ReadStartElement("spectrum"); // Throws exception if we are not at the "spectrum" tag. bool is_ms_ms = false; int msLevel = 0; bool centroided = false; double tic = 0; List<Precursor> precursors = new List<Precursor>(); List<ScanData> scans = new List<ScanData>(); List<BinaryDataArray> bdas = new List<BinaryDataArray>(); while (reader.ReadState == ReadState.Interactive) { // Handle exiting out properly at EndElement tags if (reader.NodeType != XmlNodeType.Element) { reader.Read(); continue; } ////////////////////////////////////////////////////////////////////////////////////// /// /// MS1 Spectra: only need Spectrum data: scanNum, MSLevel, ElutionTime, mzArray, IntensityArray /// /// MS2 Spectra: use ProductSpectrum; adds ActivationMethod and IsolationWindow /// ////////////////////////////////////////////////////////////////////////////////////// switch (reader.Name) { case "referenceableParamGroupRef": // Schema requirements: zero to many instances of this element reader.Skip(); break; case "cvParam": // Schema requirements: zero to many instances of this element /* MAY supply a *child* term of MS:1000465 (scan polarity) only once * e.g.: MS:1000129 (negative scan) * e.g.: MS:1000130 (positive scan) * MUST supply a *child* term of MS:1000559 (spectrum type) only once * e.g.: MS:1000322 (charge inversion mass spectrum) * e.g.: MS:1000325 (constant neutral gain spectrum) * e.g.: MS:1000326 (constant neutral loss spectrum) * e.g.: MS:1000328 (e/2 mass spectrum) * e.g.: MS:1000341 (precursor ion spectrum) * e.g.: MS:1000579 (MS1 spectrum) * e.g.: MS:1000580 (MSn spectrum) * e.g.: MS:1000581 (CRM spectrum) * e.g.: MS:1000582 (SIM spectrum) * e.g.: MS:1000583 (SRM spectrum) * e.g.: MS:1000620 (PDA spectrum) * e.g.: MS:1000627 (selected ion current chromatogram) * e.g.: MS:1000789 (enhanced multiply charged spectrum) * e.g.: MS:1000790 (time-delayed fragmentation spectrum) * et al. * MUST supply term MS:1000525 (spectrum representation) or any of its children only once * e.g.: MS:1000127 (centroid spectrum) * e.g.: MS:1000128 (profile spectrum) * MAY supply a *child* term of MS:1000499 (spectrum attribute) one or more times * e.g.: MS:1000285 (total ion current) * e.g.: MS:1000497 (zoom scan) * e.g.: MS:1000504 (base peak m/z) * e.g.: MS:1000505 (base peak intensity) * e.g.: MS:1000511 (ms level) * e.g.: MS:1000527 (highest observed m/z) * e.g.: MS:1000528 (lowest observed m/z) * e.g.: MS:1000618 (highest observed wavelength) * e.g.: MS:1000619 (lowest observed wavelength) * e.g.: MS:1000796 (spectrum title) * et al. */ switch (reader.GetAttribute("accession")) { case "MS:1000127": // name="centroid spectrum" centroided = true; break; case "MS:1000128": // name="profile spectrum" centroided = false; break; case "MS:1000511": // name="ms level" msLevel = Convert.ToInt32(reader.GetAttribute("value")); break; case "MS:1000579": // name="MS1 spectrum" is_ms_ms = false; break; case "MS:1000580": // name="MSn spectrum" is_ms_ms = true; break; case "MS:1000285": // name="total ion current" tic = Convert.ToDouble(reader.GetAttribute("value")); break; } reader.Read(); // Consume the cvParam element (no child nodes) break; case "userParam": // Schema requirements: zero to many instances of this element reader.Skip(); break; case "spectrumDescription": // mzML_1.0.0 compatibility // Schema requirements: one instance of this element ReadSpectrumDescription(reader.ReadSubtree(), ref scans, ref precursors, out centroided); reader.ReadEndElement(); // "spectrumDescription" must have child nodes break; case "scanList": // Schema requirements: zero to one instances of this element scans.AddRange(ReadScanList(reader.ReadSubtree())); reader.ReadEndElement(); // "scanList" must have child nodes break; case "precursorList": // Schema requirements: zero to one instances of this element precursors.AddRange(ReadPrecursorList(reader.ReadSubtree())); reader.ReadEndElement(); // "precursorList" must have child nodes break; case "productList": // Schema requirements: zero to one instances of this element reader.Skip(); break; case "binaryDataArrayList": // Schema requirements: zero to one instances of this element if (includePeaks) { bdas.AddRange(ReadBinaryDataArrayList(reader.ReadSubtree(), defaultArraySize)); reader.ReadEndElement(); // "binaryDataArrayList" must have child nodes } else { reader.Skip(); } break; default: reader.Skip(); break; } } reader.Close(); // Process the spectrum data ScanData scan = new ScanData(); Spectrum spectrum; BinaryDataArray mzs = new BinaryDataArray(); BinaryDataArray intensities = new BinaryDataArray(); foreach (var bda in bdas) { if (bda.ArrayType == ArrayType.m_z_array) { mzs = bda; } else if (bda.ArrayType == ArrayType.intensity_array) { intensities = bda; } } if (!centroided && includePeaks) { // Centroid spectrum // ProteoWizard var centroider = new Centroider(mzs.Data, intensities.Data); double[] centroidedMzs, centroidedIntensities; centroider.GetCentroidedData(out centroidedMzs, out centroidedIntensities); mzs.Data = centroidedMzs; intensities.Data = centroidedIntensities; } if (scans.Count == 1) { scan = scans[0]; } else if (scans.Count > 1) { // TODO: Should do something else to appropriately handle combinations... scan = scans[0]; } if (is_ms_ms) { Precursor precursor = new Precursor(); if (precursors.Count == 1) { precursor = precursors[0]; } else if (precursors.Count > 1) { // TODO: Should do something else to appropriately handle multiple precursors... precursor = precursors[0]; } SelectedIon ion = new SelectedIon(); if (precursor.Ions.Count == 1) { ion = precursor.Ions[0]; } else if (precursor.Ions.Count > 1) { // TODO: Should do something else to appropriately handle multiple selected ions... ion = precursor.Ions[0]; } var pspectrum = new ProductSpectrum(mzs.Data, intensities.Data, scanNum); pspectrum.ActivationMethod = precursor.Activation; // Select mz value to use based on presence of a Thermo-specific user param. // The user param has a slightly higher precision, if that matters. double mz = scan.MonoisotopicMz == 0.0 ? ion.SelectedIonMz : scan.MonoisotopicMz; pspectrum.IsolationWindow = new IsolationWindow(precursor.IsolationWindowTargetMz, precursor.IsolationWindowLowerOffset, precursor.IsolationWindowUpperOffset, mz, ion.Charge); //pspectrum.IsolationWindow.OldCharge = ion.OldCharge; //pspectrum.IsolationWindow.SelectedIonMz = ion.SelectedIonMz; spectrum = pspectrum; } else { spectrum = new Spectrum(mzs.Data, intensities.Data, scanNum); } spectrum.MsLevel = msLevel; spectrum.ElutionTime = scan.StartTime; spectrum.NativeId = nativeId; spectrum.TotalIonCurrent = tic; return spectrum; }