/// <summary> /// Initialize a MzMlReader object /// </summary> /// <param name="filePath">Path to mzML file</param> /// <param name="randomAccess">If mzML reader should be configured for random access</param> /// <param name="tryReducingMemoryUsage">If mzML reader should try to avoid reading all spectra into memory. This will reduce memory usage for a non-random access MzMLReader, as long as ReadMassSpectrum(int) isn't used.</param> public MzMLReader(string filePath, bool randomAccess = false, bool tryReducingMemoryUsage = true) { _filePath = filePath; _instrument = Instrument.Unknown; _version = MzML_Version.mzML1_1_0; _randomAccess = randomAccess; _reduceMemoryUsage = tryReducingMemoryUsage; _unzippedFilePath = _filePath; ConfigureFileHandles(); }
/// <summary> /// Read and parse a .mzML file /// Files are commonly larger than 100 MB, so use a streaming reader instead of a DOM reader /// Vary conditional, depending on configuration /// </summary> private void ReadMzMl() { if (_randomAccess && _haveIndex && _haveMetaData) { return; } // Handle disposal of allocated object correctly var reader = XmlReader.Create(_fileReader, _xSettings); // Guarantee a move to the root node reader.MoveToContent(); if (_encoding == null) { _encoding = _fileReader.CurrentEncoding; } XmlReader indexReader = null; if (reader.Name == "indexedmzML") { indexReader = reader; // Read to the mzML root tag, and ignore the extra indexedmzML data reader.ReadToDescendant("mzML"); if (_randomAccess && !_haveIndex) { // run to the end of the file (using stream.position = stream.length) and jump backwards to read the index first, and then read the file for needed data ReadIndexFromEnd(); } reader = reader.ReadSubtree(); reader.MoveToContent(); } string schemaName = reader.GetAttribute("xsi:schemaLocation"); // We automatically assume it uses the mzML_1.1.0 schema. Check for the old version. //if (!schemaName.Contains("mzML1.1.0.xsd")) if (schemaName.Contains("mzML1.0.0.xsd")) { _version = MzML_Version.mzML1_0_0; } // Consume the mzML root tag // Throws exception if we are not at the "mzML" tag. // This is a critical error; we want to stop processing for this file if we encounter this error reader.ReadStartElement("mzML"); bool continueReading = true; // Read the next node - should be the first child node while (reader.ReadState == ReadState.Interactive && continueReading) { // Handle exiting out properly at EndElement tags if (reader.NodeType != XmlNodeType.Element) { reader.Read(); continue; } // Handle each 1st level as a chunk switch (reader.Name) { case "cvList": // Schema requirements: one instance of this element reader.Skip(); break; case "fileDescription": // Schema requirements: one instance of this element if (!_randomAccess || (_randomAccess && !_haveMetaData)) { ReadFileDescription(reader.ReadSubtree()); reader.ReadEndElement(); // "fileDescription" must have child nodes } else { reader.Skip(); } break; case "referenceableParamGroupList": // Schema requirements: zero to one instances of this element if (!_randomAccess || (_randomAccess && !_haveMetaData)) { ReadReferenceableParamGroupList(reader.ReadSubtree()); reader.ReadEndElement(); // "referenceableParamGroupList" must have child nodes } else { reader.Skip(); } break; case "sampleList": // Schema requirements: zero to one instances of this element reader.Skip(); break; case "softwareList": // Schema requirements: one instance of this element reader.Skip(); break; case "scanSettingsList": // Schema requirements: zero to one instances of this element reader.Skip(); break; case "instrumentConfigurationList": // Schema requirements: one instance of this element reader.Skip(); break; case "dataProcessingList": // Schema requirements: one instance of this element reader.Skip(); break; case "acquisitionSettingsList": // mzML 1.0.0 compatibility // Schema requirements: zero to one instances of this element reader.Skip(); break; case "run": // Schema requirements: one instance of this element // Use reader.ReadSubtree() to provide an XmlReader that is only valid for the element and child nodes ReadRunData(reader.ReadSubtree()); if (_randomAccess || _reduceMemoryUsage) { // Kill the read, since we already have a valid index continueReading = false; // don't worry about the skip, since it can take some time. //reader.Skip(); } else { // "run" might not have any child nodes // We will either consume the EndElement, or the same element that was passed to ReadRunData (in case of no child nodes) reader.Read(); } break; default: // We are not reading anything out of the tag, so bypass it reader.Skip(); break; } } _haveMetaData = true; if (!_randomAccess && !_reduceMemoryUsage) { _allRead = true; } //_numSpectra = _spectrumOffsets.Offsets.Count; /* // Now read before any of the metadata. if (indexReader != null) { reader = indexReader; //_reader.ReadStartElement("mzML"); // Read the next node - should be the first child node while (reader.ReadState == ReadState.Interactive) { // Handle exiting out properly at EndElement tags if (reader.NodeType != XmlNodeType.Element) { reader.Read(); continue; } // Handle each 1st level as a chunk switch (reader.Name) { case "indexList": // Schema requirements: one instance of this element ReadIndexList(reader.ReadSubtree()); reader.ReadEndElement(); // "fileDescription" must have child nodes break; case "indexListOffset": // Schema requirements: zero to one instances of this element _indexListOffset = Int64.Parse(reader.ReadElementContentAsString()); break; case "fileChecksum": // Schema requirements: zero to one instances of this element reader.Skip(); break; default: // We are not reading anything out of the tag, so bypass it reader.Skip(); break; } } reader.Close(); } */ if (!_reduceMemoryUsage) { // Don't worry about closing the subtree readers, just close the root reader. // reader is the root if it is not an indexed mzML file. if (indexReader == null) { reader.Close(); } else { indexReader.Close(); } } }