예제 #1
0
        /// <summary>
        /// Initialize a MzMlReader object
        /// </summary>
        /// <param name="filePath">Path to mzML file</param>
        /// <param name="randomAccess">If mzML reader should be configured for random access</param>
        /// <param name="tryReducingMemoryUsage">If mzML reader should try to avoid reading all spectra into memory. This will reduce memory usage for a non-random access MzMLReader, as long as ReadMassSpectrum(int) isn't used.</param>
        public MzMLReader(string filePath, bool randomAccess = false, bool tryReducingMemoryUsage = true)
        {
            _filePath = filePath;
            _instrument = Instrument.Unknown;
            _version = MzML_Version.mzML1_1_0;
            _randomAccess = randomAccess;
            _reduceMemoryUsage = tryReducingMemoryUsage;
            _unzippedFilePath = _filePath;

            ConfigureFileHandles();
        }
예제 #2
0
 /// <summary>
 /// Read and parse a .mzML file
 /// Files are commonly larger than 100 MB, so use a streaming reader instead of a DOM reader
 /// Vary conditional, depending on configuration
 /// </summary>
 private void ReadMzMl()
 {
     if (_randomAccess && _haveIndex && _haveMetaData)
     {
         return;
     }
     // Handle disposal of allocated object correctly
     var reader = XmlReader.Create(_fileReader, _xSettings);
     // Guarantee a move to the root node
     reader.MoveToContent();
     if (_encoding == null)
     {
         _encoding = _fileReader.CurrentEncoding;
     }
     XmlReader indexReader = null;
     if (reader.Name == "indexedmzML")
     {
         indexReader = reader;
         // Read to the mzML root tag, and ignore the extra indexedmzML data
         reader.ReadToDescendant("mzML");
         if (_randomAccess && !_haveIndex)
         {
             // run to the end of the file (using stream.position = stream.length) and jump backwards to read the index first, and then read the file for needed data
             ReadIndexFromEnd();
         }
         reader = reader.ReadSubtree();
         reader.MoveToContent();
     }
     string schemaName = reader.GetAttribute("xsi:schemaLocation");
     // We automatically assume it uses the mzML_1.1.0 schema. Check for the old version.
     //if (!schemaName.Contains("mzML1.1.0.xsd"))
     if (schemaName.Contains("mzML1.0.0.xsd"))
     {
         _version = MzML_Version.mzML1_0_0;
     }
     // Consume the mzML root tag
     // Throws exception if we are not at the "mzML" tag.
     // This is a critical error; we want to stop processing for this file if we encounter this error
     reader.ReadStartElement("mzML");
     bool continueReading = true;
     // Read the next node - should be the first child node
     while (reader.ReadState == ReadState.Interactive && continueReading)
     {
         // Handle exiting out properly at EndElement tags
         if (reader.NodeType != XmlNodeType.Element)
         {
             reader.Read();
             continue;
         }
         // Handle each 1st level as a chunk
         switch (reader.Name)
         {
             case "cvList":
                 // Schema requirements: one instance of this element
                 reader.Skip();
                 break;
             case "fileDescription":
                 // Schema requirements: one instance of this element
                 if (!_randomAccess || (_randomAccess && !_haveMetaData))
                 {
                     ReadFileDescription(reader.ReadSubtree());
                     reader.ReadEndElement(); // "fileDescription" must have child nodes
                 }
                 else
                 {
                     reader.Skip();
                 }
                 break;
             case "referenceableParamGroupList":
                 // Schema requirements: zero to one instances of this element
                 if (!_randomAccess || (_randomAccess && !_haveMetaData))
                 {
                     ReadReferenceableParamGroupList(reader.ReadSubtree());
                     reader.ReadEndElement(); // "referenceableParamGroupList" must have child nodes
                 }
                 else
                 {
                     reader.Skip();
                 }
                 break;
             case "sampleList":
                 // Schema requirements: zero to one instances of this element
                 reader.Skip();
                 break;
             case "softwareList":
                 // Schema requirements: one instance of this element
                 reader.Skip();
                 break;
             case "scanSettingsList":
                 // Schema requirements: zero to one instances of this element
                 reader.Skip();
                 break;
             case "instrumentConfigurationList":
                 // Schema requirements: one instance of this element
                 reader.Skip();
                 break;
             case "dataProcessingList":
                 // Schema requirements: one instance of this element
                 reader.Skip();
                 break;
             case "acquisitionSettingsList": // mzML 1.0.0 compatibility
                 // Schema requirements: zero to one instances of this element
                 reader.Skip();
                 break;
             case "run":
                 // Schema requirements: one instance of this element
                 // Use reader.ReadSubtree() to provide an XmlReader that is only valid for the element and child nodes
                 ReadRunData(reader.ReadSubtree());
                 if (_randomAccess || _reduceMemoryUsage)
                 {
                     // Kill the read, since we already have a valid index
                     continueReading = false;
                     // don't worry about the skip, since it can take some time.
                     //reader.Skip();
                 }
                 else
                 {
                     // "run" might not have any child nodes
                     // We will either consume the EndElement, or the same element that was passed to ReadRunData (in case of no child nodes)
                     reader.Read();
                 }
                 break;
             default:
                 // We are not reading anything out of the tag, so bypass it
                 reader.Skip();
                 break;
         }
     }
     _haveMetaData = true;
     if (!_randomAccess && !_reduceMemoryUsage)
     {
         _allRead = true;
     }
     //_numSpectra = _spectrumOffsets.Offsets.Count;
     /* // Now read before any of the metadata.
     if (indexReader != null)
     {
         reader = indexReader;
         //_reader.ReadStartElement("mzML");
         // Read the next node - should be the first child node
         while (reader.ReadState == ReadState.Interactive)
         {
             // Handle exiting out properly at EndElement tags
             if (reader.NodeType != XmlNodeType.Element)
             {
                 reader.Read();
                 continue;
             }
             // Handle each 1st level as a chunk
             switch (reader.Name)
             {
                 case "indexList":
                     // Schema requirements: one instance of this element
                     ReadIndexList(reader.ReadSubtree());
                     reader.ReadEndElement(); // "fileDescription" must have child nodes
                     break;
                 case "indexListOffset":
                     // Schema requirements: zero to one instances of this element
                     _indexListOffset = Int64.Parse(reader.ReadElementContentAsString());
                     break;
                 case "fileChecksum":
                     // Schema requirements: zero to one instances of this element
                     reader.Skip();
                     break;
                 default:
                     // We are not reading anything out of the tag, so bypass it
                     reader.Skip();
                     break;
             }
         }
         reader.Close();
     } */
     if (!_reduceMemoryUsage)
     {
         // Don't worry about closing the subtree readers, just close the root reader.
         // reader is the root if it is not an indexed mzML file.
         if (indexReader == null)
         {
             reader.Close();
         }
         else
         {
             indexReader.Close();
         }
     }
 }