/// <summary> /// Parses a list of sequences using a BioTextReader. /// </summary> /// <remarks> /// This method should be overridden by any parsers that need to process file-scope /// metadata that applies to all of the sequences in the file. /// </remarks> /// <param name="bioReader">bio text reader</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequences should be in readonly mode or not. /// If this flag is set to true then the resulting sequences's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>The list of parsed ISequence objects.</returns> protected IList <ISequence> Parse(BioTextReader bioReader, bool isReadOnly) { _lineCount = 0; _sequenceCount = 0; _lineLength = 0; _sequenceBeginsAt = 1; if (bioReader == null) { throw new ArgumentNullException("bioReader"); } // no empty files allowed if (!bioReader.HasLines) { string message = Resource.Parser_NoTextErrorMessage; Trace.Report(message); throw new InvalidOperationException(message); } // Check if DV is enabled and sidecar creation is possible if (!string.IsNullOrEmpty(bioReader.FileName) && IsDataVirtualizationEnabled && SidecarFileProvider.IsIndexFileExists(bioReader.FileName)) { while (bioReader.HasLines) { // Parse and forget as the list is now maintained by DV using sequence pointers ParseOne(bioReader, isReadOnly); } // Create sidecar SidecarFileProvider provider = SidecarFileProvider.CreateIndexFile(bioReader.FileName, _sequencePointers); VirtualSequenceList virtualSequences = new VirtualSequenceList(provider, this, _sequencePointers.Count) { CreateSequenceAsReadOnly = isReadOnly }; _sequencePointers.Clear(); return(virtualSequences); } else { List <ISequence> sequences = new List <ISequence>(); while (bioReader.HasLines) { sequences.Add(ParseOne(bioReader, isReadOnly)); } return(sequences); } }
/// <summary> /// Parses file with DV and returns Virtual Qualitative Sequences list. /// </summary> /// <param name="isReadOnly">Flag to indicate whether the sequences returned should be set to readonly or not.</param> private VirtualQualitativeSequenceList ParseWithDV(bool isReadOnly) { SidecarFileProvider sidecarFileProvider = null; sidecarFileProvider = new SidecarFileProvider(_fileName); sidecarFileProvider.Close(); // if valid sidecar file exists if (sidecarFileProvider.IsSidecarValid) { // Create virtual list and return return(new VirtualQualitativeSequenceList(sidecarFileProvider, this, sidecarFileProvider.Count) { CreateSequenceAsReadOnly = isReadOnly }); } // else create new sidecar using (sidecarFileProvider = new SidecarFileProvider(_fileName, true)) { using (_mbfStreamReader = new MBFStreamReader(_fileName)) { if (sidecarFileProvider.SidecarFileExists) { try { while (_mbfStreamReader.HasLines) { ParseOne(_mbfStreamReader, isReadOnly); } // Create sidecar sidecarFileProvider.CreateSidecarFile(_mbfStreamReader.FileName, _sequencePointers); VirtualQualitativeSequenceList virtualSequences = new VirtualQualitativeSequenceList(sidecarFileProvider, this, _sequencePointers.Count) { CreateSequenceAsReadOnly = isReadOnly }; _sequencePointers.Clear(); return(virtualSequences); } catch (Exception) { sidecarFileProvider.Cleanup(); } } } } return(null); }
/// <summary> /// Parses a sequence alignment texts from a file. /// </summary> /// <param name="fileName">file name.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequences in the sequence alignment should be in /// readonly mode or not. If this flag is set to true then the resulting sequences's /// isReadOnly property will be set to true, otherwise it will be set to false. /// </param> /// <returns>SequenceAlignmentMap object.</returns> public SequenceAlignmentMap Parse(string fileName, bool isReadOnly) { if (string.IsNullOrWhiteSpace(fileName)) { throw new ArgumentNullException("fileName"); } _fileName = fileName; // check if DV is required FileInfo fileInfo = new FileInfo(_fileName); _enforceDataVirtualizationByFileSize = EnforceDataVirtualizationByFileSize * FileLoadHelper.KBytes; if ((_enforceDataVirtualizationByFileSize != 0 && fileInfo.Length >= _enforceDataVirtualizationByFileSize) || _isDataVirtualizationEnforced) { EnforceDataVirtualization = true; } SequenceAlignmentMap sequenceAlignmentMap = null; SAMAlignmentHeader header = null; if (IsDataVirtualizationEnabled) { VirtualAlignedSequenceList<SAMAlignedSequence> queries = null; using (MBFStreamReader mbfReader = new MBFStreamReader(fileName)) { header = ParserSAMHeader(mbfReader); _sidecarFileProvider = new SidecarFileProvider(fileName); // if a valid sidecar does not exist then recreate it if (_sidecarFileProvider.SidecarFileExists && _sidecarFileProvider.IsSidecarValid == false) { ParseSequences(mbfReader); } if (_sidecarFileProvider.IsSidecarValid) { queries = new VirtualAlignedSequenceList<SAMAlignedSequence>(_sidecarFileProvider, this, _sidecarFileProvider.Count); sequenceAlignmentMap = new SequenceAlignmentMap(header, queries); return sequenceAlignmentMap; } } } using (MBFTextReader mbfReader = new MBFTextReader(fileName)) { return Parse(mbfReader, isReadOnly); } }
/// <summary> /// Parses a list of biological sequence data from a file. /// </summary> /// <param name="filename">The name of a biological sequence file.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting QualitativeSequences should be in readonly mode or not. /// If this flag is set to true then the resulting QualitativeSequences's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>The list of parsed IQualitativeSequence objects.</returns> new public IList <IQualitativeSequence> Parse(string filename, bool isReadOnly) { _fileName = filename; //check DV is requried if (filename != null) { _fileLoadHelper = new FileLoadHelper(filename); _blockSize = _fileLoadHelper.BlockSize; _maxNumberOfBlocks = _fileLoadHelper.MaxNumberOfBlocks; if (_isDataVirtualizationForced) { _blockSize = FileLoadHelper.DefaultBlockSize; } } else { _blockSize = FileLoadHelper.DefaultFullLoadBlockSize; _maxNumberOfBlocks = 0; } SidecarFileProvider indexedProvider = null; // Check for sidecar if (IsDataVirtualizationEnabled) { try { indexedProvider = SidecarFileProvider.GetProvider(filename); } catch (OperationCanceledException) { indexedProvider = null; } } if (indexedProvider != null) { // Create virtual list and return return(new VirtualQualitativeSequenceList(indexedProvider, this, indexedProvider.Count) { CreateSequenceAsReadOnly = isReadOnly }); } else { using (BioTextReader bioReader = new BioTextReader(filename)) { return(Parse(bioReader, isReadOnly)); } } }
/// <summary> /// Parses a list of biological sequence data from a BioTextReader. /// </summary> /// <param name="bioReader">BioTextReader instance for a biological sequence data.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting QualitativeSequences should be in readonly mode or not. /// If this flag is set to true then the resulting QualitativeSequences's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>The list of parsed IQualitativeSequence objects.</returns> new protected IList <IQualitativeSequence> Parse(BioTextReader bioReader, bool isReadOnly) { if (bioReader == null) { throw new ArgumentNullException("bioReader"); } // no empty files allowed if (!bioReader.HasLines) { string message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, Resource.IONoTextToParse); Trace.Report(message); throw new FileFormatException(message); } if (!string.IsNullOrEmpty(bioReader.FileName) && IsDataVirtualizationEnabled && SidecarFileProvider.IsIndexFileExists(bioReader.FileName)) { while (bioReader.HasLines) { ParseOne(bioReader, isReadOnly); } // Create sidecar SidecarFileProvider provider = SidecarFileProvider.CreateIndexFile(bioReader.FileName, _sequencePointers); VirtualQualitativeSequenceList virtualSequences = new VirtualQualitativeSequenceList(provider, this, _sequencePointers.Count) { CreateSequenceAsReadOnly = isReadOnly }; _sequencePointers.Clear(); return(virtualSequences); } else { List <IQualitativeSequence> qualSequences = new List <IQualitativeSequence>(); while (bioReader.HasLines) { qualSequences.Add(ParseOne(bioReader, isReadOnly)); } return(qualSequences); } }
/// <summary> /// Parses a list of biological sequence texts from a file. /// </summary> /// <param name="filename">The name of a biological sequence file.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequences should be in readonly mode or not. /// If this flag is set to true then the resulting sequences's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>The list of parsed ISequence objects.</returns> public IList <ISequence> Parse(string filename, bool isReadOnly) { // default to full load _blockSize = FileLoadHelper.DefaultFullLoadBlockSize; _maxNumberOfBlocks = 0; // check if DV is required if (filename != null) { _fileName = filename; FileInfo fileInfo = new FileInfo(_fileName); _enforceDataVirtualizationByFileSize = EnforceDataVirtualizationByFileSize * FileLoadHelper.KBytes; if ((_enforceDataVirtualizationByFileSize != 0 && fileInfo.Length >= _enforceDataVirtualizationByFileSize) || _isDataVirtualizationEnforced) { _blockSize = FileLoadHelper.DefaultBlockSize; _maxNumberOfBlocks = FileLoadHelper.DefaultMaxNumberOfBlocks; } } // Check for sidecar if (IsDataVirtualizationEnabled) { _sidecarFileProvider = new SidecarFileProvider(_fileName); _sidecarFileProvider.Close(); // if valid sidecar file exists if (_sidecarFileProvider.IsSidecarValid) { // Create virtual list and return return(new VirtualSequenceList(_sidecarFileProvider, this, _sidecarFileProvider.Count) { CreateSequenceAsReadOnly = isReadOnly }); } // else create new sidecar _sidecarFileProvider = new SidecarFileProvider(_fileName, true); if (_sidecarFileProvider.SidecarFileExists) { using (_mbfStreamReader = new MBFStreamReader(_fileName)) { try { while (_mbfStreamReader.HasLines) { // Parse and forget as the list is now maintained by DV using sequence pointers ParseOne(_mbfStreamReader, isReadOnly); } _sidecarFileProvider.Close(); VirtualSequenceList virtualSequences = new VirtualSequenceList(_sidecarFileProvider, this, _sidecarFileProvider.Count) { CreateSequenceAsReadOnly = isReadOnly }; return(virtualSequences); } catch (Exception) { _sidecarFileProvider.Cleanup(); } } } } // non-DV parsing using (MBFTextReader mbfReader = new MBFTextReader(filename)) { return(Parse(mbfReader, isReadOnly)); } }
/// <summary> /// Parses a sequence alignment texts from a file. /// </summary> /// <param name="fileName">file name.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequences in the sequence alignment should be in /// readonly mode or not. If this flag is set to true then the resulting sequences's /// isReadOnly property will be set to true, otherwise it will be set to false. /// </param> /// <returns>SequenceAlignmentMap object.</returns> public SequenceAlignmentMap Parse(string fileName, bool isReadOnly) { if (string.IsNullOrWhiteSpace(fileName)) { throw new ArgumentNullException("fileName"); } _fileName = fileName; // check if DV is required FileInfo fileInfo = new FileInfo(_fileName); _enforceDataVirtualizationByFileSize = EnforceDataVirtualizationByFileSize * FileLoadHelper.KBytes; if ((_enforceDataVirtualizationByFileSize != 0 && fileInfo.Length >= _enforceDataVirtualizationByFileSize) || _isDataVirtualizationEnforced) { EnforceDataVirtualization = true; } SequenceAlignmentMap sequenceAlignmentMap = null; SAMAlignmentHeader header = null; if (IsDataVirtualizationEnabled) { VirtualAlignedSequenceList <SAMAlignedSequence> queries = null; using (MBFStreamReader mbfReader = new MBFStreamReader(fileName)) { header = ParseSAMHeader(mbfReader); if (header.Comments.Count == 0 && header.RecordFields.Count == 0) { try { // verify whether this is a valid SAM file by parsing a single sequence ParseSequence(mbfReader.Line, true, Alphabet, Encoding, RefSequences); } catch (IndexOutOfRangeException) { throw new FileFormatException(Resource.SAM_InvalidInputFile); } } _sidecarFileProvider = new SidecarFileProvider(fileName); // if a valid sidecar does not exist then recreate it if (_sidecarFileProvider.SidecarFileExists && _sidecarFileProvider.IsSidecarValid == false) { ParseSequences(mbfReader); } if (_sidecarFileProvider.IsSidecarValid) { queries = new VirtualAlignedSequenceList <SAMAlignedSequence>(_sidecarFileProvider, this, _sidecarFileProvider.Count); sequenceAlignmentMap = new SequenceAlignmentMap(header, queries); return(sequenceAlignmentMap); } } } using (MBFTextReader mbfReader = new MBFTextReader(fileName)) { return(Parse(mbfReader, isReadOnly)); } }