Ejemplo n.º 1
0
        /// <summary>
        /// Parses a list of sequences using a BioTextReader.
        /// </summary>
        /// <remarks>
        /// This method should be overridden by any parsers that need to process file-scope
        /// metadata that applies to all of the sequences in the file.
        /// </remarks>
        /// <param name="bioReader">bio text reader</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequences should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequences's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The list of parsed ISequence objects.</returns>
        protected IList <ISequence> Parse(BioTextReader bioReader, bool isReadOnly)
        {
            _lineCount        = 0;
            _sequenceCount    = 0;
            _lineLength       = 0;
            _sequenceBeginsAt = 1;

            if (bioReader == null)
            {
                throw new ArgumentNullException("bioReader");
            }

            // no empty files allowed
            if (!bioReader.HasLines)
            {
                string message = Resource.Parser_NoTextErrorMessage;
                Trace.Report(message);
                throw new InvalidOperationException(message);
            }

            // Check if DV is enabled and sidecar creation is possible
            if (!string.IsNullOrEmpty(bioReader.FileName) && IsDataVirtualizationEnabled && SidecarFileProvider.IsIndexFileExists(bioReader.FileName))
            {
                while (bioReader.HasLines)
                {
                    // Parse and forget as the list is now maintained by DV using sequence pointers
                    ParseOne(bioReader, isReadOnly);
                }

                // Create sidecar
                SidecarFileProvider provider = SidecarFileProvider.CreateIndexFile(bioReader.FileName, _sequencePointers);

                VirtualSequenceList virtualSequences =
                    new VirtualSequenceList(provider, this, _sequencePointers.Count)
                {
                    CreateSequenceAsReadOnly = isReadOnly
                };

                _sequencePointers.Clear();

                return(virtualSequences);
            }
            else
            {
                List <ISequence> sequences = new List <ISequence>();

                while (bioReader.HasLines)
                {
                    sequences.Add(ParseOne(bioReader, isReadOnly));
                }

                return(sequences);
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Parses file with DV and returns Virtual Qualitative Sequences list.
        /// </summary>
        /// <param name="isReadOnly">Flag to indicate whether the sequences returned should be set to readonly or not.</param>
        private VirtualQualitativeSequenceList ParseWithDV(bool isReadOnly)
        {
            SidecarFileProvider sidecarFileProvider = null;

            sidecarFileProvider = new SidecarFileProvider(_fileName);
            sidecarFileProvider.Close();

            // if valid sidecar file exists
            if (sidecarFileProvider.IsSidecarValid)
            {
                // Create virtual list and return
                return(new VirtualQualitativeSequenceList(sidecarFileProvider, this, sidecarFileProvider.Count)
                {
                    CreateSequenceAsReadOnly = isReadOnly
                });
            }

            // else create new sidecar
            using (sidecarFileProvider = new SidecarFileProvider(_fileName, true))
            {
                using (_mbfStreamReader = new MBFStreamReader(_fileName))
                {
                    if (sidecarFileProvider.SidecarFileExists)
                    {
                        try
                        {
                            while (_mbfStreamReader.HasLines)
                            {
                                ParseOne(_mbfStreamReader, isReadOnly);
                            }

                            // Create sidecar
                            sidecarFileProvider.CreateSidecarFile(_mbfStreamReader.FileName, _sequencePointers);

                            VirtualQualitativeSequenceList virtualSequences =
                                new VirtualQualitativeSequenceList(sidecarFileProvider, this, _sequencePointers.Count)
                            {
                                CreateSequenceAsReadOnly = isReadOnly
                            };

                            _sequencePointers.Clear();
                            return(virtualSequences);
                        }
                        catch (Exception)
                        {
                            sidecarFileProvider.Cleanup();
                        }
                    }
                }
            }

            return(null);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Parses a sequence alignment texts from a file.
        /// </summary>
        /// <param name="fileName">file name.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequences in the sequence alignment should be in 
        /// readonly mode or not. If this flag is set to true then the resulting sequences's 
        /// isReadOnly property will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>SequenceAlignmentMap object.</returns>
        public SequenceAlignmentMap Parse(string fileName, bool isReadOnly)
        {
            if (string.IsNullOrWhiteSpace(fileName))
            {
                throw new ArgumentNullException("fileName");
            }

            _fileName = fileName;

            // check if DV is required

            FileInfo fileInfo = new FileInfo(_fileName);

            _enforceDataVirtualizationByFileSize = EnforceDataVirtualizationByFileSize * FileLoadHelper.KBytes;
            if ((_enforceDataVirtualizationByFileSize != 0 && fileInfo.Length >= _enforceDataVirtualizationByFileSize)
                || _isDataVirtualizationEnforced)
            {
                EnforceDataVirtualization = true;
            }

            SequenceAlignmentMap sequenceAlignmentMap = null;
            SAMAlignmentHeader header = null;

            if (IsDataVirtualizationEnabled)
            {
                VirtualAlignedSequenceList<SAMAlignedSequence> queries = null;

                using (MBFStreamReader mbfReader = new MBFStreamReader(fileName))
                {
                    header = ParserSAMHeader(mbfReader);

                    _sidecarFileProvider = new SidecarFileProvider(fileName);

                    // if a valid sidecar does not exist then recreate it
                    if (_sidecarFileProvider.SidecarFileExists && _sidecarFileProvider.IsSidecarValid == false)
                    {
                        ParseSequences(mbfReader);
                    }

                    if (_sidecarFileProvider.IsSidecarValid)
                    {
                        queries = new VirtualAlignedSequenceList<SAMAlignedSequence>(_sidecarFileProvider, this, _sidecarFileProvider.Count);
                        sequenceAlignmentMap = new SequenceAlignmentMap(header, queries);
                        return sequenceAlignmentMap;
                    }
                }
            }

            using (MBFTextReader mbfReader = new MBFTextReader(fileName))
            {
                return Parse(mbfReader, isReadOnly);
            }
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Parses a list of biological sequence data from a file.
        /// </summary>
        /// <param name="filename">The name of a biological sequence file.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting QualitativeSequences should be in readonly mode or not.
        /// If this flag is set to true then the resulting QualitativeSequences's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The list of parsed IQualitativeSequence objects.</returns>
        new public IList <IQualitativeSequence> Parse(string filename, bool isReadOnly)
        {
            _fileName = filename;

            //check DV is requried
            if (filename != null)
            {
                _fileLoadHelper    = new FileLoadHelper(filename);
                _blockSize         = _fileLoadHelper.BlockSize;
                _maxNumberOfBlocks = _fileLoadHelper.MaxNumberOfBlocks;

                if (_isDataVirtualizationForced)
                {
                    _blockSize = FileLoadHelper.DefaultBlockSize;
                }
            }
            else
            {
                _blockSize         = FileLoadHelper.DefaultFullLoadBlockSize;
                _maxNumberOfBlocks = 0;
            }

            SidecarFileProvider indexedProvider = null;

            // Check for sidecar
            if (IsDataVirtualizationEnabled)
            {
                try
                {
                    indexedProvider = SidecarFileProvider.GetProvider(filename);
                }
                catch (OperationCanceledException)
                {
                    indexedProvider = null;
                }
            }

            if (indexedProvider != null)
            {
                // Create virtual list and return
                return(new VirtualQualitativeSequenceList(indexedProvider, this, indexedProvider.Count)
                {
                    CreateSequenceAsReadOnly = isReadOnly
                });
            }
            else
            {
                using (BioTextReader bioReader = new BioTextReader(filename))
                {
                    return(Parse(bioReader, isReadOnly));
                }
            }
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Parses a list of biological sequence data from a BioTextReader.
        /// </summary>
        /// <param name="bioReader">BioTextReader instance for a biological sequence data.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting QualitativeSequences should be in readonly mode or not.
        /// If this flag is set to true then the resulting QualitativeSequences's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The list of parsed IQualitativeSequence objects.</returns>
        new protected IList <IQualitativeSequence> Parse(BioTextReader bioReader, bool isReadOnly)
        {
            if (bioReader == null)
            {
                throw new ArgumentNullException("bioReader");
            }


            // no empty files allowed
            if (!bioReader.HasLines)
            {
                string message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, Resource.IONoTextToParse);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            if (!string.IsNullOrEmpty(bioReader.FileName) &&
                IsDataVirtualizationEnabled && SidecarFileProvider.IsIndexFileExists(bioReader.FileName))
            {
                while (bioReader.HasLines)
                {
                    ParseOne(bioReader, isReadOnly);
                }

                // Create sidecar
                SidecarFileProvider provider = SidecarFileProvider.CreateIndexFile(bioReader.FileName, _sequencePointers);

                VirtualQualitativeSequenceList virtualSequences =
                    new VirtualQualitativeSequenceList(provider, this, _sequencePointers.Count)
                {
                    CreateSequenceAsReadOnly = isReadOnly
                };

                _sequencePointers.Clear();

                return(virtualSequences);
            }
            else
            {
                List <IQualitativeSequence> qualSequences = new List <IQualitativeSequence>();

                while (bioReader.HasLines)
                {
                    qualSequences.Add(ParseOne(bioReader, isReadOnly));
                }

                return(qualSequences);
            }
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Parses a list of biological sequence texts from a file.
        /// </summary>
        /// <param name="filename">The name of a biological sequence file.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequences should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequences's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The list of parsed ISequence objects.</returns>
        public IList <ISequence> Parse(string filename, bool isReadOnly)
        {
            // default to full load
            _blockSize         = FileLoadHelper.DefaultFullLoadBlockSize;
            _maxNumberOfBlocks = 0;

            // check if DV is required
            if (filename != null)
            {
                _fileName = filename;

                FileInfo fileInfo = new FileInfo(_fileName);
                _enforceDataVirtualizationByFileSize = EnforceDataVirtualizationByFileSize * FileLoadHelper.KBytes;
                if ((_enforceDataVirtualizationByFileSize != 0 && fileInfo.Length >= _enforceDataVirtualizationByFileSize) ||
                    _isDataVirtualizationEnforced)
                {
                    _blockSize         = FileLoadHelper.DefaultBlockSize;
                    _maxNumberOfBlocks = FileLoadHelper.DefaultMaxNumberOfBlocks;
                }
            }

            // Check for sidecar
            if (IsDataVirtualizationEnabled)
            {
                _sidecarFileProvider = new SidecarFileProvider(_fileName);
                _sidecarFileProvider.Close();

                // if valid sidecar file exists
                if (_sidecarFileProvider.IsSidecarValid)
                {
                    // Create virtual list and return
                    return(new VirtualSequenceList(_sidecarFileProvider, this, _sidecarFileProvider.Count)
                    {
                        CreateSequenceAsReadOnly = isReadOnly
                    });
                }

                // else create new sidecar
                _sidecarFileProvider = new SidecarFileProvider(_fileName, true);

                if (_sidecarFileProvider.SidecarFileExists)
                {
                    using (_mbfStreamReader = new MBFStreamReader(_fileName))
                    {
                        try
                        {
                            while (_mbfStreamReader.HasLines)
                            {
                                // Parse and forget as the list is now maintained by DV using sequence pointers
                                ParseOne(_mbfStreamReader, isReadOnly);
                            }

                            _sidecarFileProvider.Close();

                            VirtualSequenceList virtualSequences =
                                new VirtualSequenceList(_sidecarFileProvider, this, _sidecarFileProvider.Count)
                            {
                                CreateSequenceAsReadOnly = isReadOnly
                            };

                            return(virtualSequences);
                        }
                        catch (Exception)
                        {
                            _sidecarFileProvider.Cleanup();
                        }
                    }
                }
            }

            // non-DV parsing
            using (MBFTextReader mbfReader = new MBFTextReader(filename))
            {
                return(Parse(mbfReader, isReadOnly));
            }
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Parses a sequence alignment texts from a file.
        /// </summary>
        /// <param name="fileName">file name.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequences in the sequence alignment should be in
        /// readonly mode or not. If this flag is set to true then the resulting sequences's
        /// isReadOnly property will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>SequenceAlignmentMap object.</returns>
        public SequenceAlignmentMap Parse(string fileName, bool isReadOnly)
        {
            if (string.IsNullOrWhiteSpace(fileName))
            {
                throw new ArgumentNullException("fileName");
            }

            _fileName = fileName;

            // check if DV is required

            FileInfo fileInfo = new FileInfo(_fileName);

            _enforceDataVirtualizationByFileSize = EnforceDataVirtualizationByFileSize * FileLoadHelper.KBytes;
            if ((_enforceDataVirtualizationByFileSize != 0 && fileInfo.Length >= _enforceDataVirtualizationByFileSize) ||
                _isDataVirtualizationEnforced)
            {
                EnforceDataVirtualization = true;
            }

            SequenceAlignmentMap sequenceAlignmentMap = null;
            SAMAlignmentHeader   header = null;

            if (IsDataVirtualizationEnabled)
            {
                VirtualAlignedSequenceList <SAMAlignedSequence> queries = null;

                using (MBFStreamReader mbfReader = new MBFStreamReader(fileName))
                {
                    header = ParseSAMHeader(mbfReader);

                    if (header.Comments.Count == 0 && header.RecordFields.Count == 0)
                    {
                        try
                        {
                            // verify whether this is a valid SAM file by parsing a single sequence
                            ParseSequence(mbfReader.Line, true, Alphabet, Encoding, RefSequences);
                        }
                        catch (IndexOutOfRangeException)
                        {
                            throw new FileFormatException(Resource.SAM_InvalidInputFile);
                        }
                    }

                    _sidecarFileProvider = new SidecarFileProvider(fileName);

                    // if a valid sidecar does not exist then recreate it
                    if (_sidecarFileProvider.SidecarFileExists && _sidecarFileProvider.IsSidecarValid == false)
                    {
                        ParseSequences(mbfReader);
                    }

                    if (_sidecarFileProvider.IsSidecarValid)
                    {
                        queries = new VirtualAlignedSequenceList <SAMAlignedSequence>(_sidecarFileProvider, this, _sidecarFileProvider.Count);
                        sequenceAlignmentMap = new SequenceAlignmentMap(header, queries);
                        return(sequenceAlignmentMap);
                    }
                }
            }

            using (MBFTextReader mbfReader = new MBFTextReader(fileName))
            {
                return(Parse(mbfReader, isReadOnly));
            }
        }