Ejemplo n.º 1
0
        /// <summary>
        /// Parses all the sequences in a SAM file.
        /// This method is used only in data virtualization scenarios.
        /// </summary>
        /// <param name="mbfReader">A reader for the sequence alignment text.</param>
        private void ParseSequences(MBFStreamReader mbfReader)
        {
            // if DV enabled
            if (IsDataVirtualizationEnabled && _sidecarFileProvider.SidecarFileExists)
            {
                try
                {
                    while (mbfReader.HasLines && !mbfReader.Line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase))
                    {
                        SequencePointer sequencePointer = new SequencePointer { AlphabetName = Alphabets.DNA.Name };

                        // sequence starting index
                        sequencePointer.IndexOffsets[0] = mbfReader.CurrentLineStartingIndex;
                        // sequence ending index
                        sequencePointer.IndexOffsets[1] = mbfReader.CurrentLineStartingIndex + mbfReader.Line.Length;

                        // Write each sequence pointer to the sidecar file immediately
                        _sidecarFileProvider.WritePointer(sequencePointer);

                        mbfReader.GoToNextLine();
                        _lineCount++;
                    }

                    _sidecarFileProvider.Close();
                }
                catch (Exception)
                {
                    _sidecarFileProvider.Cleanup();
                }
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Parses file with DV and returns Virtual Qualitative Sequences list.
        /// </summary>
        /// <param name="isReadOnly">Flag to indicate whether the sequences returned should be set to readonly or not.</param>
        private VirtualQualitativeSequenceList ParseWithDV(bool isReadOnly)
        {
            SidecarFileProvider sidecarFileProvider = null;

            sidecarFileProvider = new SidecarFileProvider(_fileName);
            sidecarFileProvider.Close();

            // if valid sidecar file exists
            if (sidecarFileProvider.IsSidecarValid)
            {
                // Create virtual list and return
                return(new VirtualQualitativeSequenceList(sidecarFileProvider, this, sidecarFileProvider.Count)
                {
                    CreateSequenceAsReadOnly = isReadOnly
                });
            }

            // else create new sidecar
            using (sidecarFileProvider = new SidecarFileProvider(_fileName, true))
            {
                using (_mbfStreamReader = new MBFStreamReader(_fileName))
                {
                    if (sidecarFileProvider.SidecarFileExists)
                    {
                        try
                        {
                            while (_mbfStreamReader.HasLines)
                            {
                                ParseOne(_mbfStreamReader, isReadOnly);
                            }

                            // Create sidecar
                            sidecarFileProvider.CreateSidecarFile(_mbfStreamReader.FileName, _sequencePointers);

                            VirtualQualitativeSequenceList virtualSequences =
                                new VirtualQualitativeSequenceList(sidecarFileProvider, this, _sequencePointers.Count)
                            {
                                CreateSequenceAsReadOnly = isReadOnly
                            };

                            _sequencePointers.Clear();
                            return(virtualSequences);
                        }
                        catch (Exception)
                        {
                            sidecarFileProvider.Cleanup();
                        }
                    }
                }
            }

            return(null);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Parses a list of biological sequence texts from a file.
        /// </summary>
        /// <param name="filename">The name of a biological sequence file.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequences should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequences's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The list of parsed ISequence objects.</returns>
        public IList <ISequence> Parse(string filename, bool isReadOnly)
        {
            // default to full load
            _blockSize         = FileLoadHelper.DefaultFullLoadBlockSize;
            _maxNumberOfBlocks = 0;

            // check if DV is required
            if (filename != null)
            {
                _fileName = filename;

                FileInfo fileInfo = new FileInfo(_fileName);
                _enforceDataVirtualizationByFileSize = EnforceDataVirtualizationByFileSize * FileLoadHelper.KBytes;
                if ((_enforceDataVirtualizationByFileSize != 0 && fileInfo.Length >= _enforceDataVirtualizationByFileSize) ||
                    _isDataVirtualizationEnforced)
                {
                    _blockSize         = FileLoadHelper.DefaultBlockSize;
                    _maxNumberOfBlocks = FileLoadHelper.DefaultMaxNumberOfBlocks;
                }
            }

            // Check for sidecar
            if (IsDataVirtualizationEnabled)
            {
                _sidecarFileProvider = new SidecarFileProvider(_fileName);
                _sidecarFileProvider.Close();

                // if valid sidecar file exists
                if (_sidecarFileProvider.IsSidecarValid)
                {
                    // Create virtual list and return
                    return(new VirtualSequenceList(_sidecarFileProvider, this, _sidecarFileProvider.Count)
                    {
                        CreateSequenceAsReadOnly = isReadOnly
                    });
                }

                // else create new sidecar
                _sidecarFileProvider = new SidecarFileProvider(_fileName, true);

                if (_sidecarFileProvider.SidecarFileExists)
                {
                    using (_mbfStreamReader = new MBFStreamReader(_fileName))
                    {
                        try
                        {
                            while (_mbfStreamReader.HasLines)
                            {
                                // Parse and forget as the list is now maintained by DV using sequence pointers
                                ParseOne(_mbfStreamReader, isReadOnly);
                            }

                            _sidecarFileProvider.Close();

                            VirtualSequenceList virtualSequences =
                                new VirtualSequenceList(_sidecarFileProvider, this, _sidecarFileProvider.Count)
                            {
                                CreateSequenceAsReadOnly = isReadOnly
                            };

                            return(virtualSequences);
                        }
                        catch (Exception)
                        {
                            _sidecarFileProvider.Cleanup();
                        }
                    }
                }
            }

            // non-DV parsing
            using (MBFTextReader mbfReader = new MBFTextReader(filename))
            {
                return(Parse(mbfReader, isReadOnly));
            }
        }