Esempio n. 1
0
        /// <summary>
        /// Parses all the sequences in a SAM file.
        /// This method is used only in data virtualization scenarios.
        /// </summary>
        /// <param name="mbfReader">A reader for the sequence alignment text.</param>
        private void ParseSequences(MBFStreamReader mbfReader)
        {
            // if DV enabled
            if (IsDataVirtualizationEnabled && _sidecarFileProvider.SidecarFileExists)
            {
                try
                {
                    while (mbfReader.HasLines && !mbfReader.Line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase))
                    {
                        SequencePointer sequencePointer = new SequencePointer { AlphabetName = Alphabets.DNA.Name };

                        // sequence starting index
                        sequencePointer.IndexOffsets[0] = mbfReader.CurrentLineStartingIndex;
                        // sequence ending index
                        sequencePointer.IndexOffsets[1] = mbfReader.CurrentLineStartingIndex + mbfReader.Line.Length;

                        // Write each sequence pointer to the sidecar file immediately
                        _sidecarFileProvider.WritePointer(sequencePointer);

                        mbfReader.GoToNextLine();
                        _lineCount++;
                    }

                    _sidecarFileProvider.Close();
                }
                catch (Exception)
                {
                    _sidecarFileProvider.Cleanup();
                }
            }
        }
Esempio n. 2
0
        public void ValidateMBFStreamReaderProperties()
        {
            // Get values from xml
            string FilePath = Utility._xmlUtil.GetTextValue(
                Constants.SimpleFastAStreamReaderNode, Constants.FilePathNode);
            string newLineCharsCount = Utility._xmlUtil.GetTextValue(
                Constants.SimpleFastAStreamReaderNode, Constants.NewLineCharacterCountNode);
            string pos = Utility._xmlUtil.GetTextValue(
                Constants.SimpleFastAStreamReaderNode, Constants.PositionNode);
            string startingIndex = Utility._xmlUtil.GetTextValue(
                Constants.SimpleFastAStreamReaderNode, Constants.CurrentLineStartingIndexNode);

            MBFStreamReader streamReader = new MBFStreamReader(FilePath, true);

            // Validate Properties
            Assert.IsTrue(streamReader.CanRead);
            Assert.IsTrue(streamReader.SkipBlankLines);
            Assert.IsTrue(streamReader.HasLines);
            Assert.AreEqual(newLineCharsCount,
                            streamReader.NewLineCharacterCount.ToString());
            Assert.AreEqual(pos,
                            streamReader.Position.ToString());
            Assert.AreEqual(newLineCharsCount,
                            streamReader.NewLineCharacterCount.ToString());
            Assert.AreEqual(startingIndex,
                            streamReader.CurrentLineStartingIndex.ToString());

            Console.WriteLine("Validated the StreamReader properties successfully");
            ApplicationLog.WriteLine("Validated the StreamReader properties successfully");

            // Dispose StreamReader.
            streamReader.Close();
            streamReader.Dispose();
        }
Esempio n. 3
0
        /// <summary>
        /// Validate Read Biological sequences using MBFStreamReader
        /// </summary>
        /// <param name="nodeName">Name of the node used for different test case.</param>
        /// <param name="IsStartAndEndIndex">True if validating from start to end index substring,
        /// else false</param>
        void ValidateSubString(string nodeName,
                               bool IsStartAndEndIndex)
        {
            // Get values from xml
            string FilePath = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.FilePathNode);
            string expectedString = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.ExpectedString);
            string startIndex = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.StartIndexNode);
            string endIndex = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.EndIndexNode);

            string subString = string.Empty;

            using (MBFStreamReader streamReader = new MBFStreamReader(FilePath))
            {
                if (IsStartAndEndIndex)
                {
                    subString = streamReader.GetLineField(Int32.Parse(startIndex, (IFormatProvider)null),
                                                          Int32.Parse(endIndex, (IFormatProvider)null));
                }
                else
                {
                    subString = streamReader.GetLineField(Int32.Parse(startIndex, (IFormatProvider)null));
                }

                // Validate sub string of a line.
                Assert.AreEqual(expectedString, subString);
                Console.WriteLine("The expected substring is {0}", subString);
                ApplicationLog.WriteLine("Validated the substring successfully");
            }
        }
Esempio n. 4
0
        /// <summary>
        /// Parses the sequence represented by the specified sequence pointer.
        /// </summary>
        /// <param name="pointer">
        /// A sequence pointer which holds information about the sequence to be retrieved.
        /// </param>
        /// <returns>IAlignedSequence object.</returns>
        public IAlignedSequence ParseAlignedSequence(SequencePointer pointer)
        {
            if (pointer == null)
            {
                throw new ArgumentNullException("pointer");
            }

            if (string.IsNullOrEmpty(_fileName))
            {
                throw new NotSupportedException(Resource.DataVirtualizationNeedsInputFile);
            }

            if (pointer.IndexOffsets[0] >= pointer.IndexOffsets[1])
                return null;

            if (_mbfStreamReader == null || !_mbfStreamReader.CanRead)
            {
                _mbfStreamReader = new MBFStreamReader(_fileName);
            }

            string buffer;

            _mbfStreamReader.Seek(pointer.IndexOffsets[0], SeekOrigin.Begin);
            
            buffer = _mbfStreamReader.ReadLine();

            return ParseSequence(buffer, _isReadOnly);
        }
Esempio n. 5
0
        public void TestMBFTextReaderConstructors()
        {
            string testFileFullName = @"TestUtils\Fasta\uniprot-dutpase.fasta";

            using (StreamReader stream = new StreamReader(testFileFullName))
            {
                using (MBFStreamReader mbfReader = new MBFStreamReader(testFileFullName))
                {
                    Assert.AreEqual(testFileFullName, mbfReader.FileName);
                    Assert.AreEqual(stream.ReadLine(), mbfReader.Line);
                }
            }

            // MBFStreamReader(string) should read first line and set the Filename property.
            using (MBFStreamReader mbfReader = new MBFStreamReader(testFileFullName))
            {
                Assert.AreEqual(testFileFullName, mbfReader.FileName);
            }

            using (Stream stream = new FileStream(testFileFullName, FileMode.Open, FileAccess.Read))
            {
                using (MBFStreamReader mbfReader = new MBFStreamReader(testFileFullName))
                {
                    Assert.AreEqual(testFileFullName, mbfReader.FileName);
                }
            }

            using (MBFStreamReader mbfReader = new MBFStreamReader(testFileFullName))
            {
                Assert.AreEqual(testFileFullName, mbfReader.FileName);
            }
        }
Esempio n. 6
0
        /// <summary>
        /// Validate Read characters from curent line
        /// </summary>
        /// <param name="nodeName">Name of the node used for different test case.</param>
        private void ValidateChars(string nodeName)
        {
            // Get values from xml
            string FilePath = Utility._xmlUtil.GetTextValue(
                nodeName, Constants.FilePathNode);
            string startIndex = Utility._xmlUtil.GetTextValue(
                nodeName, Constants.CharsStartIndexNode);
            string count = Utility._xmlUtil.GetTextValue(
                nodeName, Constants.CharsCountNode);

            MBFStreamReader streamReader = new MBFStreamReader(FilePath);
            string          currentLine  = streamReader.Line;

            char[] charsArray = streamReader.ReadChars(Int32.Parse(startIndex), Int32.Parse(count));

            // Validate array.
            for (int i = 0; i < charsArray.Length; i++)
            {
                Assert.AreEqual(currentLine[i], charsArray[i]);
                Console.WriteLine("Validated the char {0} successfully", charsArray[i]);
                ApplicationLog.WriteLine("Validated the char successfully");
            }

            // Dispose stream reader.
            streamReader.Close();
            streamReader.Dispose();
        }
Esempio n. 7
0
        /// <summary>
        /// Parses a range of symbols starting from the specified index in the sequence.
        /// </summary>
        /// <param name="startIndex">The zero-based index at which to begin parsing.</param>
        /// <param name="count">The number of symbols to parse.</param>
        /// <param name="seqPointer">The sequence pointer of that sequence.</param>
        /// <returns>The parsed symbols as ASCII values.</returns>
        public byte[] ParseRange(int startIndex, int count, SequencePointer seqPointer)
        {
            if (string.IsNullOrEmpty(_fileName))
            {
                throw new NotSupportedException(Resource.DataVirtualizationNeedsInputFile);
            }

            if (seqPointer == null)
            {
                throw new ArgumentNullException("seqPointer");
            }

            if (startIndex < 0)
            {
                throw new ArgumentOutOfRangeException("startIndex");
            }

            if (count <= 0)
            {
                throw new ArgumentOutOfRangeException("count");
            }

            if (_mbfStreamReader == null || !_mbfStreamReader.CanRead)
            {
                _mbfStreamReader = new MBFStreamReader(_fileName);
            }

            long fileIndex = startIndex + seqPointer.IndexOffsets[0];

            return(_mbfStreamReader.ReadBytes(fileIndex, count));
        }
Esempio n. 8
0
        /// <summary>
        /// Validate Read characters from curent line
        /// </summary>
        /// <param name="nodeName">Name of the node used for different test case.</param>
        void ValidateChars(string nodeName)
        {
            // Get values from xml
            string FilePath = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.FilePathNode);
            string startIndex = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.CharsStartIndexNode);
            string count = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.CharsCountNode);

            using (MBFStreamReader streamReader = new MBFStreamReader(FilePath))
            {
                string currentLine = streamReader.Line;
                char[] charsArray  = streamReader.ReadChars(Int32.Parse(startIndex, (IFormatProvider)null),
                                                            Int32.Parse(count, (IFormatProvider)null));

                // Validate array.
                for (int i = 0; i < charsArray.Length; i++)
                {
                    Assert.AreEqual(currentLine[i], charsArray[i]);
                    Console.WriteLine("Validated the char {0} successfully", charsArray[i]);
                    ApplicationLog.WriteLine("Validated the char successfully");
                }
            }
        }
Esempio n. 9
0
        /// <summary>
        /// Validate Read Biological sequences using MBFStreamReader
        /// </summary>
        /// <param name="nodeName">Name of the node used for different test case.</param>
        /// <param name="inputType">Different streaming ipnuts used for different test cases</param>
        void ValidateMBFStreamReader(string nodeName,
                                     StreamReaderInputType inputType)
        {
            // Get values from xml
            string FilePath = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.FilePathNode);

            string[] expectedOutput = _utilityObj._xmlUtil.GetTextValues(
                nodeName, Constants.ExpectedLinesNode);
            MBFStreamReader streamReader = null;

            try
            {
                // Read Fasta file.
                switch (inputType)
                {
                case StreamReaderInputType.FileName:
                    streamReader = new MBFStreamReader(FilePath);
                    break;

                case StreamReaderInputType.FileNameWithSkipBlankLines:
                    streamReader = new MBFStreamReader(FilePath, true);
                    break;

                case StreamReaderInputType.Stream:
                    using (Stream stream = new FileStream(FilePath, FileMode.Open,
                                                          FileAccess.ReadWrite))
                    {
                        streamReader = new MBFStreamReader(stream);
                    }
                    break;

                case StreamReaderInputType.StreamWithSkipBlankLines:
                    using (Stream stream = new FileStream(FilePath, FileMode.Open,
                                                          FileAccess.ReadWrite))
                    {
                        streamReader = new MBFStreamReader(stream, true);
                    }
                    break;
                }
                for (int i = 0; i < expectedOutput.Length; i++)
                {
                    Assert.AreEqual(expectedOutput[i], streamReader.Line);

                    Console.WriteLine("Validated the line {0} successfully", streamReader.Line);
                    ApplicationLog.WriteLine("Validated the MBF StreamReader successfully");

                    // Move to next line
                    streamReader.GoToNextLine();
                }
            }
            finally
            {
                if (streamReader != null)
                {
                    streamReader.Dispose();
                }
            }
        }
Esempio n. 10
0
        /// <summary>
        /// Parses file with DV and returns Virtual Qualitative Sequences list.
        /// </summary>
        /// <param name="isReadOnly">Flag to indicate whether the sequences returned should be set to readonly or not.</param>
        private VirtualQualitativeSequenceList ParseWithDV(bool isReadOnly)
        {
            SidecarFileProvider sidecarFileProvider = null;

            sidecarFileProvider = new SidecarFileProvider(_fileName);
            sidecarFileProvider.Close();

            // if valid sidecar file exists
            if (sidecarFileProvider.IsSidecarValid)
            {
                // Create virtual list and return
                return(new VirtualQualitativeSequenceList(sidecarFileProvider, this, sidecarFileProvider.Count)
                {
                    CreateSequenceAsReadOnly = isReadOnly
                });
            }

            // else create new sidecar
            using (sidecarFileProvider = new SidecarFileProvider(_fileName, true))
            {
                using (_mbfStreamReader = new MBFStreamReader(_fileName))
                {
                    if (sidecarFileProvider.SidecarFileExists)
                    {
                        try
                        {
                            while (_mbfStreamReader.HasLines)
                            {
                                ParseOne(_mbfStreamReader, isReadOnly);
                            }

                            // Create sidecar
                            sidecarFileProvider.CreateSidecarFile(_mbfStreamReader.FileName, _sequencePointers);

                            VirtualQualitativeSequenceList virtualSequences =
                                new VirtualQualitativeSequenceList(sidecarFileProvider, this, _sequencePointers.Count)
                            {
                                CreateSequenceAsReadOnly = isReadOnly
                            };

                            _sequencePointers.Clear();
                            return(virtualSequences);
                        }
                        catch (Exception)
                        {
                            sidecarFileProvider.Cleanup();
                        }
                    }
                }
            }

            return(null);
        }
Esempio n. 11
0
        /// <summary>
        /// Parses a sequence alignment texts from a file.
        /// </summary>
        /// <param name="fileName">file name.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequences in the sequence alignment should be in 
        /// readonly mode or not. If this flag is set to true then the resulting sequences's 
        /// isReadOnly property will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>SequenceAlignmentMap object.</returns>
        public SequenceAlignmentMap Parse(string fileName, bool isReadOnly)
        {
            if (string.IsNullOrWhiteSpace(fileName))
            {
                throw new ArgumentNullException("fileName");
            }

            _fileName = fileName;

            // check if DV is required

            FileInfo fileInfo = new FileInfo(_fileName);

            _enforceDataVirtualizationByFileSize = EnforceDataVirtualizationByFileSize * FileLoadHelper.KBytes;
            if ((_enforceDataVirtualizationByFileSize != 0 && fileInfo.Length >= _enforceDataVirtualizationByFileSize)
                || _isDataVirtualizationEnforced)
            {
                EnforceDataVirtualization = true;
            }

            SequenceAlignmentMap sequenceAlignmentMap = null;
            SAMAlignmentHeader header = null;

            if (IsDataVirtualizationEnabled)
            {
                VirtualAlignedSequenceList<SAMAlignedSequence> queries = null;

                using (MBFStreamReader mbfReader = new MBFStreamReader(fileName))
                {
                    header = ParserSAMHeader(mbfReader);

                    _sidecarFileProvider = new SidecarFileProvider(fileName);

                    // if a valid sidecar does not exist then recreate it
                    if (_sidecarFileProvider.SidecarFileExists && _sidecarFileProvider.IsSidecarValid == false)
                    {
                        ParseSequences(mbfReader);
                    }

                    if (_sidecarFileProvider.IsSidecarValid)
                    {
                        queries = new VirtualAlignedSequenceList<SAMAlignedSequence>(_sidecarFileProvider, this, _sidecarFileProvider.Count);
                        sequenceAlignmentMap = new SequenceAlignmentMap(header, queries);
                        return sequenceAlignmentMap;
                    }
                }
            }

            using (MBFTextReader mbfReader = new MBFTextReader(fileName))
            {
                return Parse(mbfReader, isReadOnly);
            }
        }
Esempio n. 12
0
        public void TestMBFTextReaderCoreFunctionality()
        {
            string       testFileFullName = @"TestUtils\Fasta\5_sequences.fasta";
            StreamReader streamReader     = null;

            try
            {
                streamReader = new StreamReader(testFileFullName);

                using (MBFStreamReader mbfReader = new MBFStreamReader(testFileFullName))
                {
                    //Test line access members.
                    Assert.IsTrue(mbfReader.HasLines);

                    // Test line reads
                    string streamLine = streamReader.ReadLine();
                    Assert.AreEqual(streamLine, mbfReader.Line);

                    // Test getting of line fields
                    Assert.AreEqual(streamLine.Substring(26, 10), mbfReader.GetLineField(27, 36));
                    Assert.AreEqual(streamLine.Substring(14), mbfReader.GetLineField(15));

                    // Test moving to next line
                    mbfReader.GoToNextLine();
                    Assert.AreEqual(streamReader.ReadLine(), mbfReader.Line);

                    char[] streamBuffer = new char[10];
                    char[] bioBuffer;

                    // Test seeking to a position in the stream
                    streamReader.DiscardBufferedData();
                    streamReader.BaseStream.Seek(100, SeekOrigin.Begin);
                    mbfReader.Seek(100, SeekOrigin.Begin);
                    Assert.AreEqual(streamReader.BaseStream.Position, mbfReader.Position);

                    // Test character reading
                    streamReader.ReadBlock(streamBuffer, 0, 10);
                    bioBuffer = mbfReader.ReadChars(100, 10);
                    for (int i = 0; i <= streamBuffer.Length; i++)
                    {
                        Assert.AreEqual(streamBuffer[0], bioBuffer[0]);
                    }
                }
            }
            finally
            {
                if (streamReader != null)
                {
                    streamReader.Dispose();
                }
            }
        }
Esempio n. 13
0
        /// <summary>
        /// Parses a single FastQ text from a MBFStreamReader.
        /// </summary>
        /// <param name="mbfReader">MBFStreamReader instance for a biological sequence text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting QualitativeSequence should be in readonly mode or not.
        /// If this flag is set to true then the resulting QualitativeSequence's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The list of parsed IQualitativeSequence objects.</returns>
        private IQualitativeSequence ParseOne(MBFStreamReader mbfReader, bool isReadOnly)
        {
            // no empty files allowed
            if (!mbfReader.HasLines)
            {
                string message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, Resource.IONoTextToParse);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            // do the actual parsing
            return(ParseOneWithFastQFormat(mbfReader, isReadOnly));
        }
Esempio n. 14
0
        /// <summary>
        /// Parses a range of sequence items starting from the specified index in the sequence.
        /// </summary>
        /// <param name="startIndex">The zero-based index at which to begin parsing.</param>
        /// <param name="count">The number of symbols to parse.</param>
        /// <param name="seqPointer">The sequence pointer of that sequence.</param>
        /// <returns>The parsed sequence.</returns>
        public ISequence ParseRange(int startIndex, int count, SequencePointer seqPointer)
        {
            if (0 > startIndex)
            {
                throw new ArgumentOutOfRangeException("startIndex");
            }

            if (0 >= count)
            {
                throw new ArgumentOutOfRangeException("count");
            }

            if (seqPointer == null)
            {
                throw new ArgumentNullException("seqPointer");
            }

            // if the start index exceeds the sequence boundary
            if ((long)startIndex + seqPointer.IndexOffsets[0] >= seqPointer.IndexOffsets[1])
            {
                return(null);
            }

            IAlphabet alphabet = Alphabets.All.Single(A => A.Name.Equals(seqPointer.AlphabetName));
            Sequence  sequence = new Sequence(alphabet)
            {
                IsReadOnly = false
            };

            if (_mbfStreamReader == null || !_mbfStreamReader.CanRead)
            {
                _mbfStreamReader = new MBFStreamReader(_fileName);
            }

            long filePosition = startIndex + seqPointer.IndexOffsets[0];

            int sequenceLength = (int)(seqPointer.IndexOffsets[1] - seqPointer.IndexOffsets[0]);

            if (count + startIndex >= sequenceLength)
            {
                count = (int)(sequenceLength - startIndex);
            }

            char[] buffer = _mbfStreamReader.ReadChars(filePosition, count);
            sequence.InsertRange(0, new string(buffer));

            // default for partial load
            sequence.IsReadOnly = true;

            return(sequence);
        }
Esempio n. 15
0
        /// <summary>
        /// Parses SAM alignment header from specified MBFStreamReader.
        /// </summary>
        /// <param name="mbfReader">MBF text reader.</param>
        private static SAMAlignmentHeader ParseSAMHeader(MBFStreamReader mbfReader)
        {
            _headerLength = 0;
            SAMAlignmentHeader samHeader = new SAMAlignmentHeader();

            if (mbfReader.HasLines && mbfReader.Line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase))
            {
                while (mbfReader.HasLines && mbfReader.Line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase))
                {
                    _headerLength += mbfReader.Line.Length;
                    string[] tokens         = mbfReader.Line.Split(tabDelim, StringSplitOptions.RemoveEmptyEntries);
                    string   recordTypecode = tokens[0].Substring(1);
                    // Validate the header format.
                    ValidateHeaderLineFormat(mbfReader.Line);

                    SAMRecordField headerLine = null;
                    if (string.Compare(recordTypecode, "CO", StringComparison.OrdinalIgnoreCase) != 0)
                    {
                        List <string> tags = new List <string>();
                        headerLine = new SAMRecordField(recordTypecode);
                        for (int i = 1; i < tokens.Length; i++)
                        {
                            string tagToken = tokens[i];
                            string tagName  = tagToken.Substring(0, 2);
                            tags.Add(tagName);
                            headerLine.Tags.Add(new SAMRecordFieldTag(tagName, tagToken.Substring(3)));
                        }

                        samHeader.RecordFields.Add(headerLine);
                    }
                    else
                    {
                        samHeader.Comments.Add(mbfReader.Line.Substring(4));
                    }

                    mbfReader.GoToNextLine();
                }

                string message = samHeader.IsValid();
                if (!string.IsNullOrEmpty(message))
                {
                    throw new FormatException(message);
                }
            }

            return(samHeader);
        }
Esempio n. 16
0
        /// <summary>
        /// Parses a single sequence using a MBFStreamReader.
        /// This method is only used in data virtualization scenarios.
        /// </summary>
        /// <param name="mbfReader">The MBFStreamReader of the file to be parsed.</param>
        /// <param name="isReadOnly">Indicates whether the parsed sequence is read-only.</param>
        /// <returns>The parsed sequence.</returns>
        private ISequence ParseOne(MBFStreamReader mbfReader, bool isReadOnly)
        {
            _fileName = mbfReader.FileName;

            // no empty files allowed
            if (!mbfReader.HasLines)
            {
                string message = Resource.Parser_NoTextErrorMessage;
                Trace.Report(message);
                throw new InvalidOperationException(message);
            }

            // do the actual parsing
            ISequence sequence = ParseOneWithSpecificFormat(mbfReader, isReadOnly);

            return(sequence);
        }
Esempio n. 17
0
 /// <summary>
 /// Parses a single biological sequence from a file.
 /// </summary>
 /// <param name="filename">The name of a biological sequence file.</param>
 /// <param name="isReadOnly">
 /// Flag to indicate whether the resulting QualitativeSequence should be in read-only mode or not.
 /// If this flag is set to true then the resulting QualitativeSequence's IsReadOnly property
 /// will be set to true, otherwise it will be set to false.
 /// </param>
 /// <returns>The parsed IQualitativeSequence object.</returns>
 public IQualitativeSequence ParseOne(string filename, bool isReadOnly)
 {
     if (IsDataVirtualizationEnabled)
     {
         using (MBFStreamReader mbfStreamReader = new MBFStreamReader(filename))
         {
             return(ParseOne(mbfStreamReader, isReadOnly));
         }
     }
     else
     {
         using (MBFTextReader mbfReader = new MBFTextReader(filename))
         {
             return(ParseOne(mbfReader, isReadOnly));
         }
     }
 }
Esempio n. 18
0
        public void ValidatePosition()
        {
            string FilePath = Utility._xmlUtil.GetTextValue(
                Constants.SimpleFastAStreamReaderNode, Constants.FilePathNode);
            string pos = Utility._xmlUtil.GetTextValue(
                Constants.SimpleFastAStreamReaderNode, Constants.PositionNode);

            MBFStreamReader reader = new MBFStreamReader(FilePath);

            // Set position at the begining.
            reader.Seek(Int32.Parse(pos), SeekOrigin.Begin);

            // Validate the set position
            Assert.AreEqual(pos, reader.Position.ToString());

            Console.WriteLine("Validate the position successfulyy");
            ApplicationLog.WriteLine("Validated the position successfully");
        }
Esempio n. 19
0
        /// <summary>
        /// Gets the sequence ID corresponding to the specified sequence pointer.
        /// </summary>
        /// <param name="pointer">
        /// A sequence pointer representing the sequence whose ID is to be retrieved.
        /// </param>
        /// <returns>The sequence ID of the specified sequence.</returns>
        public string GetSequenceID(SequencePointer pointer)
        {
            if (pointer == null)
            {
                throw new ArgumentNullException("pointer");
            }

            if (_mbfStreamReader == null || !_mbfStreamReader.CanRead)
            {
                _mbfStreamReader = new MBFStreamReader(_fileName);
            }

            _mbfStreamReader.Seek(pointer.IndexOffsets[0] - pointer.StartingLine, SeekOrigin.Begin);
            _mbfStreamReader.ReadLine();

            // Read Sequence ID by looking back from the sequence starting index
            pointer.Id = _mbfStreamReader.GetLineField(2);
            return(pointer.Id);
        }
Esempio n. 20
0
        /// <summary>
        /// Parses a range of symbols starting from the specified index in the sequence.
        /// </summary>
        /// <param name="startIndex">The zero-based index at which to begin parsing.</param>
        /// <param name="count">The number of symbols to parse.</param>
        /// <param name="seqPointer">The sequence pointer of that sequence.</param>
        /// <returns>The parsed symbols as ASCII values.</returns>
        public byte[] ParseRange(int startIndex, int count, SequencePointer seqPointer)
        {
            if (0 > startIndex)
            {
                throw new ArgumentOutOfRangeException("startIndex");
            }

            if (0 >= count)
            {
                throw new ArgumentOutOfRangeException("count");
            }

            if (seqPointer == null)
            {
                throw new ArgumentNullException("seqPointer");
            }

            // if the start index exceeds the sequence boundary
            if ((long)startIndex + seqPointer.IndexOffsets[0] >= seqPointer.IndexOffsets[1])
            {
                return(null);
            }

            if (_mbfStreamReader == null || !_mbfStreamReader.CanRead)
            {
                _mbfStreamReader = new MBFStreamReader(_fileName);
            }

            long filePosition = startIndex + seqPointer.IndexOffsets[0];

            int sequenceLength = (int)(seqPointer.IndexOffsets[1] - seqPointer.IndexOffsets[0]);

            if (count + startIndex >= sequenceLength)
            {
                count = (int)(sequenceLength - startIndex);
            }

            return(_mbfStreamReader.ReadBytes(filePosition, count));
        }
Esempio n. 21
0
        /// <summary>
        /// Parses a range of sequence items starting from the specified index in the sequence.
        /// </summary>
        /// <param name="startIndex">The zero-based index at which to begin parsing.</param>
        /// <param name="count">The number of symbols to parse.</param>
        /// <param name="seqPointer">The sequence pointer of the specified sequence.</param>
        /// <returns>The parsed sequence.</returns>
        public ISequence ParseRange(int startIndex, int count, SequencePointer seqPointer)
        {
            if (string.IsNullOrEmpty(_fileName))
            {
                throw new NotSupportedException(Resource.DataVirtualizationNeedsInputFile);
            }

            if (startIndex < 0)
            {
                throw new ArgumentOutOfRangeException("startIndex");
            }

            if (count <= 0)
            {
                throw new ArgumentOutOfRangeException("count");
            }

            IAlphabet alphabet = Alphabets.All.Single(A => A.Name.Equals(seqPointer.AlphabetName));
            Sequence  sequence = new Sequence(alphabet)
            {
                IsReadOnly = false
            };

            if (_mbfStreamReader == null || !_mbfStreamReader.CanRead)
            {
                _mbfStreamReader = new MBFStreamReader(_fileName);
            }

            long fileIndex = startIndex + seqPointer.IndexOffsets[0];

            char[] buffer = _mbfStreamReader.ReadChars(fileIndex, count);
            sequence.InsertRange(0, new string(buffer));

            // default for partial load
            sequence.IsReadOnly = true;

            return(sequence);
        }
Esempio n. 22
0
        /// <summary>
        /// Validate Read Biological sequences using MBFStreamReader
        /// </summary>
        /// <param name="nodeName">Name of the node used for different test case.</param>
        /// <param name="IsStartAndEndIndex">True if validating from start to end index substring,
        /// else false</param>
        private void ValidateSubString(string nodeName,
                                       bool IsStartAndEndIndex)
        {
            // Get values from xml
            string FilePath = Utility._xmlUtil.GetTextValue(
                nodeName, Constants.FilePathNode);
            string expectedString = Utility._xmlUtil.GetTextValue(
                nodeName, Constants.ExpectedString);
            string startIndex = Utility._xmlUtil.GetTextValue(
                nodeName, Constants.StartIndexNode);
            string endIndex = Utility._xmlUtil.GetTextValue(
                nodeName, Constants.EndIndexNode);

            string subString = string.Empty;

            MBFStreamReader streamReader = new MBFStreamReader(FilePath);

            if (IsStartAndEndIndex)
            {
                subString = streamReader.GetLineField(Int32.Parse(startIndex),
                                                      Int32.Parse(endIndex));
            }
            else
            {
                subString = streamReader.GetLineField(Int32.Parse(startIndex));
            }

            // Validate sub string of a line.
            Assert.AreEqual(expectedString, subString);
            Console.WriteLine("The expected substring is {0}", subString);
            ApplicationLog.WriteLine("Validated the substring successfully");

            // Dispose stream reader.
            streamReader.Close();
            streamReader.Dispose();
        }
Esempio n. 23
0
        /// <summary>
        /// Parses a single FASTQ text from a reader into a QualitativeSequence.
        /// </summary>
        /// <param name="mbfReader">A reader for a biological sequence text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting QualitativeSequence should be in readonly mode or not.
        /// If this flag is set to true then the resulting QualitativeSequence's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>A new QualitativeSequence instance containing parsed data.</returns>
        private IQualitativeSequence ParseOneWithFastQFormat(MBFStreamReader mbfReader, bool isReadOnly)
        {
            SequencePointer sequencePointer = new SequencePointer();

            string message;

            // Check for '@' symbol at the first line.
            if (!mbfReader.HasLines || !mbfReader.Line.StartsWith("@", StringComparison.Ordinal))
            {
                message = string.Format(CultureInfo.CurrentCulture, Resource.INVALID_INPUT_FILE, Name);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            // Process header line.
            string id = mbfReader.GetLineField(2).Trim();

            // save sequence starting index
            sequencePointer.IndexOffsets[0] = mbfReader.Position;

            // Go to second line.
            mbfReader.GoToNextLine();
            if (!mbfReader.HasLines || string.IsNullOrEmpty(mbfReader.Line))
            {
                string message1 = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_InvalidSequenceLine, id);
                message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, message1);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            // Get sequence from second line.
            string sequenceLine = mbfReader.Line;

            //save sequence ending index
            sequencePointer.IndexOffsets[1] = sequencePointer.IndexOffsets[0] + mbfReader.Line.Length;

            // Goto third line.
            mbfReader.GoToNextLine();

            // Check for '+' symbol in the third line.
            if (!mbfReader.HasLines || !mbfReader.Line.StartsWith("+", StringComparison.Ordinal))
            {
                string message1 = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_InvalidQualityScoreHeaderLine, id);
                message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, message1);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            string qualScoreId = mbfReader.GetLineField(2).Trim();

            if (!string.IsNullOrEmpty(qualScoreId) && !id.Equals(qualScoreId))
            {
                string message1 = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_InvalidQualityScoreHeaderData, id);
                message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, message1);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            // Goto fourth line.
            mbfReader.GoToNextLine();
            if (!mbfReader.HasLines || string.IsNullOrEmpty(mbfReader.Line))
            {
                string message1 = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_EmptyQualityScoreLine, id);
                message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, message1);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            // Get the quality scores from the fourth line.
            byte[] qualScores = ASCIIEncoding.ASCII.GetBytes(mbfReader.Line);

            // Check for sequence length and quality score length.
            if (sequenceLine.Length != mbfReader.Line.Length)
            {
                string message1 = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_InvalidQualityScoresLength, id);
                message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, message1);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            mbfReader.GoToNextLine();

            IAlphabet alphabet = Alphabet;

            // Identify alphabet if it is not specified.
            if (alphabet == null)
            {
                alphabet = _commonSequenceParser.IdentifyAlphabet(alphabet, sequenceLine);

                if (alphabet == null)
                {
                    string message1 = string.Format(CultureInfo.CurrentCulture, Resource.InvalidSymbolInString, sequenceLine);
                    message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, message1);
                    Trace.Report(message);
                    throw new FileFormatException(message);
                }
            }

            FastQFormatType fastQType = FastqType;

            // Identify fastq format type if AutoDetectFastQFormat property is set to true.
            if (AutoDetectFastQFormat)
            {
                fastQType = IdentifyFastQFormatType(qualScores);
            }

            QualitativeSequence sequence = null;

            if (Encoding == null)
            {
                sequence = new QualitativeSequence(alphabet, fastQType, sequenceLine, qualScores);
            }
            else
            {
                sequence = new QualitativeSequence(alphabet, fastQType, Encoding, sequenceLine, qualScores);
            }

            sequence.ID         = id;
            sequence.IsReadOnly = isReadOnly;

            sequencePointer.AlphabetName = sequence.Alphabet.Name;
            sequencePointer.Id           = sequence.ID;
            _sequencePointers.Add(sequencePointer);

            FileVirtualQualitativeSequenceProvider dataProvider = new FileVirtualQualitativeSequenceProvider(this, sequencePointer)
            {
                BlockSize         = _blockSize,
                MaxNumberOfBlocks = _maxNumberOfBlocks
            };

            sequence.VirtualQualitativeSequenceProvider = dataProvider;
            return(sequence);
        }
Esempio n. 24
0
 /// <summary>
 /// Parses a single FASTQ text from a reader into a QualitativeSequence.
 /// </summary>
 /// <param name="mbfReader">A reader for a biological sequence text.</param>
 /// <param name="isReadOnly">
 /// Flag to indicate whether the resulting QualitativeSequence should be in readonly mode or not.
 /// If this flag is set to true then the resulting QualitativeSequence's isReadOnly property
 /// will be set to true, otherwise it will be set to false.
 /// </param>
 /// <returns>A new QualitativeSequence instance containing parsed data.</returns>
 protected ISequence ParseOneWithSpecificFormat(MBFStreamReader mbfReader, bool isReadOnly)
 {
     return(ParseOneWithFastQFormat(mbfReader, isReadOnly));
 }
Esempio n. 25
0
        /// <summary>
        /// Parses a list of biological sequence texts from a file.
        /// </summary>
        /// <param name="filename">The name of a biological sequence file.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequences should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequences's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The list of parsed ISequence objects.</returns>
        public IList <ISequence> Parse(string filename, bool isReadOnly)
        {
            // default to full load
            _blockSize         = FileLoadHelper.DefaultFullLoadBlockSize;
            _maxNumberOfBlocks = 0;

            // check if DV is required
            if (filename != null)
            {
                _fileName = filename;

                FileInfo fileInfo = new FileInfo(_fileName);
                _enforceDataVirtualizationByFileSize = EnforceDataVirtualizationByFileSize * FileLoadHelper.KBytes;
                if ((_enforceDataVirtualizationByFileSize != 0 && fileInfo.Length >= _enforceDataVirtualizationByFileSize) ||
                    _isDataVirtualizationEnforced)
                {
                    _blockSize         = FileLoadHelper.DefaultBlockSize;
                    _maxNumberOfBlocks = FileLoadHelper.DefaultMaxNumberOfBlocks;
                }
            }

            // Check for sidecar
            if (IsDataVirtualizationEnabled)
            {
                _sidecarFileProvider = new SidecarFileProvider(_fileName);
                _sidecarFileProvider.Close();

                // if valid sidecar file exists
                if (_sidecarFileProvider.IsSidecarValid)
                {
                    // Create virtual list and return
                    return(new VirtualSequenceList(_sidecarFileProvider, this, _sidecarFileProvider.Count)
                    {
                        CreateSequenceAsReadOnly = isReadOnly
                    });
                }

                // else create new sidecar
                _sidecarFileProvider = new SidecarFileProvider(_fileName, true);

                if (_sidecarFileProvider.SidecarFileExists)
                {
                    using (_mbfStreamReader = new MBFStreamReader(_fileName))
                    {
                        try
                        {
                            while (_mbfStreamReader.HasLines)
                            {
                                // Parse and forget as the list is now maintained by DV using sequence pointers
                                ParseOne(_mbfStreamReader, isReadOnly);
                            }

                            _sidecarFileProvider.Close();

                            VirtualSequenceList virtualSequences =
                                new VirtualSequenceList(_sidecarFileProvider, this, _sidecarFileProvider.Count)
                            {
                                CreateSequenceAsReadOnly = isReadOnly
                            };

                            return(virtualSequences);
                        }
                        catch (Exception)
                        {
                            _sidecarFileProvider.Cleanup();
                        }
                    }
                }
            }

            // non-DV parsing
            using (MBFTextReader mbfReader = new MBFTextReader(filename))
            {
                return(Parse(mbfReader, isReadOnly));
            }
        }
Esempio n. 26
0
        /// <summary>
        /// Parses a single FASTA sequence from a file using MBFStreamReader.
        /// This method is only used in data virtualization scenarios.
        /// </summary>
        /// <param name="mbfReader">The MBFStreamReader of the file to be parsed.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequence should be in read-only mode.
        /// If this flag is set to true then the resulting sequence's IsReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The parsed sequence.</returns>
        protected ISequence ParseOneWithSpecificFormat(MBFStreamReader mbfReader, bool isReadOnly)
        {
            SequencePointer sequencePointer = new SequencePointer();

            if (mbfReader == null)
            {
                throw new ArgumentNullException("mbfReader");
            }

            string message;

            if (!mbfReader.Line.StartsWith(">", StringComparison.OrdinalIgnoreCase))
            {
                message = string.Format(CultureInfo.InvariantCulture,
                                        Resource.INVALID_INPUT_FILE,
                                        Resource.FASTA_NAME);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            // Process header line.
            Sequence sequence;
            string   id = mbfReader.GetLineField(2).Trim();

            // save initial start and end indices
            sequencePointer.StartingLine    = (int)(mbfReader.Position - mbfReader.CurrentLineStartingIndex);
            sequencePointer.IndexOffsets[0] = mbfReader.Position;
            sequencePointer.IndexOffsets[1] = mbfReader.Position;

            mbfReader.GoToNextLine();

            IAlphabet alphabet = Alphabet;

            if (alphabet == null)
            {
                alphabet = _commonSequenceParser.IdentifyAlphabet(alphabet, mbfReader.Line);

                if (alphabet == null)
                {
                    message = string.Format(CultureInfo.InvariantCulture,
                                            Resource.InvalidSymbolInString,
                                            mbfReader.Line);
                    Trace.Report(message);
                    throw new FileFormatException(message);
                }
            }

            if (Encoding == null)
            {
                sequence = new Sequence(alphabet);
            }
            else
            {
                sequence = new Sequence(alphabet, Encoding, string.Empty)
                {
                    IsReadOnly = false
                };
            }

            int currentBlockSize         = 0;
            int symbolCount              = -1;
            int newLineCharacterCount    = mbfReader.NewLineCharacterCount;
            int prenewLineCharacterCount = 0;
            int lineLength = mbfReader.Line.Length;

            sequence.ID = id;

            while (mbfReader.HasLines && !mbfReader.Line.StartsWith(">", StringComparison.OrdinalIgnoreCase))
            {
                sequencePointer.IndexOffsets[1] += mbfReader.Line.Length;
                if (Alphabet == null)
                {
                    alphabet = _commonSequenceParser.IdentifyAlphabet(sequence.Alphabet, mbfReader.Line);

                    if (alphabet == null)
                    {
                        message = string.Format(CultureInfo.InvariantCulture,
                                                Resource.InvalidSymbolInString,
                                                mbfReader.Line);
                        Trace.Report(message);
                        throw new FileFormatException(message);
                    }

                    if (sequence.Alphabet != alphabet)
                    {
                        Sequence seq = new Sequence(alphabet, Encoding, sequence)
                        {
                            IsReadOnly = false
                        };
                        sequence.Clear();
                        sequence = seq;
                    }
                }

                newLineCharacterCount = mbfReader.NewLineCharacterCount;
                lineLength            = mbfReader.Line.Length;

                while (lineLength != 0 && _sidecarFileProvider != null)
                {
                    if (lineLength + currentBlockSize + newLineCharacterCount <= _blockSize)
                    {
                        symbolCount      += lineLength;
                        currentBlockSize += lineLength + newLineCharacterCount;
                        lineLength        = 0;
                    }
                    else
                    {
                        symbolCount += _blockSize - currentBlockSize;
                        lineLength   = lineLength - (_blockSize - currentBlockSize);
                        if (lineLength <= 0)
                        {
                            symbolCount += lineLength;
                            prenewLineCharacterCount = newLineCharacterCount + lineLength;
                            lineLength = 0;
                        }

                        currentBlockSize = _blockSize;
                    }

                    if (currentBlockSize == _blockSize)
                    {
                        // write to file.
                        _sidecarFileProvider.WriteBlockIndex(symbolCount);
                        currentBlockSize         = prenewLineCharacterCount;
                        prenewLineCharacterCount = 0;
                    }
                }

                mbfReader.GoToNextLine();
            }

            if (_sidecarFileProvider != null)
            {
                if (sequencePointer.IndexOffsets[1] - sequencePointer.IndexOffsets[0] > _blockSize &&
                    currentBlockSize - newLineCharacterCount > 0)
                {
                    _sidecarFileProvider.WriteBlockIndex(symbolCount);
                }
                else
                {
                    _sidecarFileProvider.WriteBlockIndex(0);
                }
            }

            if (sequence.MoleculeType == MoleculeType.Invalid)
            {
                sequence.MoleculeType = CommonSequenceParser.GetMoleculeType(sequence.Alphabet);
            }

            sequence.IsReadOnly = isReadOnly;

            sequencePointer.AlphabetName = sequence.Alphabet.Name;
            sequencePointer.Id           = sequence.ID;

            if (_sidecarFileProvider != null)
            {
                // Write each sequence pointer to the sidecar file immediately
                _sidecarFileProvider.WritePointer(sequencePointer);
            }

            FileVirtualSequenceProvider dataprovider = new FileVirtualSequenceProvider(this, sequencePointer)
            {
                BlockSize         = _blockSize,
                MaxNumberOfBlocks = _maxNumberOfBlocks
            };

            sequence.VirtualSequenceProvider = dataprovider;
            return(sequence);
        }
Esempio n. 27
0
        /// <summary>
        /// Parses a sequence alignment texts from a file.
        /// </summary>
        /// <param name="fileName">file name.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequences in the sequence alignment should be in
        /// readonly mode or not. If this flag is set to true then the resulting sequences's
        /// isReadOnly property will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>SequenceAlignmentMap object.</returns>
        public SequenceAlignmentMap Parse(string fileName, bool isReadOnly)
        {
            if (string.IsNullOrWhiteSpace(fileName))
            {
                throw new ArgumentNullException("fileName");
            }

            _fileName = fileName;

            // check if DV is required

            FileInfo fileInfo = new FileInfo(_fileName);

            _enforceDataVirtualizationByFileSize = EnforceDataVirtualizationByFileSize * FileLoadHelper.KBytes;
            if ((_enforceDataVirtualizationByFileSize != 0 && fileInfo.Length >= _enforceDataVirtualizationByFileSize) ||
                _isDataVirtualizationEnforced)
            {
                EnforceDataVirtualization = true;
            }

            SequenceAlignmentMap sequenceAlignmentMap = null;
            SAMAlignmentHeader   header = null;

            if (IsDataVirtualizationEnabled)
            {
                VirtualAlignedSequenceList <SAMAlignedSequence> queries = null;

                using (MBFStreamReader mbfReader = new MBFStreamReader(fileName))
                {
                    header = ParseSAMHeader(mbfReader);

                    if (header.Comments.Count == 0 && header.RecordFields.Count == 0)
                    {
                        try
                        {
                            // verify whether this is a valid SAM file by parsing a single sequence
                            ParseSequence(mbfReader.Line, true, Alphabet, Encoding, RefSequences);
                        }
                        catch (IndexOutOfRangeException)
                        {
                            throw new FileFormatException(Resource.SAM_InvalidInputFile);
                        }
                    }

                    _sidecarFileProvider = new SidecarFileProvider(fileName);

                    // if a valid sidecar does not exist then recreate it
                    if (_sidecarFileProvider.SidecarFileExists && _sidecarFileProvider.IsSidecarValid == false)
                    {
                        ParseSequences(mbfReader);
                    }

                    if (_sidecarFileProvider.IsSidecarValid)
                    {
                        queries = new VirtualAlignedSequenceList <SAMAlignedSequence>(_sidecarFileProvider, this, _sidecarFileProvider.Count);
                        sequenceAlignmentMap = new SequenceAlignmentMap(header, queries);
                        return(sequenceAlignmentMap);
                    }
                }
            }

            using (MBFTextReader mbfReader = new MBFTextReader(fileName))
            {
                return(Parse(mbfReader, isReadOnly));
            }
        }