Exemplo n.º 1
0
        /// <summary>
        /// General Test Case to validate CommonSequenceParser
        /// </summary>
        static void CommonSequenceParserGeneralTestCases(
            string sequence,
            CommonSequenceParserAttributes addParam)
        {
            IAlphabet            sequenceAlphabet = null;
            CommonSequenceParser parser           = new CommonSequenceParser();

            switch (addParam)
            {
            case CommonSequenceParserAttributes.ParseRNA:
                sequenceAlphabet = parser.IdentifyAlphabet(Alphabets.RNA, sequence);
                Assert.IsNotNull(sequenceAlphabet);
                Assert.AreEqual(sequenceAlphabet, Alphabets.RNA);
                break;

            case CommonSequenceParserAttributes.ParseProtein:
                sequenceAlphabet = parser.IdentifyAlphabet(Alphabets.Protein, sequence);
                Assert.IsNotNull(sequenceAlphabet);
                Assert.AreEqual(sequenceAlphabet, Alphabets.Protein);
                break;
            }

            ApplicationLog.WriteLine(
                "CommonSequenceParser BVT : All the features validated successfully.");
            Console.WriteLine(
                "CommonSequenceParser BVT : All the features validated successfully.");
        }
Exemplo n.º 2
0
        public void CommonSequenceParserValidateGetMoleculeTypeString()
        {
            Assert.IsNotNullOrEmpty(CommonSequenceParser.GetMoleculeType(
                                        Constants.DNA).ToString());
            Assert.AreEqual(MoleculeType.DNA,
                            CommonSequenceParser.GetMoleculeType(Constants.DNA));
            Assert.IsNotNullOrEmpty(CommonSequenceParser.GetMoleculeType(
                                        Constants.NA).ToString());
            Assert.AreEqual(MoleculeType.NA,
                            CommonSequenceParser.GetMoleculeType(Constants.NA));
            Assert.IsNotNullOrEmpty(CommonSequenceParser.GetMoleculeType(
                                        Constants.RNA).ToString());
            Assert.AreEqual(MoleculeType.RNA,
                            CommonSequenceParser.GetMoleculeType(Constants.RNA));
            Assert.IsNotNullOrEmpty(CommonSequenceParser.GetMoleculeType(
                                        Constants.TRNA).ToString());
            Assert.AreEqual(MoleculeType.tRNA,
                            CommonSequenceParser.GetMoleculeType(Constants.TRNA));
            Assert.IsNotNullOrEmpty(CommonSequenceParser.GetMoleculeType(
                                        Constants.RRNA).ToString());
            Assert.AreEqual(MoleculeType.rRNA,
                            CommonSequenceParser.GetMoleculeType(Constants.RRNA));
            Assert.IsNotNullOrEmpty(CommonSequenceParser.GetMoleculeType(
                                        Constants.MRNA).ToString());
            Assert.AreEqual(MoleculeType.mRNA,
                            CommonSequenceParser.GetMoleculeType(Constants.MRNA));
            Assert.IsNotNullOrEmpty(CommonSequenceParser.GetMoleculeType(
                                        Constants.URNA).ToString());
            Assert.AreEqual(MoleculeType.uRNA,
                            CommonSequenceParser.GetMoleculeType(Constants.URNA));
            Assert.IsNotNullOrEmpty(CommonSequenceParser.GetMoleculeType(
                                        Constants.SNRNA).ToString());
            Assert.AreEqual(MoleculeType.snRNA,
                            CommonSequenceParser.GetMoleculeType(Constants.SNRNA));
            Assert.IsNotNullOrEmpty(CommonSequenceParser.GetMoleculeType(
                                        Constants.SNORNA).ToString());
            Assert.AreEqual(MoleculeType.snoRNA,
                            CommonSequenceParser.GetMoleculeType(Constants.SNORNA));
            Assert.IsNotNullOrEmpty(CommonSequenceParser.GetMoleculeType(
                                        Constants.PROTEIN).ToString());
            Assert.AreEqual(MoleculeType.Protein,
                            CommonSequenceParser.GetMoleculeType(Constants.PROTEIN));
            Assert.IsNotNullOrEmpty(CommonSequenceParser.GetMoleculeType(
                                        String.Empty).ToString());
            Assert.AreEqual(MoleculeType.Invalid,
                            CommonSequenceParser.GetMoleculeType(String.Empty));

            ApplicationLog.WriteLine(
                "CommonSequenceParser BVT : All the features validated successfully.");
            Console.WriteLine(
                "CommonSequenceParser BVT : All the features validated successfully.");
        }
Exemplo n.º 3
0
        public void InvalidateCommonSeqParserGetMoleculeTypeAlphabet()
        {
            Assert.IsNotNull(
                CommonSequenceParser.GetMoleculeType(null as IAlphabet));
            Assert.AreEqual(
                MoleculeType.Invalid,
                CommonSequenceParser.GetMoleculeType(null as IAlphabet));

            ApplicationLog.WriteLine(
                "CommonSequenceParser P2 : All the features invalidated successfully.");
            Console.WriteLine(
                "CommonSequenceParser P2 : All the features invalidated successfully.");
        }
Exemplo n.º 4
0
 public void InvalidateCommonSeqParserGetMoleculeTypeStr()
 {
     try
     {
         CommonSequenceParser.GetMoleculeType(null as string);
         Assert.Fail();
     }
     catch (ArgumentNullException)
     {
         ApplicationLog.WriteLine(
             "CommonSequenceParser P2 : All the features invalidated successfully.");
         Console.WriteLine(
             "CommonSequenceParser P2 : All the features invalidated successfully.");
     }
 }
Exemplo n.º 5
0
        public void CommonSequenceParserValidateGetAlphabets()
        {
            Assert.IsNotNullOrEmpty(CommonSequenceParser.GetAlphabet(
                                        MoleculeType.NA).ToString());
            Assert.AreEqual(Alphabets.DNA,
                            CommonSequenceParser.GetAlphabet(MoleculeType.NA));
            Assert.IsNotNullOrEmpty(CommonSequenceParser.GetAlphabet(
                                        MoleculeType.snoRNA).ToString());
            Assert.AreEqual(Alphabets.RNA,
                            CommonSequenceParser.GetAlphabet(MoleculeType.snoRNA));
            Assert.IsNotNullOrEmpty(CommonSequenceParser.GetAlphabet(
                                        MoleculeType.RNA).ToString());
            Assert.AreEqual(Alphabets.Protein,
                            CommonSequenceParser.GetAlphabet(MoleculeType.Protein));
            Assert.IsNull(
                CommonSequenceParser.GetAlphabet(MoleculeType.Invalid));

            ApplicationLog.WriteLine(
                "CommonSequenceParser BVT : All the features validated successfully.");
            Console.WriteLine(
                "CommonSequenceParser BVT : All the features validated successfully.");
        }
Exemplo n.º 6
0
 /// <summary>
 /// A constructor to set the encoding used.
 /// </summary>
 /// <param name="encoding">The encoding to use for the parsed IQualitativeSequence objects.</param>
 public FastQParser(IEncoding encoding)
 {
     AutoDetectFastQFormat = true;
     Encoding = encoding;
     _commonSequenceParser = new CommonSequenceParser();
 }
Exemplo n.º 7
0
 /// <summary>
 /// The default constructor which chooses the default encoding based on the alphabet.
 /// </summary>
 public FastQParser()
 {
     AutoDetectFastQFormat = true;
     _commonSequenceParser = new CommonSequenceParser();
 }
Exemplo n.º 8
0
 /// <summary>
 /// Initializes a new instance of the NexusParser class.
 /// Default constructor chooses default encoding based on alphabet.
 /// </summary>
 public NexusParser()
 {
     _basicParser = new CommonSequenceParser();
 }
Exemplo n.º 9
0
 /// <summary>
 /// Initializes a new instance of the PhylipParser class.
 /// Default constructor chooses default encoding based on alphabet.
 /// </summary>
 public PhylipParser()
 {
     _basicParser = new CommonSequenceParser();
 }
Exemplo n.º 10
0
 /// <summary>
 /// A constructor to set the encoding used.
 /// </summary>
 /// <param name="encoding">The encoding to use for parsed ISequence objects.</param>
 public FastaParser(IEncoding encoding)
 {
     _commonSequenceParser = new CommonSequenceParser();
     Encoding = encoding;
 }
Exemplo n.º 11
0
 /// <summary>
 /// The default constructor which chooses the default encoding based on the alphabet.
 /// </summary>
 public FastaParser()
 {
     _commonSequenceParser = new CommonSequenceParser();
 }
Exemplo n.º 12
0
        /// <summary>
        /// Parses a single FASTA sequence from a file using MBFStreamReader.
        /// This method is only used in data virtualization scenarios.
        /// </summary>
        /// <param name="mbfReader">The MBFStreamReader of the file to be parsed.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequence should be in read-only mode.
        /// If this flag is set to true then the resulting sequence's IsReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The parsed sequence.</returns>
        protected ISequence ParseOneWithSpecificFormat(MBFStreamReader mbfReader, bool isReadOnly)
        {
            SequencePointer sequencePointer = new SequencePointer();

            if (mbfReader == null)
            {
                throw new ArgumentNullException("mbfReader");
            }

            string message;

            if (!mbfReader.Line.StartsWith(">", StringComparison.OrdinalIgnoreCase))
            {
                message = string.Format(CultureInfo.InvariantCulture,
                                        Resource.INVALID_INPUT_FILE,
                                        Resource.FASTA_NAME);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            // Process header line.
            Sequence sequence;
            string   id = mbfReader.GetLineField(2).Trim();

            // save initial start and end indices
            sequencePointer.StartingLine    = (int)(mbfReader.Position - mbfReader.CurrentLineStartingIndex);
            sequencePointer.IndexOffsets[0] = mbfReader.Position;
            sequencePointer.IndexOffsets[1] = mbfReader.Position;

            mbfReader.GoToNextLine();

            IAlphabet alphabet = Alphabet;

            if (alphabet == null)
            {
                alphabet = _commonSequenceParser.IdentifyAlphabet(alphabet, mbfReader.Line);

                if (alphabet == null)
                {
                    message = string.Format(CultureInfo.InvariantCulture,
                                            Resource.InvalidSymbolInString,
                                            mbfReader.Line);
                    Trace.Report(message);
                    throw new FileFormatException(message);
                }
            }

            if (Encoding == null)
            {
                sequence = new Sequence(alphabet);
            }
            else
            {
                sequence = new Sequence(alphabet, Encoding, string.Empty)
                {
                    IsReadOnly = false
                };
            }

            int currentBlockSize         = 0;
            int symbolCount              = -1;
            int newLineCharacterCount    = mbfReader.NewLineCharacterCount;
            int prenewLineCharacterCount = 0;
            int lineLength = mbfReader.Line.Length;

            sequence.ID = id;

            while (mbfReader.HasLines && !mbfReader.Line.StartsWith(">", StringComparison.OrdinalIgnoreCase))
            {
                sequencePointer.IndexOffsets[1] += mbfReader.Line.Length;
                if (Alphabet == null)
                {
                    alphabet = _commonSequenceParser.IdentifyAlphabet(sequence.Alphabet, mbfReader.Line);

                    if (alphabet == null)
                    {
                        message = string.Format(CultureInfo.InvariantCulture,
                                                Resource.InvalidSymbolInString,
                                                mbfReader.Line);
                        Trace.Report(message);
                        throw new FileFormatException(message);
                    }

                    if (sequence.Alphabet != alphabet)
                    {
                        Sequence seq = new Sequence(alphabet, Encoding, sequence)
                        {
                            IsReadOnly = false
                        };
                        sequence.Clear();
                        sequence = seq;
                    }
                }

                newLineCharacterCount = mbfReader.NewLineCharacterCount;
                lineLength            = mbfReader.Line.Length;

                while (lineLength != 0 && _sidecarFileProvider != null)
                {
                    if (lineLength + currentBlockSize + newLineCharacterCount <= _blockSize)
                    {
                        symbolCount      += lineLength;
                        currentBlockSize += lineLength + newLineCharacterCount;
                        lineLength        = 0;
                    }
                    else
                    {
                        symbolCount += _blockSize - currentBlockSize;
                        lineLength   = lineLength - (_blockSize - currentBlockSize);
                        if (lineLength <= 0)
                        {
                            symbolCount += lineLength;
                            prenewLineCharacterCount = newLineCharacterCount + lineLength;
                            lineLength = 0;
                        }

                        currentBlockSize = _blockSize;
                    }

                    if (currentBlockSize == _blockSize)
                    {
                        // write to file.
                        _sidecarFileProvider.WriteBlockIndex(symbolCount);
                        currentBlockSize         = prenewLineCharacterCount;
                        prenewLineCharacterCount = 0;
                    }
                }

                mbfReader.GoToNextLine();
            }

            if (_sidecarFileProvider != null)
            {
                if (sequencePointer.IndexOffsets[1] - sequencePointer.IndexOffsets[0] > _blockSize &&
                    currentBlockSize - newLineCharacterCount > 0)
                {
                    _sidecarFileProvider.WriteBlockIndex(symbolCount);
                }
                else
                {
                    _sidecarFileProvider.WriteBlockIndex(0);
                }
            }

            if (sequence.MoleculeType == MoleculeType.Invalid)
            {
                sequence.MoleculeType = CommonSequenceParser.GetMoleculeType(sequence.Alphabet);
            }

            sequence.IsReadOnly = isReadOnly;

            sequencePointer.AlphabetName = sequence.Alphabet.Name;
            sequencePointer.Id           = sequence.ID;

            if (_sidecarFileProvider != null)
            {
                // Write each sequence pointer to the sidecar file immediately
                _sidecarFileProvider.WritePointer(sequencePointer);
            }

            FileVirtualSequenceProvider dataprovider = new FileVirtualSequenceProvider(this, sequencePointer)
            {
                BlockSize         = _blockSize,
                MaxNumberOfBlocks = _maxNumberOfBlocks
            };

            sequence.VirtualSequenceProvider = dataprovider;
            return(sequence);
        }
Exemplo n.º 13
0
        /// <summary>
        /// Parses a single FASTA sequence from a file using MBFTextReader.
        /// This method is used in non-data virtualization scenarios.
        /// </summary>
        /// <param name="mbfReader">The MBFTextReader of the file to be parsed.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequence should be in read-only mode.
        /// If this flag is set to true then the resulting sequence's IsReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The parsed sequence.</returns>
        protected ISequence ParseOneWithSpecificFormat(MBFTextReader mbfReader, bool isReadOnly)
        {
            if (mbfReader == null)
            {
                throw new ArgumentNullException("mbfReader");
            }

            string message;

            if (!mbfReader.Line.StartsWith(">", StringComparison.OrdinalIgnoreCase))
            {
                message = string.Format(CultureInfo.InvariantCulture,
                                        Resource.INVALID_INPUT_FILE,
                                        Resource.FASTA_NAME);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            // Process header line.
            Sequence sequence;
            string   id = mbfReader.GetLineField(2).Trim();

            mbfReader.GoToNextLine();

            IAlphabet alphabet = Alphabet;

            if (alphabet == null)
            {
                alphabet = _commonSequenceParser.IdentifyAlphabet(alphabet, mbfReader.Line);

                if (alphabet == null)
                {
                    message = string.Format(CultureInfo.InvariantCulture,
                                            Resource.InvalidSymbolInString,
                                            mbfReader.Line);
                    Trace.Report(message);
                    throw new FileFormatException(message);
                }
            }

            if (Encoding == null)
            {
                sequence = new Sequence(alphabet);
            }
            else
            {
                sequence = new Sequence(alphabet, Encoding, string.Empty)
                {
                    IsReadOnly = false
                };
            }

            sequence.ID = id;
            while (mbfReader.HasLines && !mbfReader.Line.StartsWith(">", StringComparison.OrdinalIgnoreCase))
            {
                if (Alphabet == null)
                {
                    alphabet = _commonSequenceParser.IdentifyAlphabet(sequence.Alphabet, mbfReader.Line);

                    if (alphabet == null)
                    {
                        message = string.Format(CultureInfo.InvariantCulture,
                                                Resource.InvalidSymbolInString,
                                                mbfReader.Line);
                        Trace.Report(message);
                        throw new FileFormatException(message);
                    }

                    if (sequence.Alphabet != alphabet)
                    {
                        Sequence seq = new Sequence(alphabet, Encoding, sequence)
                        {
                            IsReadOnly = false
                        };
                        sequence.Clear();
                        sequence = seq;
                    }
                }

                sequence.InsertRange(sequence.Count, mbfReader.Line);
                mbfReader.GoToNextLine();
            }

            if (sequence.MoleculeType == MoleculeType.Invalid)
            {
                sequence.MoleculeType = CommonSequenceParser.GetMoleculeType(sequence.Alphabet);
            }

            sequence.IsReadOnly = isReadOnly;
            return(sequence);
        }
Exemplo n.º 14
0
        /// <summary>
        /// Parses a single FASTA text from a reader into a sequence.
        /// </summary>
        /// <param name="bioReader">bio text reader</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequence should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequence's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>A new Sequence instance containing parsed data.</returns>
        protected ISequence ParseOneWithSpecificFormat(BioTextReader bioReader, bool isReadOnly)
        {
            SequencePointer sequencePointer = null;

            if (bioReader == null)
            {
                throw new ArgumentNullException("bioReader");
            }

            string message;

            if (!bioReader.Line.StartsWith(">", StringComparison.OrdinalIgnoreCase))
            {
                message = string.Format(CultureInfo.InvariantCulture,
                                        Resource.INVAILD_INPUT_FILE,
                                        Resource.FASTA_NAME);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            // Process header line.
            Sequence sequence;
            string   id = bioReader.GetLineField(2).Trim();

            if (_blockSize > FileLoadHelper.DefaultFullLoadBlockSize)
            {
                _lineCount++;
                _lineLength    += bioReader.Line.Length;
                sequencePointer = new SequencePointer {
                    StartingLine = _lineCount
                };
            }

            bioReader.GoToNextLine();

            IAlphabet alphabet = Alphabet;

            if (alphabet == null)
            {
                alphabet = _commonSequenceParser.IdentifyAlphabet(alphabet, bioReader.Line);

                if (alphabet == null)
                {
                    message = string.Format(CultureInfo.InvariantCulture,
                                            Resource.InvalidSymbolInString,
                                            bioReader.Line);
                    Trace.Report(message);
                    throw new FileFormatException(message);
                }
            }

            if (Encoding == null)
            {
                sequence = new Sequence(alphabet);
            }
            else
            {
                sequence = new Sequence(alphabet, Encoding, string.Empty)
                {
                    IsReadOnly = false
                };
            }

            bool sameSequence = false;

            sequence.ID = id;
            while (bioReader.HasLines && !bioReader.Line.StartsWith(">", StringComparison.OrdinalIgnoreCase))
            {
                if (Alphabet == null)
                {
                    alphabet = _commonSequenceParser.IdentifyAlphabet(sequence.Alphabet, bioReader.Line);

                    if (alphabet == null)
                    {
                        message = string.Format(CultureInfo.InvariantCulture,
                                                Resource.InvalidSymbolInString,
                                                bioReader.Line);
                        Trace.Report(message);
                        throw new FileFormatException(message);
                    }

                    if (sequence.Alphabet != alphabet)
                    {
                        Sequence seq = new Sequence(alphabet, Encoding, sequence)
                        {
                            IsReadOnly = false
                        };
                        sequence.Clear();
                        sequence = seq;
                    }
                }


                // full load
                if (_blockSize <= 0)
                {
                    sequence.InsertRange(sequence.Count, bioReader.Line);
                }
                else
                {
                    if (sameSequence == false)
                    {
                        _sequenceBeginsAt = _lineLength;
                        sameSequence      = true;
                    }

                    _lineLength += bioReader.Line.Length;
                    _lineCount++;
                }

                bioReader.GoToNextLine();
            }

            if (sequence.MoleculeType == MoleculeType.Invalid)
            {
                sequence.MoleculeType = CommonSequenceParser.GetMoleculeType(sequence.Alphabet);
            }
            sequence.IsReadOnly = isReadOnly;

            // full load
            if (_blockSize == FileLoadHelper.DefaultFullLoadBlockSize)
            {
                return(sequence);
            }

            if (sequencePointer != null)
            {
                sequencePointer.AlphabetName = sequence.Alphabet.Name;
                sequencePointer.Id           = sequence.ID;

                sequencePointer.StartingIndex = _sequenceBeginsAt;
                sequencePointer.EndingIndex   = _lineLength;
                _sequencePointers.Add(sequencePointer);
            }
            _sequenceCount++;
            FileVirtualSequenceProvider dataprovider = new FileVirtualSequenceProvider(this, sequencePointer)
            {
                BlockSize         = _blockSize,
                MaxNumberOfBlocks = _maxNumberOfBlocks
            };

            sequence.VirtualSequenceProvider = dataprovider;
            return(sequence);
        }
Exemplo n.º 15
0
 /// <summary>
 /// Initializes a new instance of the ClustalWParser class.
 /// Default constructor chooses default encoding based on alphabet.
 /// </summary>
 public ClustalWParser()
 {
     _basicParser = new CommonSequenceParser();
 }