Пример #1
0
 /// <summary>
 /// Gets the default quality score for the specified FastQFormatType.
 /// </summary>
 ///  /// <param name="type">FastQ format type.</param>
 /// <returns>Quality score.</returns>
 public static byte GetDefaultQualScore(FastQFormatType type)
 {
     if (type == FastQFormatType.Sanger)
     {
         return((byte)(SangerAsciiBaseValue + DefaultQualScore));
     }
     else if (type == FastQFormatType.Solexa)
     {
         return((byte)(SolexaAsciiBaseValue + DefaultQualScore));
     }
     else
     {
         return((byte)(IlluminaAsciiBaseValue + DefaultQualScore));
     }
 }
Пример #2
0
 /// <summary>
 /// Validates whether the specified quality score is within the FastQFormatType limit or not.
 /// </summary>
 /// <param name="qualScore">Quality score.</param>
 /// <param name="type">Fastq format type.</param>
 /// <returns>Returns true if the specified quality score is with in the limit, otherwise false.</returns>
 private static bool ValidateQualScore(byte qualScore, FastQFormatType type)
 {
     if (type == FastQFormatType.Sanger)
     {
         return(qualScore >= SangerMinQualScore && qualScore <= SangerMaxQualScore);
     }
     else if (type == FastQFormatType.Solexa)
     {
         return(qualScore >= SolexaMinQualScore && qualScore <= SolexaMaxQualScore);
     }
     else
     {
         return(qualScore >= IlluminaMinQualScore && qualScore <= IlluminaMaxQualScore);
     }
 }
Пример #3
0
 /// <summary>
 /// Gets the quality score from the ASCII encoded quality score.
 /// </summary>
 /// <param name="qualScore">ASCII Encoded quality score.</param>
 /// <param name="type">FastQ format type.</param>
 /// <returns>Returns quality score.</returns>
 private static int GetQualScore(byte qualScore, FastQFormatType type)
 {
     if (type == FastQFormatType.Sanger)
     {
         return(qualScore - SangerAsciiBaseValue);
     }
     else if (type == FastQFormatType.Solexa)
     {
         return(qualScore - SolexaAsciiBaseValue);
     }
     else
     {
         return(qualScore - IlluminaAsciiBaseValue);
     }
 }
Пример #4
0
 /// <summary>
 /// Gets the ASCII encoded quality score for the given quality score.
 /// </summary>
 /// <param name="qualScore">Quality Score.</param>
 /// <param name="type">FastQ format type.</param>
 /// <returns>ASCII encoded quality score.</returns>
 private static byte GetEncodedQualScore(int qualScore, FastQFormatType type)
 {
     if (type == FastQFormatType.Sanger)
     {
         return((byte)(qualScore + SangerAsciiBaseValue));
     }
     else if (type == FastQFormatType.Solexa)
     {
         return((byte)(qualScore + SolexaAsciiBaseValue));
     }
     else
     {
         return((byte)(qualScore + IlluminaAsciiBaseValue));
     }
 }
Пример #5
0
 /// <summary>
 /// Gets the maximum quality score for the specified FastQFormatType.
 /// </summary>
 ///  /// <param name="type">FastQ format type.</param>
 /// <returns>Quality score.</returns>
 public static byte GetMaxQualScore(FastQFormatType type)
 {
     if (type == FastQFormatType.Solexa)
     {
         return(SolexaMaxQualScore);
     }
     else if (type == FastQFormatType.Sanger)
     {
         return(SangerMaxQualScore);
     }
     else
     {
         return(IlluminaMaxQualScore);
     }
 }
Пример #6
0
        /// <summary>
        /// Validates whether the specified quality scores are within the FastQFormatType limit or not.
        /// </summary>
        /// <param name="qualScores">Quality scores.</param>
        /// <param name="type">Fastq format type.</param>
        /// <returns>Returns true if the specified quality scores are with in the limit, otherwise false.</returns>
        private static bool ValidateQualScore(byte[] qualScores, FastQFormatType type)
        {
            bool result = true;

            switch (type)
            {
            case FastQFormatType.Sanger:
                for (long index = 0; index < qualScores.LongLength; index++)
                {
                    byte qualScore = qualScores[index];
                    if (qualScore < SangerMinQualScore || qualScore > SangerMaxQualScore)
                    {
                        result = false;
                        break;
                    }
                }

                break;

            case FastQFormatType.Solexa:
                for (long index = 0; index < qualScores.LongLength; index++)
                {
                    byte qualScore = qualScores[index];
                    if (qualScore < SolexaMinQualScore || qualScore > SolexaMaxQualScore)
                    {
                        result = false;
                        break;
                    }
                }

                break;

            case FastQFormatType.Illumina:
                for (long index = 0; index < qualScores.LongLength; index++)
                {
                    byte qualScore = qualScores[index];
                    if (qualScore < IlluminaMinQualScore || qualScore > IlluminaMaxQualScore)
                    {
                        result = false;
                        break;
                    }
                }

                break;
            }

            return(result);
        }
Пример #7
0
        /// <summary>
        /// Get the FastQFormatType enum value corresponding to a given string value
        /// </summary>
        /// <param name="formatAsString">Fastq format as a string</param>
        /// <returns>FastQ format type enum</returns>
        public static FastQFormatType GetQualityFormatType(string formatAsString)
        {
            try
            {
                FastQFormatType format = (FastQFormatType)Enum.Parse(typeof(FastQFormatType), formatAsString, true);

                return(format);
            }
            catch (ArgumentNullException ex)
            {
                throw new ArgumentNullException(ex.Message);
            }
            catch (ArgumentException ex)
            {
                throw new ArgumentException(ex.Message);
            }
        }
 /// <summary>
 /// Returns an IEnumerable of sequences in the file being parsed.
 /// </summary>
 /// <returns>Returns ISequence arrays.</returns>
 public override System.Collections.Generic.IEnumerable <Bio.QualitativeSequence> Parse()
 {
     using (GZipStream gz = new GZipStream((new FileInfo(Filename)).OpenRead(), CompressionMode.Decompress)) {
         using (StreamReader streamReader = new StreamReader(gz))
         {
             FastQFormatType formatType = this.FormatType;
             do
             {
                 var seq = ParseOne(streamReader, formatType);
                 if (seq != null)
                 {
                     yield return(seq);
                 }
             }while (!streamReader.EndOfStream);
         }
     }
 }
Пример #9
0
        /// <summary>
        /// General method to Invalidate FastQ Parser.
        /// <param name="nodeName">xml node name.</param>
        /// <param name="IsParseOne">True for FastQParseOne validations, else false</param>
        /// </summary>
        void InValidateFastQParser(string nodeName, bool IsParseOne)
        {
            // Gets the expected sequence from the Xml
            string filePath = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.FilePathNode);
            FastQFormatType expectedFormatType = Utility.GetFastQFormatType(
                _utilityObj._xmlUtil.GetTextValue(nodeName, Constants.FastQFormatType));

            // Create a FastQ Parser object.
            using (FastQParser fastQParserObj = new FastQParser())
            {
                fastQParserObj.AutoDetectFastQFormat = true;
                fastQParserObj.FastqType             = expectedFormatType;

                if (IsParseOne)
                {
                    try
                    {
                        fastQParserObj.ParseOne(filePath);
                        Assert.Fail();
                    }
                    catch (Exception)
                    {
                        ApplicationLog.WriteLine(
                            "FastQ Parser P2 : Successfully validated the exception");
                        Console.WriteLine(
                            "FastQ Parser P2 : Successfully validated the exception");
                    }
                }
                else
                {
                    try
                    {
                        fastQParserObj.Parse(filePath);
                        Assert.Fail();
                    }
                    catch (Exception)
                    {
                        ApplicationLog.WriteLine(
                            "FastQ Parser P2 : Successfully validated the exception");
                        Console.WriteLine(
                            "FastQ Parser P2 : Successfully validated the exception");
                    }
                }
            }
        }
Пример #10
0
        /// <summary>
        /// Initializes a new instance of the QualitativeSequence class with specified alphabet, quality score type,
        /// byte array representing symbols and quality scores.
        /// </summary>
        /// <param name="alphabet">Alphabet to which this instance should conform.</param>
        /// <param name="fastQFormatType">FastQ format type.</param>
        /// <param name="sequence">An array of bytes representing the symbols.</param>
        /// <param name="qualityScores">An array of bytes representing the quality scores.</param>
        /// <param name="validate">If this flag is true then validation will be done to see whether the data is valid or not,
        /// else validation will be skipped.</param>
        public QualitativeSequence(IAlphabet alphabet, FastQFormatType fastQFormatType, byte[] sequence, byte[] qualityScores, bool validate)
        {
            if (alphabet == null)
            {
                throw new ArgumentNullException("alphabet");
            }

            if (sequence == null)
            {
                throw new ArgumentNullException("sequence");
            }

            if (qualityScores == null)
            {
                throw new ArgumentNullException("qualityScores");
            }

            this.Alphabet   = alphabet;
            this.ID         = string.Empty;
            this.FormatType = fastQFormatType;

            if (validate)
            {
                // Validate sequence data
                if (!this.Alphabet.ValidateSequence(sequence, 0, sequence.LongLength))
                {
                    throw new ArgumentOutOfRangeException("sequence");
                }

                // Validate quality scores
                if (!ValidateQualScore(qualityScores, this.FormatType))
                {
                    throw new ArgumentOutOfRangeException("qualityScores");
                }
            }

            this.sequenceData  = new byte[sequence.LongLength];
            this.qualityScores = new byte[qualityScores.LongLength];

            Array.Copy(sequence, this.sequenceData, sequence.LongLength);
            Array.Copy(qualityScores, this.qualityScores, qualityScores.LongLength);

            this.Count = this.sequenceData.LongLength;
        }
Пример #11
0
        /// <summary>
        /// Initializes a new instance of the QualitativeSequence class with specified alphabet, quality score type,
        /// string representing symbols and quality scores.
        /// </summary>
        /// <param name="alphabet">Alphabet to which this instance should conform.</param>
        /// <param name="fastQFormatType">FastQ format type.</param>
        /// <param name="sequence">A string representing the symbols.</param>
        /// <param name="qualityScores">A string representing the quality scores.</param>
        /// <param name="validate">If this flag is true then validation will be done to see whether the data is valid or not,
        /// else validation will be skipped.</param>
        public QualitativeSequence(IAlphabet alphabet, FastQFormatType fastQFormatType, string sequence, string qualityScores, bool validate)
        {
            if (alphabet == null)
            {
                throw new ArgumentNullException("alphabet");
            }

            this.Alphabet = alphabet;
            this.ID       = string.Empty;

            if (sequence == null)
            {
                throw new ArgumentNullException("sequence");
            }

            if (qualityScores == null)
            {
                throw new ArgumentNullException("qualityScores");
            }

            this.FormatType    = fastQFormatType;
            this.sequenceData  = ASCIIEncoding.ASCII.GetBytes(sequence);
            this.qualityScores = ASCIIEncoding.ASCII.GetBytes(qualityScores);

            if (validate)
            {
                // Validate sequence data
                if (!this.Alphabet.ValidateSequence(this.sequenceData, 0, this.sequenceData.LongLength))
                {
                    throw new ArgumentOutOfRangeException("sequence");
                }

                // Validate quality scores
                if (!ValidateQualScore(this.qualityScores, this.FormatType))
                {
                    throw new ArgumentOutOfRangeException("qualityScores");
                }
            }

            this.Count = this.sequenceData.LongLength;
        }
Пример #12
0
        /// <summary>
        /// Gets the FastQFormatType for the format passed.
        /// </summary>
        /// <param name="formatType">Illumina/Sanger/Solexa</param>
        /// <returns>FastQFormat</returns>
        public static FastQFormatType GetFastQFormatType(string formatType)
        {
            FastQFormatType format = FastQFormatType.Illumina_v1_3;

            switch (formatType)
            {
                case "Illumina":
                    format = FastQFormatType.Illumina_v1_3;
                    break;
                case "Sanger":
                    format = FastQFormatType.Sanger;
                    break;
                case "Solexa":
                    format = FastQFormatType.Solexa_Illumina_v1_0;
                    break;
                default:
                    break;
            }

            return format;
        }
Пример #13
0
        /// <summary>
        ///     Gets the IEnumerable of QualitativeSequences from the steam being parsed.
        /// </summary>
        /// <param name="stream">Stream to be parsed.</param>
        /// <returns>Returns the QualitativeSequences.</returns>
        public IEnumerable <IQualitativeSequence> Parse(Stream stream)
        {
            if (stream == null)
            {
                throw new ArgumentNullException("stream");
            }

            FastQFormatType formatType = this.FormatType;

            using (StreamReader reader = stream.OpenRead())
            {
                while (reader.Peek() != -1)
                {
                    IQualitativeSequence seq = ParseOne(reader, formatType);
                    if (seq != null)
                    {
                        yield return(seq);
                    }
                }
            }
        }
Пример #14
0
        public void ValidateSangerFormatTypeDnaReverseComplement()
        {
            // Gets the actual sequence and the alphabet from the Xml
            IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue(
                                                         Constants.SimpleDnaSangerNode, Constants.AlphabetNameNode));
            FastQFormatType expectedFormatType = Utility.GetFastQFormatType(
                utilityObj.xmlUtil.GetTextValue(Constants.SimpleDnaSangerNode, Constants.FastQFormatType));
            string inputSequence = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleDnaSangerNode, Constants.inputSequenceNode);
            string compSequence = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleDnaSangerNode, Constants.ComplementQualSeqNode);
            string expectedRevCompSeq = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleDnaSangerNode, Constants.RevComplement);
            string expectedRevSeq = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleDnaSangerNode, Constants.ReverseQualSeq);
            //IEncoding encoding = Encodings.IupacNA;
            string inputQuality = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleDnaSangerNode, Constants.InputByteArrayNode);

            byte[] byteArray       = ASCIIEncoding.ASCII.GetBytes(inputQuality);
            Byte[] inputScoreArray = encodingObj.GetBytes(inputQuality);


            QualitativeSequence createdQualitativeSequence =
                new QualitativeSequence(alphabet, expectedFormatType,
                                        inputSequence, inputQuality);

            ISequence revSeq     = createdQualitativeSequence.GetReversedSequence();
            ISequence revCompSeq = createdQualitativeSequence.GetReverseComplementedSequence();
            ISequence compSeq    = createdQualitativeSequence.GetComplementedSequence();

            Assert.AreEqual(expectedRevSeq, new string(revSeq.Select(a => (char)a).ToArray()));
            Assert.AreEqual(expectedRevCompSeq, new string(revCompSeq.Select(a => (char)a).ToArray()));
            Assert.AreEqual(compSequence, new string(compSeq.Select(a => (char)a).ToArray()));

            ApplicationLog.WriteLine("Qualitative BVT: Successfully validated Reverse, Complement and ReverseComplement sequence");
            Console.WriteLine("Qualitative BVT: Successfully validated Reverse, Complement and ReverseComplement sequence");
        }
Пример #15
0
        /// <summary>
        /// Gets the FastQFormatType for the format passed.
        /// </summary>
        /// <param name="formatType">Illumina/Sanger/Solexa</param>
        /// <returns>FastQFormat</returns>
        internal static FastQFormatType GetFastQFormatType(string formatType)
        {
            FastQFormatType format = FastQFormatType.Illumina;

            switch (formatType)
            {
            case "Illumina":
                format = FastQFormatType.Illumina;
                break;

            case "Sanger":
                format = FastQFormatType.Sanger;
                break;

            case "Solexa":
                format = FastQFormatType.Solexa;
                break;

            default:
                break;
            }

            return(format);
        }
Пример #16
0
        void GeneralQualitativeSequence(
            string nodeName, QualitativeSequenceParameters parameters)
        {
            // Gets the actual sequence and the alphabet from the Xml
            IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue(
                                                         nodeName, Constants.AlphabetNameNode));
            FastQFormatType expectedFormatType = Utility.GetFastQFormatType(
                utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FastQFormatType));
            string expectedScore = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ExpectedScore);
            QualitativeSequence createdQualitativeSequence = null;
            string inputSequence = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.inputSequenceNode);
            string expectedSequence = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ExpectedSequenceNode);
            string expectedSequenceCount = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.QSequenceCount);
            string expectedMaxScore = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.MaxScoreNode);
            string inputScore = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.InputScoreNode);
            string expectedOuptutScore = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.InputScoreNode);
            string inputQuality = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.InputByteArrayNode);

            byte[] byteArray       = ASCIIEncoding.ASCII.GetBytes(inputQuality);
            Byte[] inputScoreArray = encodingObj.GetBytes(inputQuality);


            // Create and validate Qualitative Sequence.
            switch (parameters)
            {
            case QualitativeSequenceParameters.Score:
                createdQualitativeSequence = new QualitativeSequence(alphabet, expectedFormatType,
                                                                     inputSequence, inputQuality);
                int count = 0;
                // Validate score
                foreach (byte qualScore in createdQualitativeSequence.QualityScores)
                {
                    Assert.AreEqual(qualScore, inputScoreArray[count]);
                    count++;
                }
                break;

            case QualitativeSequenceParameters.ByteArray:
                byte[] scoreValue = ASCIIEncoding.ASCII.GetBytes(inputSequence);
                int    index      = 0;
                createdQualitativeSequence = new QualitativeSequence(alphabet, expectedFormatType,
                                                                     scoreValue, inputScoreArray);

                // Validate score
                foreach (byte qualScore in createdQualitativeSequence.QualityScores)
                {
                    Assert.AreEqual(qualScore, inputScoreArray[index]);
                    index++;
                }
                break;

            default:
                break;
            }

            string qualitativeSequence = new string(createdQualitativeSequence.Select(a => (char)a).ToArray());

            // Validate createdSequence qualitative sequence.
            Assert.IsNotNull(createdQualitativeSequence);
            Assert.AreEqual(alphabet, createdQualitativeSequence.Alphabet);
            Assert.AreEqual(expectedSequence, qualitativeSequence);
            Assert.AreEqual(expectedSequenceCount, createdQualitativeSequence.Count.ToString((IFormatProvider)null));
            Assert.AreEqual(expectedScore, createdQualitativeSequence.QualityScores.Count().ToString((IFormatProvider)null));
            Assert.AreEqual(expectedFormatType, createdQualitativeSequence.FormatType);

            // Logs to the NUnit GUI (Console.Out) window
            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "Qualitative Sequence BVT:Qualitative Sequence {0} is as expected.",
                                            qualitativeSequence));

            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "Qualitative Sequence BVT:Qualitative Sequence Score {0} is as expected.",
                                            createdQualitativeSequence.Count().ToString((IFormatProvider)null)));

            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "Qualitative Sequence BVT:Qualitative format type {0} is as expected.",
                                            createdQualitativeSequence.FormatType));
        }
 /// <summary>
 /// Initializes a new instance of the QualitativeSequence class with specified alphabet, quality score type,
 /// byte array representing symbols and integer array representing base quality scores
 /// (Phred or Solexa base according to the FastQ format type).
 /// </summary>
 /// <param name="alphabet">Alphabet to which this instance should conform.</param>
 /// <param name="fastQFormatType">FastQ format type.</param>
 /// <param name="sequence">An array of bytes representing the symbols.</param>
 /// <param name="qualityScores">An array of integers representing the base quality scores
 /// (Phred or Solexa base according to the FastQ format type).</param>
 /// <param name="validate">If this flag is true then validation will be done to see whether the data is valid or not,
 /// else validation will be skipped.</param>
 public CompactSAMSequence(IAlphabet alphabet, FastQFormatType fastQFormatType, byte[] sequence, int[] qualityScores, bool validate)
     : base(alphabet, fastQFormatType, sequence, qualityScores, validate)
 {
 }
Пример #18
0
        /// <summary>
        /// Parases sequence data and quality values and updates SAMAlignedSequence instance.
        /// </summary>
        /// <param name="alignedSeq">SAM aligned Sequence.</param>
        /// <param name="alphabet">Alphabet of the sequence to be created.</param>
        /// <param name="Encoding">Encoding to use while creating sequence.</param>
        /// <param name="sequencedata">Sequence data.</param>
        /// <param name="qualitydata">Quality values.</param>
        /// <param name="refSeq">Reference sequence if known.</param>
        /// <param name="isReadOnly">Flag to indicate whether the new sequence is required to in readonly or not.</param>
        public static void ParseQualityNSequence(SAMAlignedSequence alignedSeq, IAlphabet alphabet, IEncoding Encoding, string sequencedata, string qualitydata, ISequence refSeq, bool isReadOnly)
        {
            if (alignedSeq == null)
            {
                throw new ArgumentNullException("alignedSeq");
            }

            if (string.IsNullOrWhiteSpace(sequencedata))
            {
                throw new ArgumentNullException("sequencedata");
            }

            if (string.IsNullOrWhiteSpace(qualitydata))
            {
                throw new ArgumentNullException("qualitydata");
            }

            bool isQualitativeSequence = true;
            string message = string.Empty;
            byte[] qualScores = null;
            FastQFormatType fastQType = QualityFormatType;

            if (sequencedata.Equals("*"))
            {
                return;
            }

            if (qualitydata.Equals("*"))
            {
                isQualitativeSequence = false;
            }

            if (isQualitativeSequence)
            {
                // Get the quality scores from the fourth line.
                qualScores = ASCIIEncoding.ASCII.GetBytes(qualitydata);

                // Check for sequence length and quality score length.
                if (sequencedata.Length != qualitydata.Length)
                {
                    string message1 = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_InvalidQualityScoresLength, alignedSeq.QName);
                    message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Resource.SAM_NAME, message1);
                    Trace.Report(message);
                    throw new FileFormatException(message);
                }
            }

            // get "." symbol indexes.
            int index = sequencedata.IndexOf('.', 0);
            while (index > -1)
            {
                alignedSeq.DotSymbolIndexes.Add(index++);
                index = sequencedata.IndexOf('.', index);
            }

            // replace "." with N
            if (alignedSeq.DotSymbolIndexes.Count > 0)
            {
                sequencedata = sequencedata.Replace('.', 'N');
            }

            // get "=" symbol indexes.
            index = sequencedata.IndexOf('=', 0);
            while (index > -1)
            {
                alignedSeq.EqualSymbolIndexes.Add(index++);
                index = sequencedata.IndexOf('=', index);
            }

            // replace "=" with corresponding symbol from refSeq.
            if (alignedSeq.EqualSymbolIndexes.Count > 0)
            {
                if (refSeq == null)
                {
                    throw new ArgumentException(Resource.RefSequenceNofFound);
                }

                for (int i = 0; i < alignedSeq.EqualSymbolIndexes.Count; i++)
                {
                    index = alignedSeq.EqualSymbolIndexes[i];
                    sequencedata = sequencedata.Remove(index, 1);
                    sequencedata = sequencedata.Insert(index, refSeq[index].Symbol.ToString());
                }
            }

            ISequence sequence = null;
            if (isQualitativeSequence)
            {
                QualitativeSequence qualSeq = null;
                if (Encoding == null)
                {
                    qualSeq = new QualitativeSequence(alphabet, fastQType, sequencedata, qualScores);
                }
                else
                {
                    qualSeq = new QualitativeSequence(alphabet, fastQType, Encoding, sequencedata, qualScores);
                }

                qualSeq.ID = alignedSeq.QName;
                qualSeq.IsReadOnly = isReadOnly;
                sequence = qualSeq;
            }
            else
            {
                Sequence seq = null;
                if (Encoding == null)
                {
                    seq = new Sequence(alphabet, sequencedata);
                }
                else
                {
                    seq = new Sequence(alphabet, Encoding, sequencedata);
                }

                seq.ID = alignedSeq.QName;
                seq.IsReadOnly = isReadOnly;
                sequence = seq;
            }

            alignedSeq.QuerySequence = sequence;
        }
Пример #19
0
        /// <summary>
        /// General method to validate Index of Qualitative Sequence Items.
        /// <param name="nodeName">xml node name.</param>
        /// <param name="indexParam">Different Qualitative Sequence parameters.</param>
        /// </summary>
        void ValidateGeneralQualitativeSeqItemIndices(
            string nodeName, QualitativeSequenceParameters indexParam)
        {
            // Gets the actual sequence and the alphabet from the Xml
            IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue(
                                                         nodeName, Constants.AlphabetNameNode));
            FastQFormatType expectedFormatType = Utility.GetFastQFormatType(
                utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FastQFormatType));
            QualitativeSequence createdQualitativeSequence = null;
            string inputSequence = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.inputSequenceNode);
            string expectedFirstItemIdex = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FirstItemIndex);
            string expectedLastItemIdex = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.LastItemIndex);
            string expectedGapIndex = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.IndexOfGap);
            long lastItemIndex;
            long index;

            // Create a qualitative Sequence.
            createdQualitativeSequence = new QualitativeSequence(
                alphabet, expectedFormatType, inputSequence,
                ((char)QualitativeSequence.GetDefaultQualScore(expectedFormatType)).ToString());

            // Get a Index of qualitative sequence items
            switch (indexParam)
            {
            case QualitativeSequenceParameters.IndexOfNonGap:
                index = createdQualitativeSequence.IndexOfNonGap();

                // Validate Qualitative sequence item indices.
                Assert.AreEqual(index, Convert.ToInt32(expectedFirstItemIdex, (IFormatProvider)null));
                break;

            case QualitativeSequenceParameters.IndexOfNonGapWithParam:
                index = createdQualitativeSequence.IndexOfNonGap(5);

                // Validate Qualitative sequence item indices.
                Assert.AreEqual(index, Convert.ToInt32(expectedGapIndex, (IFormatProvider)null));
                break;

            case QualitativeSequenceParameters.LastIndexOf:
                lastItemIndex = createdQualitativeSequence.LastIndexOfNonGap();

                // Validate Qualitative sequence item indices.
                Assert.AreEqual(lastItemIndex, Convert.ToInt32(expectedLastItemIdex, (IFormatProvider)null));
                break;

            case QualitativeSequenceParameters.LastIndexOfWithPam:
                lastItemIndex = createdQualitativeSequence.LastIndexOfNonGap(5);

                // Validate Qualitative sequence item indices.
                Assert.AreEqual(lastItemIndex, Convert.ToInt32(expectedGapIndex, (IFormatProvider)null));
                break;

            default:
                break;
            }

            // Logs to the NUnit GUI (Console.Out) window
            Console.WriteLine("Qualitative Sequence P1 : Qualitative SequenceItems indices validation completed successfully.");
        }
Пример #20
0
        /// <summary>
        /// Gets default encoded quality scores.
        /// </summary>
        /// <param name="formatType">Fastq format type.</param>
        /// <param name="length">No of quality scores required.</param>
        public static string GetDefaultEncodedQualityScores(FastQFormatType formatType, int length)
        {
            char[] encodedQualityScores = new char[length];
            for (int i = 0; i < length; i++)
            {
                encodedQualityScores[i] = (char)QualitativeSequence.GetDefaultQualScore(formatType);
            }

            return new string(encodedQualityScores);
        }
Пример #21
0
 /// <summary>
 /// Constructor for performing quality score-level QC
 /// </summary>
 /// <param name="sequences">Sequence parser</param>
 /// <param name="readLengthMax">Maximum read length</param>
 /// <param name="count">Total number of reads</param>
 /// <param name="format">FastQ Format Type.</param>
 /// <param name="filename">input filename</param>
 public QualityScoreAnalyzer(ISequenceParser sequences, long readLengthMax, long count, FastQFormatType format, string filename)
     : base(sequences, filename)
 {
     Initialize(format, readLengthMax, count);
 }
Пример #22
0
        /// <summary>
        /// General method to validate creation of Qualitative sequence.
        /// <param name="nodeName">xml node name.</param>
        /// <param name="parameters">Different Qualitative Sequence parameters.</param>
        /// </summary>
        void GeneralQualitativeSequence(
            string nodeName, QualitativeSequenceParameters parameters)
        {
            // Gets the actual sequence and the alphabet from the Xml
            IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue(
                                                         nodeName, Constants.AlphabetNameNode));
            FastQFormatType expectedFormatType = Utility.GetFastQFormatType(
                utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FastQFormatType));
            QualitativeSequence createdQualitativeSequence = null;
            string inputSequence = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.inputSequenceNode);
            string expectedSequence = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ExpectedSequenceNode);
            string expectedSequenceCount = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.QSequenceCount);
            string inputScoreforIUPAC = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.MaxScoreNode);
            string inputQuality = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.InputByteArrayNode);

            byte[] byteArray = Encoding.UTF8.GetBytes(inputQuality);
            int    index     = 0;

            // Create and validate Qualitative Sequence.
            switch (parameters)
            {
            case QualitativeSequenceParameters.Score:
                createdQualitativeSequence = new QualitativeSequence(alphabet, expectedFormatType,
                                                                     inputSequence, Utility.GetDefaultEncodedQualityScores(expectedFormatType, inputSequence.Length));
                // Validate score
                foreach (byte qualScore in createdQualitativeSequence.GetEncodedQualityScores())
                {
                    Assert.AreEqual(qualScore, Convert.ToInt32(inputScoreforIUPAC, (IFormatProvider)null));
                }
                break;

            case QualitativeSequenceParameters.ByteArray:
                createdQualitativeSequence = new QualitativeSequence(alphabet, expectedFormatType,
                                                                     Encoding.UTF8.GetBytes(inputSequence), byteArray);

                // Validate score
                foreach (byte qualScore in createdQualitativeSequence.GetEncodedQualityScores())
                {
                    Assert.AreEqual(qualScore, Convert.ToInt32(byteArray[index], (IFormatProvider)null));
                    index++;
                }
                break;

            default:
                break;
            }

            // Validate createdSequence qualitative sequence.
            Assert.IsNotNull(createdQualitativeSequence);
            Assert.AreEqual(createdQualitativeSequence.Alphabet, alphabet);
            Assert.AreEqual(createdQualitativeSequence.ConvertToString(), expectedSequence);
            Assert.AreEqual(createdQualitativeSequence.Count.ToString((IFormatProvider)null), expectedSequenceCount);
            ApplicationLog.WriteLine(string.Format(null, "Qualitative Sequence P1:Qualitative Sequence {0} is as expected.", createdQualitativeSequence));
            ApplicationLog.WriteLine(string.Format(null, "Qualitative Sequence P1:Qualitative Sequence Score {0} is as expected.", createdQualitativeSequence.GetEncodedQualityScores()));
            Assert.AreEqual(createdQualitativeSequence.FormatType, expectedFormatType);
            ApplicationLog.WriteLine(string.Format(null, "Qualitative Sequence P1:Qualitative format type {0} is as expected.", createdQualitativeSequence.FormatType));
        }
Пример #23
0
        /// <summary>
        /// Gets the minimum encoded quality score for the specified FastQFormatType.
        /// </summary>
        /// <param name="formatType">FastQ format type.</param>
        /// <returns>Quality score.</returns>
        public static byte GetMinEncodedQualScore(FastQFormatType formatType)
        {
            byte result;
            switch (formatType)
            {
                case FastQFormatType.Sanger:
                    result = Sanger_MinEncodedQualScore;
                    break;
                case FastQFormatType.Solexa_Illumina_v1_0:
                    result = Solexa_Illumina_v1_0_MinEncodedQualScore;
                    break;
                case FastQFormatType.Illumina_v1_3:
                    result = Illumina_v1_3_MinEncodedQualScore;
                    break;
                case FastQFormatType.Illumina_v1_5:
                    result = Illumina_v1_5_MinEncodedQualScore;
                    break;
                default:
                    result = Illumina_v1_8_MinEncodedQualScore;
                    break;
            }

            return result;
        }
Пример #24
0
        /// <summary>
        /// Converts Encoded quality scores from to specified format.
        /// </summary>
        /// <param name="fromFormatType">from fastq format.</param>
        /// <param name="toFormatType">to fastq format.</param>
        /// <param name="encodedqualScores">Encoded quality scores.</param>
        public static byte[] ConvertEncodedQualityScore(FastQFormatType fromFormatType, FastQFormatType toFormatType, byte[] encodedqualScores)
        {
            if (encodedqualScores == null)
            {
                throw new ArgumentNullException("encodedqualScores");
            }
            byte invalidQualScore;
            if (!ValidateQualScores(encodedqualScores, fromFormatType, out invalidQualScore))
            {
                string message = string.Format(CultureInfo.CurrentUICulture, Properties.Resource.InvalidQualityScore,(char) invalidQualScore);
                throw new ArgumentOutOfRangeException("encodedqualScores", message);
            }

            byte[] result;
            if (fromFormatType == toFormatType)
            {
                result = new byte[encodedqualScores.GetLongLength()];
                Helper.Copy(encodedqualScores, result, encodedqualScores.GetLongLength());
            }
            else
            {
                int[] fromQualScore = GetDecodedQualScores(encodedqualScores, fromFormatType);
                int[] toQualScore = ConvertQualityScores(fromFormatType, toFormatType, fromQualScore);
                result = GetEncodedQualScores(toQualScore, toFormatType);
            }

            return result;
        }
Пример #25
0
        /// <summary>
        /// Converts Quality scores from to specified format.
        /// </summary>
        /// <param name="fromFormatType">from fastq format.</param>
        /// <param name="toFormatType">to fastq format.</param>
        /// <param name="qualScores">Quality scores.</param>
        public static sbyte[] ConvertQualityScores(FastQFormatType fromFormatType, FastQFormatType toFormatType, sbyte[] qualScores)
        {
            if (qualScores == null)
            {
                throw new ArgumentNullException("qualScores");
            }

            sbyte invalidQualScore;
            if (!ValidateQualScores(qualScores, fromFormatType, out invalidQualScore))
            {
                string message = string.Format(CultureInfo.CurrentUICulture, Properties.Resource.InvalidQualityScore, invalidQualScore);
                throw new ArgumentOutOfRangeException("qualScores", message);
            }

            sbyte[] result;
            if (fromFormatType == toFormatType)
            {
                result = new sbyte[qualScores.GetLongLength()];
                Helper.Copy(qualScores, result, qualScores.GetLongLength());
            }
            else
            {
                BaseQualityScoreType fromQualityType = GetQualityScoreType(fromFormatType);
                BaseQualityScoreType toQualityType = GetQualityScoreType(toFormatType);
                if (fromQualityType == toQualityType)
                {
                    result = new sbyte[qualScores.GetLongLength()];
                    Helper.Copy(qualScores, result, qualScores.GetLongLength());
                }
                else
                {
                    result = Convert(fromQualityType, toQualityType, qualScores);
                }
            }

            return result;
        }
Пример #26
0
        /// <summary>
        /// Initializes a new instance of the QualitativeSequence class with specified alphabet, quality score type,
        /// byte array representing symbols and integer array representing base quality scores 
        /// (Phred or Solexa base according to the FastQ format type).
        /// </summary>
        /// <param name="alphabet">Alphabet to which this instance should conform.</param>
        /// <param name="fastQFormatType">FastQ format type.</param>
        /// <param name="sequence">An array of bytes representing the symbols.</param>
        /// <param name="qualityScores">An array of integers representing the base quality scores 
        /// (Phred or Solexa base according to the FastQ format type).</param>
        /// <param name="validate">If this flag is true then validation will be done to see whether the data is valid or not,
        /// else validation will be skipped.</param>
        public QualitativeSequence(IAlphabet alphabet, FastQFormatType fastQFormatType, byte[] sequence, int[] qualityScores, bool validate)
        {
            if (alphabet == null)
            {
                throw new ArgumentNullException("alphabet");
            }

            if (sequence == null)
            {
                throw new ArgumentNullException("sequence");
            }

            if (qualityScores == null)
            {
                throw new ArgumentNullException("qualityScores");
            }

            this.Alphabet = alphabet;
            this.ID = string.Empty;
            this.FormatType = fastQFormatType;
            if (validate)
            {
                if (sequence.GetLongLength() != qualityScores.GetLongLength())
                {
                    string message = string.Format(CultureInfo.CurrentUICulture,
                                                Properties.Resource.DifferenceInSequenceAndQualityScoresLengthMessage,
                                                sequence.GetLongLength(),
                                                qualityScores.GetLongLength());
                    throw new ArgumentException(message);
                }

                // Validate sequence data
                if (!this.Alphabet.ValidateSequence(sequence, 0, sequence.GetLongLength()))
                {
                    throw Helper.GenerateAlphabetCheckFailureException(this.Alphabet, sequence);
                }

                int invalidQualityScore;

                // Validate quality scores
                if (!ValidateQualScores(qualityScores, this.FormatType, out invalidQualityScore))
                {
                    string message = string.Format(CultureInfo.CurrentUICulture,
                                            Properties.Resource.InvalidQualityScoreFound,
                                            invalidQualityScore,
                                            this.FormatType);
                    throw new ArgumentOutOfRangeException("qualityScores", message);
                }
            }

            long len = qualityScores.GetLongLength();
            this.sequenceData = new byte[sequence.GetLongLength()];
            this.qualityScores = new sbyte[len];
            Helper.Copy(sequence, this.sequenceData, sequence.GetLongLength());

            for (long i = 0; i < len; i++)
            {
                this.qualityScores[i] = (sbyte)qualityScores[i];
            }

            this.Count = this.sequenceData.GetLongLength();
        }
Пример #27
0
        /// <summary>
        /// Converts Quality score from to specified format.
        /// </summary>
        /// <param name="fromFormatType">from fastq format.</param>
        /// <param name="toFormatType">to fastq format.</param>
        /// <param name="qualScore">Quality score.</param>
        public static int ConvertQualityScore(FastQFormatType fromFormatType, FastQFormatType toFormatType, int qualScore)
        {
            int result;
            int invalidQualScore;

            if (!ValidateQualScores(new int[] { qualScore }, fromFormatType, out invalidQualScore))
            {
                string message = string.Format(CultureInfo.CurrentUICulture, Properties.Resource.InvalidQualityScore, invalidQualScore);
                throw new ArgumentOutOfRangeException("qualScore", message);
            }


            if (fromFormatType == toFormatType)
            {
                result = qualScore;
            }
            else
            {
                BaseQualityScoreType fromQualityType = GetQualityScoreType(fromFormatType);
                BaseQualityScoreType toQualityType = GetQualityScoreType(toFormatType);
                if (fromQualityType == toQualityType)
                {
                    result = qualScore;
                }
                else
                {
                    result = Convert(fromQualityType, toQualityType, qualScore);
                }
            }

            return result;
        }
Пример #28
0
        /// <summary>
        /// Controls execution of QC steps
        /// </summary>
        /// <param name="parser">ISequenceParser object holding the input sequence data</param>
        /// <param name="filename">Input filename</param>
        /// <param name="runSequenceQc">Indicates whether the sequence-level QC module should be initialized</param>
        /// <param name="runQualityScoreQc">Indicates whether the quality score-level QC module should be initialized</param>
        /// <param name="runBlast">Indicates whether the sequence contamination finder module should be initialized</param>
        /// <param name="format">FastQ Format Type, if applicable. Otherwise use 'null'.</param>
        /// <param name="dir">Output directory</param>
        public Seqcos(ISequenceParser parser, string filename, bool runSequenceQc, bool runQualityScoreQc, bool runBlast, string format, string dir = null)
        {
            if (parser == null)
            {
                throw new ArgumentNullException("parser");
            }

            if (filename == null)
            {
                throw new ArgumentNullException("filename");
            }

            // (deprecated) Register AssemblyResolve event handler - for dealing with Sho libaries that are located
            // externally from this application's install folder
            //AppDomain currentDomain = AppDomain.CurrentDomain;
            //currentDomain.AssemblyResolve += new ResolveEventHandler(OnAssemblyResolveEventHandler);

            this.myFilenames    = new Filenames(filename, Resource.ChartFormat);
            this.SelectedParser = parser;

            this.OutputDirectory = (dir == null) ? Path.GetDirectoryName(filename) + @"\" + myFilenames.Prefix : dir;
            //string customOutputPath = Path.GetDirectoryName(this.OutputDirectory);
            this.InitialWorkingDirectory = Path.GetDirectoryName(filename);
            Directory.SetCurrentDirectory(this.InitialWorkingDirectory);

            if (!Directory.Exists(this.OutputDirectory))
            {
                Directory.CreateDirectory(this.OutputDirectory);
            }

            // Initialize SequenceAnalyzer
            this.SequenceQc = runSequenceQc ? new SequenceAnalyzer(this.SelectedParser, myFilenames.FileName) : null;

            // Initialize QualityScoreAnalyzer
            if (runQualityScoreQc && !(parser is FastAParser))
            {
                if (format == null)
                {
                    throw new ArgumentNullException("format");
                }

                FastQFormatType myFormat = BioHelper.GetQualityFormatType(format);

                if (runSequenceQc && this.SequenceQc != null)
                {
                    this.QualityScoreQc = new QualityScoreAnalyzer(this.SelectedParser, this.SequenceQc.ReadLengthMax, this.SequenceQc.Count, myFormat, myFilenames.FileName);
                }
                else
                {
                    this.QualityScoreQc = new QualityScoreAnalyzer(this.SelectedParser, myFormat, myFilenames.FileName);
                }
            }
            else
            {
                this.QualityScoreQc = null;
            }

            // Initialize ContaminationFinder
            this.ContaminationFinder = runBlast ? new SequenceContaminationFinder(this.SelectedParser) : null;

            this.HasPlottedSequenceStats     = false;
            this.HasPlottedQualityScoreStats = false;
        }
Пример #29
0
        /// <summary>
        /// Converts Encoded quality score from to specified format.
        /// </summary>
        /// <param name="fromFormatType">from fastq format.</param>
        /// <param name="toFormatType">to fastq format.</param>
        /// <param name="encodedqualScore">Encoded quality score.</param>
        public static byte ConvertEncodedQualityScore(FastQFormatType fromFormatType, FastQFormatType toFormatType, byte encodedqualScore)
        {
            byte result;
            byte invalidQualScore;
            if (!ValidateQualScores(new byte[] {encodedqualScore}, fromFormatType, out invalidQualScore))
            {
                string message = string.Format(CultureInfo.CurrentUICulture, Properties.Resource.InvalidQualityScore, (char)invalidQualScore);
                throw new ArgumentOutOfRangeException("encodedqualScore", message);
            }

            if (fromFormatType == toFormatType)
            {
                result = encodedqualScore;
            }
            else
            {
                int fromQualScore = GetDecodedQualScore(encodedqualScore, fromFormatType);
                int toQualScore = ConvertQualityScore(fromFormatType, toFormatType, fromQualScore);
                result = GetEncodedQualScore(toQualScore, toFormatType);
            }

            return result;
        }
Пример #30
0
        /// <summary>
        /// Parases sequence data and quality values and updates SAMAlignedSequence instance.
        /// </summary>
        /// <param name="alignedSeq">SAM aligned Sequence.</param>
        /// <param name="alphabet">Alphabet of the sequence to be created.</param>
        /// <param name="sequencedata">Sequence data.</param>
        /// <param name="qualitydata">Quality values.</param>
        public static void ParseQualityNSequence(SAMAlignedSequence alignedSeq, IAlphabet alphabet, string sequencedata, string qualitydata)
        {
            if (alignedSeq == null)
            {
                throw new ArgumentNullException("alignedSeq");
            }

            if (string.IsNullOrWhiteSpace(sequencedata))
            {
                throw new ArgumentNullException("sequencedata");
            }

            if (string.IsNullOrWhiteSpace(qualitydata))
            {
                throw new ArgumentNullException("qualitydata");
            }

            bool   isQualitativeSequence = true;
            string message = string.Empty;

            byte[]          qualScores = null;
            FastQFormatType fastQType  = QualityFormatType;

            if (sequencedata.Equals("*"))
            {
                return;
            }

            if (qualitydata.Equals("*"))
            {
                isQualitativeSequence = false;
            }

            if (isQualitativeSequence)
            {
                // Get the quality scores from the fourth line.
                qualScores = ASCIIEncoding.ASCII.GetBytes(qualitydata);

                // Check for sequence length and quality score length.
                if (sequencedata.Length != qualitydata.Length)
                {
                    string message1 = string.Format(CultureInfo.CurrentCulture, Properties.Resource.FastQ_InvalidQualityScoresLength, alignedSeq.QName);
                    message = string.Format(CultureInfo.CurrentCulture, Properties.Resource.IOFormatErrorMessage, Properties.Resource.SAM_NAME, message1);
                    Trace.Report(message);
                    throw new FileFormatException(message);
                }
            }

            ISequence sequence = null;

            if (isQualitativeSequence)
            {
                QualitativeSequence qualSeq = new QualitativeSequence(alphabet, fastQType, sequencedata, ASCIIEncoding.ASCII.GetString(qualScores));
                qualSeq.ID = alignedSeq.QName;
                sequence   = qualSeq;
            }
            else
            {
                sequence    = new Sequence(alphabet, sequencedata);
                sequence.ID = alignedSeq.QName;
            }

            alignedSeq.QuerySequence = sequence;
        }
Пример #31
0
 /// <summary>
 /// Gets the default quality score for the specified FastQFormatType.
 /// </summary>
 ///  /// <param name="type">FastQ format type.</param>
 /// <returns>Quality score.</returns>
 public static byte GetDefaultQualScore(FastQFormatType type)
 {
     return (byte)(GetEncodedQualScore(DefaultQualScore, type));
 }
Пример #32
0
        /// <summary>
        /// Gets the Ascii base value for the specified format.
        /// </summary>
        /// <param name="formatType">FastQ format.</param>
        private static int GetAsciiBaseValue(FastQFormatType formatType)
        {
            int result;
            switch (formatType)
            {
                case FastQFormatType.Sanger:
                    result = Sanger_AsciiBaseValue;
                    break;
                case FastQFormatType.Solexa_Illumina_v1_0:
                    result = Solexa_Illumina_v1_0_AsciiBaseValue;
                    break;
                case FastQFormatType.Illumina_v1_3:
                    result = Illumina_v1_3_AsciiBaseValue;
                    break;
                case FastQFormatType.Illumina_v1_5:
                    result = Illumina_v1_5_AsciiBaseValue;
                    break;
                default:
                    result = Illumina_v1_8_AsciiBaseValue;
                    break;
            }

            return result;
        }
Пример #33
0
        /// <summary>
        /// Converts the current instance to the specified FastQ format type 
        /// and returns a new instance of QualitativeSequence.
        /// </summary>
        /// <param name="formatType">FastQ format type to convert.</param>
        public QualitativeSequence ConvertTo(FastQFormatType formatType)
        {
            sbyte[] convertedQualityScores = ConvertQualityScores(this.FormatType, formatType, this.qualityScores);

            QualitativeSequence seq = new QualitativeSequence(this.Alphabet, formatType, this.sequenceData, convertedQualityScores, false);
            seq.ID = this.ID;
            seq.metadata = this.metadata;

            return seq;
        }
Пример #34
0
        /// <summary>
        /// Gets the quality score type for the specified format.
        /// </summary>
        /// <param name="formatType">FastQ format.</param>
        private static BaseQualityScoreType GetQualityScoreType(FastQFormatType formatType)
        {
            BaseQualityScoreType result;
            switch (formatType)
            {
                case FastQFormatType.Solexa_Illumina_v1_0:
                    result = BaseQualityScoreType.SolexaBaseQualityScore;
                    break;
                default:
                    result = BaseQualityScoreType.PhredBaseQualityScore;
                    break;
            }

            return result;
        }
Пример #35
0
        /// <summary>
        /// General method to validate default score for different FastQ
        /// format with different sequence.
        /// <param name="nodeName">xml node name.</param>
        /// <param name="parameters">Different Qualitative Score method parameter.</param>
        /// </summary>
        void ValidateFastQDefaultScores(string nodeName, QualitativeSequenceParameters parameters)
        {
            // Gets the actual sequence and the alphabet from the Xml
            IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue(
                                                         nodeName, Constants.AlphabetNameNode));
            FastQFormatType expectedFormatType = Utility.GetFastQFormatType(
                utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FastQFormatType));
            string inputSequence = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.inputSequenceNode);
            string expectedMaxScore = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.DefualtMaxScore);
            string expectedMinScore = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.DefaultMinScore);

            QualitativeSequence createdQualitativeSequence = null;
            string qualityScoresString = Utility.GetDefaultEncodedQualityScores(expectedFormatType, inputSequence.Length);

            byte[] expectedMaxScores = Utility.GetEncodedQualityScores((byte)int.Parse(expectedMaxScore, null as IFormatProvider), inputSequence.Length);
            byte[] expectedMinScores = Utility.GetEncodedQualityScores((byte)int.Parse(expectedMinScore, null as IFormatProvider), inputSequence.Length);
            int    i = 0;

            switch (parameters)
            {
            case QualitativeSequenceParameters.DefaultScoreWithAlphabets:
                createdQualitativeSequence = new QualitativeSequence(
                    alphabet, expectedFormatType, inputSequence,
                    qualityScoresString);

                // Validate default score.
                i = 0;
                foreach (byte qualitativeScore in createdQualitativeSequence.GetEncodedQualityScores())
                {
                    Assert.AreEqual(qualitativeScore,
                                    (byte)(qualityScoresString[i]));
                    i++;
                }

                // Log VSTest GUI.
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "Qualitative Sequence P1:Qualitative Sequence Default score {0} is as expected.",
                                                       qualityScoresString[0]));
                break;

            case QualitativeSequenceParameters.DefaultScoreWithSequence:
                createdQualitativeSequence = new QualitativeSequence(alphabet,
                                                                     expectedFormatType, inputSequence,
                                                                     qualityScoresString);

                i = 0;
                // Validate default score.
                foreach (byte qualitativeScore in createdQualitativeSequence.GetEncodedQualityScores())
                {
                    Assert.AreEqual(qualitativeScore,
                                    (byte)(qualityScoresString[i]));
                    i++;
                }

                // Log VSTest GUI.
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "Qualitative Sequence P1:Qualitative Sequence Default score {0} is as expected.",
                                                       qualityScoresString[0]));
                break;

            case QualitativeSequenceParameters.MaxDefaultScore:
                createdQualitativeSequence = new QualitativeSequence(
                    alphabet, expectedFormatType, Encoding.UTF8.GetBytes(inputSequence),
                    expectedMaxScores);
                i = 0;
                // Validate default maximum score.
                foreach (byte qualitativeScore in createdQualitativeSequence.GetEncodedQualityScores())
                {
                    Assert.AreEqual(qualitativeScore,
                                    expectedMaxScores[i]);
                    i++;
                }

                // Log VSTest GUI.
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "Qualitative Sequence P1:Qualitative Sequence Maximum score {0} is as expected.",
                                                       QualitativeSequence.GetMaxEncodedQualScore(expectedFormatType)));
                break;

            case QualitativeSequenceParameters.MinDefaultScore:
                createdQualitativeSequence = new QualitativeSequence(
                    alphabet, expectedFormatType, Encoding.UTF8.GetBytes(inputSequence),
                    expectedMinScores);

                i = 0;
                // Validate default minimum score.
                foreach (byte qualitativeScore in createdQualitativeSequence.GetEncodedQualityScores())
                {
                    Assert.AreEqual(qualitativeScore,
                                    expectedMinScores[i]);
                    i++;
                }

                // Log VSTest GUI.
                ApplicationLog.WriteLine(string.Format(null, "Qualitative Sequence P1:Qualitative Sequence Minimum score {0} is as expected.",
                                                       QualitativeSequence.GetMinEncodedQualScore(expectedFormatType)));
                break;

            default:
                break;
            }
        }
Пример #36
0
 /// <summary>
 /// Gets the decoded quality score from the ASCII encoded quality score.
 /// </summary>
 /// <param name="encodedQualScore">ASCII Encoded quality score.</param>
 /// <param name="formatType">FastQ format type.</param>
 /// <returns>Returns quality score.</returns>
 private static int GetDecodedQualScore(byte encodedQualScore, FastQFormatType formatType)
 {
     return DecodeQualityScore(encodedQualScore, GetAsciiBaseValue(formatType));
 }
Пример #37
0
 /// <summary>
 /// Constructor when called from GUI. This is the standard constructor used when SequenceAnalyzer is called before this and
 /// has already calculated readLengthMax and count.
 /// </summary>
 /// <param name="sequences">Sequence parser</param>
 /// <param name="readLengthMax">Maximum read length</param>
 /// <param name="count">Total number of reads</param>
 /// <param name="format">FastQ Format Type</param>
 /// <param name="filename">input filename</param>
 /// <param name="worker">Background worker</param>
 /// <param name="e">Background Worker event args</param>
 public QualityScoreAnalyzer(ISequenceParser sequences, long readLengthMax, long count, FastQFormatType format, string filename, BackgroundWorker worker, DoWorkEventArgs e)
     : base(sequences, filename, worker, e)
 {
     Initialize(format, readLengthMax, count);
 }
Пример #38
0
        /// <summary>
        /// Gets the decoded quality scores from the ASCII encoded quality score.
        /// </summary>
        /// <param name="encodedQualScores">ASCII Encoded quality score.</param>
        /// <param name="formatType">FastQ format type.</param>
        /// <returns>Returns quality scores.</returns>
        private static sbyte[] GetDecodedQualScoresInSignedBytes(byte[] encodedQualScores, FastQFormatType formatType)
        {
            int baseValue = GetAsciiBaseValue(formatType);
            long count = encodedQualScores.GetLongLength();
            sbyte[] result = new sbyte[count];
            for (long i = 0; i < count; i++)
            {
                result[i] = (sbyte)DecodeQualityScore(encodedQualScores[i], baseValue);
            }

            return result;
        }
Пример #39
0
        /// <summary>
        /// General method to Invalidate FastQ Parser.
        /// <param name="nodeName">xml node name.</param>
        /// <param name="param">FastQ Formatter different parameters</param>
        /// </summary>
        void InValidateFastQFormatter(FastQFormatParameters param)
        {
            // Gets the expected sequence from the Xml
            string filepath = _utilityObj._xmlUtil.GetTextValue(
                Constants.MultiSeqSangerRnaProNode, Constants.FilePathNode);
            FastQFormatType expectedFormatType = Utility.GetFastQFormatType(
                _utilityObj._xmlUtil.GetTextValue(Constants.MultiSeqSangerRnaProNode,
                                                  Constants.FastQFormatType));

            // Parse a FastQ file.
            using (FastQParser fastQParser = new FastQParser())
            {
                fastQParser.AutoDetectFastQFormat = true;
                fastQParser.FastqType             = expectedFormatType;

                IQualitativeSequence sequence = null;
                sequence = fastQParser.ParseOne(filepath);
                FastQFormatter fastQFormatter = new FastQFormatter();
                TextWriter     txtWriter      = null;

                switch (param)
                {
                case FastQFormatParameters.TextWriter:
                    try
                    {
                        fastQFormatter.Format(sequence, null as TextWriter);
                        Assert.Fail();
                    }

                    catch (Exception)
                    {
                        ApplicationLog.WriteLine(
                            "FastQ Parser P2 : Successfully validated the exception");
                        Console.WriteLine(
                            "FastQ Parser P2 : Successfully validated the exception");
                    }
                    break;

                case FastQFormatParameters.Sequence:
                    try
                    {
                        fastQFormatter.Format(null as ISequence, txtWriter);
                        Assert.Fail();
                    }

                    catch (Exception)
                    {
                        ApplicationLog.WriteLine(
                            "FastQ Parser P2 : Successfully validated the exception");
                        Console.WriteLine(
                            "FastQ Parser P2 : Successfully validated the exception");
                    }
                    break;

                case FastQFormatParameters.QualitativeSequence:
                    try
                    {
                        fastQFormatter.Format(null as IQualitativeSequence, txtWriter);
                        Assert.Fail();
                    }

                    catch (Exception)
                    {
                        ApplicationLog.WriteLine(
                            "FastQ Parser P2 : Successfully validated the exception");
                        Console.WriteLine(
                            "FastQ Parser P2 : Successfully validated the exception");
                    }
                    break;

                default:
                    try
                    {
                        fastQFormatter.Format(sequence as QualitativeSequence, null as TextWriter);
                        Assert.Fail();
                    }
                    catch (Exception)
                    {
                        ApplicationLog.WriteLine(
                            "FastQ Parser P2 : Successfully validated the exception");
                        Console.WriteLine(
                            "FastQ Parser P2 : Successfully validated the exception");
                    }
                    break;
                }
            }
        }
Пример #40
0
 /// <summary>
 /// Gets the ASCII encoded quality score for the given quality score.
 /// </summary>
 /// <param name="qualScore">Quality Score.</param>
 /// <param name="formatType">FastQ format type.</param>
 /// <returns>ASCII encoded quality score.</returns>
 private static byte GetEncodedQualScore(int qualScore, FastQFormatType formatType)
 {
     return EncodeQualityScore(qualScore, GetAsciiBaseValue(formatType));
 }
Пример #41
0
        /// <summary>
        /// General method to validate default score for different FastQ
        /// format with different sequence.
        /// <param name="nodeName">xml node name.</param>
        /// <param name="parameters">Different Qualitative Score method parameter.</param>
        /// </summary>
        void ValidateFastQDefaultScores(
            string nodeName, QualitativeSequenceParameters parameters)
        {
            // Gets the actual sequence and the alphabet from the Xml
            IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue(
                                                         nodeName, Constants.AlphabetNameNode));
            FastQFormatType expectedFormatType = Utility.GetFastQFormatType(
                utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FastQFormatType));
            string inputSequence = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.inputSequenceNode);
            string expectedMaxScore = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.DefualtMaxScore);
            string expectedMinScore = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.DefaultMinScore);

            QualitativeSequence createdQualitativeSequence = null;

            switch (parameters)
            {
            case QualitativeSequenceParameters.DefaultScoreWithAlphabets:
                createdQualitativeSequence = new QualitativeSequence(
                    alphabet, expectedFormatType, inputSequence,
                    ((char)QualitativeSequence.GetDefaultQualScore(expectedFormatType)).ToString());

                // Validate default score.
                foreach (byte qualitativeScore in createdQualitativeSequence.QualityScores)
                {
                    Assert.AreEqual(qualitativeScore,
                                    QualitativeSequence.GetDefaultQualScore(expectedFormatType));
                }

                // Log Nunit GUI.
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "Qualitative Sequence P1:Qualitative Sequence Default score {0} is as expected.",
                                                QualitativeSequence.GetDefaultQualScore(expectedFormatType)));
                break;

            case QualitativeSequenceParameters.DefaultScoreWithSequence:
                createdQualitativeSequence = new QualitativeSequence(alphabet,
                                                                     expectedFormatType, inputSequence,
                                                                     ((char)QualitativeSequence.GetDefaultQualScore(expectedFormatType)).ToString());

                // Validate default score.
                foreach (byte qualitativeScore in createdQualitativeSequence.QualityScores)
                {
                    Assert.AreEqual(qualitativeScore,
                                    QualitativeSequence.GetDefaultQualScore(expectedFormatType));
                }

                // Log Nunit GUI.
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "Qualitative Sequence P1:Qualitative Sequence Default score {0} is as expected.",
                                                QualitativeSequence.GetDefaultQualScore(expectedFormatType)));
                break;

            case QualitativeSequenceParameters.MaxDefaultScore:
                createdQualitativeSequence = new QualitativeSequence(
                    alphabet, expectedFormatType, encodingObj.GetBytes(inputSequence),
                    new byte[] { byte.Parse(expectedMaxScore, (IFormatProvider)null) });

                // Validate default maximum score.
                foreach (byte qualitativeScore in createdQualitativeSequence.QualityScores)
                {
                    Assert.AreEqual(qualitativeScore,
                                    QualitativeSequence.GetMaxQualScore(expectedFormatType));
                }

                // Log Nunit GUI.
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "Qualitative Sequence P1:Qualitative Sequence Maximum score {0} is as expected.",
                                                QualitativeSequence.GetMaxQualScore(expectedFormatType)));
                break;

            case QualitativeSequenceParameters.MinDefaultScore:
                createdQualitativeSequence = new QualitativeSequence(
                    alphabet, expectedFormatType, encodingObj.GetBytes(inputSequence),
                    new byte[] { byte.Parse(expectedMinScore, (IFormatProvider)null) });

                // Validate default minimum score.
                foreach (byte qualitativeScore in createdQualitativeSequence.QualityScores)
                {
                    Assert.AreEqual(qualitativeScore,
                                    QualitativeSequence.GetMinQualScore(expectedFormatType));
                }

                // Log Nunit GUI.
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "Qualitative Sequence P1:Qualitative Sequence Minimum score {0} is as expected.",
                                                QualitativeSequence.GetMinQualScore(expectedFormatType)));
                break;

            default:
                break;
            }
        }
Пример #42
0
        /// <summary>
        /// Gets the ASCII encoded quality scores for the given quality score.
        /// </summary>
        /// <param name="qualScores">Quality Score.</param>
        /// <param name="formatType">FastQ format type.</param>
        /// <returns>ASCII encoded quality scores.</returns>
        private static byte[] GetEncodedQualScores(sbyte[] qualScores, FastQFormatType formatType)
        {
            int baseValue = GetAsciiBaseValue(formatType);
            long count = qualScores.GetLongLength();
            byte[] result = new byte[count];
            for (long i = 0; i < count; i++)
            {
                result[i] = EncodeQualityScore(qualScores[i], baseValue);
            }

            return result;
        }
 /// <summary>
 /// Initializes a new instance of the QualitativeSequence class with specified alphabet, quality score type,
 /// string representing symbols and encoded quality scores.
 /// Sequence and quality scores are validated with the specified alphabet and specified fastq format respectively.
 /// </summary>
 /// <param name="alphabet">Alphabet to which this instance should conform.</param>
 /// <param name="fastQFormatType">FastQ format type.</param>
 /// <param name="sequence">A string representing the symbols.</param>
 /// <param name="encodedQualityScores">A string representing the encoded quality scores.</param>
 public CompactSAMSequence(IAlphabet alphabet, FastQFormatType fastQFormatType, string sequence, string encodedQualityScores)
     : base(alphabet, fastQFormatType, sequence, encodedQualityScores, true)
 {
 }
Пример #44
0
        public static bool ValidateQualScores(byte[] encodedQualScore, FastQFormatType formatType, out byte invalidQualScore)
        {
            bool result = true;
            invalidQualScore = 0;
            int minScore = GetMinEncodedQualScore(formatType);
            int maxScore = GetMaxEncodedQualScore(formatType);
            long count = encodedQualScore.GetLongLength();
            for (long index = 0; index < count; index++)
            {
                byte qualScore = encodedQualScore[index];
                if (qualScore < minScore || qualScore > maxScore)
                {
                    result = false;
                    invalidQualScore = qualScore;
                    break;
                }
            }

            return result;
        }
Пример #45
0
        /// <summary>
        ///     Returns a single QualitativeSequence from the FASTQ data.
        /// </summary>
        /// <param name="reader">Reader to be parsed.</param>
        /// <param name="formatType">FASTQ format type.</param>
        /// <returns>Returns a QualitativeSequence.</returns>
        private IQualitativeSequence ParseOne(StreamReader reader, FastQFormatType formatType)
        {
            if (reader.EndOfStream)
            {
                return(null);
            }

            string line = ReadNextLine(reader, true);

            if (line == null || !line.StartsWith("@", StringComparison.Ordinal))
            {
                string message = string.Format(CultureInfo.CurrentCulture, Resource.INVALID_INPUT_FILE, this.Name);
                throw new Exception(message);
            }

            // Process header line.
            string id = line.Substring(1).Trim();

            line = ReadNextLine(reader, true);
            if (string.IsNullOrEmpty(line))
            {
                string details = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_InvalidSequenceLine, id);
                string message = string.Format(
                    CultureInfo.CurrentCulture,
                    Resource.IOFormatErrorMessage,
                    this.Name,
                    details);
                throw new Exception(message);
            }

            // Get sequence from second line.
            byte[] sequenceData = Encoding.ASCII.GetBytes(line);

            // Goto third line.
            line = ReadNextLine(reader, true);

            // Check for '+' symbol in the third line.
            if (line == null || !line.StartsWith("+", StringComparison.Ordinal))
            {
                string details = string.Format(
                    CultureInfo.CurrentCulture,
                    Resource.FastQ_InvalidQualityScoreHeaderLine,
                    id);
                string message = string.Format(
                    CultureInfo.CurrentCulture,
                    Resource.IOFormatErrorMessage,
                    this.Name,
                    details);
                throw new Exception(message);
            }

            string qualScoreId = line.Substring(1).Trim();

            if (!string.IsNullOrEmpty(qualScoreId) && !id.Equals(qualScoreId))
            {
                string details = string.Format(
                    CultureInfo.CurrentCulture,
                    Resource.FastQ_InvalidQualityScoreHeaderData,
                    id);
                string message = string.Format(
                    CultureInfo.CurrentCulture,
                    Resource.IOFormatErrorMessage,
                    this.Name,
                    details);
                throw new Exception(message);
            }

            // Goto fourth line.
            line = ReadNextLine(reader, true);

            if (string.IsNullOrEmpty(line))
            {
                string details = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_EmptyQualityScoreLine, id);
                string message = string.Format(
                    CultureInfo.CurrentCulture,
                    Resource.IOFormatErrorMessage,
                    this.Name,
                    details);
                throw new Exception(message);
            }

            // Get the quality scores from the fourth line.
            byte[] qualScores = Encoding.ASCII.GetBytes(line);

            // Check for sequence length and quality score length.
            if (sequenceData.GetLongLength() != qualScores.GetLongLength())
            {
                string details = string.Format(
                    CultureInfo.CurrentCulture,
                    Resource.FastQ_InvalidQualityScoresLength,
                    id);
                string message = string.Format(
                    CultureInfo.CurrentCulture,
                    Resource.IOFormatErrorMessage,
                    this.Name,
                    details);
                throw new Exception(message);
            }

            // Auto detect alphabet if alphabet is set to null, else validate with already set alphabet
            IAlphabet alphabet = this.Alphabet;

            if (alphabet == null)
            {
                alphabet = Alphabets.AutoDetectAlphabet(sequenceData, 0, sequenceData.GetLongLength(), alphabet);
                if (alphabet == null)
                {
                    throw new Exception(Resource.CouldNotIdentifyAlphabetType);
                }
            }
            else
            {
                if (!alphabet.ValidateSequence(sequenceData, 0, sequenceData.GetLongLength()))
                {
                    throw new Exception(Resource.InvalidAlphabetType);
                }
            }

            return(new QualitativeSequence(alphabet, formatType, sequenceData, qualScores, false)
            {
                ID = id
            });
        }
Пример #46
0
        /// <summary>
        /// Validates whether the specified quality scores are within the FastQFormatType limit or not.
        /// </summary>
        /// <param name="qualScores">Quality scores in base type.</param>
        /// <param name="formatType">Fastq format type.</param>
        /// <param name="invalidQualScore">returns invalid quality score if found.</param>
        /// <returns>Returns true if the specified quality scores are with in the limit, otherwise false.</returns>
        private static bool ValidateQualScores(int[] qualScores, FastQFormatType formatType, out int invalidQualScore)
        {
            bool result = true;
            invalidQualScore = 0;
            int minScore = GetDecodedQualScore(GetMinEncodedQualScore(formatType), formatType);
            int maxScore = GetDecodedQualScore(GetMaxEncodedQualScore(formatType), formatType);
            long count = qualScores.GetLongLength();
            for (long index = 0; index < count; index++)
            {
                int qualScore = qualScores[index];
                if (qualScore < minScore || qualScore > maxScore)
                {
                    result = false;
                    invalidQualScore = qualScore;
                    break;
                }
            }

            return result;
        }
Пример #47
0
 /// <summary>
 /// Initializes a new instance of the QualitativeSequence class with specified alphabet, quality score type,
 /// string representing symbols and encoded quality scores.
 /// Sequence and quality scores are validated with the specified alphabet and specified fastq format respectively.
 /// </summary>
 /// <param name="alphabet">Alphabet to which this instance should conform.</param>
 /// <param name="fastQFormatType">FastQ format type.</param>
 /// <param name="sequence">A string representing the symbols.</param>
 /// <param name="encodedQualityScores">A string representing the encoded quality scores.</param>
 public QualitativeSequence(IAlphabet alphabet, FastQFormatType fastQFormatType, string sequence, string encodedQualityScores)
     : this(alphabet, fastQFormatType, sequence, encodedQualityScores, true)
 {
 }
Пример #48
0
        /// <summary>
        /// Initializes a new instance of the QualitativeSequence class with specified alphabet, quality score type,
        /// string representing symbols and encoded quality scores.
        /// </summary>
        /// <param name="alphabet">Alphabet to which this instance should conform.</param>
        /// <param name="fastQFormatType">FastQ format type.</param>
        /// <param name="sequence">A string representing the symbols.</param>
        /// <param name="encodedQualityScores">A string representing the encoded quality scores.</param>
        /// <param name="validate">If this flag is true then validation will be done to see whether the data is valid or not,
        /// else validation will be skipped.</param>
        public QualitativeSequence(IAlphabet alphabet, FastQFormatType fastQFormatType, string sequence, string encodedQualityScores, bool validate)
        {
            if (alphabet == null)
            {
                throw new ArgumentNullException("alphabet");
            }

            this.Alphabet = alphabet;
            this.ID = string.Empty;

            if (sequence == null)
            {
                throw new ArgumentNullException("sequence");
            }

            if (encodedQualityScores == null)
            {
                throw new ArgumentNullException("encodedQualityScores");
            }

            this.FormatType = fastQFormatType;
            this.sequenceData = UTF8Encoding.UTF8.GetBytes(sequence);
            byte[] encodedQualityScoresarray = UTF8Encoding.UTF8.GetBytes(encodedQualityScores);

            if (validate)
            {
                if (this.sequenceData.GetLongLength() != encodedQualityScoresarray.GetLongLength())
                {
                    string message = string.Format(CultureInfo.CurrentUICulture,
                                                Properties.Resource.DifferenceInSequenceAndQualityScoresLengthMessage,
                                                 this.sequenceData.GetLongLength(),
                                                encodedQualityScoresarray.GetLongLength());
                    throw new ArgumentException(message);
                }

                // Validate sequence data
                if (!this.Alphabet.ValidateSequence(this.sequenceData, 0, this.sequenceData.GetLongLength()))
                {
                    throw Helper.GenerateAlphabetCheckFailureException(this.Alphabet, sequenceData);
                }

                byte invalidEncodedQualityScore;
                // Validate quality scores
                if (!ValidateQualScores(encodedQualityScoresarray, this.FormatType, out invalidEncodedQualityScore))
                {
                    string message = string.Format(CultureInfo.CurrentUICulture,
                                            Properties.Resource.InvalidEncodedQualityScoreFound,
                                            (char)invalidEncodedQualityScore,
                                            this.FormatType);
                    throw new ArgumentOutOfRangeException("encodedQualityScores", message);
                }
            }

            this.qualityScores = GetDecodedQualScoresInSignedBytes(encodedQualityScoresarray, this.FormatType);
            this.Count = this.sequenceData.GetLongLength();
        }
Пример #49
0
        /// <summary>
        /// Parses a single FASTQ text from a reader into a QualitativeSequence.
        /// </summary>
        /// <param name="mbfReader">A reader for a biological sequence text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting QualitativeSequence should be in readonly mode or not.
        /// If this flag is set to true then the resulting QualitativeSequence's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>A new QualitativeSequence instance containing parsed data.</returns>
        private IQualitativeSequence ParseOneWithFastQFormat(MBFStreamReader mbfReader, bool isReadOnly)
        {
            SequencePointer sequencePointer = new SequencePointer();

            string message;

            // Check for '@' symbol at the first line.
            if (!mbfReader.HasLines || !mbfReader.Line.StartsWith("@", StringComparison.Ordinal))
            {
                message = string.Format(CultureInfo.CurrentCulture, Resource.INVALID_INPUT_FILE, Name);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            // Process header line.
            string id = mbfReader.GetLineField(2).Trim();

            // save sequence starting index
            sequencePointer.IndexOffsets[0] = mbfReader.Position;

            // Go to second line.
            mbfReader.GoToNextLine();
            if (!mbfReader.HasLines || string.IsNullOrEmpty(mbfReader.Line))
            {
                string message1 = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_InvalidSequenceLine, id);
                message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, message1);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            // Get sequence from second line.
            string sequenceLine = mbfReader.Line;

            //save sequence ending index
            sequencePointer.IndexOffsets[1] = sequencePointer.IndexOffsets[0] + mbfReader.Line.Length;

            // Goto third line.
            mbfReader.GoToNextLine();

            // Check for '+' symbol in the third line.
            if (!mbfReader.HasLines || !mbfReader.Line.StartsWith("+", StringComparison.Ordinal))
            {
                string message1 = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_InvalidQualityScoreHeaderLine, id);
                message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, message1);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            string qualScoreId = mbfReader.GetLineField(2).Trim();

            if (!string.IsNullOrEmpty(qualScoreId) && !id.Equals(qualScoreId))
            {
                string message1 = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_InvalidQualityScoreHeaderData, id);
                message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, message1);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            // Goto fourth line.
            mbfReader.GoToNextLine();
            if (!mbfReader.HasLines || string.IsNullOrEmpty(mbfReader.Line))
            {
                string message1 = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_EmptyQualityScoreLine, id);
                message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, message1);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            // Get the quality scores from the fourth line.
            byte[] qualScores = ASCIIEncoding.ASCII.GetBytes(mbfReader.Line);

            // Check for sequence length and quality score length.
            if (sequenceLine.Length != mbfReader.Line.Length)
            {
                string message1 = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_InvalidQualityScoresLength, id);
                message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, message1);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            mbfReader.GoToNextLine();

            IAlphabet alphabet = Alphabet;

            // Identify alphabet if it is not specified.
            if (alphabet == null)
            {
                alphabet = _commonSequenceParser.IdentifyAlphabet(alphabet, sequenceLine);

                if (alphabet == null)
                {
                    string message1 = string.Format(CultureInfo.CurrentCulture, Resource.InvalidSymbolInString, sequenceLine);
                    message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, message1);
                    Trace.Report(message);
                    throw new FileFormatException(message);
                }
            }

            FastQFormatType fastQType = FastqType;

            // Identify fastq format type if AutoDetectFastQFormat property is set to true.
            if (AutoDetectFastQFormat)
            {
                fastQType = IdentifyFastQFormatType(qualScores);
            }

            QualitativeSequence sequence = null;

            if (Encoding == null)
            {
                sequence = new QualitativeSequence(alphabet, fastQType, sequenceLine, qualScores);
            }
            else
            {
                sequence = new QualitativeSequence(alphabet, fastQType, Encoding, sequenceLine, qualScores);
            }

            sequence.ID         = id;
            sequence.IsReadOnly = isReadOnly;

            sequencePointer.AlphabetName = sequence.Alphabet.Name;
            sequencePointer.Id           = sequence.ID;
            _sequencePointers.Add(sequencePointer);

            FileVirtualQualitativeSequenceProvider dataProvider = new FileVirtualQualitativeSequenceProvider(this, sequencePointer)
            {
                BlockSize         = _blockSize,
                MaxNumberOfBlocks = _maxNumberOfBlocks
            };

            sequence.VirtualQualitativeSequenceProvider = dataProvider;
            return(sequence);
        }
        /// <summary>
        /// Invalidate convert from Illumina to Sanger format type.
        /// </summary>
        void ConvertTypeToType(FastQFormatType type1, FastQFormatType type2)
        {

            int[] scoreArray = { -12, 24 };
            int qualScore = -12;
            string actualError = null;
            Assert.Throws<ArgumentNullException> ( () =>
                QualitativeSequence.ConvertEncodedQualityScore(type1, type2, null));
         
            Assert.Throws<ArgumentOutOfRangeException> ( () =>
                QualitativeSequence.ConvertQualityScores(type1, type2, scoreArray)
            );

            // Validate an expected error message for invalid qual scores.
            Assert.Throws<ArgumentOutOfRangeException> ( () =>
                QualitativeSequence.ConvertQualityScore(type1, type2, qualScore)
            );
        }