/// <summary> /// Gets the default quality score for the specified FastQFormatType. /// </summary> /// /// <param name="type">FastQ format type.</param> /// <returns>Quality score.</returns> public static byte GetDefaultQualScore(FastQFormatType type) { if (type == FastQFormatType.Sanger) { return((byte)(SangerAsciiBaseValue + DefaultQualScore)); } else if (type == FastQFormatType.Solexa) { return((byte)(SolexaAsciiBaseValue + DefaultQualScore)); } else { return((byte)(IlluminaAsciiBaseValue + DefaultQualScore)); } }
/// <summary> /// Validates whether the specified quality score is within the FastQFormatType limit or not. /// </summary> /// <param name="qualScore">Quality score.</param> /// <param name="type">Fastq format type.</param> /// <returns>Returns true if the specified quality score is with in the limit, otherwise false.</returns> private static bool ValidateQualScore(byte qualScore, FastQFormatType type) { if (type == FastQFormatType.Sanger) { return(qualScore >= SangerMinQualScore && qualScore <= SangerMaxQualScore); } else if (type == FastQFormatType.Solexa) { return(qualScore >= SolexaMinQualScore && qualScore <= SolexaMaxQualScore); } else { return(qualScore >= IlluminaMinQualScore && qualScore <= IlluminaMaxQualScore); } }
/// <summary> /// Gets the quality score from the ASCII encoded quality score. /// </summary> /// <param name="qualScore">ASCII Encoded quality score.</param> /// <param name="type">FastQ format type.</param> /// <returns>Returns quality score.</returns> private static int GetQualScore(byte qualScore, FastQFormatType type) { if (type == FastQFormatType.Sanger) { return(qualScore - SangerAsciiBaseValue); } else if (type == FastQFormatType.Solexa) { return(qualScore - SolexaAsciiBaseValue); } else { return(qualScore - IlluminaAsciiBaseValue); } }
/// <summary> /// Gets the ASCII encoded quality score for the given quality score. /// </summary> /// <param name="qualScore">Quality Score.</param> /// <param name="type">FastQ format type.</param> /// <returns>ASCII encoded quality score.</returns> private static byte GetEncodedQualScore(int qualScore, FastQFormatType type) { if (type == FastQFormatType.Sanger) { return((byte)(qualScore + SangerAsciiBaseValue)); } else if (type == FastQFormatType.Solexa) { return((byte)(qualScore + SolexaAsciiBaseValue)); } else { return((byte)(qualScore + IlluminaAsciiBaseValue)); } }
/// <summary> /// Gets the maximum quality score for the specified FastQFormatType. /// </summary> /// /// <param name="type">FastQ format type.</param> /// <returns>Quality score.</returns> public static byte GetMaxQualScore(FastQFormatType type) { if (type == FastQFormatType.Solexa) { return(SolexaMaxQualScore); } else if (type == FastQFormatType.Sanger) { return(SangerMaxQualScore); } else { return(IlluminaMaxQualScore); } }
/// <summary> /// Validates whether the specified quality scores are within the FastQFormatType limit or not. /// </summary> /// <param name="qualScores">Quality scores.</param> /// <param name="type">Fastq format type.</param> /// <returns>Returns true if the specified quality scores are with in the limit, otherwise false.</returns> private static bool ValidateQualScore(byte[] qualScores, FastQFormatType type) { bool result = true; switch (type) { case FastQFormatType.Sanger: for (long index = 0; index < qualScores.LongLength; index++) { byte qualScore = qualScores[index]; if (qualScore < SangerMinQualScore || qualScore > SangerMaxQualScore) { result = false; break; } } break; case FastQFormatType.Solexa: for (long index = 0; index < qualScores.LongLength; index++) { byte qualScore = qualScores[index]; if (qualScore < SolexaMinQualScore || qualScore > SolexaMaxQualScore) { result = false; break; } } break; case FastQFormatType.Illumina: for (long index = 0; index < qualScores.LongLength; index++) { byte qualScore = qualScores[index]; if (qualScore < IlluminaMinQualScore || qualScore > IlluminaMaxQualScore) { result = false; break; } } break; } return(result); }
/// <summary> /// Get the FastQFormatType enum value corresponding to a given string value /// </summary> /// <param name="formatAsString">Fastq format as a string</param> /// <returns>FastQ format type enum</returns> public static FastQFormatType GetQualityFormatType(string formatAsString) { try { FastQFormatType format = (FastQFormatType)Enum.Parse(typeof(FastQFormatType), formatAsString, true); return(format); } catch (ArgumentNullException ex) { throw new ArgumentNullException(ex.Message); } catch (ArgumentException ex) { throw new ArgumentException(ex.Message); } }
/// <summary> /// Returns an IEnumerable of sequences in the file being parsed. /// </summary> /// <returns>Returns ISequence arrays.</returns> public override System.Collections.Generic.IEnumerable <Bio.QualitativeSequence> Parse() { using (GZipStream gz = new GZipStream((new FileInfo(Filename)).OpenRead(), CompressionMode.Decompress)) { using (StreamReader streamReader = new StreamReader(gz)) { FastQFormatType formatType = this.FormatType; do { var seq = ParseOne(streamReader, formatType); if (seq != null) { yield return(seq); } }while (!streamReader.EndOfStream); } } }
/// <summary> /// General method to Invalidate FastQ Parser. /// <param name="nodeName">xml node name.</param> /// <param name="IsParseOne">True for FastQParseOne validations, else false</param> /// </summary> void InValidateFastQParser(string nodeName, bool IsParseOne) { // Gets the expected sequence from the Xml string filePath = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); FastQFormatType expectedFormatType = Utility.GetFastQFormatType( _utilityObj._xmlUtil.GetTextValue(nodeName, Constants.FastQFormatType)); // Create a FastQ Parser object. using (FastQParser fastQParserObj = new FastQParser()) { fastQParserObj.AutoDetectFastQFormat = true; fastQParserObj.FastqType = expectedFormatType; if (IsParseOne) { try { fastQParserObj.ParseOne(filePath); Assert.Fail(); } catch (Exception) { ApplicationLog.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); Console.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); } } else { try { fastQParserObj.Parse(filePath); Assert.Fail(); } catch (Exception) { ApplicationLog.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); Console.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); } } } }
/// <summary> /// Initializes a new instance of the QualitativeSequence class with specified alphabet, quality score type, /// byte array representing symbols and quality scores. /// </summary> /// <param name="alphabet">Alphabet to which this instance should conform.</param> /// <param name="fastQFormatType">FastQ format type.</param> /// <param name="sequence">An array of bytes representing the symbols.</param> /// <param name="qualityScores">An array of bytes representing the quality scores.</param> /// <param name="validate">If this flag is true then validation will be done to see whether the data is valid or not, /// else validation will be skipped.</param> public QualitativeSequence(IAlphabet alphabet, FastQFormatType fastQFormatType, byte[] sequence, byte[] qualityScores, bool validate) { if (alphabet == null) { throw new ArgumentNullException("alphabet"); } if (sequence == null) { throw new ArgumentNullException("sequence"); } if (qualityScores == null) { throw new ArgumentNullException("qualityScores"); } this.Alphabet = alphabet; this.ID = string.Empty; this.FormatType = fastQFormatType; if (validate) { // Validate sequence data if (!this.Alphabet.ValidateSequence(sequence, 0, sequence.LongLength)) { throw new ArgumentOutOfRangeException("sequence"); } // Validate quality scores if (!ValidateQualScore(qualityScores, this.FormatType)) { throw new ArgumentOutOfRangeException("qualityScores"); } } this.sequenceData = new byte[sequence.LongLength]; this.qualityScores = new byte[qualityScores.LongLength]; Array.Copy(sequence, this.sequenceData, sequence.LongLength); Array.Copy(qualityScores, this.qualityScores, qualityScores.LongLength); this.Count = this.sequenceData.LongLength; }
/// <summary> /// Initializes a new instance of the QualitativeSequence class with specified alphabet, quality score type, /// string representing symbols and quality scores. /// </summary> /// <param name="alphabet">Alphabet to which this instance should conform.</param> /// <param name="fastQFormatType">FastQ format type.</param> /// <param name="sequence">A string representing the symbols.</param> /// <param name="qualityScores">A string representing the quality scores.</param> /// <param name="validate">If this flag is true then validation will be done to see whether the data is valid or not, /// else validation will be skipped.</param> public QualitativeSequence(IAlphabet alphabet, FastQFormatType fastQFormatType, string sequence, string qualityScores, bool validate) { if (alphabet == null) { throw new ArgumentNullException("alphabet"); } this.Alphabet = alphabet; this.ID = string.Empty; if (sequence == null) { throw new ArgumentNullException("sequence"); } if (qualityScores == null) { throw new ArgumentNullException("qualityScores"); } this.FormatType = fastQFormatType; this.sequenceData = ASCIIEncoding.ASCII.GetBytes(sequence); this.qualityScores = ASCIIEncoding.ASCII.GetBytes(qualityScores); if (validate) { // Validate sequence data if (!this.Alphabet.ValidateSequence(this.sequenceData, 0, this.sequenceData.LongLength)) { throw new ArgumentOutOfRangeException("sequence"); } // Validate quality scores if (!ValidateQualScore(this.qualityScores, this.FormatType)) { throw new ArgumentOutOfRangeException("qualityScores"); } } this.Count = this.sequenceData.LongLength; }
/// <summary> /// Gets the FastQFormatType for the format passed. /// </summary> /// <param name="formatType">Illumina/Sanger/Solexa</param> /// <returns>FastQFormat</returns> public static FastQFormatType GetFastQFormatType(string formatType) { FastQFormatType format = FastQFormatType.Illumina_v1_3; switch (formatType) { case "Illumina": format = FastQFormatType.Illumina_v1_3; break; case "Sanger": format = FastQFormatType.Sanger; break; case "Solexa": format = FastQFormatType.Solexa_Illumina_v1_0; break; default: break; } return format; }
/// <summary> /// Gets the IEnumerable of QualitativeSequences from the steam being parsed. /// </summary> /// <param name="stream">Stream to be parsed.</param> /// <returns>Returns the QualitativeSequences.</returns> public IEnumerable <IQualitativeSequence> Parse(Stream stream) { if (stream == null) { throw new ArgumentNullException("stream"); } FastQFormatType formatType = this.FormatType; using (StreamReader reader = stream.OpenRead()) { while (reader.Peek() != -1) { IQualitativeSequence seq = ParseOne(reader, formatType); if (seq != null) { yield return(seq); } } } }
public void ValidateSangerFormatTypeDnaReverseComplement() { // Gets the actual sequence and the alphabet from the Xml IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue( Constants.SimpleDnaSangerNode, Constants.AlphabetNameNode)); FastQFormatType expectedFormatType = Utility.GetFastQFormatType( utilityObj.xmlUtil.GetTextValue(Constants.SimpleDnaSangerNode, Constants.FastQFormatType)); string inputSequence = utilityObj.xmlUtil.GetTextValue( Constants.SimpleDnaSangerNode, Constants.inputSequenceNode); string compSequence = utilityObj.xmlUtil.GetTextValue( Constants.SimpleDnaSangerNode, Constants.ComplementQualSeqNode); string expectedRevCompSeq = utilityObj.xmlUtil.GetTextValue( Constants.SimpleDnaSangerNode, Constants.RevComplement); string expectedRevSeq = utilityObj.xmlUtil.GetTextValue( Constants.SimpleDnaSangerNode, Constants.ReverseQualSeq); //IEncoding encoding = Encodings.IupacNA; string inputQuality = utilityObj.xmlUtil.GetTextValue( Constants.SimpleDnaSangerNode, Constants.InputByteArrayNode); byte[] byteArray = ASCIIEncoding.ASCII.GetBytes(inputQuality); Byte[] inputScoreArray = encodingObj.GetBytes(inputQuality); QualitativeSequence createdQualitativeSequence = new QualitativeSequence(alphabet, expectedFormatType, inputSequence, inputQuality); ISequence revSeq = createdQualitativeSequence.GetReversedSequence(); ISequence revCompSeq = createdQualitativeSequence.GetReverseComplementedSequence(); ISequence compSeq = createdQualitativeSequence.GetComplementedSequence(); Assert.AreEqual(expectedRevSeq, new string(revSeq.Select(a => (char)a).ToArray())); Assert.AreEqual(expectedRevCompSeq, new string(revCompSeq.Select(a => (char)a).ToArray())); Assert.AreEqual(compSequence, new string(compSeq.Select(a => (char)a).ToArray())); ApplicationLog.WriteLine("Qualitative BVT: Successfully validated Reverse, Complement and ReverseComplement sequence"); Console.WriteLine("Qualitative BVT: Successfully validated Reverse, Complement and ReverseComplement sequence"); }
/// <summary> /// Gets the FastQFormatType for the format passed. /// </summary> /// <param name="formatType">Illumina/Sanger/Solexa</param> /// <returns>FastQFormat</returns> internal static FastQFormatType GetFastQFormatType(string formatType) { FastQFormatType format = FastQFormatType.Illumina; switch (formatType) { case "Illumina": format = FastQFormatType.Illumina; break; case "Sanger": format = FastQFormatType.Sanger; break; case "Solexa": format = FastQFormatType.Solexa; break; default: break; } return(format); }
void GeneralQualitativeSequence( string nodeName, QualitativeSequenceParameters parameters) { // Gets the actual sequence and the alphabet from the Xml IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue( nodeName, Constants.AlphabetNameNode)); FastQFormatType expectedFormatType = Utility.GetFastQFormatType( utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FastQFormatType)); string expectedScore = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedScore); QualitativeSequence createdQualitativeSequence = null; string inputSequence = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.inputSequenceNode); string expectedSequence = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequenceNode); string expectedSequenceCount = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.QSequenceCount); string expectedMaxScore = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.MaxScoreNode); string inputScore = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.InputScoreNode); string expectedOuptutScore = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.InputScoreNode); string inputQuality = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.InputByteArrayNode); byte[] byteArray = ASCIIEncoding.ASCII.GetBytes(inputQuality); Byte[] inputScoreArray = encodingObj.GetBytes(inputQuality); // Create and validate Qualitative Sequence. switch (parameters) { case QualitativeSequenceParameters.Score: createdQualitativeSequence = new QualitativeSequence(alphabet, expectedFormatType, inputSequence, inputQuality); int count = 0; // Validate score foreach (byte qualScore in createdQualitativeSequence.QualityScores) { Assert.AreEqual(qualScore, inputScoreArray[count]); count++; } break; case QualitativeSequenceParameters.ByteArray: byte[] scoreValue = ASCIIEncoding.ASCII.GetBytes(inputSequence); int index = 0; createdQualitativeSequence = new QualitativeSequence(alphabet, expectedFormatType, scoreValue, inputScoreArray); // Validate score foreach (byte qualScore in createdQualitativeSequence.QualityScores) { Assert.AreEqual(qualScore, inputScoreArray[index]); index++; } break; default: break; } string qualitativeSequence = new string(createdQualitativeSequence.Select(a => (char)a).ToArray()); // Validate createdSequence qualitative sequence. Assert.IsNotNull(createdQualitativeSequence); Assert.AreEqual(alphabet, createdQualitativeSequence.Alphabet); Assert.AreEqual(expectedSequence, qualitativeSequence); Assert.AreEqual(expectedSequenceCount, createdQualitativeSequence.Count.ToString((IFormatProvider)null)); Assert.AreEqual(expectedScore, createdQualitativeSequence.QualityScores.Count().ToString((IFormatProvider)null)); Assert.AreEqual(expectedFormatType, createdQualitativeSequence.FormatType); // Logs to the NUnit GUI (Console.Out) window Console.WriteLine(string.Format((IFormatProvider)null, "Qualitative Sequence BVT:Qualitative Sequence {0} is as expected.", qualitativeSequence)); Console.WriteLine(string.Format((IFormatProvider)null, "Qualitative Sequence BVT:Qualitative Sequence Score {0} is as expected.", createdQualitativeSequence.Count().ToString((IFormatProvider)null))); Console.WriteLine(string.Format((IFormatProvider)null, "Qualitative Sequence BVT:Qualitative format type {0} is as expected.", createdQualitativeSequence.FormatType)); }
/// <summary> /// Initializes a new instance of the QualitativeSequence class with specified alphabet, quality score type, /// byte array representing symbols and integer array representing base quality scores /// (Phred or Solexa base according to the FastQ format type). /// </summary> /// <param name="alphabet">Alphabet to which this instance should conform.</param> /// <param name="fastQFormatType">FastQ format type.</param> /// <param name="sequence">An array of bytes representing the symbols.</param> /// <param name="qualityScores">An array of integers representing the base quality scores /// (Phred or Solexa base according to the FastQ format type).</param> /// <param name="validate">If this flag is true then validation will be done to see whether the data is valid or not, /// else validation will be skipped.</param> public CompactSAMSequence(IAlphabet alphabet, FastQFormatType fastQFormatType, byte[] sequence, int[] qualityScores, bool validate) : base(alphabet, fastQFormatType, sequence, qualityScores, validate) { }
/// <summary> /// Parases sequence data and quality values and updates SAMAlignedSequence instance. /// </summary> /// <param name="alignedSeq">SAM aligned Sequence.</param> /// <param name="alphabet">Alphabet of the sequence to be created.</param> /// <param name="Encoding">Encoding to use while creating sequence.</param> /// <param name="sequencedata">Sequence data.</param> /// <param name="qualitydata">Quality values.</param> /// <param name="refSeq">Reference sequence if known.</param> /// <param name="isReadOnly">Flag to indicate whether the new sequence is required to in readonly or not.</param> public static void ParseQualityNSequence(SAMAlignedSequence alignedSeq, IAlphabet alphabet, IEncoding Encoding, string sequencedata, string qualitydata, ISequence refSeq, bool isReadOnly) { if (alignedSeq == null) { throw new ArgumentNullException("alignedSeq"); } if (string.IsNullOrWhiteSpace(sequencedata)) { throw new ArgumentNullException("sequencedata"); } if (string.IsNullOrWhiteSpace(qualitydata)) { throw new ArgumentNullException("qualitydata"); } bool isQualitativeSequence = true; string message = string.Empty; byte[] qualScores = null; FastQFormatType fastQType = QualityFormatType; if (sequencedata.Equals("*")) { return; } if (qualitydata.Equals("*")) { isQualitativeSequence = false; } if (isQualitativeSequence) { // Get the quality scores from the fourth line. qualScores = ASCIIEncoding.ASCII.GetBytes(qualitydata); // Check for sequence length and quality score length. if (sequencedata.Length != qualitydata.Length) { string message1 = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_InvalidQualityScoresLength, alignedSeq.QName); message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Resource.SAM_NAME, message1); Trace.Report(message); throw new FileFormatException(message); } } // get "." symbol indexes. int index = sequencedata.IndexOf('.', 0); while (index > -1) { alignedSeq.DotSymbolIndexes.Add(index++); index = sequencedata.IndexOf('.', index); } // replace "." with N if (alignedSeq.DotSymbolIndexes.Count > 0) { sequencedata = sequencedata.Replace('.', 'N'); } // get "=" symbol indexes. index = sequencedata.IndexOf('=', 0); while (index > -1) { alignedSeq.EqualSymbolIndexes.Add(index++); index = sequencedata.IndexOf('=', index); } // replace "=" with corresponding symbol from refSeq. if (alignedSeq.EqualSymbolIndexes.Count > 0) { if (refSeq == null) { throw new ArgumentException(Resource.RefSequenceNofFound); } for (int i = 0; i < alignedSeq.EqualSymbolIndexes.Count; i++) { index = alignedSeq.EqualSymbolIndexes[i]; sequencedata = sequencedata.Remove(index, 1); sequencedata = sequencedata.Insert(index, refSeq[index].Symbol.ToString()); } } ISequence sequence = null; if (isQualitativeSequence) { QualitativeSequence qualSeq = null; if (Encoding == null) { qualSeq = new QualitativeSequence(alphabet, fastQType, sequencedata, qualScores); } else { qualSeq = new QualitativeSequence(alphabet, fastQType, Encoding, sequencedata, qualScores); } qualSeq.ID = alignedSeq.QName; qualSeq.IsReadOnly = isReadOnly; sequence = qualSeq; } else { Sequence seq = null; if (Encoding == null) { seq = new Sequence(alphabet, sequencedata); } else { seq = new Sequence(alphabet, Encoding, sequencedata); } seq.ID = alignedSeq.QName; seq.IsReadOnly = isReadOnly; sequence = seq; } alignedSeq.QuerySequence = sequence; }
/// <summary> /// General method to validate Index of Qualitative Sequence Items. /// <param name="nodeName">xml node name.</param> /// <param name="indexParam">Different Qualitative Sequence parameters.</param> /// </summary> void ValidateGeneralQualitativeSeqItemIndices( string nodeName, QualitativeSequenceParameters indexParam) { // Gets the actual sequence and the alphabet from the Xml IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue( nodeName, Constants.AlphabetNameNode)); FastQFormatType expectedFormatType = Utility.GetFastQFormatType( utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FastQFormatType)); QualitativeSequence createdQualitativeSequence = null; string inputSequence = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.inputSequenceNode); string expectedFirstItemIdex = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FirstItemIndex); string expectedLastItemIdex = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.LastItemIndex); string expectedGapIndex = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.IndexOfGap); long lastItemIndex; long index; // Create a qualitative Sequence. createdQualitativeSequence = new QualitativeSequence( alphabet, expectedFormatType, inputSequence, ((char)QualitativeSequence.GetDefaultQualScore(expectedFormatType)).ToString()); // Get a Index of qualitative sequence items switch (indexParam) { case QualitativeSequenceParameters.IndexOfNonGap: index = createdQualitativeSequence.IndexOfNonGap(); // Validate Qualitative sequence item indices. Assert.AreEqual(index, Convert.ToInt32(expectedFirstItemIdex, (IFormatProvider)null)); break; case QualitativeSequenceParameters.IndexOfNonGapWithParam: index = createdQualitativeSequence.IndexOfNonGap(5); // Validate Qualitative sequence item indices. Assert.AreEqual(index, Convert.ToInt32(expectedGapIndex, (IFormatProvider)null)); break; case QualitativeSequenceParameters.LastIndexOf: lastItemIndex = createdQualitativeSequence.LastIndexOfNonGap(); // Validate Qualitative sequence item indices. Assert.AreEqual(lastItemIndex, Convert.ToInt32(expectedLastItemIdex, (IFormatProvider)null)); break; case QualitativeSequenceParameters.LastIndexOfWithPam: lastItemIndex = createdQualitativeSequence.LastIndexOfNonGap(5); // Validate Qualitative sequence item indices. Assert.AreEqual(lastItemIndex, Convert.ToInt32(expectedGapIndex, (IFormatProvider)null)); break; default: break; } // Logs to the NUnit GUI (Console.Out) window Console.WriteLine("Qualitative Sequence P1 : Qualitative SequenceItems indices validation completed successfully."); }
/// <summary> /// Gets default encoded quality scores. /// </summary> /// <param name="formatType">Fastq format type.</param> /// <param name="length">No of quality scores required.</param> public static string GetDefaultEncodedQualityScores(FastQFormatType formatType, int length) { char[] encodedQualityScores = new char[length]; for (int i = 0; i < length; i++) { encodedQualityScores[i] = (char)QualitativeSequence.GetDefaultQualScore(formatType); } return new string(encodedQualityScores); }
/// <summary> /// Constructor for performing quality score-level QC /// </summary> /// <param name="sequences">Sequence parser</param> /// <param name="readLengthMax">Maximum read length</param> /// <param name="count">Total number of reads</param> /// <param name="format">FastQ Format Type.</param> /// <param name="filename">input filename</param> public QualityScoreAnalyzer(ISequenceParser sequences, long readLengthMax, long count, FastQFormatType format, string filename) : base(sequences, filename) { Initialize(format, readLengthMax, count); }
/// <summary> /// General method to validate creation of Qualitative sequence. /// <param name="nodeName">xml node name.</param> /// <param name="parameters">Different Qualitative Sequence parameters.</param> /// </summary> void GeneralQualitativeSequence( string nodeName, QualitativeSequenceParameters parameters) { // Gets the actual sequence and the alphabet from the Xml IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue( nodeName, Constants.AlphabetNameNode)); FastQFormatType expectedFormatType = Utility.GetFastQFormatType( utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FastQFormatType)); QualitativeSequence createdQualitativeSequence = null; string inputSequence = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.inputSequenceNode); string expectedSequence = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequenceNode); string expectedSequenceCount = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.QSequenceCount); string inputScoreforIUPAC = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.MaxScoreNode); string inputQuality = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.InputByteArrayNode); byte[] byteArray = Encoding.UTF8.GetBytes(inputQuality); int index = 0; // Create and validate Qualitative Sequence. switch (parameters) { case QualitativeSequenceParameters.Score: createdQualitativeSequence = new QualitativeSequence(alphabet, expectedFormatType, inputSequence, Utility.GetDefaultEncodedQualityScores(expectedFormatType, inputSequence.Length)); // Validate score foreach (byte qualScore in createdQualitativeSequence.GetEncodedQualityScores()) { Assert.AreEqual(qualScore, Convert.ToInt32(inputScoreforIUPAC, (IFormatProvider)null)); } break; case QualitativeSequenceParameters.ByteArray: createdQualitativeSequence = new QualitativeSequence(alphabet, expectedFormatType, Encoding.UTF8.GetBytes(inputSequence), byteArray); // Validate score foreach (byte qualScore in createdQualitativeSequence.GetEncodedQualityScores()) { Assert.AreEqual(qualScore, Convert.ToInt32(byteArray[index], (IFormatProvider)null)); index++; } break; default: break; } // Validate createdSequence qualitative sequence. Assert.IsNotNull(createdQualitativeSequence); Assert.AreEqual(createdQualitativeSequence.Alphabet, alphabet); Assert.AreEqual(createdQualitativeSequence.ConvertToString(), expectedSequence); Assert.AreEqual(createdQualitativeSequence.Count.ToString((IFormatProvider)null), expectedSequenceCount); ApplicationLog.WriteLine(string.Format(null, "Qualitative Sequence P1:Qualitative Sequence {0} is as expected.", createdQualitativeSequence)); ApplicationLog.WriteLine(string.Format(null, "Qualitative Sequence P1:Qualitative Sequence Score {0} is as expected.", createdQualitativeSequence.GetEncodedQualityScores())); Assert.AreEqual(createdQualitativeSequence.FormatType, expectedFormatType); ApplicationLog.WriteLine(string.Format(null, "Qualitative Sequence P1:Qualitative format type {0} is as expected.", createdQualitativeSequence.FormatType)); }
/// <summary> /// Gets the minimum encoded quality score for the specified FastQFormatType. /// </summary> /// <param name="formatType">FastQ format type.</param> /// <returns>Quality score.</returns> public static byte GetMinEncodedQualScore(FastQFormatType formatType) { byte result; switch (formatType) { case FastQFormatType.Sanger: result = Sanger_MinEncodedQualScore; break; case FastQFormatType.Solexa_Illumina_v1_0: result = Solexa_Illumina_v1_0_MinEncodedQualScore; break; case FastQFormatType.Illumina_v1_3: result = Illumina_v1_3_MinEncodedQualScore; break; case FastQFormatType.Illumina_v1_5: result = Illumina_v1_5_MinEncodedQualScore; break; default: result = Illumina_v1_8_MinEncodedQualScore; break; } return result; }
/// <summary> /// Converts Encoded quality scores from to specified format. /// </summary> /// <param name="fromFormatType">from fastq format.</param> /// <param name="toFormatType">to fastq format.</param> /// <param name="encodedqualScores">Encoded quality scores.</param> public static byte[] ConvertEncodedQualityScore(FastQFormatType fromFormatType, FastQFormatType toFormatType, byte[] encodedqualScores) { if (encodedqualScores == null) { throw new ArgumentNullException("encodedqualScores"); } byte invalidQualScore; if (!ValidateQualScores(encodedqualScores, fromFormatType, out invalidQualScore)) { string message = string.Format(CultureInfo.CurrentUICulture, Properties.Resource.InvalidQualityScore,(char) invalidQualScore); throw new ArgumentOutOfRangeException("encodedqualScores", message); } byte[] result; if (fromFormatType == toFormatType) { result = new byte[encodedqualScores.GetLongLength()]; Helper.Copy(encodedqualScores, result, encodedqualScores.GetLongLength()); } else { int[] fromQualScore = GetDecodedQualScores(encodedqualScores, fromFormatType); int[] toQualScore = ConvertQualityScores(fromFormatType, toFormatType, fromQualScore); result = GetEncodedQualScores(toQualScore, toFormatType); } return result; }
/// <summary> /// Converts Quality scores from to specified format. /// </summary> /// <param name="fromFormatType">from fastq format.</param> /// <param name="toFormatType">to fastq format.</param> /// <param name="qualScores">Quality scores.</param> public static sbyte[] ConvertQualityScores(FastQFormatType fromFormatType, FastQFormatType toFormatType, sbyte[] qualScores) { if (qualScores == null) { throw new ArgumentNullException("qualScores"); } sbyte invalidQualScore; if (!ValidateQualScores(qualScores, fromFormatType, out invalidQualScore)) { string message = string.Format(CultureInfo.CurrentUICulture, Properties.Resource.InvalidQualityScore, invalidQualScore); throw new ArgumentOutOfRangeException("qualScores", message); } sbyte[] result; if (fromFormatType == toFormatType) { result = new sbyte[qualScores.GetLongLength()]; Helper.Copy(qualScores, result, qualScores.GetLongLength()); } else { BaseQualityScoreType fromQualityType = GetQualityScoreType(fromFormatType); BaseQualityScoreType toQualityType = GetQualityScoreType(toFormatType); if (fromQualityType == toQualityType) { result = new sbyte[qualScores.GetLongLength()]; Helper.Copy(qualScores, result, qualScores.GetLongLength()); } else { result = Convert(fromQualityType, toQualityType, qualScores); } } return result; }
/// <summary> /// Initializes a new instance of the QualitativeSequence class with specified alphabet, quality score type, /// byte array representing symbols and integer array representing base quality scores /// (Phred or Solexa base according to the FastQ format type). /// </summary> /// <param name="alphabet">Alphabet to which this instance should conform.</param> /// <param name="fastQFormatType">FastQ format type.</param> /// <param name="sequence">An array of bytes representing the symbols.</param> /// <param name="qualityScores">An array of integers representing the base quality scores /// (Phred or Solexa base according to the FastQ format type).</param> /// <param name="validate">If this flag is true then validation will be done to see whether the data is valid or not, /// else validation will be skipped.</param> public QualitativeSequence(IAlphabet alphabet, FastQFormatType fastQFormatType, byte[] sequence, int[] qualityScores, bool validate) { if (alphabet == null) { throw new ArgumentNullException("alphabet"); } if (sequence == null) { throw new ArgumentNullException("sequence"); } if (qualityScores == null) { throw new ArgumentNullException("qualityScores"); } this.Alphabet = alphabet; this.ID = string.Empty; this.FormatType = fastQFormatType; if (validate) { if (sequence.GetLongLength() != qualityScores.GetLongLength()) { string message = string.Format(CultureInfo.CurrentUICulture, Properties.Resource.DifferenceInSequenceAndQualityScoresLengthMessage, sequence.GetLongLength(), qualityScores.GetLongLength()); throw new ArgumentException(message); } // Validate sequence data if (!this.Alphabet.ValidateSequence(sequence, 0, sequence.GetLongLength())) { throw Helper.GenerateAlphabetCheckFailureException(this.Alphabet, sequence); } int invalidQualityScore; // Validate quality scores if (!ValidateQualScores(qualityScores, this.FormatType, out invalidQualityScore)) { string message = string.Format(CultureInfo.CurrentUICulture, Properties.Resource.InvalidQualityScoreFound, invalidQualityScore, this.FormatType); throw new ArgumentOutOfRangeException("qualityScores", message); } } long len = qualityScores.GetLongLength(); this.sequenceData = new byte[sequence.GetLongLength()]; this.qualityScores = new sbyte[len]; Helper.Copy(sequence, this.sequenceData, sequence.GetLongLength()); for (long i = 0; i < len; i++) { this.qualityScores[i] = (sbyte)qualityScores[i]; } this.Count = this.sequenceData.GetLongLength(); }
/// <summary> /// Converts Quality score from to specified format. /// </summary> /// <param name="fromFormatType">from fastq format.</param> /// <param name="toFormatType">to fastq format.</param> /// <param name="qualScore">Quality score.</param> public static int ConvertQualityScore(FastQFormatType fromFormatType, FastQFormatType toFormatType, int qualScore) { int result; int invalidQualScore; if (!ValidateQualScores(new int[] { qualScore }, fromFormatType, out invalidQualScore)) { string message = string.Format(CultureInfo.CurrentUICulture, Properties.Resource.InvalidQualityScore, invalidQualScore); throw new ArgumentOutOfRangeException("qualScore", message); } if (fromFormatType == toFormatType) { result = qualScore; } else { BaseQualityScoreType fromQualityType = GetQualityScoreType(fromFormatType); BaseQualityScoreType toQualityType = GetQualityScoreType(toFormatType); if (fromQualityType == toQualityType) { result = qualScore; } else { result = Convert(fromQualityType, toQualityType, qualScore); } } return result; }
/// <summary> /// Controls execution of QC steps /// </summary> /// <param name="parser">ISequenceParser object holding the input sequence data</param> /// <param name="filename">Input filename</param> /// <param name="runSequenceQc">Indicates whether the sequence-level QC module should be initialized</param> /// <param name="runQualityScoreQc">Indicates whether the quality score-level QC module should be initialized</param> /// <param name="runBlast">Indicates whether the sequence contamination finder module should be initialized</param> /// <param name="format">FastQ Format Type, if applicable. Otherwise use 'null'.</param> /// <param name="dir">Output directory</param> public Seqcos(ISequenceParser parser, string filename, bool runSequenceQc, bool runQualityScoreQc, bool runBlast, string format, string dir = null) { if (parser == null) { throw new ArgumentNullException("parser"); } if (filename == null) { throw new ArgumentNullException("filename"); } // (deprecated) Register AssemblyResolve event handler - for dealing with Sho libaries that are located // externally from this application's install folder //AppDomain currentDomain = AppDomain.CurrentDomain; //currentDomain.AssemblyResolve += new ResolveEventHandler(OnAssemblyResolveEventHandler); this.myFilenames = new Filenames(filename, Resource.ChartFormat); this.SelectedParser = parser; this.OutputDirectory = (dir == null) ? Path.GetDirectoryName(filename) + @"\" + myFilenames.Prefix : dir; //string customOutputPath = Path.GetDirectoryName(this.OutputDirectory); this.InitialWorkingDirectory = Path.GetDirectoryName(filename); Directory.SetCurrentDirectory(this.InitialWorkingDirectory); if (!Directory.Exists(this.OutputDirectory)) { Directory.CreateDirectory(this.OutputDirectory); } // Initialize SequenceAnalyzer this.SequenceQc = runSequenceQc ? new SequenceAnalyzer(this.SelectedParser, myFilenames.FileName) : null; // Initialize QualityScoreAnalyzer if (runQualityScoreQc && !(parser is FastAParser)) { if (format == null) { throw new ArgumentNullException("format"); } FastQFormatType myFormat = BioHelper.GetQualityFormatType(format); if (runSequenceQc && this.SequenceQc != null) { this.QualityScoreQc = new QualityScoreAnalyzer(this.SelectedParser, this.SequenceQc.ReadLengthMax, this.SequenceQc.Count, myFormat, myFilenames.FileName); } else { this.QualityScoreQc = new QualityScoreAnalyzer(this.SelectedParser, myFormat, myFilenames.FileName); } } else { this.QualityScoreQc = null; } // Initialize ContaminationFinder this.ContaminationFinder = runBlast ? new SequenceContaminationFinder(this.SelectedParser) : null; this.HasPlottedSequenceStats = false; this.HasPlottedQualityScoreStats = false; }
/// <summary> /// Converts Encoded quality score from to specified format. /// </summary> /// <param name="fromFormatType">from fastq format.</param> /// <param name="toFormatType">to fastq format.</param> /// <param name="encodedqualScore">Encoded quality score.</param> public static byte ConvertEncodedQualityScore(FastQFormatType fromFormatType, FastQFormatType toFormatType, byte encodedqualScore) { byte result; byte invalidQualScore; if (!ValidateQualScores(new byte[] {encodedqualScore}, fromFormatType, out invalidQualScore)) { string message = string.Format(CultureInfo.CurrentUICulture, Properties.Resource.InvalidQualityScore, (char)invalidQualScore); throw new ArgumentOutOfRangeException("encodedqualScore", message); } if (fromFormatType == toFormatType) { result = encodedqualScore; } else { int fromQualScore = GetDecodedQualScore(encodedqualScore, fromFormatType); int toQualScore = ConvertQualityScore(fromFormatType, toFormatType, fromQualScore); result = GetEncodedQualScore(toQualScore, toFormatType); } return result; }
/// <summary> /// Parases sequence data and quality values and updates SAMAlignedSequence instance. /// </summary> /// <param name="alignedSeq">SAM aligned Sequence.</param> /// <param name="alphabet">Alphabet of the sequence to be created.</param> /// <param name="sequencedata">Sequence data.</param> /// <param name="qualitydata">Quality values.</param> public static void ParseQualityNSequence(SAMAlignedSequence alignedSeq, IAlphabet alphabet, string sequencedata, string qualitydata) { if (alignedSeq == null) { throw new ArgumentNullException("alignedSeq"); } if (string.IsNullOrWhiteSpace(sequencedata)) { throw new ArgumentNullException("sequencedata"); } if (string.IsNullOrWhiteSpace(qualitydata)) { throw new ArgumentNullException("qualitydata"); } bool isQualitativeSequence = true; string message = string.Empty; byte[] qualScores = null; FastQFormatType fastQType = QualityFormatType; if (sequencedata.Equals("*")) { return; } if (qualitydata.Equals("*")) { isQualitativeSequence = false; } if (isQualitativeSequence) { // Get the quality scores from the fourth line. qualScores = ASCIIEncoding.ASCII.GetBytes(qualitydata); // Check for sequence length and quality score length. if (sequencedata.Length != qualitydata.Length) { string message1 = string.Format(CultureInfo.CurrentCulture, Properties.Resource.FastQ_InvalidQualityScoresLength, alignedSeq.QName); message = string.Format(CultureInfo.CurrentCulture, Properties.Resource.IOFormatErrorMessage, Properties.Resource.SAM_NAME, message1); Trace.Report(message); throw new FileFormatException(message); } } ISequence sequence = null; if (isQualitativeSequence) { QualitativeSequence qualSeq = new QualitativeSequence(alphabet, fastQType, sequencedata, ASCIIEncoding.ASCII.GetString(qualScores)); qualSeq.ID = alignedSeq.QName; sequence = qualSeq; } else { sequence = new Sequence(alphabet, sequencedata); sequence.ID = alignedSeq.QName; } alignedSeq.QuerySequence = sequence; }
/// <summary> /// Gets the default quality score for the specified FastQFormatType. /// </summary> /// /// <param name="type">FastQ format type.</param> /// <returns>Quality score.</returns> public static byte GetDefaultQualScore(FastQFormatType type) { return (byte)(GetEncodedQualScore(DefaultQualScore, type)); }
/// <summary> /// Gets the Ascii base value for the specified format. /// </summary> /// <param name="formatType">FastQ format.</param> private static int GetAsciiBaseValue(FastQFormatType formatType) { int result; switch (formatType) { case FastQFormatType.Sanger: result = Sanger_AsciiBaseValue; break; case FastQFormatType.Solexa_Illumina_v1_0: result = Solexa_Illumina_v1_0_AsciiBaseValue; break; case FastQFormatType.Illumina_v1_3: result = Illumina_v1_3_AsciiBaseValue; break; case FastQFormatType.Illumina_v1_5: result = Illumina_v1_5_AsciiBaseValue; break; default: result = Illumina_v1_8_AsciiBaseValue; break; } return result; }
/// <summary> /// Converts the current instance to the specified FastQ format type /// and returns a new instance of QualitativeSequence. /// </summary> /// <param name="formatType">FastQ format type to convert.</param> public QualitativeSequence ConvertTo(FastQFormatType formatType) { sbyte[] convertedQualityScores = ConvertQualityScores(this.FormatType, formatType, this.qualityScores); QualitativeSequence seq = new QualitativeSequence(this.Alphabet, formatType, this.sequenceData, convertedQualityScores, false); seq.ID = this.ID; seq.metadata = this.metadata; return seq; }
/// <summary> /// Gets the quality score type for the specified format. /// </summary> /// <param name="formatType">FastQ format.</param> private static BaseQualityScoreType GetQualityScoreType(FastQFormatType formatType) { BaseQualityScoreType result; switch (formatType) { case FastQFormatType.Solexa_Illumina_v1_0: result = BaseQualityScoreType.SolexaBaseQualityScore; break; default: result = BaseQualityScoreType.PhredBaseQualityScore; break; } return result; }
/// <summary> /// General method to validate default score for different FastQ /// format with different sequence. /// <param name="nodeName">xml node name.</param> /// <param name="parameters">Different Qualitative Score method parameter.</param> /// </summary> void ValidateFastQDefaultScores(string nodeName, QualitativeSequenceParameters parameters) { // Gets the actual sequence and the alphabet from the Xml IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue( nodeName, Constants.AlphabetNameNode)); FastQFormatType expectedFormatType = Utility.GetFastQFormatType( utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FastQFormatType)); string inputSequence = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.inputSequenceNode); string expectedMaxScore = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.DefualtMaxScore); string expectedMinScore = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.DefaultMinScore); QualitativeSequence createdQualitativeSequence = null; string qualityScoresString = Utility.GetDefaultEncodedQualityScores(expectedFormatType, inputSequence.Length); byte[] expectedMaxScores = Utility.GetEncodedQualityScores((byte)int.Parse(expectedMaxScore, null as IFormatProvider), inputSequence.Length); byte[] expectedMinScores = Utility.GetEncodedQualityScores((byte)int.Parse(expectedMinScore, null as IFormatProvider), inputSequence.Length); int i = 0; switch (parameters) { case QualitativeSequenceParameters.DefaultScoreWithAlphabets: createdQualitativeSequence = new QualitativeSequence( alphabet, expectedFormatType, inputSequence, qualityScoresString); // Validate default score. i = 0; foreach (byte qualitativeScore in createdQualitativeSequence.GetEncodedQualityScores()) { Assert.AreEqual(qualitativeScore, (byte)(qualityScoresString[i])); i++; } // Log VSTest GUI. ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Qualitative Sequence P1:Qualitative Sequence Default score {0} is as expected.", qualityScoresString[0])); break; case QualitativeSequenceParameters.DefaultScoreWithSequence: createdQualitativeSequence = new QualitativeSequence(alphabet, expectedFormatType, inputSequence, qualityScoresString); i = 0; // Validate default score. foreach (byte qualitativeScore in createdQualitativeSequence.GetEncodedQualityScores()) { Assert.AreEqual(qualitativeScore, (byte)(qualityScoresString[i])); i++; } // Log VSTest GUI. ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Qualitative Sequence P1:Qualitative Sequence Default score {0} is as expected.", qualityScoresString[0])); break; case QualitativeSequenceParameters.MaxDefaultScore: createdQualitativeSequence = new QualitativeSequence( alphabet, expectedFormatType, Encoding.UTF8.GetBytes(inputSequence), expectedMaxScores); i = 0; // Validate default maximum score. foreach (byte qualitativeScore in createdQualitativeSequence.GetEncodedQualityScores()) { Assert.AreEqual(qualitativeScore, expectedMaxScores[i]); i++; } // Log VSTest GUI. ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Qualitative Sequence P1:Qualitative Sequence Maximum score {0} is as expected.", QualitativeSequence.GetMaxEncodedQualScore(expectedFormatType))); break; case QualitativeSequenceParameters.MinDefaultScore: createdQualitativeSequence = new QualitativeSequence( alphabet, expectedFormatType, Encoding.UTF8.GetBytes(inputSequence), expectedMinScores); i = 0; // Validate default minimum score. foreach (byte qualitativeScore in createdQualitativeSequence.GetEncodedQualityScores()) { Assert.AreEqual(qualitativeScore, expectedMinScores[i]); i++; } // Log VSTest GUI. ApplicationLog.WriteLine(string.Format(null, "Qualitative Sequence P1:Qualitative Sequence Minimum score {0} is as expected.", QualitativeSequence.GetMinEncodedQualScore(expectedFormatType))); break; default: break; } }
/// <summary> /// Gets the decoded quality score from the ASCII encoded quality score. /// </summary> /// <param name="encodedQualScore">ASCII Encoded quality score.</param> /// <param name="formatType">FastQ format type.</param> /// <returns>Returns quality score.</returns> private static int GetDecodedQualScore(byte encodedQualScore, FastQFormatType formatType) { return DecodeQualityScore(encodedQualScore, GetAsciiBaseValue(formatType)); }
/// <summary> /// Constructor when called from GUI. This is the standard constructor used when SequenceAnalyzer is called before this and /// has already calculated readLengthMax and count. /// </summary> /// <param name="sequences">Sequence parser</param> /// <param name="readLengthMax">Maximum read length</param> /// <param name="count">Total number of reads</param> /// <param name="format">FastQ Format Type</param> /// <param name="filename">input filename</param> /// <param name="worker">Background worker</param> /// <param name="e">Background Worker event args</param> public QualityScoreAnalyzer(ISequenceParser sequences, long readLengthMax, long count, FastQFormatType format, string filename, BackgroundWorker worker, DoWorkEventArgs e) : base(sequences, filename, worker, e) { Initialize(format, readLengthMax, count); }
/// <summary> /// Gets the decoded quality scores from the ASCII encoded quality score. /// </summary> /// <param name="encodedQualScores">ASCII Encoded quality score.</param> /// <param name="formatType">FastQ format type.</param> /// <returns>Returns quality scores.</returns> private static sbyte[] GetDecodedQualScoresInSignedBytes(byte[] encodedQualScores, FastQFormatType formatType) { int baseValue = GetAsciiBaseValue(formatType); long count = encodedQualScores.GetLongLength(); sbyte[] result = new sbyte[count]; for (long i = 0; i < count; i++) { result[i] = (sbyte)DecodeQualityScore(encodedQualScores[i], baseValue); } return result; }
/// <summary> /// General method to Invalidate FastQ Parser. /// <param name="nodeName">xml node name.</param> /// <param name="param">FastQ Formatter different parameters</param> /// </summary> void InValidateFastQFormatter(FastQFormatParameters param) { // Gets the expected sequence from the Xml string filepath = _utilityObj._xmlUtil.GetTextValue( Constants.MultiSeqSangerRnaProNode, Constants.FilePathNode); FastQFormatType expectedFormatType = Utility.GetFastQFormatType( _utilityObj._xmlUtil.GetTextValue(Constants.MultiSeqSangerRnaProNode, Constants.FastQFormatType)); // Parse a FastQ file. using (FastQParser fastQParser = new FastQParser()) { fastQParser.AutoDetectFastQFormat = true; fastQParser.FastqType = expectedFormatType; IQualitativeSequence sequence = null; sequence = fastQParser.ParseOne(filepath); FastQFormatter fastQFormatter = new FastQFormatter(); TextWriter txtWriter = null; switch (param) { case FastQFormatParameters.TextWriter: try { fastQFormatter.Format(sequence, null as TextWriter); Assert.Fail(); } catch (Exception) { ApplicationLog.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); Console.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); } break; case FastQFormatParameters.Sequence: try { fastQFormatter.Format(null as ISequence, txtWriter); Assert.Fail(); } catch (Exception) { ApplicationLog.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); Console.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); } break; case FastQFormatParameters.QualitativeSequence: try { fastQFormatter.Format(null as IQualitativeSequence, txtWriter); Assert.Fail(); } catch (Exception) { ApplicationLog.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); Console.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); } break; default: try { fastQFormatter.Format(sequence as QualitativeSequence, null as TextWriter); Assert.Fail(); } catch (Exception) { ApplicationLog.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); Console.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); } break; } } }
/// <summary> /// Gets the ASCII encoded quality score for the given quality score. /// </summary> /// <param name="qualScore">Quality Score.</param> /// <param name="formatType">FastQ format type.</param> /// <returns>ASCII encoded quality score.</returns> private static byte GetEncodedQualScore(int qualScore, FastQFormatType formatType) { return EncodeQualityScore(qualScore, GetAsciiBaseValue(formatType)); }
/// <summary> /// General method to validate default score for different FastQ /// format with different sequence. /// <param name="nodeName">xml node name.</param> /// <param name="parameters">Different Qualitative Score method parameter.</param> /// </summary> void ValidateFastQDefaultScores( string nodeName, QualitativeSequenceParameters parameters) { // Gets the actual sequence and the alphabet from the Xml IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue( nodeName, Constants.AlphabetNameNode)); FastQFormatType expectedFormatType = Utility.GetFastQFormatType( utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FastQFormatType)); string inputSequence = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.inputSequenceNode); string expectedMaxScore = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.DefualtMaxScore); string expectedMinScore = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.DefaultMinScore); QualitativeSequence createdQualitativeSequence = null; switch (parameters) { case QualitativeSequenceParameters.DefaultScoreWithAlphabets: createdQualitativeSequence = new QualitativeSequence( alphabet, expectedFormatType, inputSequence, ((char)QualitativeSequence.GetDefaultQualScore(expectedFormatType)).ToString()); // Validate default score. foreach (byte qualitativeScore in createdQualitativeSequence.QualityScores) { Assert.AreEqual(qualitativeScore, QualitativeSequence.GetDefaultQualScore(expectedFormatType)); } // Log Nunit GUI. Console.WriteLine(string.Format((IFormatProvider)null, "Qualitative Sequence P1:Qualitative Sequence Default score {0} is as expected.", QualitativeSequence.GetDefaultQualScore(expectedFormatType))); break; case QualitativeSequenceParameters.DefaultScoreWithSequence: createdQualitativeSequence = new QualitativeSequence(alphabet, expectedFormatType, inputSequence, ((char)QualitativeSequence.GetDefaultQualScore(expectedFormatType)).ToString()); // Validate default score. foreach (byte qualitativeScore in createdQualitativeSequence.QualityScores) { Assert.AreEqual(qualitativeScore, QualitativeSequence.GetDefaultQualScore(expectedFormatType)); } // Log Nunit GUI. Console.WriteLine(string.Format((IFormatProvider)null, "Qualitative Sequence P1:Qualitative Sequence Default score {0} is as expected.", QualitativeSequence.GetDefaultQualScore(expectedFormatType))); break; case QualitativeSequenceParameters.MaxDefaultScore: createdQualitativeSequence = new QualitativeSequence( alphabet, expectedFormatType, encodingObj.GetBytes(inputSequence), new byte[] { byte.Parse(expectedMaxScore, (IFormatProvider)null) }); // Validate default maximum score. foreach (byte qualitativeScore in createdQualitativeSequence.QualityScores) { Assert.AreEqual(qualitativeScore, QualitativeSequence.GetMaxQualScore(expectedFormatType)); } // Log Nunit GUI. Console.WriteLine(string.Format((IFormatProvider)null, "Qualitative Sequence P1:Qualitative Sequence Maximum score {0} is as expected.", QualitativeSequence.GetMaxQualScore(expectedFormatType))); break; case QualitativeSequenceParameters.MinDefaultScore: createdQualitativeSequence = new QualitativeSequence( alphabet, expectedFormatType, encodingObj.GetBytes(inputSequence), new byte[] { byte.Parse(expectedMinScore, (IFormatProvider)null) }); // Validate default minimum score. foreach (byte qualitativeScore in createdQualitativeSequence.QualityScores) { Assert.AreEqual(qualitativeScore, QualitativeSequence.GetMinQualScore(expectedFormatType)); } // Log Nunit GUI. Console.WriteLine(string.Format((IFormatProvider)null, "Qualitative Sequence P1:Qualitative Sequence Minimum score {0} is as expected.", QualitativeSequence.GetMinQualScore(expectedFormatType))); break; default: break; } }
/// <summary> /// Gets the ASCII encoded quality scores for the given quality score. /// </summary> /// <param name="qualScores">Quality Score.</param> /// <param name="formatType">FastQ format type.</param> /// <returns>ASCII encoded quality scores.</returns> private static byte[] GetEncodedQualScores(sbyte[] qualScores, FastQFormatType formatType) { int baseValue = GetAsciiBaseValue(formatType); long count = qualScores.GetLongLength(); byte[] result = new byte[count]; for (long i = 0; i < count; i++) { result[i] = EncodeQualityScore(qualScores[i], baseValue); } return result; }
/// <summary> /// Initializes a new instance of the QualitativeSequence class with specified alphabet, quality score type, /// string representing symbols and encoded quality scores. /// Sequence and quality scores are validated with the specified alphabet and specified fastq format respectively. /// </summary> /// <param name="alphabet">Alphabet to which this instance should conform.</param> /// <param name="fastQFormatType">FastQ format type.</param> /// <param name="sequence">A string representing the symbols.</param> /// <param name="encodedQualityScores">A string representing the encoded quality scores.</param> public CompactSAMSequence(IAlphabet alphabet, FastQFormatType fastQFormatType, string sequence, string encodedQualityScores) : base(alphabet, fastQFormatType, sequence, encodedQualityScores, true) { }
public static bool ValidateQualScores(byte[] encodedQualScore, FastQFormatType formatType, out byte invalidQualScore) { bool result = true; invalidQualScore = 0; int minScore = GetMinEncodedQualScore(formatType); int maxScore = GetMaxEncodedQualScore(formatType); long count = encodedQualScore.GetLongLength(); for (long index = 0; index < count; index++) { byte qualScore = encodedQualScore[index]; if (qualScore < minScore || qualScore > maxScore) { result = false; invalidQualScore = qualScore; break; } } return result; }
/// <summary> /// Returns a single QualitativeSequence from the FASTQ data. /// </summary> /// <param name="reader">Reader to be parsed.</param> /// <param name="formatType">FASTQ format type.</param> /// <returns>Returns a QualitativeSequence.</returns> private IQualitativeSequence ParseOne(StreamReader reader, FastQFormatType formatType) { if (reader.EndOfStream) { return(null); } string line = ReadNextLine(reader, true); if (line == null || !line.StartsWith("@", StringComparison.Ordinal)) { string message = string.Format(CultureInfo.CurrentCulture, Resource.INVALID_INPUT_FILE, this.Name); throw new Exception(message); } // Process header line. string id = line.Substring(1).Trim(); line = ReadNextLine(reader, true); if (string.IsNullOrEmpty(line)) { string details = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_InvalidSequenceLine, id); string message = string.Format( CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, this.Name, details); throw new Exception(message); } // Get sequence from second line. byte[] sequenceData = Encoding.ASCII.GetBytes(line); // Goto third line. line = ReadNextLine(reader, true); // Check for '+' symbol in the third line. if (line == null || !line.StartsWith("+", StringComparison.Ordinal)) { string details = string.Format( CultureInfo.CurrentCulture, Resource.FastQ_InvalidQualityScoreHeaderLine, id); string message = string.Format( CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, this.Name, details); throw new Exception(message); } string qualScoreId = line.Substring(1).Trim(); if (!string.IsNullOrEmpty(qualScoreId) && !id.Equals(qualScoreId)) { string details = string.Format( CultureInfo.CurrentCulture, Resource.FastQ_InvalidQualityScoreHeaderData, id); string message = string.Format( CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, this.Name, details); throw new Exception(message); } // Goto fourth line. line = ReadNextLine(reader, true); if (string.IsNullOrEmpty(line)) { string details = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_EmptyQualityScoreLine, id); string message = string.Format( CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, this.Name, details); throw new Exception(message); } // Get the quality scores from the fourth line. byte[] qualScores = Encoding.ASCII.GetBytes(line); // Check for sequence length and quality score length. if (sequenceData.GetLongLength() != qualScores.GetLongLength()) { string details = string.Format( CultureInfo.CurrentCulture, Resource.FastQ_InvalidQualityScoresLength, id); string message = string.Format( CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, this.Name, details); throw new Exception(message); } // Auto detect alphabet if alphabet is set to null, else validate with already set alphabet IAlphabet alphabet = this.Alphabet; if (alphabet == null) { alphabet = Alphabets.AutoDetectAlphabet(sequenceData, 0, sequenceData.GetLongLength(), alphabet); if (alphabet == null) { throw new Exception(Resource.CouldNotIdentifyAlphabetType); } } else { if (!alphabet.ValidateSequence(sequenceData, 0, sequenceData.GetLongLength())) { throw new Exception(Resource.InvalidAlphabetType); } } return(new QualitativeSequence(alphabet, formatType, sequenceData, qualScores, false) { ID = id }); }
/// <summary> /// Validates whether the specified quality scores are within the FastQFormatType limit or not. /// </summary> /// <param name="qualScores">Quality scores in base type.</param> /// <param name="formatType">Fastq format type.</param> /// <param name="invalidQualScore">returns invalid quality score if found.</param> /// <returns>Returns true if the specified quality scores are with in the limit, otherwise false.</returns> private static bool ValidateQualScores(int[] qualScores, FastQFormatType formatType, out int invalidQualScore) { bool result = true; invalidQualScore = 0; int minScore = GetDecodedQualScore(GetMinEncodedQualScore(formatType), formatType); int maxScore = GetDecodedQualScore(GetMaxEncodedQualScore(formatType), formatType); long count = qualScores.GetLongLength(); for (long index = 0; index < count; index++) { int qualScore = qualScores[index]; if (qualScore < minScore || qualScore > maxScore) { result = false; invalidQualScore = qualScore; break; } } return result; }
/// <summary> /// Initializes a new instance of the QualitativeSequence class with specified alphabet, quality score type, /// string representing symbols and encoded quality scores. /// Sequence and quality scores are validated with the specified alphabet and specified fastq format respectively. /// </summary> /// <param name="alphabet">Alphabet to which this instance should conform.</param> /// <param name="fastQFormatType">FastQ format type.</param> /// <param name="sequence">A string representing the symbols.</param> /// <param name="encodedQualityScores">A string representing the encoded quality scores.</param> public QualitativeSequence(IAlphabet alphabet, FastQFormatType fastQFormatType, string sequence, string encodedQualityScores) : this(alphabet, fastQFormatType, sequence, encodedQualityScores, true) { }
/// <summary> /// Initializes a new instance of the QualitativeSequence class with specified alphabet, quality score type, /// string representing symbols and encoded quality scores. /// </summary> /// <param name="alphabet">Alphabet to which this instance should conform.</param> /// <param name="fastQFormatType">FastQ format type.</param> /// <param name="sequence">A string representing the symbols.</param> /// <param name="encodedQualityScores">A string representing the encoded quality scores.</param> /// <param name="validate">If this flag is true then validation will be done to see whether the data is valid or not, /// else validation will be skipped.</param> public QualitativeSequence(IAlphabet alphabet, FastQFormatType fastQFormatType, string sequence, string encodedQualityScores, bool validate) { if (alphabet == null) { throw new ArgumentNullException("alphabet"); } this.Alphabet = alphabet; this.ID = string.Empty; if (sequence == null) { throw new ArgumentNullException("sequence"); } if (encodedQualityScores == null) { throw new ArgumentNullException("encodedQualityScores"); } this.FormatType = fastQFormatType; this.sequenceData = UTF8Encoding.UTF8.GetBytes(sequence); byte[] encodedQualityScoresarray = UTF8Encoding.UTF8.GetBytes(encodedQualityScores); if (validate) { if (this.sequenceData.GetLongLength() != encodedQualityScoresarray.GetLongLength()) { string message = string.Format(CultureInfo.CurrentUICulture, Properties.Resource.DifferenceInSequenceAndQualityScoresLengthMessage, this.sequenceData.GetLongLength(), encodedQualityScoresarray.GetLongLength()); throw new ArgumentException(message); } // Validate sequence data if (!this.Alphabet.ValidateSequence(this.sequenceData, 0, this.sequenceData.GetLongLength())) { throw Helper.GenerateAlphabetCheckFailureException(this.Alphabet, sequenceData); } byte invalidEncodedQualityScore; // Validate quality scores if (!ValidateQualScores(encodedQualityScoresarray, this.FormatType, out invalidEncodedQualityScore)) { string message = string.Format(CultureInfo.CurrentUICulture, Properties.Resource.InvalidEncodedQualityScoreFound, (char)invalidEncodedQualityScore, this.FormatType); throw new ArgumentOutOfRangeException("encodedQualityScores", message); } } this.qualityScores = GetDecodedQualScoresInSignedBytes(encodedQualityScoresarray, this.FormatType); this.Count = this.sequenceData.GetLongLength(); }
/// <summary> /// Parses a single FASTQ text from a reader into a QualitativeSequence. /// </summary> /// <param name="mbfReader">A reader for a biological sequence text.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting QualitativeSequence should be in readonly mode or not. /// If this flag is set to true then the resulting QualitativeSequence's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>A new QualitativeSequence instance containing parsed data.</returns> private IQualitativeSequence ParseOneWithFastQFormat(MBFStreamReader mbfReader, bool isReadOnly) { SequencePointer sequencePointer = new SequencePointer(); string message; // Check for '@' symbol at the first line. if (!mbfReader.HasLines || !mbfReader.Line.StartsWith("@", StringComparison.Ordinal)) { message = string.Format(CultureInfo.CurrentCulture, Resource.INVALID_INPUT_FILE, Name); Trace.Report(message); throw new FileFormatException(message); } // Process header line. string id = mbfReader.GetLineField(2).Trim(); // save sequence starting index sequencePointer.IndexOffsets[0] = mbfReader.Position; // Go to second line. mbfReader.GoToNextLine(); if (!mbfReader.HasLines || string.IsNullOrEmpty(mbfReader.Line)) { string message1 = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_InvalidSequenceLine, id); message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, message1); Trace.Report(message); throw new FileFormatException(message); } // Get sequence from second line. string sequenceLine = mbfReader.Line; //save sequence ending index sequencePointer.IndexOffsets[1] = sequencePointer.IndexOffsets[0] + mbfReader.Line.Length; // Goto third line. mbfReader.GoToNextLine(); // Check for '+' symbol in the third line. if (!mbfReader.HasLines || !mbfReader.Line.StartsWith("+", StringComparison.Ordinal)) { string message1 = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_InvalidQualityScoreHeaderLine, id); message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, message1); Trace.Report(message); throw new FileFormatException(message); } string qualScoreId = mbfReader.GetLineField(2).Trim(); if (!string.IsNullOrEmpty(qualScoreId) && !id.Equals(qualScoreId)) { string message1 = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_InvalidQualityScoreHeaderData, id); message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, message1); Trace.Report(message); throw new FileFormatException(message); } // Goto fourth line. mbfReader.GoToNextLine(); if (!mbfReader.HasLines || string.IsNullOrEmpty(mbfReader.Line)) { string message1 = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_EmptyQualityScoreLine, id); message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, message1); Trace.Report(message); throw new FileFormatException(message); } // Get the quality scores from the fourth line. byte[] qualScores = ASCIIEncoding.ASCII.GetBytes(mbfReader.Line); // Check for sequence length and quality score length. if (sequenceLine.Length != mbfReader.Line.Length) { string message1 = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_InvalidQualityScoresLength, id); message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, message1); Trace.Report(message); throw new FileFormatException(message); } mbfReader.GoToNextLine(); IAlphabet alphabet = Alphabet; // Identify alphabet if it is not specified. if (alphabet == null) { alphabet = _commonSequenceParser.IdentifyAlphabet(alphabet, sequenceLine); if (alphabet == null) { string message1 = string.Format(CultureInfo.CurrentCulture, Resource.InvalidSymbolInString, sequenceLine); message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, message1); Trace.Report(message); throw new FileFormatException(message); } } FastQFormatType fastQType = FastqType; // Identify fastq format type if AutoDetectFastQFormat property is set to true. if (AutoDetectFastQFormat) { fastQType = IdentifyFastQFormatType(qualScores); } QualitativeSequence sequence = null; if (Encoding == null) { sequence = new QualitativeSequence(alphabet, fastQType, sequenceLine, qualScores); } else { sequence = new QualitativeSequence(alphabet, fastQType, Encoding, sequenceLine, qualScores); } sequence.ID = id; sequence.IsReadOnly = isReadOnly; sequencePointer.AlphabetName = sequence.Alphabet.Name; sequencePointer.Id = sequence.ID; _sequencePointers.Add(sequencePointer); FileVirtualQualitativeSequenceProvider dataProvider = new FileVirtualQualitativeSequenceProvider(this, sequencePointer) { BlockSize = _blockSize, MaxNumberOfBlocks = _maxNumberOfBlocks }; sequence.VirtualQualitativeSequenceProvider = dataProvider; return(sequence); }
/// <summary> /// Invalidate convert from Illumina to Sanger format type. /// </summary> void ConvertTypeToType(FastQFormatType type1, FastQFormatType type2) { int[] scoreArray = { -12, 24 }; int qualScore = -12; string actualError = null; Assert.Throws<ArgumentNullException> ( () => QualitativeSequence.ConvertEncodedQualityScore(type1, type2, null)); Assert.Throws<ArgumentOutOfRangeException> ( () => QualitativeSequence.ConvertQualityScores(type1, type2, scoreArray) ); // Validate an expected error message for invalid qual scores. Assert.Throws<ArgumentOutOfRangeException> ( () => QualitativeSequence.ConvertQualityScore(type1, type2, qualScore) ); }