/// <summary> /// Returns formatter which supports the specified file. /// </summary> /// <param name="fileName">File name for which the formatter is required.</param> /// <returns>If found returns the formatter as ISequenceFormatter else returns null.</returns> public static ISequenceFormatter FindFormatterByFile(string fileName) { ISequenceFormatter formatter = null; if (!string.IsNullOrEmpty(fileName)) { if (Helper.IsGenBank(fileName)) { formatter = new GenBankFormatter(); } else if (fileName.EndsWith(Resource.GFF_FILEEXTENSION, StringComparison.InvariantCultureIgnoreCase)) { formatter = new GffFormatter(); } else if (Helper.IsFasta(fileName)) { formatter = new FastaFormatter(); } else if (Helper.IsFastQ(fileName)) { formatter = new FastQFormatter(); } else { formatter = null; } } return(formatter); }
/// <summary> /// Returns parser which supports the specified file. /// </summary> /// <param name="fileName">File name for which the parser is required.</param> /// <param name="formatterName">Name of the formatter to use.</param> /// <returns>If found returns the formatter as ISequenceFormatter else returns null.</returns> public static ISequenceFormatter FindFormatterByName(string fileName, string formatterName) { ISequenceFormatter formatter = null; if (!string.IsNullOrEmpty(fileName)) { if (formatterName == Properties.Resource.FastAName) { formatter = new FastAFormatter(fileName); } else if (formatterName == Properties.Resource.FastQName) { formatter = new FastQFormatter(fileName); } else if (formatterName == Properties.Resource.GENBANK_NAME) { formatter = new GenBankFormatter(fileName); } else { // Do a search through the known formatters to pick up custom formatters added through add-in. formatter = All.FirstOrDefault(p => p.Name == formatterName); // If we found a match based on extension, then open the file - this // matches the above behavior where a specific formatter was created for // the passed filename - the formatter is opened automatically in the constructor. if (formatter != null) { formatter.Open(fileName); } } } return(formatter); }
public void ValidateSeqFormatterProperties() { // Gets the expected sequence from the Xml string fastaFormatterName = this.utilityObj.xmlUtil.GetTextValue(Constants.FastAFileParserNode, Constants.ParserNameNode); string genBankFormatterName = this.utilityObj.xmlUtil.GetTextValue(Constants.GenBankFileParserNode, Constants.ParserNameNode); string gffFormatterName = this.utilityObj.xmlUtil.GetTextValue(Constants.GffFileParserNode, Constants.ParserNameNode); string fastQFormatterName = this.utilityObj.xmlUtil.GetTextValue(Constants.FastQFileParserNode, Constants.ParserNameNode); // Get SequenceFormatter class properties. FastAFormatter actualFastAFormatter = SequenceFormatters.Fasta; IReadOnlyList <ISequenceFormatter> allFormatters = SequenceFormatters.All; GenBankFormatter actualgenBankFormatterName = SequenceFormatters.GenBank; FastQFormatter actualFastQFormatterName = SequenceFormatters.FastQ; GffFormatter actualGffFormatterName = SequenceFormatters.Gff; // Validate Sequence Formatter Assert.AreEqual(fastaFormatterName, actualFastAFormatter.Name); Assert.AreEqual(genBankFormatterName, actualgenBankFormatterName.Name); Assert.AreEqual(gffFormatterName, actualGffFormatterName.Name); Assert.AreEqual(fastQFormatterName, actualFastQFormatterName.Name); Assert.IsNotNull(allFormatters); ApplicationLog.WriteLine("Type of the parser is validated successfully"); }
/// <summary> /// Returns parser which supports the specified file. /// </summary> /// <param name="fileName">File name for which the parser is required.</param> /// <param name="formatterName">Name of the formatter to use.</param> /// <returns>If found returns the formatter as ISequenceFormatter else returns null.</returns> public static ISequenceFormatter FindFormatterByName(string fileName, string formatterName) { ISequenceFormatter formatter = null; if (!string.IsNullOrEmpty(fileName)) { if (formatterName == Properties.Resource.FastAName) { formatter = new FastAFormatter(fileName); } else if (formatterName == Properties.Resource.FastQName) { formatter = new FastQFormatter(fileName); } else if (formatterName == Properties.Resource.GENBANK_NAME) { formatter = new GenBankFormatter(fileName); } else if (formatterName == Properties.Resource.GFF_NAME) { formatter = new GffFormatter(fileName); } else { formatter = null; } } return(formatter); }
public void ValidateSeqFormatterProperties() { // Gets the expected sequence from the Xml string fastaFormatterName = _utilityObj._xmlUtil.GetTextValue(Constants.FastAFileParserNode, Constants.ParserNameNode); string genBankFormatterName = _utilityObj._xmlUtil.GetTextValue(Constants.GenBankFileParserNode, Constants.ParserNameNode); string gffFormatterName = _utilityObj._xmlUtil.GetTextValue(Constants.GffFileParserNode, Constants.ParserNameNode); string fastQFormatterName = _utilityObj._xmlUtil.GetTextValue(Constants.FastQFileParserNode, Constants.ParserNameNode); // Get SequenceFormatter class properties. FastaFormatter actualFastAFormatter = SequenceFormatters.Fasta; IList <ISequenceFormatter> allFormatters = SequenceFormatters.All; GenBankFormatter actualgenBankFormatterName = SequenceFormatters.GenBank; FastQFormatter actualFastQFormatterName = SequenceFormatters.FastQ; GffFormatter actualGffFormatterName = SequenceFormatters.Gff; // Validate Sequence Formatter Assert.AreEqual(fastaFormatterName, actualFastAFormatter.Name); Assert.AreEqual(4, allFormatters.Count); Assert.AreEqual(genBankFormatterName, actualgenBankFormatterName.Name); Assert.AreEqual(gffFormatterName, actualGffFormatterName.Name); Assert.AreEqual(fastQFormatterName, actualFastQFormatterName.Name); Console.WriteLine(string.Format((IFormatProvider)null, "SequenceFormatter : Type of the parser is validated successfully")); ApplicationLog.WriteLine("Type of the parser is validated successfully"); }
public void FastQFormatter() { string filepathOriginal = @"TestUtils\FASTQ\SRR002012_5.fastq"; Assert.IsTrue(File.Exists(filepathOriginal)); IList <QualitativeSequence> seqsOriginal = null; string filepathTmp = Path.GetTempFileName(); using (FastQParser parser = new FastQParser()) { parser.Open(filepathOriginal); // Read the original file seqsOriginal = parser.Parse().ToList(); Assert.IsNotNull(seqsOriginal); // Use the formatter to write the original sequences to a temp file using (FastQFormatter formatter = new FastQFormatter(filepathTmp)) { foreach (QualitativeSequence s in seqsOriginal) { formatter.Write(s); } } } // Read the new file, then compare the sequences IList <QualitativeSequence> seqsNew = null; using (FastQParser parser = new FastQParser(filepathTmp)) { seqsNew = parser.Parse().ToList(); Assert.IsNotNull(seqsNew); // Now compare the sequences. int countOriginal = seqsOriginal.Count(); int countNew = seqsNew.Count(); Assert.AreEqual(countOriginal, countNew); int i; for (i = 0; i < countOriginal; i++) { Assert.AreEqual(seqsOriginal[i].ID, seqsNew[i].ID); string orgSeq = ASCIIEncoding.ASCII.GetString(seqsOriginal[i].ToArray()); string newSeq = ASCIIEncoding.ASCII.GetString(seqsNew[i].ToArray()); string orgscores = ASCIIEncoding.ASCII.GetString(seqsOriginal[i].GetEncodedQualityScores()); string newscores = ASCIIEncoding.ASCII.GetString(seqsNew[i].GetEncodedQualityScores()); Assert.AreEqual(orgSeq, newSeq); Assert.AreEqual(orgscores, newscores); } // Passed all the tests, delete the tmp file. If we failed an Assert, // the tmp file will still be there in case we need it for debugging. File.Delete(filepathTmp); } }
/// <summary> /// General method to validate FastQ Formatter on a Stream. /// <param name="nodeName">xml node name.</param> /// </summary> void ValidateFastQFormatterOnAStream(string nodeName) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedQualitativeSequence = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequenceNode); string expectedSequenceId = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.SequenceIdNode); string tempFileName1 = System.IO.Path.GetTempFileName(); string parsedValue = string.Empty; string parsedID = string.Empty; IEnumerable <QualitativeSequence> qualSequence = null; // Parse a FastQ file using parseOne method. using (FastQParser fastQParserObj = new FastQParser(filePath)) { fastQParserObj.AutoDetectFastQFormat = false; qualSequence = fastQParserObj.Parse(); // New Sequence after formatting file. IEnumerable <QualitativeSequence> newQualSeq = null; using (StreamWriter writer = new StreamWriter(tempFileName1)) { using (FastQFormatter fastQFormatter = new FastQFormatter()) { fastQFormatter.Open(writer); fastQFormatter.Write(qualSequence.ElementAt(0)); } } using (FastQParser fastQParserObjTemp = new FastQParser(tempFileName1)) { newQualSeq = fastQParserObjTemp.Parse(); parsedValue = new string(newQualSeq.ElementAt(0).Select(a => (char)a).ToArray()); parsedID = newQualSeq.ElementAt(0).ID.ToString((IFormatProvider)null); } // Validate qualitative parsing temporary file. Assert.AreEqual(parsedValue, expectedQualitativeSequence); Assert.AreEqual(parsedID, expectedSequenceId); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastQ Formatter BVT: The FASTQ sequence '{0}' validation after Write() and Parse() is found to be as expected.", parsedValue)); // Logs to the VSTest GUI (Console.Out) window Console.WriteLine(string.Format((IFormatProvider)null, "FastQ Formatter BVT: The FASTQ sequence '{0}' validation after Write() and Parse() is found to be as expected.", parsedValue)); Console.WriteLine(string.Format((IFormatProvider)null, "FastQ Formatter BVT: The FASTQ sequence '{0}' validation after Write() and Parse() is found to be as expected.", parsedID)); qualSequence = null; File.Delete(tempFileName1); } }
void InValidateFastQFormatter(FastQFormatParameters param) { // Gets the expected sequence from the Xml string filepath = utilityObj.xmlUtil.GetTextValue( Constants.MultiSeqSangerRnaProNode, Constants.FilePathNode); // Parse a FastQ file. using (FastQParser fastQParser = new FastQParser(filepath)) { fastQParser.AutoDetectFastQFormat = true; IEnumerable <QualitativeSequence> sequence = null; FastQFormatter fastQFormatter = null; switch (param) { case FastQFormatParameters.Sequence: try { fastQFormatter = new FastQFormatter(filepath); fastQFormatter.Write(null as ISequence); Assert.Fail(); } catch (ArgumentNullException) { fastQFormatter.Close(); ApplicationLog.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); Console.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); } break; default: try { sequence = fastQParser.Parse(); fastQFormatter = new FastQFormatter(Constants.FastQTempFileName); fastQFormatter.Write(sequence as QualitativeSequence); Assert.Fail(); } catch (ArgumentNullException) { fastQFormatter.Close(); ApplicationLog.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); Console.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); } break; } } }
static void Main(string[] args) { FastAParser fap = new FastAParser(@"D:\TestMixing\CRS.rn.fasta"); FastQFormatter faq = new FastQFormatter(@"D:\TestMixing\RefSeq.fastq"); var seq = fap.Parse().First() as Sequence; seq = seq.GetSubSequence(1000, 300) as Sequence; int coverage = 50; int len = 75; byte[] QualScores = Enumerable.Range(0, len).Select(x => (byte)(33 + 35)).ToArray(); for (int i = 0; i < (seq.Count - len); i++) { //string s=seq.GetSubSequence(i,len).ToString(); string s = (seq as Sequence).ConvertToString(i, len); int hh = s.Length; byte[] sb = Encoding.UTF8.GetBytes(s); for (int j = 0; j < coverage; j++) { QualitativeSequence qs = new QualitativeSequence(Alphabets.DNA, FastQFormatType.Illumina_v1_8, sb, QualScores); qs.ID = "REF-" + i.ToString(); faq.Write(qs); } } faq.Close(); fap.Close(); byte[] newSeq = new byte[seq.Count]; seq.CopyTo(newSeq, 0, seq.Count); byte oldBase = seq[seq.Count / 2]; newSeq[newSeq.Length / 2] = 65; //now mutate one base and go again Sequence nonRef = new Sequence(Alphabets.DNA, newSeq, true); faq = new FastQFormatter(@"D:\TestMixing\NonRefSeq.fastq"); for (int i = 0; i < (seq.Count - len); i++) { //string s=seq.GetSubSequence(i,len).ToString(); string s = (nonRef as Sequence).ConvertToString(i, len); int hh = s.Length; byte[] sb = Encoding.UTF8.GetBytes(s); for (int j = 0; j < coverage; j++) { QualitativeSequence qs = new QualitativeSequence(Alphabets.DNA, FastQFormatType.Illumina_v1_8, sb, QualScores); qs.ID = "NONREF-" + i.ToString(); faq.Write(qs); } } faq.Close(); }
public void FastQProperties() { ISequenceParser parser = new FastQParser(); Assert.AreEqual(parser.Name, Properties.Resource.FASTQ_NAME); Assert.AreEqual(parser.Description, Properties.Resource.FASTQPARSER_DESCRIPTION); Assert.AreEqual(parser.FileTypes, Properties.Resource.FASTQ_FILEEXTENSION); ISequenceFormatter formatter = new FastQFormatter(); Assert.AreEqual(formatter.Name, Properties.Resource.FASTQ_NAME); Assert.AreEqual(formatter.Description, Properties.Resource.FASTQFORMATTER_DESCRIPTION); Assert.AreEqual(formatter.FileTypes, Properties.Resource.FASTQ_FILEEXTENSION); }
public void FastQProperties() { FastQParser parser = new FastQParser(); Assert.AreEqual(parser.Name, Resource.FastQName); Assert.AreEqual(parser.Description, Resource.FASTQPARSER_DESCRIPTION); Assert.AreEqual(parser.SupportedFileTypes, Resource.FASTQ_FILEEXTENSION); FastQFormatter formatter = new FastQFormatter(); Assert.AreEqual(formatter.Name, Resource.FastQName); Assert.AreEqual(formatter.Description, Resource.FASTQFORMATTER_DESCRIPTION); Assert.AreEqual(formatter.SupportedFileTypes, Resource.FASTQ_FILEEXTENSION); }
private void InValidateFastQFormatter(FastQFormatParameters param) { // Gets the expected sequence from the Xml string filepath = utilityObj.xmlUtil.GetTextValue( Constants.MultiSeqSangerRnaProNode, Constants.FilePathNode); // Parse a FastQ file. var fastQParser = new FastQParser(); using (fastQParser.Open(filepath)) { FastQFormatter fastQFormatter = null; switch (param) { case FastQFormatParameters.Sequence: try { fastQFormatter = new FastQFormatter(); fastQFormatter.Format(null as ISequence, filepath); Assert.Fail(); } catch (ArgumentNullException) { fastQFormatter.Close(); ApplicationLog.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); } break; default: try { IEnumerable <IQualitativeSequence> sequence = fastQParser.Parse(); fastQFormatter = new FastQFormatter(); fastQFormatter.Format(sequence, null); Assert.Fail(); } catch (ArgumentNullException) { ApplicationLog.WriteLine("FastQ Parser P2 : Successfully validated the exception"); } break; } } }
public void TestFormatingString() { string str = @"@SRR002012.1 Oct4:5:1:871:340 length=26 GGCGCACTTACACCCTACATCCATTG +SRR002012.1 Oct4:5:1:871:340 length=26 IIIIG1?II;IIIII1IIII1%.I7I "; StringReader sr = new StringReader(str); FastQParser parser = new FastQParser(); IQualitativeSequence seq = parser.ParseOne(sr); FastQFormatter formatter = new FastQFormatter(); string formatterStr = formatter.FormatString(seq); Assert.AreEqual(str, formatterStr); }
/// <summary> /// General method to validate FastQ Formatter. /// <param name="nodeName">xml node name.</param> /// <param name="fileExtension">Different temporary file extensions</param> /// </summary> private void ValidateFastQFormatter(string nodeName, FastQFileParameters fileExtension) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string expectedQualitativeSequence = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode); string expectedSequenceId = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceIdNode); string tempFileName = Path.GetTempFileName(); // Parse a FastQ file using parseOne method. var fastQParserObj = new FastQParser(); using (fastQParserObj.Open(filePath)) { IQualitativeSequence oneSequence = fastQParserObj.ParseOne(); // Format Parsed Sequence to temp file with different extension. var fastQFormatter = new FastQFormatter(); using (fastQFormatter.Open(tempFileName)) { fastQFormatter.Format(oneSequence); } string parsedValue; string parsedId; var fastQParserObjTemp = new FastQParser(); using (fastQParserObjTemp.Open(tempFileName)) { oneSequence = fastQParserObjTemp.Parse().First(); parsedValue = oneSequence.ConvertToString(); parsedId = oneSequence.ID; } // Validate qualitative parsing temporary file. Assert.AreEqual(expectedQualitativeSequence, parsedValue); Assert.AreEqual(expectedSequenceId, parsedId); ApplicationLog.WriteLine(string.Format("FastQ Formatter BVT: The FASTQ sequence '{0}' validation after Write() and Parse() is found to be as expected.", parsedValue)); ApplicationLog.WriteLine(string.Format("FastQ Formatter BVT: The FASTQ sequence '{0}' validation after Write() and Parse() is found to be as expected.", parsedId)); File.Delete(tempFileName); } }
/// <summary> /// Create a SequenceFormatter object based on the given format type. /// </summary> /// <param name="parserName"></param> /// <param name="outputFilename"></param> /// <returns></returns> internal ISequenceFormatter DetermineSequenceFormatter(string parserName, string outputFilename) { ISequenceFormatter formatter = null; if (outputFilename.Equals("")) { return(null); } if (parserName.Equals(SequenceFormatters.Fasta.Name)) { formatter = new FastAFormatter(outputFilename); } else if (parserName.Equals(SequenceFormatters.FastQ.Name)) { formatter = new FastQFormatter(outputFilename); } return(formatter); }
/// <summary> /// Returns formatter which supports the specified file. /// </summary> /// <param name="fileName">File name for which the formatter is required.</param> /// <returns>If found returns the formatter as ISequenceFormatter else returns null.</returns> public static ISequenceFormatter FindFormatterByFileName(string fileName) { ISequenceFormatter formatter = null; if (!string.IsNullOrEmpty(fileName)) { if (IsFasta(fileName)) { formatter = new FastAFormatter(fileName); } else if (IsFastQ(fileName)) { formatter = new FastQFormatter(fileName); } else if (IsGenBank(fileName)) { formatter = new GenBankFormatter(fileName); } else if (fileName.EndsWith(Properties.Resource.GFF_FILEEXTENSION, StringComparison.InvariantCultureIgnoreCase)) { formatter = new GffFormatter(fileName); } else { // Do a search through the known formatters to pick up custom formatters added through add-in. string fileExtension = Path.GetExtension(fileName); if (!string.IsNullOrEmpty(fileExtension)) { formatter = All.FirstOrDefault(p => p.SupportedFileTypes.Contains(fileExtension)); // If we found a match based on extension, then open the file - this // matches the above behavior where a specific formatter was created for // the passed filename - the formatter is opened automatically in the constructor. if (formatter != null) { formatter.Open(fileName); } } } } return(formatter); }
public void FastQParserForManyFiles() { string path = @"testdata\FASTQ"; Assert.IsTrue(Directory.Exists(path)); int count = 0; FastQParser parser = new FastQParser(); FastQFormatter formatter = new FastQFormatter(); DirectoryInfo di = new DirectoryInfo(path); foreach (FileInfo fi in di.GetFiles("*.fastq")) { using (StreamReader reader = File.OpenText(fi.FullName)) { foreach (IQualitativeSequence seq in parser.Parse(reader)) { count++; } } } Assert.IsTrue(count >= 3); }
public static void Main(string[] args) { try { PlatformManager.Services.MaxSequenceSize = int.MaxValue; PlatformManager.Services.DefaultBufferSize = 4096; PlatformManager.Services.Is64BitProcessType = true; if (args.Length > 3) { Console.WriteLine ("Too many arguments"); DisplayHelp (); } else if (args.Length < 2) { Console.WriteLine("Not enough arguments"); DisplayHelp(); } else if (args [0] == "h" || args [0] == "help" || args [0] == "?" || args [0] == "-h") { DisplayHelp (); } else { string bam_name = args [0]; string threshold = args [1]; string output = args [2]; if (!File.Exists(bam_name)) { Console.WriteLine ("Can't find file: " + bam_name); return; } double min_rq; bool converted = Double.TryParse(threshold, out min_rq); if (!converted) { Console.WriteLine ("Could not parse minimum threshold from : " + threshold + " expected decimal number in [0,1] interval."); return; } if (min_rq < 0.0 || min_rq > 1.0) { Console.WriteLine ("Minimum RQ value: " + min_rq + " was not in [0,1] interval."); return; } if (File.Exists (output)) { Console.WriteLine ("The output file already exists, please specify a new name or delete the old one."); return; } var fastq = new FastQFormatter(); fastq.FormatType = FastQFormatType.Sanger; var os = new FileStream(output, FileMode.CreateNew); // Filter and output PacBioCCSBamReader bamreader = new PacBioCCSBamReader (); int numRead = 0; int numFiltered = 0; foreach(var read in bamreader.Parse(bam_name)) { numRead++; var ccs = read as PacBioCCSRead; if (ccs.ReadQuality > min_rq) { //read.ID = read.ID + "/RQ=" + read.ReadQuality; fastq.Format(os,read); } else { numFiltered ++; } } os.Close(); Console.WriteLine("Parsed " + numRead + " reads and filtered out " + numFiltered + " for RQ < " + min_rq); } } catch(DllNotFoundException thrown) { Console.WriteLine ("Error thrown when attempting to generate the CCS results."); Console.WriteLine("A shared library was not found. To solve this, please add the folder" + " with the downloaded file libMonoPosixHelper" + "to your environmental variables (LD_LIBRARY_PATH and DYLD_LIBRARY_PATH on Mac OS X)."); Console.WriteLine ("Error: " + thrown.Message); Console.WriteLine (thrown.StackTrace); } catch(Exception thrown) { Console.WriteLine ("Error thrown when attempting to generate the FASTQ File"); Console.WriteLine ("Error: " + thrown.Message); Console.WriteLine (thrown.StackTrace); while (thrown.InnerException != null) { Console.WriteLine ("Inner Exception: " + thrown.InnerException.Message); thrown = thrown.InnerException; } } }
public static void Main(string[] args) { try { PlatformManager.Services.MaxSequenceSize = int.MaxValue; PlatformManager.Services.DefaultBufferSize = 4096; PlatformManager.Services.Is64BitProcessType = true; if (args.Length > 3) { Console.WriteLine("Too many arguments"); DisplayHelp(); } else if (args.Length < 2) { Console.WriteLine("Not enough arguments"); DisplayHelp(); } else if (args [0] == "h" || args [0] == "help" || args [0] == "?" || args [0] == "-h") { DisplayHelp(); } else { string bam_name = args [0]; string threshold = args [1]; string output = args [2]; if (!File.Exists(bam_name)) { Console.WriteLine("Can't find file: " + bam_name); return; } double min_rq; bool converted = Double.TryParse(threshold, out min_rq); if (!converted) { Console.WriteLine("Could not parse minimum threshold from : " + threshold + " expected decimal number in [0,1] interval."); return; } if (min_rq < 0.0 || min_rq > 1.0) { Console.WriteLine("Minimum RQ value: " + min_rq + " was not in [0,1] interval."); return; } if (File.Exists(output)) { Console.WriteLine("The output file already exists, please specify a new name or delete the old one."); return; } var fastq = new FastQFormatter(); fastq.FormatType = FastQFormatType.Sanger; var os = new FileStream(output, FileMode.CreateNew); // Filter and output PacBioCCSBamReader bamreader = new PacBioCCSBamReader(); int numRead = 0; int numFiltered = 0; foreach (var read in bamreader.Parse(bam_name)) { numRead++; var ccs = read as PacBioCCSRead; if (ccs.ReadQuality > min_rq) { //read.ID = read.ID + "/RQ=" + read.ReadQuality; fastq.Format(os, read); } else { numFiltered++; } } os.Close(); Console.WriteLine("Parsed " + numRead + " reads and filtered out " + numFiltered + " for RQ < " + min_rq); } } catch (DllNotFoundException thrown) { Console.WriteLine("Error thrown when attempting to generate the CCS results."); Console.WriteLine("A shared library was not found. To solve this, please add the folder" + " with the downloaded file libMonoPosixHelper" + "to your environmental variables (LD_LIBRARY_PATH and DYLD_LIBRARY_PATH on Mac OS X)."); Console.WriteLine("Error: " + thrown.Message); Console.WriteLine(thrown.StackTrace); } catch (Exception thrown) { Console.WriteLine("Error thrown when attempting to generate the FASTQ File"); Console.WriteLine("Error: " + thrown.Message); Console.WriteLine(thrown.StackTrace); while (thrown.InnerException != null) { Console.WriteLine("Inner Exception: " + thrown.InnerException.Message); thrown = thrown.InnerException; } } }
void ValidateFastQFormatter(string nodeName, FastQFileParameters fileExtension) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedQualitativeSequence = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequenceNode); string expectedSequenceId = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.SequenceIdNode); string tempFileName1 = System.IO.Path.GetTempFileName(); string tempFileName2 = System.IO.Path.GetTempFileName(); // Parse a FastQ file using parseOne method. using (FastQParser fastQParserObj = new FastQParser(filePath)) { fastQParserObj.AutoDetectFastQFormat = false; IEnumerable <ISequence> qualSequence = null; qualSequence = fastQParserObj.Parse(); // New Sequence after formatting file. IEnumerable <ISequence> newQualSeq = null; FastQFormatter fastQFormatter = new FastQFormatter(tempFileName1); FastQFormatter fastQFormatterFq = new FastQFormatter(tempFileName2); string parsedValue = null; string parsedID = null; // Format Parsed Sequence to temp file with different extension. switch (fileExtension) { case FastQFileParameters.FastQ: fastQFormatter.Write(qualSequence.ElementAt(0)); fastQFormatter.Close(); FastQParser fastQParserObjTemp = new FastQParser(tempFileName1); newQualSeq = fastQParserObjTemp.Parse(); parsedValue = new string(newQualSeq.ElementAt(0).Select(a => (char)a).ToArray()); parsedID = newQualSeq.ElementAt(0).ID.ToString((IFormatProvider)null); fastQParserObjTemp.Dispose(); break; case FastQFileParameters.Fq: fastQFormatterFq.Write(qualSequence.ElementAt(0)); fastQFormatterFq.Close(); FastQParser fastQParserObjTemp1 = new FastQParser(tempFileName2); newQualSeq = fastQParserObjTemp1.Parse(); parsedValue = new string(newQualSeq.ElementAt(0).Select(a => (char)a).ToArray()); parsedID = newQualSeq.ElementAt(0).ID.ToString((IFormatProvider)null); break; default: break; } // Validate qualitative parsing temporary file. Assert.AreEqual(parsedValue, expectedQualitativeSequence); Assert.AreEqual(parsedID, expectedSequenceId); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastQ Formatter BVT: The FASTQ sequence '{0}' validation after Write() and Parse() is found to be as expected.", parsedValue)); // Logs to the NUnit GUI (Console.Out) window Console.WriteLine(string.Format((IFormatProvider)null, "FastQ Formatter BVT: The FASTQ sequence '{0}' validation after Write() and Parse() is found to be as expected.", parsedValue)); Console.WriteLine(string.Format((IFormatProvider)null, "FastQ Formatter BVT: The FASTQ sequence '{0}' validation after Write() and Parse() is found to be as expected.", parsedID)); qualSequence = null; fastQFormatter = null; fastQFormatterFq = null; GC.Collect(); GC.WaitForPendingFinalizers(); File.Delete(tempFileName1); File.Delete(tempFileName2); } }
private void ValidateFastQFormatter(string nodeName, FastQFileParameters fileExtension) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedQualitativeSequence = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequenceNode); string expectedSequenceId = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.SequenceIdNode); string tempFileName = Path.GetTempFileName(); // Parse a FastQ file using parseOne method. using (var fastQParserObj = new FastQParser(filePath)) { IEnumerable <QualitativeSequence> qualSequence = null; qualSequence = fastQParserObj.Parse(); // New Sequence after formatting file. IEnumerable <QualitativeSequence> newQualSeq = null; string parsedValue = null; string parsedID = null; // Format Parsed Sequence to temp file with different extension. switch (fileExtension) { case FastQFileParameters.FastQ: using (var fastQFormatter = new FastQFormatter(tempFileName)) { fastQFormatter.Write(qualSequence.ElementAt(0)); } using (var fastQParserObjTemp = new FastQParser(tempFileName)) { newQualSeq = fastQParserObjTemp.Parse(); parsedValue = new string(newQualSeq.ElementAt(0).Select(a => (char)a).ToArray()); parsedID = newQualSeq.ElementAt(0).ID.ToString(null); } break; case FastQFileParameters.Fq: using (var fastQFormatterFq = new FastQFormatter(tempFileName)) { fastQFormatterFq.Write(qualSequence.ElementAt(0)); } using (var fastQParserObjTemp1 = new FastQParser(tempFileName)) { newQualSeq = fastQParserObjTemp1.Parse(); parsedValue = new string(newQualSeq.ElementAt(0).Select(a => (char)a).ToArray()); parsedID = newQualSeq.ElementAt(0).ID.ToString(null); } break; default: break; } // Validate qualitative parsing temporary file. Assert.AreEqual(parsedValue, expectedQualitativeSequence); Assert.AreEqual(parsedID, expectedSequenceId); ApplicationLog.WriteLine(string.Format(null, "FastQ Formatter BVT: The FASTQ sequence '{0}' validation after Write() and Parse() is found to be as expected.", parsedValue)); ApplicationLog.WriteLine(string.Format(null, "FastQ Formatter BVT: The FASTQ sequence '{0}' validation after Write() and Parse() is found to be as expected.", parsedID)); qualSequence = null; File.Delete(tempFileName); } }
private void InValidateFastQFormatter(FastQFormatParameters param) { // Gets the expected sequence from the Xml string filepath = utilityObj.xmlUtil.GetTextValue( Constants.MultiSeqSangerRnaProNode, Constants.FilePathNode); // Parse a FastQ file. var fastQParser = new FastQParser(); using (fastQParser.Open(filepath)) { FastQFormatter fastQFormatter = null; switch (param) { case FastQFormatParameters.Sequence: try { fastQFormatter = new FastQFormatter(); fastQFormatter.Format(null as ISequence, filepath); Assert.Fail(); } catch (ArgumentNullException) { fastQFormatter.Close(); ApplicationLog.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); } break; default: try { IEnumerable<IQualitativeSequence> sequence = fastQParser.Parse(); fastQFormatter = new FastQFormatter(); fastQFormatter.Format(sequence, null); Assert.Fail(); } catch (ArgumentNullException) { ApplicationLog.WriteLine("FastQ Parser P2 : Successfully validated the exception"); } break; } } }
/// <summary> /// General method to validate FastQ formatting /// Qualitative Sequence by passing TextWriter as a parameter /// <param name="nodeName">xml node name.</param> /// </summary> private void ValidateFastQFormatByFormattingQualSeqeunce(string nodeName) { // Gets the actual sequence and the alphabet from the Xml IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNodeV2)); FastQFormatType expectedFormatType = Utility.GetFastQFormatType(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FastQFormatType)); string qualSequence = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode); string expectedQualitativeSequence = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode); string qualityScores = ""; int i; for (i = 0; i < qualSequence.Length; i++) qualityScores = qualityScores + "}"; byte[] seq = Encoding.UTF8.GetBytes(qualSequence); byte[] qScore = Encoding.UTF8.GetBytes(qualityScores); string tempFileName = Path.GetTempFileName(); // Create a Qualitative Sequence. var qualSeq = new QualitativeSequence(alphabet, expectedFormatType, seq, qScore); var formatter = new FastQFormatter(); using (formatter.Open(tempFileName)) { formatter.Format(qualSeq); formatter.Close(); var fastQParserObj = new FastQParser(); using (fastQParserObj.Open(tempFileName)) { // Read the new file and validate Sequences. var seqsNew = fastQParserObj.Parse(); var firstSequence = seqsNew.First(); // Validate qualitative Sequence upon parsing FastQ file. Assert.AreEqual(expectedQualitativeSequence, firstSequence.ConvertToString()); Assert.IsTrue(string.IsNullOrEmpty(firstSequence.ID)); ApplicationLog.WriteLine(string.Format("FastQ Parser P1: The FASTQ sequence '{0}' validation after Parse() is found to be as expected.", firstSequence)); } File.Delete(tempFileName); } }
/// <summary> /// General method to Invalidate FastQ Parser. /// <param name="nodeName">xml node name.</param> /// <param name="param">FastQ Formatter different parameters</param> /// </summary> void InValidateFastQFormatter(FastQFormatParameters param) { // Gets the expected sequence from the Xml string filepath = _utilityObj._xmlUtil.GetTextValue( Constants.MultiSeqSangerRnaProNode, Constants.FilePathNode); FastQFormatType expectedFormatType = Utility.GetFastQFormatType( _utilityObj._xmlUtil.GetTextValue(Constants.MultiSeqSangerRnaProNode, Constants.FastQFormatType)); // Parse a FastQ file. using (FastQParser fastQParser = new FastQParser()) { fastQParser.AutoDetectFastQFormat = true; fastQParser.FastqType = expectedFormatType; IQualitativeSequence sequence = null; sequence = fastQParser.ParseOne(filepath); FastQFormatter fastQFormatter = new FastQFormatter(); TextWriter txtWriter = null; switch (param) { case FastQFormatParameters.TextWriter: try { fastQFormatter.Format(sequence, null as TextWriter); Assert.Fail(); } catch (Exception) { ApplicationLog.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); Console.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); } break; case FastQFormatParameters.Sequence: try { fastQFormatter.Format(null as ISequence, txtWriter); Assert.Fail(); } catch (Exception) { ApplicationLog.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); Console.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); } break; case FastQFormatParameters.QualitativeSequence: try { fastQFormatter.Format(null as IQualitativeSequence, txtWriter); Assert.Fail(); } catch (Exception) { ApplicationLog.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); Console.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); } break; default: try { fastQFormatter.Format(sequence as QualitativeSequence, null as TextWriter); Assert.Fail(); } catch (Exception) { ApplicationLog.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); Console.WriteLine( "FastQ Parser P2 : Successfully validated the exception"); } break; } } }
private void ValidateFastQFormatter(string nodeName, bool writeMultipleSequences) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string expectedQualitativeSequence = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode); string expectedSequenceId = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceIdNode); string tempFileName = Path.GetTempFileName(); // Parse a FastQ file. var fastQParserObj = new FastQParser(); using (fastQParserObj.Open(filePath)) { IEnumerable<IQualitativeSequence> qualSequenceList = fastQParserObj.Parse(); var fastQFormatter = new FastQFormatter(); using (fastQFormatter.Open(tempFileName)) { if (writeMultipleSequences) { foreach (IQualitativeSequence newQualSeq in qualSequenceList) { fastQFormatter.Format(newQualSeq); } } else { fastQFormatter.Format(qualSequenceList.First()); } } // temp file is closed. // Read the new file and validate the first Sequence. FastQParser fastQParserObjNew = new FastQParser(); IQualitativeSequence firstSequence = fastQParserObjNew.ParseOne(tempFileName); // Validate qualitative Sequence upon parsing FastQ file. Assert.AreEqual(expectedQualitativeSequence, firstSequence.ConvertToString()); Assert.AreEqual(expectedSequenceId, firstSequence.ID); ApplicationLog.WriteLine(string.Format("FastQ Parser P1: The FASTQ sequence '{0}' validation after Parse() is found to be as expected.", firstSequence)); File.Delete(tempFileName); } }
/// <summary> /// General method to validate BasicSequence Parser. /// <param name="nodeName">xml node name.</param> /// </summary> private void ValidateBasicSequenceParser(string nodeName) { // Gets the expected sequence from the Xml string filepathOriginal = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string expectedQualitativeSequence = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode); string expectedSequenceId = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceIdNode); IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); Assert.IsTrue(File.Exists(filepathOriginal)); string tempPath = Path.GetTempFileName(); try { ISequenceParser fastQParserObj = SequenceParsers.FindParserByFileName("temp.fq"); // Read the original file IEnumerable<ISequence> seqsOriginal = fastQParserObj.Parse(filepathOriginal); Assert.IsNotNull(seqsOriginal); // Use the formatter to write the original sequences to a temp file var formatter = new FastQFormatter(); formatter.Format(seqsOriginal.ElementAt(0), tempPath); // Read the new file, then compare the sequences var fastQParserObjNew = new FastQParser(); IEnumerable<IQualitativeSequence> seqsNew = fastQParserObjNew.Parse(tempPath); Assert.IsNotNull(seqsNew); // Validate qualitative Sequence upon parsing FastQ file. Assert.AreEqual(expectedQualitativeSequence, new string(seqsOriginal.ElementAt(0).Select(a => (char) a).ToArray())); Assert.AreEqual( seqsOriginal.ElementAt(0).ID.ToString(null), expectedSequenceId); Assert.AreEqual( seqsOriginal.ElementAt(0).Alphabet.Name, alphabet.Name); ApplicationLog.WriteLine(string.Format("FastQ Parser P1: The FASTQ sequence '{0}' validation after Parse() is found to be as expected.", seqsOriginal.ElementAt(0))); } finally { File.Delete(tempPath); } }
public void FastQProperties() { FastQParser parser = new FastQParser(); Assert.AreEqual(parser.Name, Resource.FastQName); Assert.AreEqual(parser.Description, Resource.FASTQPARSER_DESCRIPTION); Assert.AreEqual(parser.SupportedFileTypes, Resource.FASTQ_FILEEXTENSION); FastQFormatter formatter = new FastQFormatter(); Assert.AreEqual(formatter.Name, Resource.FastQName); Assert.AreEqual(formatter.Description, Resource.FASTQFORMATTER_DESCRIPTION); Assert.AreEqual(formatter.SupportedFileTypes, Resource.FASTQ_FILEEXTENSION); }
static void Main(string[] args) { Console.Error.WriteLine(SplashString()); CommandLineOptions myArgs = ProcessCommandLine(args); #region Discarding // Determine parser InputSubmission input = new InputSubmission(myArgs.FileList[0]); input.DetermineParserUtil(); // Create a sequence formatter object ISequenceFormatter filteredFormatter; ISequenceFormatter discardedFormatter = null; // If the format is FASTA, then output will be FASTA. // Everything else (assuming quality scores are available) // will be outputted to FASTQ. if (input.Parser is FastAParser) { filteredFormatter = new FastAFormatter(myArgs.FileList[1]); if (myArgs.DiscardedFile != null) { discardedFormatter = new FastAFormatter(myArgs.DiscardedFile); } } else { filteredFormatter = new FastQFormatter(myArgs.FileList[1]); if (myArgs.DiscardedFile != null) { discardedFormatter = new FastQFormatter(myArgs.DiscardedFile); } } // Initialize a Trimmer object Discarder myDiscarder = null; // By now, we should have sanity checked the command line arguments. So we should be able to // figure out what mode is being used simply by checking the properties. if (myArgs.DiscardByLength > 0) { myDiscarder = new DiscardByLength(input.Parser, filteredFormatter, discardedFormatter, myArgs.DiscardByLength); } else if (myArgs.DiscardByQuality > 0) { if (!(input.Parser is FastQParser)) { Console.Error.WriteLine("Input file must be in FASTQ format."); Environment.Exit(-1); } myDiscarder = new DiscardByMeanQuality(input.Parser, filteredFormatter, discardedFormatter, (byte)myArgs.DiscardByQuality); } else { // Should never reach this line. Console.Error.WriteLine("Invalid trim mode. Use '-l' or '-q'."); Environment.Exit(-1); } myDiscarder.DiscardReads(); #endregion if (myArgs.Verbose) { Console.Error.WriteLine("Discarded {0}/{1} sequences.", myDiscarder.DiscardCount, myDiscarder.Counted); Console.Error.WriteLine("Non-discarded sequences saved in {0}.", Path.GetFullPath(myArgs.FileList[1])); if (myArgs.DiscardedFile != null) { Console.Error.WriteLine("Discarded sequences saved in {0}.", Path.GetFullPath(myArgs.DiscardedFile)); discardedFormatter.Close(); } Console.Error.WriteLine("Warning: Output may not be in the same order as the original input."); } input.Parser.Close(); filteredFormatter.Close(); if (discardedFormatter != null) { discardedFormatter.Close(); } }
/// <summary> /// usage: SeqcosTrimmerUtil.exe [options] <input> <output> /// </summary> /// <param name="args">Command line arguments</param> static void Main(string[] args) { Console.Error.WriteLine(SplashString()); CommandLineOptions myArgs = ProcessCommandLine(args); #region Trimming // Determine parser InputSubmission input = new InputSubmission(myArgs.InputFile); input.DetermineParserUtil(); // Create a sequence filteredFormatter object ISequenceFormatter filteredFormatter; ISequenceFormatter discardedFormatter = null; // If the format is FASTA, then output will be FASTA. // Everything else (assuming quality scores are available) // will be outputted to FASTQ. if (input.Parser is FastAParser) { if (myArgs.TrimByQuality > 0) { Console.Error.WriteLine("Cannot trim by quality using a FASTA file."); Environment.Exit(-1); } if (myArgs.DiscardedFile != null) { discardedFormatter = new FastAFormatter(myArgs.DiscardedFile); } filteredFormatter = new FastAFormatter(myArgs.OutputFile); } else { if (myArgs.DiscardedFile != null) { discardedFormatter = new FastQFormatter(myArgs.DiscardedFile); } filteredFormatter = new FastQFormatter(myArgs.OutputFile); } // Initialize a Trimmer object Trimmer myTrimmer = null; // By now, we should have sanity checked the command line arguments. So we should be able to // figure out what mode is being used simply by checking the properties. if (myArgs.TrimByLength > 0) { Console.Error.WriteLine("Trimming reads to length {0}", myArgs.TrimByLength); myTrimmer = new TrimByLength(input.Parser, filteredFormatter, discardedFormatter, myArgs.TrimByLength, myArgs.Left); } else if (myArgs.TrimByQuality > 0) { if (!(input.Parser is FastQParser)) { throw new ArgumentException("Input file must be in FASTQ format."); } Console.Error.WriteLine("Trimming reads based on quality score {0}", myArgs.TrimByQuality); myTrimmer = new TrimByQuality(input.Parser, filteredFormatter, discardedFormatter, (byte)myArgs.TrimByQuality, myArgs.Left, (int)Math.Round(myArgs.TrimByLength)); } else if (myArgs.TrimByRegex != null) { Console.Error.WriteLine("Trimming reads based on the regular expression pattern {0}", myArgs.TrimByRegex); myTrimmer = new TrimByRegex(input.Parser, filteredFormatter, discardedFormatter, myArgs.TrimByRegex); } else { // Should never reach this line. Console.Error.WriteLine("Invalid trim mode. Use '-l' or '-q'."); Environment.Exit(-1); } myTrimmer.TrimAll(); #endregion if (myArgs.Verbose) { Console.Error.WriteLine("Trimmed {0}/{1} sequences.", myTrimmer.TrimCount, myTrimmer.Counted); Console.Error.WriteLine("Discarded {0}/{1} sequences.", myTrimmer.DiscardCount, myTrimmer.Counted); Console.Error.WriteLine("Output saved in {0}.", Path.GetFullPath(myArgs.OutputFile)); Console.Error.WriteLine("Warning: Output may not be in the same order as the original input."); } input.Parser.Close(); filteredFormatter.Close(); if (discardedFormatter != null) { discardedFormatter.Close(); } }
/// <summary> /// General method to validate FastQ Formatter on a Stream. /// <param name="nodeName">xml node name.</param> /// </summary> private void ValidateFastQFormatterOnAStream(string nodeName) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string expectedQualitativeSequence = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode); string expectedSequenceId = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceIdNode); string tempFileName1 = Path.GetTempFileName(); // Parse a FastQ file using parseOne method. var fastQParserObj = new FastQParser(); using (fastQParserObj.Open(filePath)) { var oneSequence = fastQParserObj.ParseOne(); // New Sequence after formatting file. var fastQFormatter = new FastQFormatter(); using (fastQFormatter.Open(tempFileName1)) fastQFormatter.Format(oneSequence); var fastQParserObjTemp = new FastQParser(); string parsedValue, parsedId; using (fastQParserObjTemp.Open(tempFileName1)) { oneSequence = fastQParserObjTemp.Parse().First(); parsedValue = oneSequence.ConvertToString(); parsedId = oneSequence.ID; } // Validate qualitative parsing temporary file. Assert.AreEqual(expectedQualitativeSequence, parsedValue); Assert.AreEqual(expectedSequenceId, parsedId); ApplicationLog.WriteLine(string.Format("FastQ Formatter BVT: The FASTQ sequence '{0}' validation after Write() and Parse() is found to be as expected.", parsedValue)); ApplicationLog.WriteLine(string.Format("FastQ Formatter BVT: The FASTQ sequence '{0}' validation after Write() and Parse() is found to be as expected.", parsedId)); File.Delete(tempFileName1); } }