public static void SimpleCCSTest () { string fname = System.IO.Path.Combine ("TestUtils", "PacBio", "ccs.bam"); var csp = new PacBioCCSBamReader (); var seqs = csp.Parse (fname).Select(z => z as PacBioCCSRead).ToList(); var seq4 = seqs [4]; Assert.AreEqual (273, seq4.HoleNumber); Assert.AreEqual (12, seq4.NumPasses); Assert.AreEqual (1, seq4.ReadCountBadZscore); Assert.AreEqual (1946, seq4.Sequence.Count); Assert.AreEqual (14, seq4.ZScores.Length); Assert.AreEqual("m150930_045019_42194_c100916310150000001823201204291662_s1_p0/273/ccs", seq4.Sequence.ID); var seq = new Sequence (DnaAlphabet.Instance, seq4.Sequence.ToArray (), true); Assert.AreEqual("TGTCACTCATCTGAGTGATCCCGCGAAATTAATACGACTCACTATAGGGGAATTGTGAGCGGATAACAATTCCCCTCTAGAAATAATTTTGTTTAACTTTAAGAAGGAGATATACATATGAAACACATGCCACGTAAAATGTATTCCTGCGACTTTGAGACTACACCAAGGTTGAAGATTGCCGCGTATGGGCATACGGTTACATGAACATCGAAGACCACTCCGAGTATAAGATTGGTAACTCCCTGGATGAATTTATGGCTTGGGTTCTGAAAGTTCAGGCTGACCTGTACTTCCACAATCTGAAATTTGATGGCGCATTCATCATCAACTGGCTGGAACGTAACGGTTTTAAATGGTCCGCAGATGGTCTGCCAAATACCTACAACACCATCATTTCTCGCATGGGCCAGTGGTATATGATTGATATTTGCCTGGGTTACAAGGGTAAACGCAAGATCCACACCGTGATCTACGACTCTCTGAAGAAACTGCCGTTTCCGGTTAAGAAAATTGCGAAAGACTTTAAGCTGACGGTACTGAAAGGCGACATCGACTATCATAAGGAGCGCCCGGTCGTTACAAAATCACCCGGAAGAATATGCCTACATTAAAAACGATATTCAGATTATCGCAGAAGCTCTGCTGATCCAGTTCAAGCAGGGTCTGGATCGTATGACGGCAGGTTCTGACTCTCTGAAAGGCTTCAAAGACATTATCACCACCAAGAAGTTTAAAAAGGTTTCCCGACCCTGAGCCTGGGTCTGGACAAGGAAGTTCGTTATGCCTACCGTGGTGGTTTCACCTGGCTGAATGACCGTTTTAAAGAAAAAGAGATCGGCGAAGGTATGGTTTTTGATGTTAATTCCCTGTACCCAGCGCAAATGTACTCTCGCCTGCTGCCGTACGGCGAGCCGATCGTATTCGAGGGTAAATACGTCTGGGACGAGGACTACCCTCTGCACATTCAGCACATTCGTTGTGAATTTGAACTGAAGGAAGGCTACATCCCGACCATCCAGATCAATCGTTCCCGTTTCTACAAGGGTAACGAATACCTGAAATCTTCCGGCGGTGAAATTGCTGACCTGTGGCTGTCTAATGTTGATCTGGAACTGATGAAAGAGCACTACGACCTGTACAATGTTGAATATATCTCTGGTCTGAAGTTCAAAGCAACCACTGGCCTGTTCAAGGACTTTATCGACAAATGGACGTATATCAAAACTACCTCTGAAGGCGCCATCAAACAGCTGGCGAAGCTGATCCTGAACAGCCTGTACGGTAAATTCGCGTCCAACCCGGACGTTACCGGTAAAGTGCCATACCTGAAAGAGAACGGTGCTCTGGGTTTTCGTCTGGGTGAGGAGGAAACGAAAGACCCTGTATATACCCGATGGGTGTCTTTATCACGGCCTGGGCACGCTATACGACCATCACGGCAGCGCAGGCTTGTTATGATCGTATTATCTACTGCGATACCGATTCTATTCACCTGACTGGTACTGAAATTCCGGACGTTATCAAAGACATCGTAGACCCGAAGAAACTGGGCTACTGGGCGCACGAATCTACTTTTAAGCGTGCAAAATATCTGCGTCAGAAAACCTACATCCAGGATATTTACATGAAAGAAGTAGACGGCAAATTGGTAGAGGGCTCTCCTGACGACTACACTGACATCAAGTTCTCTGTGAAATGCGCAGGCATGACGGACAAAATCAAAAAGGAAGTGACTTTCGAAAACTTCAAAGTGGGTTTTTCTCGTAAAATGAAACCGAAGCCTGTTCAGGTACCGGGTGGCGTAGTGCTGGTTGATGACACTTTTACTATCAAATAACTCGAGCTGCAGGAATTCAAGCTTGGATCCGGCTGCTAACAAAGCCCGAAAGGAAGCTGAGTTGGCTGCTGCCACCGCTGAGCAATAACTTGTCACTCATCTGAGT", seq.ConvertToString()); Assert.AreEqual (seqs.Count, 7); }
public static void SimpleCCSTest() { string fname = System.IO.Path.Combine("TestUtils", "PacBio", "ccs.bam").TestDir(); var csp = new PacBioCCSBamReader(); var seqs = csp.Parse(fname).Select(z => z as PacBioCCSRead).ToList(); var seq4 = seqs [4]; Assert.AreEqual(273, seq4.HoleNumber); Assert.AreEqual(12, seq4.NumPasses); Assert.AreEqual(1, seq4.ReadCountBadZscore); Assert.AreEqual(1946, seq4.Sequence.Count); Assert.AreEqual(14, seq4.ZScores.Length); Assert.AreEqual("m150930_045019_42194_c100916310150000001823201204291662_s1_p0/273/ccs", seq4.Sequence.ID); var seq = new Sequence(DnaAlphabet.Instance, seq4.Sequence.ToArray(), true); Assert.AreEqual("TGTCACTCATCTGAGTGATCCCGCGAAATTAATACGACTCACTATAGGGGAATTGTGAGCGGATAACAATTCCCCTCTAGAAATAATTTTGTTTAACTTTAAGAAGGAGATATACATATGAAACACATGCCACGTAAAATGTATTCCTGCGACTTTGAGACTACACCAAGGTTGAAGATTGCCGCGTATGGGCATACGGTTACATGAACATCGAAGACCACTCCGAGTATAAGATTGGTAACTCCCTGGATGAATTTATGGCTTGGGTTCTGAAAGTTCAGGCTGACCTGTACTTCCACAATCTGAAATTTGATGGCGCATTCATCATCAACTGGCTGGAACGTAACGGTTTTAAATGGTCCGCAGATGGTCTGCCAAATACCTACAACACCATCATTTCTCGCATGGGCCAGTGGTATATGATTGATATTTGCCTGGGTTACAAGGGTAAACGCAAGATCCACACCGTGATCTACGACTCTCTGAAGAAACTGCCGTTTCCGGTTAAGAAAATTGCGAAAGACTTTAAGCTGACGGTACTGAAAGGCGACATCGACTATCATAAGGAGCGCCCGGTCGTTACAAAATCACCCGGAAGAATATGCCTACATTAAAAACGATATTCAGATTATCGCAGAAGCTCTGCTGATCCAGTTCAAGCAGGGTCTGGATCGTATGACGGCAGGTTCTGACTCTCTGAAAGGCTTCAAAGACATTATCACCACCAAGAAGTTTAAAAAGGTTTCCCGACCCTGAGCCTGGGTCTGGACAAGGAAGTTCGTTATGCCTACCGTGGTGGTTTCACCTGGCTGAATGACCGTTTTAAAGAAAAAGAGATCGGCGAAGGTATGGTTTTTGATGTTAATTCCCTGTACCCAGCGCAAATGTACTCTCGCCTGCTGCCGTACGGCGAGCCGATCGTATTCGAGGGTAAATACGTCTGGGACGAGGACTACCCTCTGCACATTCAGCACATTCGTTGTGAATTTGAACTGAAGGAAGGCTACATCCCGACCATCCAGATCAATCGTTCCCGTTTCTACAAGGGTAACGAATACCTGAAATCTTCCGGCGGTGAAATTGCTGACCTGTGGCTGTCTAATGTTGATCTGGAACTGATGAAAGAGCACTACGACCTGTACAATGTTGAATATATCTCTGGTCTGAAGTTCAAAGCAACCACTGGCCTGTTCAAGGACTTTATCGACAAATGGACGTATATCAAAACTACCTCTGAAGGCGCCATCAAACAGCTGGCGAAGCTGATCCTGAACAGCCTGTACGGTAAATTCGCGTCCAACCCGGACGTTACCGGTAAAGTGCCATACCTGAAAGAGAACGGTGCTCTGGGTTTTCGTCTGGGTGAGGAGGAAACGAAAGACCCTGTATATACCCGATGGGTGTCTTTATCACGGCCTGGGCACGCTATACGACCATCACGGCAGCGCAGGCTTGTTATGATCGTATTATCTACTGCGATACCGATTCTATTCACCTGACTGGTACTGAAATTCCGGACGTTATCAAAGACATCGTAGACCCGAAGAAACTGGGCTACTGGGCGCACGAATCTACTTTTAAGCGTGCAAAATATCTGCGTCAGAAAACCTACATCCAGGATATTTACATGAAAGAAGTAGACGGCAAATTGGTAGAGGGCTCTCCTGACGACTACACTGACATCAAGTTCTCTGTGAAATGCGCAGGCATGACGGACAAAATCAAAAAGGAAGTGACTTTCGAAAACTTCAAAGTGGGTTTTTCTCGTAAAATGAAACCGAAGCCTGTTCAGGTACCGGGTGGCGTAGTGCTGGTTGATGACACTTTTACTATCAAATAACTCGAGCTGCAGGAATTCAAGCTTGGATCCGGCTGCTAACAAAGCCCGAAAGGAAGCTGAGTTGGCTGCTGCCACCGCTGAGCAATAACTTGTCACTCATCTGAGT", seq.ConvertToString()); Assert.AreEqual(seqs.Count, 7); }
public static void SimpleCCSTest() { string fname = System.IO.Path.Combine("TestUtils", "PacBio", "ccs.bam"); var csp = new PacBioCCSBamReader(); var seqs = csp.Parse(fname).ToList(); var seq4 = seqs [4]; Assert.AreEqual(146331, seq4.HoleNumber); Assert.AreEqual(124, seq4.NumPasses); Assert.AreEqual(2, seq4.ReadCountBadZscore); Assert.AreEqual(136, seq4.Sequence.Count); Assert.AreEqual(128, seq4.ZScores.Length); Assert.AreEqual("m141008_060349_42194_c100704972550000001823137703241586_s1_p0/146331/ccs", seq4.Sequence.ID); var seq = new Sequence(DnaAlphabet.Instance, seq4.Sequence.ToArray(), true); Assert.AreEqual("CCCGGGGATCCTCTAGAATGCTCATACACTGGGGGATACATATACGGGGGGGGGCACATCATCTAGACAGACGACTTTTTTTTTTCGAGCGCAGCTTTTTGAGCGACGCACAAGCTTGCTGAGGACTAGTAGCTTC", seq.ConvertToString()); Assert.AreEqual(seqs.Count, 7); }
public static void Main(string[] args) { try { PlatformManager.Services.MaxSequenceSize = int.MaxValue; PlatformManager.Services.DefaultBufferSize = 4096; PlatformManager.Services.Is64BitProcessType = true; if (args.Length > 3) { Console.WriteLine("Too many arguments"); DisplayHelp(); } else if (args.Length < 2) { Console.WriteLine("Not enough arguments"); DisplayHelp(); } else if (args [0] == "h" || args [0] == "help" || args [0] == "?" || args [0] == "-h") { DisplayHelp(); } else { string bam_name = args [0]; string threshold = args [1]; string output = args [2]; if (!File.Exists(bam_name)) { Console.WriteLine("Can't find file: " + bam_name); return; } double min_rq; bool converted = Double.TryParse(threshold, out min_rq); if (!converted) { Console.WriteLine("Could not parse minimum threshold from : " + threshold + " expected decimal number in [0,1] interval."); return; } if (min_rq < 0.0 || min_rq > 1.0) { Console.WriteLine("Minimum RQ value: " + min_rq + " was not in [0,1] interval."); return; } if (File.Exists(output)) { Console.WriteLine("The output file already exists, please specify a new name or delete the old one."); return; } var fastq = new FastQFormatter(); fastq.FormatType = FastQFormatType.Sanger; var os = new FileStream(output, FileMode.CreateNew); // Filter and output PacBioCCSBamReader bamreader = new PacBioCCSBamReader(); int numRead = 0; int numFiltered = 0; foreach (var read in bamreader.Parse(bam_name)) { numRead++; var ccs = read as PacBioCCSRead; if (ccs.ReadQuality > min_rq) { //read.ID = read.ID + "/RQ=" + read.ReadQuality; fastq.Format(os, read); } else { numFiltered++; } } os.Close(); Console.WriteLine("Parsed " + numRead + " reads and filtered out " + numFiltered + " for RQ < " + min_rq); } } catch (DllNotFoundException thrown) { Console.WriteLine("Error thrown when attempting to generate the CCS results."); Console.WriteLine("A shared library was not found. To solve this, please add the folder" + " with the downloaded file libMonoPosixHelper" + "to your environmental variables (LD_LIBRARY_PATH and DYLD_LIBRARY_PATH on Mac OS X)."); Console.WriteLine("Error: " + thrown.Message); Console.WriteLine(thrown.StackTrace); } catch (Exception thrown) { Console.WriteLine("Error thrown when attempting to generate the FASTQ File"); Console.WriteLine("Error: " + thrown.Message); Console.WriteLine(thrown.StackTrace); while (thrown.InnerException != null) { Console.WriteLine("Inner Exception: " + thrown.InnerException.Message); thrown = thrown.InnerException; } } }
public static void Main(string[] args) { try { PlatformManager.Services.MaxSequenceSize = int.MaxValue; PlatformManager.Services.DefaultBufferSize = 4096; PlatformManager.Services.Is64BitProcessType = true; if (args.Length > 3) { Console.WriteLine ("Too many arguments"); DisplayHelp (); } else if (args.Length < 2) { Console.WriteLine("Not enough arguments"); DisplayHelp(); }else if (args [0] == "h" || args [0] == "help" || args [0] == "?" || args [0] == "-h") { DisplayHelp (); } else { string bam_name = args [0]; string out_dir = args [1]; string ref_name = args.Length > 2 ? args [2] : null; if (!File.Exists(bam_name)) { Console.WriteLine ("Can't find file: " + bam_name); return; } if (ref_name != null && !File.Exists (ref_name)) { Console.WriteLine ("Can't find file: " + ref_name); return; } if (Directory.Exists (out_dir)) { Console.WriteLine ("The output directory already exists, please specify a new directory or delete the old one."); return; } Directory.CreateDirectory (out_dir); List<CCSReadMetricsOutputter> outputters = new List<CCSReadMetricsOutputter> () { new ZmwOutputFile(out_dir), new ZScoreOutputter(out_dir), new VariantOutputter(out_dir), new SNROutputFile(out_dir), new QVCalibration(out_dir)}; PacBioCCSBamReader bamreader = new PacBioCCSBamReader (); BWAPairwiseAligner bwa = null; bool callVariants = ref_name != null; if(callVariants) { bwa = new BWAPairwiseAligner(ref_name, false); } // Produce aligned reads with variants called in parallel. var reads = new BlockingCollection<Tuple<PacBioCCSRead, BWAPairwiseAlignment, List<Variant>>>(); Task producer = Task.Factory.StartNew(() => { try { Parallel.ForEach(bamreader.Parse(bam_name), z => { try { BWAPairwiseAlignment aln = null; List<Variant> variants = null; if (callVariants) { aln = bwa.AlignRead(z.Sequence) as BWAPairwiseAlignment; if (aln!=null) { variants = VariantCaller.CallVariants(aln); variants.ForEach( p => { p.StartPosition += aln.AlignedSAMSequence.Pos; p.RefName = aln.Reference; }); } } var res = new Tuple<PacBioCCSRead, BWAPairwiseAlignment, List<Variant>>(z, aln, variants); reads.Add(res); } catch(Exception thrown) { Console.WriteLine("CCS READ FAIL: " + z.Sequence.ID); Console.WriteLine(thrown.Message); } }); } catch(Exception thrown) { Console.WriteLine("Could not parse BAM file: " + thrown.Message); while(thrown.InnerException!=null) { Console.WriteLine(thrown.InnerException.Message); thrown = thrown.InnerException; } } reads.CompleteAdding(); }); // Consume them into output files. foreach(var r in reads.GetConsumingEnumerable()) { foreach(var outputter in outputters) { outputter.ConsumeCCSRead(r.Item1, r.Item2, r.Item3); } } // throw any exceptions (this should be used after putting the consumer on a separate thread) producer.Wait(); // Close the files outputters.ForEach(z => z.Finish()); } } catch(DllNotFoundException thrown) { Console.WriteLine ("Error thrown when attempting to generate the CCS results."); Console.WriteLine("A shared library was not found. To solve this, please add the folder" + " with the downloaded files libbwasharp and libMonoPosixHelper" + "to your environmental variables (LD_LIBRARY_PATH on Ubuntu, DYLD_LIBRARY_PATH on Mac OS X)."); Console.WriteLine ("Error: " + thrown.Message); Console.WriteLine (thrown.StackTrace); } catch(Exception thrown) { Console.WriteLine ("Error thrown when attempting to generate the CCS results"); Console.WriteLine ("Error: " + thrown.Message); Console.WriteLine (thrown.StackTrace); while (thrown.InnerException != null) { Console.WriteLine ("Inner Exception: " + thrown.InnerException.Message); thrown = thrown.InnerException; } } }
public static void Main(string[] args) { try { PlatformManager.Services.MaxSequenceSize = int.MaxValue; PlatformManager.Services.DefaultBufferSize = 4096; PlatformManager.Services.Is64BitProcessType = true; if (args.Length > 3) { Console.WriteLine ("Too many arguments"); DisplayHelp (); } else if (args.Length < 2) { Console.WriteLine("Not enough arguments"); DisplayHelp(); } else if (args [0] == "h" || args [0] == "help" || args [0] == "?" || args [0] == "-h") { DisplayHelp (); } else { string bam_name = args [0]; string threshold = args [1]; string output = args [2]; if (!File.Exists(bam_name)) { Console.WriteLine ("Can't find file: " + bam_name); return; } double min_rq; bool converted = Double.TryParse(threshold, out min_rq); if (!converted) { Console.WriteLine ("Could not parse minimum threshold from : " + threshold + " expected decimal number in [0,1] interval."); return; } if (min_rq < 0.0 || min_rq > 1.0) { Console.WriteLine ("Minimum RQ value: " + min_rq + " was not in [0,1] interval."); return; } if (File.Exists (output)) { Console.WriteLine ("The output file already exists, please specify a new name or delete the old one."); return; } var fastq = new FastQFormatter(); fastq.FormatType = FastQFormatType.Sanger; var os = new FileStream(output, FileMode.CreateNew); // Filter and output PacBioCCSBamReader bamreader = new PacBioCCSBamReader (); int numRead = 0; int numFiltered = 0; foreach(var read in bamreader.Parse(bam_name)) { numRead++; var ccs = read as PacBioCCSRead; if (ccs.ReadQuality > min_rq) { //read.ID = read.ID + "/RQ=" + read.ReadQuality; fastq.Format(os,read); } else { numFiltered ++; } } os.Close(); Console.WriteLine("Parsed " + numRead + " reads and filtered out " + numFiltered + " for RQ < " + min_rq); } } catch(DllNotFoundException thrown) { Console.WriteLine ("Error thrown when attempting to generate the CCS results."); Console.WriteLine("A shared library was not found. To solve this, please add the folder" + " with the downloaded file libMonoPosixHelper" + "to your environmental variables (LD_LIBRARY_PATH and DYLD_LIBRARY_PATH on Mac OS X)."); Console.WriteLine ("Error: " + thrown.Message); Console.WriteLine (thrown.StackTrace); } catch(Exception thrown) { Console.WriteLine ("Error thrown when attempting to generate the FASTQ File"); Console.WriteLine ("Error: " + thrown.Message); Console.WriteLine (thrown.StackTrace); while (thrown.InnerException != null) { Console.WriteLine ("Inner Exception: " + thrown.InnerException.Message); thrown = thrown.InnerException; } } }
public static void Main(string[] args) { try { PlatformManager.Services.MaxSequenceSize = int.MaxValue; PlatformManager.Services.DefaultBufferSize = 4096; PlatformManager.Services.Is64BitProcessType = true; if (args.Length > 3) { Console.WriteLine("Too many arguments"); DisplayHelp(); } else if (args.Length < 2) { Console.WriteLine("Not enough arguments"); DisplayHelp(); } else if (args [0] == "h" || args [0] == "help" || args [0] == "?" || args [0] == "-h") { DisplayHelp(); } else { string bam_name = args [0]; string out_dir = args [1]; string ref_name = args.Length > 2 ? args [2] : null; if (!File.Exists(bam_name)) { Console.WriteLine("Can't find file: " + bam_name); return; } if (ref_name != null && !File.Exists(ref_name)) { Console.WriteLine("Can't find file: " + ref_name); return; } if (Directory.Exists(out_dir)) { Console.WriteLine("The output directory already exists, please specify a new directory or delete the old one."); return; } Directory.CreateDirectory(out_dir); List <CCSReadMetricsOutputter> outputters = new List <CCSReadMetricsOutputter> () { new ZmwOutputFile(out_dir), new ZScoreOutputter(out_dir), new VariantOutputter(out_dir), new SNROutputFile(out_dir), new QVCalibration(out_dir) }; ISequenceParser reader; if (bam_name.EndsWith(".fastq", StringComparison.OrdinalIgnoreCase)) { reader = new FastQCCSReader(); } else { reader = new PacBioCCSBamReader(); } BWAPairwiseAligner bwa = null; bool callVariants = ref_name != null; if (callVariants) { bwa = new BWAPairwiseAligner(ref_name, false); } // Produce aligned reads with variants called in parallel. var reads = new BlockingCollection <Tuple <PacBioCCSRead, BWAPairwiseAlignment, List <Variant> > >(); Task producer = Task.Factory.StartNew(() => { try { Parallel.ForEach(reader.Parse(bam_name), y => { var z = y as PacBioCCSRead; try { BWAPairwiseAlignment aln = null; List <Variant> variants = null; if (callVariants) { aln = bwa.AlignRead(z.Sequence) as BWAPairwiseAlignment; if (aln != null) { variants = VariantCaller.CallVariants(aln); variants.ForEach(p => { p.StartPosition += aln.AlignedSAMSequence.Pos; p.RefName = aln.Reference; }); } } var res = new Tuple <PacBioCCSRead, BWAPairwiseAlignment, List <Variant> >(z, aln, variants); reads.Add(res); } catch (Exception thrown) { Console.WriteLine("CCS READ FAIL: " + z.Sequence.ID); Console.WriteLine(thrown.Message); } }); } catch (Exception thrown) { Console.WriteLine("Could not parse BAM file: " + thrown.Message); while (thrown.InnerException != null) { Console.WriteLine(thrown.InnerException.Message); thrown = thrown.InnerException; } } reads.CompleteAdding(); }); // Consume them into output files. foreach (var r in reads.GetConsumingEnumerable()) { foreach (var outputter in outputters) { outputter.ConsumeCCSRead(r.Item1, r.Item2, r.Item3); } } // throw any exceptions (this should be used after putting the consumer on a separate thread) producer.Wait(); // Close the files outputters.ForEach(z => z.Finish()); } } catch (DllNotFoundException thrown) { Console.WriteLine("Error thrown when attempting to generate the CCS results."); Console.WriteLine("A shared library was not found. To solve this, please add the folder" + " with the downloaded files libbwasharp and libMonoPosixHelper" + "to your environmental variables (LD_LIBRARY_PATH on Ubuntu, DYLD_LIBRARY_PATH on Mac OS X)."); Console.WriteLine("Error: " + thrown.Message); Console.WriteLine(thrown.StackTrace); } catch (Exception thrown) { Console.WriteLine("Error thrown when attempting to generate the CCS results"); Console.WriteLine("Error: " + thrown.Message); Console.WriteLine(thrown.StackTrace); while (thrown.InnerException != null) { Console.WriteLine("Inner Exception: " + thrown.InnerException.Message); thrown = thrown.InnerException; } } }