public static void Test1BPDeletionCall() { string seq1seq = "ATACCCCTT"; string seq2seq = "ATA-CCCTT".Replace("-", String.Empty); int[] seq2qual = new int[] { 30, 30, 30, 2, 30, 30, 30, 30 }; var refseq = new Sequence(AmbiguousDnaAlphabet.Instance, seq1seq, false); var query = new Sequence(AmbiguousDnaAlphabet.Instance, seq2seq, false); NeedlemanWunschAligner aligner = new NeedlemanWunschAligner(); var aln = aligner.Align(refseq, query).First(); // Need to add in the QV Values. ConvertAlignedSequenceToQualSeq(aln, seq2qual); var variants = VariantCaller.CallVariants(aln); Assert.AreEqual(variants.Count, 1); var variant = variants.First(); Assert.AreEqual(2, variant.QV); Assert.AreEqual(2, variant.StartPosition); Assert.AreEqual(VariantType.INDEL, variant.Type); var vi = variant as IndelVariant; Assert.AreEqual("C", vi.InsertedOrDeletedBases); Assert.AreEqual('C', vi.HomopolymerBase); Assert.AreEqual(4, vi.HomopolymerLengthInReference); Assert.AreEqual(true, vi.InHomopolymer); Assert.AreEqual(vi.InsertionOrDeletion, IndelType.Deletion); }
public static void TestTrickyQVInversions() { // This will be hard because normally flip the QV value for a homopolymer, but in this case we won't. // Note the whole notion of flipping is poorly defined. string seq1seq = "ATTGC"; string seq2seq = "ATAGC"; int[] seq2qual = new int[] { 30, 30, 2, 30, 30 }; var refseq = new Sequence(DnaAlphabet.Instance, seq1seq); var query = new Sequence(DnaAlphabet.Instance, seq2seq); var s1rc = refseq.GetReverseComplementedSequence(); var s2rc = query.GetReverseComplementedSequence(); NeedlemanWunschAligner aligner = new NeedlemanWunschAligner(); var aln = aligner.Align(s1rc, s2rc).First(); VariantCallTests.ConvertAlignedSequenceToQualSeq(aln, seq2qual.Reverse().ToArray()); aln.PairwiseAlignedSequences [0].Sequences [1].MarkAsReverseComplement(); var variants = VariantCaller.CallVariants(aln); Assert.AreEqual(1, variants.Count); var variant = variants.First(); Assert.AreEqual(VariantType.SNP, variant.Type); Assert.AreEqual(2, variant.QV); var vs = variant as SNPVariant; Assert.AreEqual('T', vs.AltBP); Assert.AreEqual('A', vs.RefBP); }
public static void TestSNPCallAtStart() { string seq1seq = "CTCCCCCTT"; string seq2seq = "TTCCCCCTT"; int[] seq2qual = new int[] { 10, 30, 30, 30, 5, 3, 30, 30, 10 }; var refseq = new Sequence(DnaAlphabet.Instance, seq1seq); var query = new Sequence(DnaAlphabet.Instance, seq2seq); NeedlemanWunschAligner aligner = new NeedlemanWunschAligner(); var aln = aligner.Align(refseq, query).First(); ConvertAlignedSequenceToQualSeq(aln, seq2qual); var variants = VariantCaller.CallVariants(aln); Assert.AreEqual(variants.Count, 1); var variant = variants.First(); Assert.AreEqual(10, variant.QV); Assert.AreEqual(0, variant.StartPosition); Assert.AreEqual(variant.Type, VariantType.SNP); var vi = variant as SNPVariant; Assert.AreEqual(1, vi.Length); Assert.AreEqual('T', vi.AltBP); Assert.AreEqual('C', vi.RefBP); Assert.AreEqual(VariantType.SNP, vi.Type); Assert.AreEqual(true, vi.AtEndOfAlignment); }
public static void TestReverseComplement1BPIndelCall() { string seq1seq = "ATACCCCTTGCGC"; string seq2seq = "ATA-CCCTTGCGC".Replace("-", String.Empty); int[] seq2qual = new int[] { 30, 30, 30, 2, 30, 30, 30, 30, 30, 30, 30, 30 }; var refseq = new Sequence(DnaAlphabet.Instance, seq1seq); var query = new Sequence(DnaAlphabet.Instance, seq2seq); var s1rc = refseq.GetReverseComplementedSequence(); var s2rc = query.GetReverseComplementedSequence(); NeedlemanWunschAligner aligner = new NeedlemanWunschAligner(); var aln = aligner.Align(s1rc, s2rc).First(); VariantCallTests.ConvertAlignedSequenceToQualSeq(aln, seq2qual.Reverse().ToArray()); aln.PairwiseAlignedSequences [0].Sequences [1].MarkAsReverseComplement(); var variants = VariantCaller.CallVariants(aln); Assert.AreEqual(variants.Count, 1); var variant = variants.First(); Assert.AreEqual(2, variant.QV); Assert.AreEqual(5, variant.StartPosition); Assert.AreEqual(VariantType.INDEL, variant.Type); var vi = variant as IndelVariant; Assert.AreEqual(IndelType.Deletion, vi.InsertionOrDeletion); Assert.AreEqual('G', vi.HomopolymerBase); Assert.AreEqual(1, vi.Length); Assert.AreEqual(4, vi.HomopolymerLengthInReference); Assert.AreEqual(true, vi.InHomopolymer); Assert.AreEqual("G", vi.InsertedOrDeletedBases); Assert.AreEqual(false, vi.AtEndOfAlignment); Assert.AreEqual(6, vi.EndPosition); }
public static void Main(string[] args) { try { PlatformManager.Services.MaxSequenceSize = int.MaxValue; PlatformManager.Services.DefaultBufferSize = 4096; PlatformManager.Services.Is64BitProcessType = true; if (args.Length > 3) { Console.WriteLine("Too many arguments"); DisplayHelp(); } else if (args.Length < 2) { Console.WriteLine("Not enough arguments"); DisplayHelp(); } else if (args [0] == "h" || args [0] == "help" || args [0] == "?" || args [0] == "-h") { DisplayHelp(); } else { string bam_name = args [0]; string out_dir = args [1]; string ref_name = args.Length > 2 ? args [2] : null; if (!File.Exists(bam_name)) { Console.WriteLine("Can't find file: " + bam_name); return; } if (ref_name != null && !File.Exists(ref_name)) { Console.WriteLine("Can't find file: " + ref_name); return; } if (Directory.Exists(out_dir)) { Console.WriteLine("The output directory already exists, please specify a new directory or delete the old one."); return; } Directory.CreateDirectory(out_dir); List <CCSReadMetricsOutputter> outputters = new List <CCSReadMetricsOutputter> () { new ZmwOutputFile(out_dir), new ZScoreOutputter(out_dir), new VariantOutputter(out_dir), new SNROutputFile(out_dir), new QVCalibration(out_dir) }; ISequenceParser reader; if (bam_name.EndsWith(".fastq", StringComparison.OrdinalIgnoreCase)) { reader = new FastQCCSReader(); } else { reader = new PacBioCCSBamReader(); } BWAPairwiseAligner bwa = null; bool callVariants = ref_name != null; if (callVariants) { bwa = new BWAPairwiseAligner(ref_name, false); } // Produce aligned reads with variants called in parallel. var reads = new BlockingCollection <Tuple <PacBioCCSRead, BWAPairwiseAlignment, List <Variant> > >(); Task producer = Task.Factory.StartNew(() => { try { Parallel.ForEach(reader.Parse(bam_name), y => { var z = y as PacBioCCSRead; try { BWAPairwiseAlignment aln = null; List <Variant> variants = null; if (callVariants) { aln = bwa.AlignRead(z.Sequence) as BWAPairwiseAlignment; if (aln != null) { variants = VariantCaller.CallVariants(aln); variants.ForEach(p => { p.StartPosition += aln.AlignedSAMSequence.Pos; p.RefName = aln.Reference; }); } } var res = new Tuple <PacBioCCSRead, BWAPairwiseAlignment, List <Variant> >(z, aln, variants); reads.Add(res); } catch (Exception thrown) { Console.WriteLine("CCS READ FAIL: " + z.Sequence.ID); Console.WriteLine(thrown.Message); } }); } catch (Exception thrown) { Console.WriteLine("Could not parse BAM file: " + thrown.Message); while (thrown.InnerException != null) { Console.WriteLine(thrown.InnerException.Message); thrown = thrown.InnerException; } } reads.CompleteAdding(); }); // Consume them into output files. foreach (var r in reads.GetConsumingEnumerable()) { foreach (var outputter in outputters) { outputter.ConsumeCCSRead(r.Item1, r.Item2, r.Item3); } } // throw any exceptions (this should be used after putting the consumer on a separate thread) producer.Wait(); // Close the files outputters.ForEach(z => z.Finish()); } } catch (DllNotFoundException thrown) { Console.WriteLine("Error thrown when attempting to generate the CCS results."); Console.WriteLine("A shared library was not found. To solve this, please add the folder" + " with the downloaded files libbwasharp and libMonoPosixHelper" + "to your environmental variables (LD_LIBRARY_PATH on Ubuntu, DYLD_LIBRARY_PATH on Mac OS X)."); Console.WriteLine("Error: " + thrown.Message); Console.WriteLine(thrown.StackTrace); } catch (Exception thrown) { Console.WriteLine("Error thrown when attempting to generate the CCS results"); Console.WriteLine("Error: " + thrown.Message); Console.WriteLine(thrown.StackTrace); while (thrown.InnerException != null) { Console.WriteLine("Inner Exception: " + thrown.InnerException.Message); thrown = thrown.InnerException; } } }