public void SequenceAssemblerWithContigMethod() { IOverlapDeNovoAssembly assembly = GetSequenceAssembly("contig"); string contigConsensus = utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.ContigConsensusNode); int contigSequencesCount = int.Parse(utilityObj.xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.ContigSequencesCountNode), null); // Read the contig from Contig method. Contig contigsRead = assembly.Contigs[0]; // Log the required info. ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Consensus read is '{0}'.", contigsRead.Consensus.ToString())); Console.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Consensus read is '{0}'.", contigsRead.Consensus.ToString())); Assert.AreEqual(contigConsensus, new String(contigsRead.Consensus.Select(a => (char)a).ToArray())); Assert.AreEqual(contigSequencesCount, contigsRead.Sequences.Count); ApplicationLog.WriteLine("SequenceAssembly BVT : Successfully read the Contig."); Console.WriteLine("SequenceAssembly BVT : Successfully read the Contig."); }
public void TestContigToString() { // test parameters const int matchScore = 5; const int mismatchScore = -4; const int gapCost = -10; const double mergeThreshold = 4; const double consensusThreshold = 66; Sequence seq2 = new Sequence(Alphabets.DNA, "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGAGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATATACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCGAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA"); Sequence seq1 = new Sequence(Alphabets.DNA, "ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGACATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAATTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGTTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA"); OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler { MergeThreshold = mergeThreshold, OverlapAlgorithm = new NeedlemanWunschAligner { SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore), GapOpenCost = gapCost }, ConsensusResolver = new SimpleConsensusResolver(consensusThreshold), AssumeStandardOrientation = false, }; IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(new List <ISequence> { seq1, seq2 }); Contig contig0 = seqAssembly.Contigs[0]; string actualString = contig0.ToString(); //string expectedString = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATG... +[1678]"; string expectedString = "AYRAARGCAAYAMWARTRRWKSYRMTAYWWRYAKTTSYRMYMKMWAMWKYWGMMACMKYAWRTR... +[1678]"; Assert.AreEqual(actualString, expectedString); }
public void MapContigToReverseComplementOfRead() { IList <ISequence> contigs = new List <ISequence>(); IList <ISequence> reads = new List <ISequence>(); Sequence seq = new Sequence(Alphabets.DNA, "TCTGATAAGG".Select(a => (byte)a).ToArray()); seq.ID = "1"; contigs.Add(seq); Sequence read = new Sequence(Alphabets.DNA, "CCTTATCAG".Select(a => (byte)a).ToArray()); read.ID = "2"; reads.Add(read); const int kmerLength = 6; IList <Contig> alignment = ReadAlignment.ReadContigAlignment(contigs, reads, kmerLength); Assert.AreEqual(alignment.Count, contigs.Count); Contig contig = alignment.First(); Contig.AssembledSequence sequence = contig.Sequences.First(); Assert.AreEqual(sequence.Length, 9); Assert.AreEqual(sequence.Position, 1); Assert.AreEqual(sequence.ReadPosition, 0); Assert.AreEqual(sequence.Sequence, reads.First()); Assert.AreEqual(sequence.IsComplemented, true); Assert.AreEqual(sequence.IsReversed, true); }
public void TestContigToString() { // test parameters int matchScore = 5; int mismatchScore = -4; int gapCost = -10; double mergeThreshold = 4; double consensusThreshold = 66; Sequence seq2 = new Sequence(Alphabets.DNA, "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGAGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATATACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCGAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA"); Sequence seq1 = new Sequence(Alphabets.DNA, "ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGACATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAATTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGTTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA"); OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler(); assembler.MergeThreshold = mergeThreshold; assembler.OverlapAlgorithm = new NeedlemanWunschAligner(); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost; assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); assembler.AssumeStandardOrientation = false; List <ISequence> inputs = new List <ISequence>(); inputs.Add(seq1); inputs.Add(seq2); IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Contig contig0 = seqAssembly.Contigs[0]; string actualString = contig0.ToString(); string expectedString = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATG... +[1678]"; Assert.AreEqual(actualString, expectedString); }
public void XsvSparseParseContig() { // Gets the expected file from the Xml string filePathObj = utilityObj.xmlUtil.GetTextValue( Constants.SimpleXsvSparseNodeName, Constants.FilePathNode); Assert.IsTrue(File.Exists(filePathObj)); // Logs information to the log file ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "XsvSparse Formatter BVT: File Exists in the Path '{0}'.", filePathObj)); using (XsvContigParser parserObj = new XsvContigParser(filePathObj, Alphabets.DNA, ',', '#')) { parserObj.Parse(); Contig contig = parserObj.ParseContig(); // Validate parsed temp file with original Xsv file. Assert.AreEqual(26048682, contig.Length); Assert.AreEqual(26048682, contig.Consensus.Count); Assert.AreEqual("Chr22+Chr22+Chr22+Chr22", contig.Consensus.ID); Assert.AreEqual(56, contig.Sequences.Count); } // Log to GUI. Console.WriteLine("Successfully validated the ParseConting() method with Xsv file"); ApplicationLog.WriteLine("Successfully validated the ParseConting() method with Xsv file"); }
public void MapReadToContig() { IList <ISequence> contigs = new List <ISequence>(); IList <ISequence> reads = new List <ISequence>(); Sequence seq = new Sequence(Alphabets.DNA, "TCTGATAAGG"); seq.DisplayID = "1"; contigs.Add(seq); Sequence read = new Sequence(Alphabets.DNA, "CTGATAAGG"); read.DisplayID = "2"; reads.Add(read); const int kmerLength = 6; IList <Contig> alignment = ReadAlignment.ReadContigAlignment(contigs, reads, kmerLength); Assert.AreEqual(alignment.Count, contigs.Count); Contig contig = alignment.First(); Contig.AssembledSequence sequence = contig.Sequences.First(); Assert.AreEqual(sequence.Length, 9); Assert.AreEqual(sequence.Position, 1); Assert.AreEqual(sequence.ReadPosition, 0); Assert.AreEqual(sequence.Sequence, reads.First()); Assert.AreEqual(sequence.IsComplemented, false); Assert.AreEqual(sequence.IsReversed, false); }
public void SimpleConsensusWithMakeConsensusMethod() { IOverlapDeNovoAssembly assembly = GetSequenceAssembly("consensus"); string contigConsensus = utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.ContigConsensusNode); double consensusThreshold = double.Parse(utilityObj.xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.ConsensusThresholdNode), null); IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.AlphabetNameNode)); // Read the contig from Contig method. Contig contigReadForConsensus = assembly.Contigs[0]; contigReadForConsensus.Consensus = null; OverlapDeNovoAssembler simpleSeqAssembler = new OverlapDeNovoAssembler(); simpleSeqAssembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); simpleSeqAssembler.MakeConsensus(alphabet, contigReadForConsensus); Assert.AreEqual(contigConsensus, new String(contigReadForConsensus.Consensus.Select(a => (char)a).ToArray())); // Log the required info. ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SimpleConsensusMethod BVT : Consensus read is '{0}'.", contigReadForConsensus.Consensus.ToString())); Console.WriteLine(string.Format((IFormatProvider)null, "SimpleConsensusMethod BVT : Consensus read is '{0}'.", contigReadForConsensus.Consensus.ToString())); }
/// <summary> /// Clear the view /// </summary> public void Clear() { this.sequences = null; this.alignedSequence = null; this.contig = null; this.Children.Clear(); }
/// <summary> /// Formats a (sparse) contig to a charcter separated value file, /// writing the consensus first, followed by the sequence separator, /// and each assembled sequences followed by the sequence separator. /// The consensus has an offet of 0, while the assembed sequences have the /// offset as present in AssembledSequence.Position. /// </summary> /// <param name="contig">The contig to format as a set of sparse sequences.</param> /// <param name="writer">The text writer to write the formatted output to.</param> public void Format(Contig contig, TextWriter writer) { Format(contig.Consensus, writer); foreach (Contig.AssembledSequence aSeq in contig.Sequences) { Format(aSeq.Sequence, aSeq.Position, writer); } }
/// <summary> /// Set dource data for the panel /// This will trigger a redraw of this panel /// </summary> /// <param name="sourceList">Alignment to plot</param> /// <param name="maximumWidthInChars">Maximum span of the sequences</param> public void SetDataSource(IAlignedSequence alignedSequence, int maximumWidthInChars) { this.sequences = null; this.contig = null; this.contigSequences = null; this.alignedSequence = alignedSequence; this.maxLength = maximumWidthInChars; InvalidateMeasure(); }
/// <summary> /// Set dource data for the panel /// This will trigger a redraw of this panel /// </summary> /// <param name="sourceList">List of seuqences to plot</param> /// <param name="maximumWidthInChars">Maximum span of the sequences</param> public void SetDataSource(IList <ISequence> sourceList, int maximumWidthInChars) { this.contig = null; this.alignedSequence = null; this.contigSequences = null; this.sequences = sourceList; this.maxLength = maximumWidthInChars; InvalidateMeasure(); }
/// <summary> /// Set dource data for the panel /// This will trigger a redraw of this panel /// </summary> /// <param name="sourceList">Contig to plot</param> /// <param name="maximumWidthInChars">Maximum span of the sequences</param> /// <param name="basePaddingLeft">Padding to be added to left of all sequences</param> public void SetDataSource(Contig contig, int maximumWidthInChars, int basePaddingLeft) { this.sequences = null; this.alignedSequence = null; this.contig = contig; this.basePaddingLeft = basePaddingLeft; this.maxLength = maximumWidthInChars; contigSequences = contig.Sequences.OrderBy(s => s.Position); InvalidateMeasure(); }
/// <summary> /// Formats a (sparse) contig to a charcter separated value file, /// writing the consensus first, followed by the sequence separator, /// and each assembled sequences followed by the sequence separator. /// The consensus has an offet of 0, while the assembed sequences have the /// offset as present in AssembledSequence.Position. /// </summary> /// <param name="contig">The contig to format as a set of sparse sequences.</param> public void Write(Contig contig) { if (contig == null) { throw new ArgumentNullException("contig"); } Write(contig.Consensus); foreach (Contig.AssembledSequence aSeq in contig.Sequences) { Format(aSeq.Sequence, (long)aSeq.Sequence.Metadata[XsvSparseParser.MetadataOffsetKey]); } }
/// <summary> /// This converts a list of sparse sequences read from the Text reader into a contig. /// Assumes the first sequence is the consensus and the rest are assembled sequences. /// The positions of the assembed sequences are the offsets of the sparse sequences in /// the sequence start line. The positions of the sequence items are the same as their /// position field value in each character separated line /// (i.e. they are not incremented by the offset) /// </summary> /// <param name="reader">Text reader with the formatted contig</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequences in the contig should be in readonly mode or not. /// If this flag is set to true then the resulting sequences's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>The parsed contig with consensus and assembled sequences, all represented /// as SparseSequences. /// Null if no lines were present in the reader. Exception if valid sparse sequences /// were not present. /// NOTE: This does not check if the assembled sequence positions are valid with respect to the consensus. /// </returns> public Contig ParseContig(TextReader reader, bool isReadOnly) { // parse the consensus XsvSparseReader sparseReader = GetSparseReader(reader); ISequence consensus = ParseOne(sparseReader, isReadOnly); if (consensus == null) { return(null); } Contig contig = new Contig(); contig.Consensus = consensus; contig.Sequences = ParseAssembledSequence(sparseReader, isReadOnly); return(contig); }
public void TestSimpleSequenceAssembler() { Trace.Set(Trace.AssemblyDetails); // turn on log dump // test parameters int matchScore = 1; int mismatchScore = -8; int gapCost = -8; double mergeThreshold = 4; double consensusThreshold = 66; Sequence seq1 = new Sequence(Alphabets.DNA, "GCCAAAATTTAGGC"); Sequence seq2 = new Sequence(Alphabets.DNA, "TTATGGCGCCCACGGA"); Sequence seq3 = new Sequence(Alphabets.DNA, "TATAAAGCGCCAA"); // here is how the above sequences should align: // TATAAAGCGCCAA // GCCAAAATTTAGGC // AGGCACCCGCGGTATT <= reversed // // TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler(); assembler.MergeThreshold = mergeThreshold; assembler.OverlapAlgorithm = new PairwiseOverlapAligner(); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost; assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); assembler.AssumeStandardOrientation = false; List <ISequence> inputs = new List <ISequence>(); inputs.Add(seq1); inputs.Add(seq2); inputs.Add(seq3); IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); Contig contig0 = seqAssembly.Contigs[0]; Assert.AreEqual("TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT", contig0.Consensus.ConvertToString()); Assert.AreEqual(3, contig0.Sequences.Count); }
/// <summary> /// This converts a list of sparse sequences read from the Text reader into a contig. /// Assumes the first sequence is the consensus and the rest are assembled sequences. /// The positions of the assembed sequences are the offsets of the sparse sequences in /// the sequence start line. The positions of the sequence items are the same as their /// position field value in each character separated line /// (i.e. they are not incremented by the offset) /// </summary> /// <returns>The parsed contig with consensus and assembled sequences, all represented /// as SparseSequences. /// Null if no lines were present in the reader. Exception if valid sparse sequences /// were not present. /// NOTE: This does not check if the assembled sequence positions are valid with respect to the consensus. /// </returns> public Contig ParseContig() { // parse the consensus using (StreamReader reader = new StreamReader(this.Filename)) { XsvSparseReader sparseReader = new XsvSparseReader(reader, separator, sequenceIdPrefix); ISequence consensus = ParseOne(sparseReader); if (consensus == null) { return(null); } Contig contig = new Contig(); contig.Consensus = consensus; contig.Sequences = ParseAssembledSequence(sparseReader); return(contig); } }
/// <summary> /// Write out a set of contigs to the given file. /// </summary> /// <param name="formatter">Formatter</param> /// <param name="contig">Contig to write</param> /// <param name="filename">Filename</param> public static void Format(this XsvContigFormatter formatter, Contig contig, string filename) { if (formatter == null) { throw new ArgumentNullException("formatter"); } if (contig == null) { throw new ArgumentNullException("contig"); } if (string.IsNullOrWhiteSpace(filename)) { throw new ArgumentNullException("filename"); } using (var fs = File.Create(filename)) { formatter.Write(fs, contig); } }
public void SequenceAssemblerWithAssembleMethod() { IOverlapDeNovoAssembly assembly = GetSequenceAssembly("assemble"); string contigConsensus = utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.ContigConsensusNode); int contigSequencesCount = int.Parse(utilityObj.xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.ContigSequencesCountNode), null); // Get the parameters from Xml for Assemble() method test cases. int unMergedCount = int.Parse(utilityObj.xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.UnMergedSequencesCountNode), null); int contigsCount = int.Parse(utilityObj.xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.ContigsCountNode), null); Assert.AreEqual(unMergedCount, assembly.UnmergedSequences.Count); Assert.AreEqual(contigsCount, assembly.Contigs.Count); Contig contigRead = assembly.Contigs[0]; Assert.AreEqual(contigConsensus, new String(contigRead.Consensus.Select(a => (char)a).ToArray())); Assert.AreEqual(contigSequencesCount, contigRead.Sequences.Count); // Logs the concensus ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Un Merged Sequences Count is '{0}'.", assembly.UnmergedSequences.Count.ToString((IFormatProvider)null))); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Contigs Count is '{0}'.", assembly.Contigs.Count.ToString((IFormatProvider)null))); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Contig Sequences Count is '{0}'.", contigRead.Sequences.Count.ToString((IFormatProvider)null))); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Consensus read is '{0}'.", contigRead.Consensus.ToString())); Console.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Consensus read is '{0}'.", contigRead.Consensus.ToString())); }
public void XsvSparseContigFormatterWrite() { // Gets the expected sequence from the Xml string filePathObj = this.utilityObj.xmlUtil.GetTextValue( Constants.SimpleXsvSparseNodeName, Constants.FilePathNode).TestDir(); Assert.IsTrue(File.Exists(filePathObj)); // Logs information to the log file ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Xsv Contig Formatter BVT: File Exists in the Path '{0}'.", filePathObj)); Contig expectedContig; XsvContigParser parserObj = new XsvContigParser(Alphabets.DNA, ',', '#'); Contig contig = parserObj.ParseContig(filePathObj); string seqId = contig.Sequences.Aggregate(string.Empty, (current, seq) => current + (seq.Sequence.ID + ",")); // Write Xsv file. XsvContigFormatter formatObj = new XsvContigFormatter(',', '#'); formatObj.Format(contig, Constants.XsvTempFileName); XsvContigParser parserObjNew = new XsvContigParser(Alphabets.DNA, ',', '#'); expectedContig = parserObjNew.ParseContig(Constants.XsvTempFileName); string expectedseqId = expectedContig.Sequences.Aggregate(string.Empty, (current, seq) => current + (seq.Sequence.ID + ",")); // Validate parsed temp file with original Xsv file. Assert.AreEqual(contig.Length, expectedContig.Length); Assert.AreEqual(contig.Consensus.Count, expectedContig.Consensus.Count); Assert.AreEqual(contig.Consensus.ID, expectedContig.Consensus.ID); Assert.AreEqual(contig.Sequences.Count, expectedContig.Sequences.Count); Assert.AreEqual(seqId.Length, expectedseqId.Length); Assert.AreEqual(seqId, expectedseqId); File.Delete(Constants.XsvTempFileName); ApplicationLog.WriteLine("Successfully validated the Write Xsv file"); }
/// <summary> /// Write out a set of contigs to the given file. /// </summary> /// <param name="formatter">Formatter</param> /// <param name="contig">Contig to write</param> public static void Format(this XsvContigFormatter formatter, Contig contig) { if (formatter == null) { throw new ArgumentNullException("formatter"); } if (contig == null) { throw new ArgumentNullException("contig"); } var fs = ParserFormatterExtensions <ISequenceFormatter> .GetOpenStream(formatter, true); if (fs != null) { formatter.Write(fs, contig); } else { throw new Exception("You must open a formatter before calling Write."); } }
/// <summary> /// Used to set datasource if the control is to display the output of an assembly. /// </summary> /// <param name="contig">Contig retrieved after doing the assembly.</param> public void SetDataSource(Contig contig) { this.sequenceList = new List <ISequence>(); this.sequencePropertiesList = new List <SequenceProperties>(); this.referenceSequence = contig.Consensus; foreach (Contig.AssembledSequence currentSeq in contig.Sequences.OrderBy(s => s.Position)) { sequenceList.Add(currentSeq.Sequence); sequencePropertiesList.Add(new SequenceProperties { AlignPosition = currentSeq.Position, ReadStartAlignPosition = currentSeq.ReadPosition, AlignmentLength = currentSeq.Length == 0 ? currentSeq.Sequence.Count : currentSeq.Length, IsComplemented = currentSeq.IsComplemented, IsReversed = currentSeq.IsReversed }); } DataSourceUpdated(); }
/// <summary> /// This converts a list of sparse sequences read from the Text reader into a contig. /// Assumes the first sequence is the consensus and the rest are assembled sequences. /// The positions of the assembed sequences are the offsets of the sparse sequences in /// the sequence start line. The positions of the sequence items are the same as their /// position field value in each character separated line /// (i.e. they are not incremented by the offset) /// </summary> /// <param name="reader">Text reader with the formatted contig</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequences in the contig should be in readonly mode or not. /// If this flag is set to true then the resulting sequences's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>The parsed contig with consensus and assembled sequences, all represented /// as SparseSequences. /// Null if no lines were present in the reader. Exception if valid sparse sequences /// were not present. /// NOTE: This does not check if the assembled sequence positions are valid with respect to the consensus. /// </returns> public Contig ParseContig(TextReader reader, bool isReadOnly) { // Check input arguments if (reader == null) { throw new ArgumentNullException("reader", "Text reader to read contig from cannot be null"); } // parse the consensus XsvSparseReader sparseReader = GetSparseReader(reader); ISequence consensus = ParseOne(sparseReader, isReadOnly); if (consensus == null) { return(null); } Contig contig = new Contig(); contig.Consensus = consensus; contig.Sequences = ParseAssembledSequence(sparseReader, isReadOnly); return(contig); }
/// <summary> /// Formats a (sparse) contig to a character-separated value file, /// writing the consensus first, followed by the sequence separator, /// and each assembled sequences followed by the sequence separator. /// The consensus has an offset of 0, while the assembled sequences have the /// offset as present in AssembledSequence.Position. /// </summary> /// <param name="stream">Stream to write to, it is left open at the end.</param> /// <param name="contig">The contig to format as a set of sparse sequences.</param> public void Write(Stream stream, Contig contig) { if (stream == null) { throw new ArgumentNullException("stream"); } if (contig == null) { throw new ArgumentNullException("contig"); } // Write the consensus sequence out. base.Format(stream, contig.Consensus); // Write out the contigs. using (StreamWriter writer = stream.OpenWrite(leaveOpen: true)) { foreach (Contig.AssembledSequence aSeq in contig.Sequences) { this.Write(writer, aSeq.Sequence, (long)aSeq.Sequence.Metadata[XsvSparseParser.MetadataOffsetKey]); } } }
/// <summary> /// Validates the Sequence Assembler for all the general test cases. /// </summary> /// <param name="nodeName">Xml Node Name</param> /// <param name="additionalParameter"> /// Additional Parameter based /// on which the validations are done. /// </param> /// <param name="isSeqAssemblyctr">True if Default contructor is validated or else false.</param> private void ValidateSequenceAssemblerGeneral(string nodeName, AssemblyParameters additionalParameter, bool isSeqAssemblyctr) { // Get the parameters from Xml int matchScore = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode), null); int mismatchScore = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MisMatchScoreNode), null); int gapCost = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapCostNode), null); double mergeThreshold = double.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MergeThresholdNode), null); double consensusThreshold = double.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ConsensusThresholdNode), null); string[] sequences = utilityObj.xmlUtil.GetTextValues(nodeName, Constants.SequencesNode); IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); string documentation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DocumentaionNode); var info = new SerializationInfo(typeof(OverlapDeNovoAssembly), new FormatterConverter()); var context = new StreamingContext(StreamingContextStates.All); var inputs = new List <ISequence>(); switch (additionalParameter) { case AssemblyParameters.Consensus: for (int i = 0; i < sequences.Length; i++) { // Logs the sequences ApplicationLog.WriteLine(string.Format(null, "SimpleConsensusMethod P1 : Sequence '{0}' used is '{1}'.", i, sequences[i])); var seq = new Sequence(alphabet, sequences[i]); inputs.Add(seq); } break; default: for (int i = 0; i < sequences.Length; i++) { // Logs the sequences ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly P1 : Sequence '{0}' used is '{1}'.", i, sequences[i])); var seq = new Sequence(alphabet, sequences[i]); inputs.Add(seq); } break; } // here is how the above sequences should align: // TATAAAGCGCCAA // GCCAAAATTTAGGC // AGGCACCCGCGGTATT <= reversed // // TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT var assembler = new OverlapDeNovoAssembler { MergeThreshold = mergeThreshold, OverlapAlgorithm = new PairwiseOverlapAligner() }; switch (additionalParameter) { case AssemblyParameters.DiagonalSM: (assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore); break; case AssemblyParameters.SimilarityMatrix: string blosumFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode); (assembler.OverlapAlgorithm).SimilarityMatrix = new SimilarityMatrix(blosumFilePath); break; default: (assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore); break; } (assembler.OverlapAlgorithm).GapOpenCost = gapCost; assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); assembler.AssumeStandardOrientation = false; // Assembles all the sequences. IOverlapDeNovoAssembly assembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); // Set Documentation property. assembly.Documentation = documentation; // Get the parameters from Xml in general int contigSequencesCount = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ContigSequencesCountNode), null); string contigConsensus = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ContigConsensusNode); switch (additionalParameter) { case AssemblyParameters.Consensus: // Read the contig from Contig method. Contig contigReadForConsensus = assembly.Contigs[0]; contigReadForConsensus.Consensus = null; var simpleSeqAssembler = new OverlapDeNovoAssembler { ConsensusResolver = new SimpleConsensusResolver(consensusThreshold) }; simpleSeqAssembler.MakeConsensus(alphabet, contigReadForConsensus); // Log the required info. ApplicationLog.WriteLine(string.Format(null, "SimpleConsensusMethod BVT : Consensus read is '{0}'.", contigReadForConsensus.Consensus)); Assert.AreEqual(contigConsensus, new String(contigReadForConsensus.Consensus.Select(a => (char)a).ToArray())); break; default: // Get the parameters from Xml for Assemble() method test cases. int unMergedCount = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.UnMergedSequencesCountNode), null); int contigsCount = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ContigsCountNode), null); Assert.AreEqual(unMergedCount, assembly.UnmergedSequences.Count); Assert.AreEqual(contigsCount, assembly.Contigs.Count); Assert.AreEqual(documentation, assembly.Documentation); Contig contigRead = assembly.Contigs[0]; // Logs the consensus ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Un Merged Sequences Count is '{0}'.", assembly.UnmergedSequences.Count)); ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Contigs Count is '{0}'.", assembly.Contigs.Count)); ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Contig Sequences Count is '{0}'.", contigRead.Sequences.Count)); ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Consensus read is '{0}'.", contigRead.Consensus)); Assert.AreEqual(contigConsensus, new String(contigRead.Consensus.Select(a => (char)a).ToArray())); Assert.AreEqual(contigSequencesCount, contigRead.Sequences.Count); break; } }
public void ValidateContigToString() { const int matchScore = 5; const int mismatchScore = -4; const int gapCost = -10; const double mergeThreshold = 4; const double consensusThreshold = 66; string seq2Str = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.Seq2StrNode); string seq1Str = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.Seq1StrNode); ISequence seq1 = new Sequence(Alphabets.DNA, seq1Str); ISequence seq2 = new Sequence(Alphabets.DNA, seq2Str); var assembler = new OverlapDeNovoAssembler { MergeThreshold = mergeThreshold, OverlapAlgorithm = new NeedlemanWunschAligner { SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore), GapOpenCost = gapCost }, ConsensusResolver = new SimpleConsensusResolver(consensusThreshold), AssumeStandardOrientation = false }; var seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(new List <ISequence> { seq1, seq2 }); Contig contig0 = seqAssembly.Contigs[0]; string actualString = contig0.ToString(); string expectedString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.OverlapDenovoExpectedNode); Assert.AreEqual(expectedString.Replace("\\r\\n", ""), actualString.Replace("\r\n", "")); // Get the parameters from Xml int matchScore1 = int.Parse( this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MatchScoreNode), null); int mismatchScore1 = int.Parse( this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MisMatchScoreNode), null); int gapCost1 = int.Parse(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.GapCostNode), null); double mergeThreshold1 = double.Parse( this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MergeThresholdNode), null); double consensusThreshold1 = double.Parse( this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.ConsensusThresholdNode), null); string sequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode1); string sequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode2); string sequence3 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode3); IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.AlphabetNameNode)); ISequence seq4 = new Sequence(alphabet, sequence1); ISequence seq5 = new Sequence(alphabet, sequence2); ISequence seq6 = new Sequence(alphabet, sequence3); var assembler1 = new OverlapDeNovoAssembler { MergeThreshold = mergeThreshold1, OverlapAlgorithm = new PairwiseOverlapAligner { SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore1, mismatchScore1), GapOpenCost = gapCost1 }, ConsensusResolver = new SimpleConsensusResolver(consensusThreshold1), AssumeStandardOrientation = false }; // Assembles all the sequences. var seqAssembly1 = (IOverlapDeNovoAssembly)assembler1.Assemble(new List <ISequence> { seq4, seq5, seq6 }); Contig contig1 = seqAssembly1.Contigs[0]; string actualString1 = contig1.ToString(); const string expectedString1 = "TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT"; Assert.AreEqual(expectedString1, actualString1); }
/// <summary> /// Aligns reads to contigs using kmer method of alignment. /// </summary> /// <param name="contigs">List of contig sequences.</param> /// <param name="reads">List of read sequences.</param> /// <param name="kmerLength">Kmer Length.</param> /// <returns>List of Contig.</returns> public static IList<Contig> ReadContigAlignment(IList<ISequence> contigs, IList<ISequence> reads, int kmerLength) { KmerIndexerDictionary map = SequenceToKmerBuilder.BuildKmerDictionary(reads, kmerLength); IList<ContigIndex> contigDatas; contigDatas = contigs.AsParallel().Select(contig => { IEnumerable<ISequence> kmers = SequenceToKmerBuilder.GetKmerSequences(contig, kmerLength); ContigIndex index = new ContigIndex(contig); foreach (ISequence kmer in kmers) { IList<KmerIndexer> positions; if (map.TryGetValue(kmer, out positions) || map.TryGetValue(kmer.GetReverseComplementedSequence(), out positions)) { index.ContigReadMatchIndexes.Add(positions); } else { index.ContigReadMatchIndexes.Add(new List<KmerIndexer>()); } } return index; }).ToList(); return contigDatas.Select(contigData => { IList<Task<IList<ReadMap>>> tasks = new List<Task<IList<ReadMap>>>(); // Stores information about contigs for which tasks has been generated. IList<long> visitedReads = new List<long>(); // Creates Task for every read in nodes for a given contig. for (int index = 0; index < contigData.ContigReadMatchIndexes.Count; index++) { int readPosition = index; foreach (KmerIndexer kmer in contigData.ContigReadMatchIndexes[index]) { long contigIndex = kmer.SequenceIndex; if (!visitedReads.Contains(contigIndex)) { visitedReads.Add(contigIndex); tasks.Add( Task<IList<ReadMap>>.Factory.StartNew(t => MapRead(readPosition, contigData.ContigReadMatchIndexes, contigIndex, kmerLength), TaskCreationOptions.AttachedToParent)); } } } Contig contigOutputStructure = new Contig(); contigOutputStructure.Consensus = contigData.ContigSequence; for (int index = 0; index < visitedReads.Count; index++) { foreach (ReadMap maps in tasks[index].Result) { Contig.AssembledSequence assembledSeq = new Contig.AssembledSequence() { Length = maps.Length, Position = maps.StartPositionOfContig, ReadPosition = maps.StartPositionOfRead, Sequence = reads.ElementAt(visitedReads[index]) }; if (new string( contigOutputStructure.Consensus.GetSubSequence( assembledSeq.Position, assembledSeq.Length).Select(a => (char)a).ToArray()). Equals(new string(assembledSeq.Sequence.GetSubSequence(assembledSeq.ReadPosition, assembledSeq.Length) .Select(a => (char)a).ToArray()))) { assembledSeq.IsComplemented = false; assembledSeq.IsReversed = false; } else { assembledSeq.IsComplemented = true; assembledSeq.IsReversed = true; } contigOutputStructure.Sequences.Add(assembledSeq); } } return contigOutputStructure; }).ToList(); }
/// <summary> /// Aligns reads to contigs using kmer method of alignment. /// </summary> /// <param name="contigs">List of contig sequences.</param> /// <param name="reads">List of read sequences.</param> /// <param name="kmerLength">Kmer Length.</param> /// <returns>List of Contig.</returns> public static IList <Contig> ReadContigAlignment(IList <ISequence> contigs, IList <ISequence> reads, int kmerLength) { KmerIndexerDictionary map = SequenceToKmerBuilder.BuildKmerDictionary(reads, kmerLength); IList <ContigIndex> contigDatas; contigDatas = contigs.AsParallel().Select(contig => { IEnumerable <ISequence> kmers = SequenceToKmerBuilder.GetKmerSequences(contig, kmerLength); ContigIndex index = new ContigIndex(contig); IList <KmerIndexer> positions; foreach (ISequence kmer in kmers) { if (map.TryGetValue(kmer, out positions) || map.TryGetValue(kmer.GetReverseComplementedSequence(), out positions)) { index.ContigReadMatchIndexes.Add(positions); } else { index.ContigReadMatchIndexes.Add(new List <KmerIndexer>()); } } return(index); }).ToList(); return(contigDatas.Select(contigData => { IList <Task <IList <ReadMap> > > tasks = new List <Task <IList <ReadMap> > >(); // Stores information about contigs for which tasks has been generated. IList <long> visitedReads = new List <long>(); // Creates Task for every read in nodes for a given contig. for (int index = 0; index < contigData.ContigReadMatchIndexes.Count; index++) { int readPosition = index; foreach (KmerIndexer kmer in contigData.ContigReadMatchIndexes[index]) { long contigIndex = kmer.SequenceIndex; if (!visitedReads.Contains(contigIndex)) { visitedReads.Add(contigIndex); tasks.Add( Task <IList <ReadMap> > .Factory.StartNew(t => MapRead(readPosition, contigData.ContigReadMatchIndexes, contigIndex, kmerLength), TaskCreationOptions.AttachedToParent)); } } } Contig contigOutputStructure = new Contig(); contigOutputStructure.Consensus = contigData.ContigSequence; for (int index = 0; index < visitedReads.Count; index++) { foreach (ReadMap maps in tasks[index].Result) { Contig.AssembledSequence assembledSeq = new Contig.AssembledSequence() { Length = maps.Length, Position = maps.StartPositionOfContig, ReadPosition = maps.StartPositionOfRead, Sequence = reads.ElementAt(visitedReads[index]) }; if (new string( contigOutputStructure.Consensus.GetSubSequence( assembledSeq.Position, assembledSeq.Length).Select(a => (char)a).ToArray()). Equals(new string(assembledSeq.Sequence.GetSubSequence(assembledSeq.ReadPosition, assembledSeq.Length) .Select(a => (char)a).ToArray()))) { assembledSeq.IsComplemented = false; assembledSeq.IsReversed = false; } else { assembledSeq.IsComplemented = true; assembledSeq.IsReversed = true; } contigOutputStructure.Sequences.Add(assembledSeq); } } return contigOutputStructure; }).ToList()); }
public void TestSimpleSequenceAssemblerWithSwineflu() { Trace.Set(Trace.AssemblyDetails); // turn on log dump // test parameters const int matchScore = 5; const int mismatchScore = -4; const int gapCost = -10; const double mergeThreshold = 4; const double consensusThreshold = 66; ISequence seq2 = new Sequence(Alphabets.DNA, "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGAGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATATACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCGAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA"); ISequence seq1 = new Sequence(Alphabets.DNA, "ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGACATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAATTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGTTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA"); OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler { MergeThreshold = mergeThreshold, OverlapAlgorithm = new NeedlemanWunschAligner { SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore), GapOpenCost = gapCost }, ConsensusResolver = new SimpleConsensusResolver(consensusThreshold), AssumeStandardOrientation = false }; var inputs = new List <ISequence> { seq1, seq2 }; var seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); Contig contig0 = seqAssembly.Contigs[0]; string expected = "AYRAARGCAAYAMWARTRRWKSYRMTAYWWRYAKTTSYRMYMKMWAMWKYWGMMACMKYAWRTRYAGRYWMWYWWKSKAWMRRTTMWMMWGMSAMYRWWKMMACAGWMMYWGWARASAMWGTAMYAGWAAMRMAYKYWRYWRWMMYWCWMKMWGWYAASCWTMWMGRRRAMMWRYRYAAMSKRARASKRKKMRMMCYAWKRSRWKTRGSYMMATKKMAYWTKGSTRRMTGKAWCMTKGSWRRYYSRRWSYKKGRAWMWCYMKMSWSWGMAWSMYYMTSSWCMKMMAKYKYRKRRWCMTMYAKTKYRGAMAMWKSWASKTSWKACMMWGGARMKTKYWWCSMWKRWGAKKWSMTMRRWKAKSARKWGMKMWSAGWGYMATYRWKYKMARKGTYWKMRWTWKWMMSSWWKRMRAKWTYMYSSMMSAMWMRTKMMTSGMMCAAWSRTGWMWCGRMMRMAKGTSYWMMKGCWGSAKSWMMWMRYKYYKRMRMAAAWWKMWTMTRSMWARWTWWAAWAKGRMWWKYWWAMMMARRRMWYWSMWAMYCMWASMTYARYRAWWMMKRSAWWRAWGWYMWMGKGMWAKRRGKCMTYSWSCWWYSRKSYAYTMRYSMTSMMYMWMMWAGTSYYKAYCARMAWRSWSWYKMWYAKRWTKYWGWKGSRWMWKYWWKWKWSRGSWMRWMRWKMWAKMSSRARAWRKYMAWRMSRSMMAWAGYRAKRRRWCMMRAAGKGAGRRWKMAMKAWKRSWGRAYRMWMKWWKASYSGRSASWMRWARWRMCRKKMGAMRMAAYWRSAWWYSWAGYRRYWSSRARWYWWGYRKTMSCRAKRKAWRSAWWYGCWRKRKMWRGWAWTRYYRKWTCWGRTAYWMYMRTYYMMGATWSMMMWRYMMMYKRTYRSAMWMCMAMKKGTSMKAYAMMCAMSRGYSYYMYAWWYMMSARYMTMCMWYYKMWSAMWATWSRWMMRWKYMCAAWWKRWRWAWRWMSMAMAWAWKTRARAMKSRCMAMAKKRWKRMKGRMYRYMSSRTYKAKKMAWKYYMSRKSYMTWYWWKSKRSMRKYSYMKKTKKSRYYRWWGSSGGKTKSAYWGRRRKGGKRKRKRSAKGGWWSGKWKATSRMYRKYAMRRTKAKCASSRKYMARRWKAKSMRGSSKMMSKRWAKRSMRCMSASMWKRMSAKYRMMSAGAWTRCYAWYRAMGWRAWTWCTRWYAWWGWAAAKWYKRWTAYWSARWWSAYRRMWRTASRKWWMRMRKYMRWMSRYMWRGARWWMARMMWMSWGRAWWWAARWAWARARAWTKWWRATRRWWWMSTKGAYRWTKGKWYYYWSRAYRYYKRRMYKTWSRWTSYMKWRSWRWWKGWWMKAWYKKWRRAYKAMMRMRMTTYRRAYKWSMASRAYTYRWATGWRAAGRWMWKAWRYSARWWRRWAARMARYSMSMWRRAAAWYRRWRMCRRSKRMWTTGRAWWYKRCYRCWWWKRMKWTWACMMSWRMWKSGAWARYRYSWRMAWKGRRASTKWYRAMWAYSSRAMWTAYKMMKASSMARMAWAMTYARASRRAGMARAAWTARAYRGRGWARARMTRGAWKSRRYAARGMTKKAMYMRAYWWKGRYKWWCYAKWYWWYKGYSRYCWRTTCAWYKGTMSYSRKWKYMTYSSTRSKGGYARTCWSYYTSKGGRYRWKCWSTWWYKGGWYKYKMYMKWRTRGRWYWYKWMWKTRWAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA"; Assert.AreEqual(expected, contig0.Consensus.ConvertToString(), "NeedlemanWunschAligner"); Assert.AreEqual(2, contig0.Sequences.Count); assembler.OverlapAlgorithm = new SmithWatermanAligner(); seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); contig0 = seqAssembly.Contigs[0]; expected = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGRGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGWCATCAAGATAYAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATAYACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAARTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCRAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGYTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA"; Assert.AreEqual(expected, contig0.Consensus.ConvertToString(), "SmithwatermanAligner"); Assert.AreEqual(2, contig0.Sequences.Count); assembler.OverlapAlgorithm = new PairwiseOverlapAligner(); seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); contig0 = seqAssembly.Contigs[0]; expected = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGRGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGWCATCAAGATAYAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATAYACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAARTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCRAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGYTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA"; Assert.AreEqual(expected, contig0.Consensus.ConvertToString(), "PairwiseOverlapAligner"); Assert.AreEqual(2, contig0.Sequences.Count); assembler.OverlapAlgorithm = new MUMmerAligner(); seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); contig0 = seqAssembly.Contigs[0]; Assert.AreEqual(expected, contig0.Consensus.ConvertToString(), "MUMmerAligner"); Assert.AreEqual(2, contig0.Sequences.Count); }
public void TestSimpleSequenceAssemblerWithRandomSequence() { // Test parameters. // // In theory, as long as all positions in the master sequence are // covered by at least one read, we should be able to pass this test. // But some parameter settings will make the test fail, for // various reasons, including: // 1. Short reads, caused by the strategy used to ensure full coverage // at the ends, might not score well enough to merge. // 2. Uncovered positions are always possible due to the random // generation of reads. (Increasing the number of reads helps with this) // 3. The assembler might construct the reverse or complement (or both) // of the master sequence. // 4. Too low a merge threshold could cause incorrect merges, which // the algorithm will not repair. int matchScore = 1; int mismatchScore = -8; int gapCost = -8; double mergeThreshold = 3; double consensusThreshold = 99; const int MasterLength = 100; const int MinReadLength = 10; const int MaxReadLength = 30; const int NumReads = 200; const bool AssumeOrientedReads = true; // if this is uncommented, assembly details appear in log. // this is extremely verbose. // Trace.Set(Trace.AssemblyDetails); // make random master sequence // (use seed for repeatability, or omit seed for // different test each time) // Random randGen = new Random(); Random randGen = new Random(654321); StringBuilder randSeq = new StringBuilder(); for (int i = 0; i < MasterLength; ++i) { int randm = randGen.Next(8); if (randm < 2) { randSeq.Append('A'); } else if (randm < 4) { randSeq.Append('C'); } else if (randm < 6) { randSeq.Append('G'); } else { randSeq.Append('T'); } } Sequence master = new Sequence(Alphabets.AmbiguousDNA, randSeq.ToString()); // create the reads List <ISequence> inputs = new List <ISequence>(); for (int i = 0; i < NumReads; ++i) { // try for uniform coverage clear to the ends (this can lead to short reads, though) int rndPos = Math.Max(0, randGen.Next(-MinReadLength, MasterLength - 1)); int rndLen = Math.Min(MasterLength - rndPos, randGen.Next(MinReadLength, MaxReadLength + 1)); string data = master.ConvertToString().Substring(Math.Max(0, rndPos), rndLen); bool revcomp = randGen.Next(2) > 0; bool reverse = randGen.Next(2) > 0 && !AssumeOrientedReads; ISequence read; if (reverse && revcomp) { Sequence tmp = new Sequence(Alphabets.DNA, data); read = new Sequence(Alphabets.DNA, tmp.GetReversedSequence().ConvertToString()); } else if (revcomp) { Sequence tmp = new Sequence(Alphabets.DNA, data); read = new Sequence(Alphabets.DNA, tmp.GetReverseComplementedSequence().ConvertToString()); } else { read = new Sequence(Alphabets.DNA, data); } ApplicationLog.WriteLine("read {0}: {1}", i, read); inputs.Add(read); } OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler(); assembler.MergeThreshold = mergeThreshold; assembler.OverlapAlgorithm = new PairwiseOverlapAligner(); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost; assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); assembler.AssumeStandardOrientation = AssumeOrientedReads; IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); ApplicationLog.WriteLine( "Assembly finished. Contigs: {0}. Unmerged sequences: {1}.", seqAssembly.Contigs.Count, seqAssembly.UnmergedSequences.Count); Contig contig0 = seqAssembly.Contigs[0]; ApplicationLog.WriteLine("master sequence and contig 0 consensus:"); ApplicationLog.WriteLine(master.ConvertToString()); ApplicationLog.WriteLine(contig0.Consensus.ConvertToString()); Assert.AreEqual(2, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); // note that this is tricky, esp. without oriented reads - consensus // could be reversed and/or complemented relative to original Assert.AreEqual(master.ConvertToString(), contig0.Consensus.ConvertToString()); }
public void MapReadsToSingleContig() { const int kmerLength = 6; IList <ISequence> readSeqs = new List <ISequence>(); Sequence read = new Sequence(Alphabets.DNA, "GATGCCTC".Select(a => (byte)a).ToArray()); read.ID = "0"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "CCTCCTAT".Select(a => (byte)a).ToArray()); read.ID = "1"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "TCCTATC".Select(a => (byte)a).ToArray()); read.ID = "2"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "GCCTCCTAT".Select(a => (byte)a).ToArray()); read.ID = "3"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "TGCCTCCT".Select(a => (byte)a).ToArray()); read.ID = "4"; readSeqs.Add(read); IList <ISequence> contigs = new List <ISequence> { new Sequence(Alphabets.DNA, "GATGCCTCCTATC".Select(a => (byte)a).ToArray()) }; IList <Contig> maps = ReadAlignment.ReadContigAlignment(contigs, readSeqs, kmerLength); Contig contig = maps.First(); Assert.AreEqual(contig.Consensus, contigs.First()); IList <Contig.AssembledSequence> readMap = Sort(contig.Sequences); Assert.AreEqual(readMap[0].Length, 8); Assert.AreEqual(readMap[0].Position, 4); Assert.AreEqual(readMap[0].ReadPosition, 0); Assert.AreEqual(readMap[0].IsComplemented, false); Assert.AreEqual(readMap[0].IsReversed, false); Assert.AreEqual(readMap[1].Length, 8); Assert.AreEqual(readMap[1].Position, 0); Assert.AreEqual(readMap[1].ReadPosition, 0); Assert.AreEqual(readMap[1].IsComplemented, false); Assert.AreEqual(readMap[1].IsReversed, false); Assert.AreEqual(readMap[2].Length, 9); Assert.AreEqual(readMap[2].Position, 3); Assert.AreEqual(readMap[2].ReadPosition, 0); Assert.AreEqual(readMap[2].IsComplemented, false); Assert.AreEqual(readMap[2].IsReversed, false); Assert.AreEqual(readMap[3].Length, 7); Assert.AreEqual(readMap[3].Position, 6); Assert.AreEqual(readMap[3].ReadPosition, 0); Assert.AreEqual(readMap[3].IsComplemented, false); Assert.AreEqual(readMap[3].IsReversed, false); Assert.AreEqual(readMap[4].Length, 8); Assert.AreEqual(readMap[4].Position, 2); Assert.AreEqual(readMap[4].ReadPosition, 0); Assert.AreEqual(readMap[3].IsComplemented, false); Assert.AreEqual(readMap[3].IsReversed, false); }
public void TestSimpleSequenceAssemblerWithSemiRandomSequence() { // test parameters int matchScore = 1; int mismatchScore = -8; int gapCost = -8; double mergeThreshold = 4; double consensusThreshold = 66; const int MasterLength = 30; const int ReadLength = 10; const int NumReads = 5; const bool AssumeOrientedReads = false; // if this is uncommented, assembly details appear in log. // this is extremely verbose. Trace.Set(Trace.AssemblyDetails); // make random master sequence // (use seed for repeatability, or omit seed for // different test each time) // Random randGen = new Random(); Random randGen = new Random(654321); StringBuilder randSeq = new StringBuilder(); for (int i = 0; i < MasterLength; ++i) { int randm = randGen.Next(8); if (randm < 2) { randSeq.Append('A'); } else if (randm < 4) { randSeq.Append('C'); } else if (randm < 6) { randSeq.Append('G'); } else { randSeq.Append('T'); } } Sequence master = new Sequence(Alphabets.DNA, randSeq.ToString()); // create the reads List <ISequence> inputs = new List <ISequence>(); for (int i = 0; i < NumReads; ++i) { int pos = 5 * i; string data = master.ConvertToString().Substring(pos, ReadLength); bool revcomp = randGen.Next(2) > 0; bool reverse = randGen.Next(2) > 0 && !AssumeOrientedReads; ISequence read; if (reverse && revcomp) { Sequence tmp = new Sequence(Alphabets.DNA, data); read = new Sequence(Alphabets.DNA, tmp.GetReversedSequence().ConvertToString()); } else if (revcomp) { Sequence tmp = new Sequence(Alphabets.DNA, data); read = new Sequence(Alphabets.DNA, tmp.GetReverseComplementedSequence().ConvertToString()); } else { read = new Sequence(Alphabets.DNA, data); } inputs.Add(read); } OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler(); assembler.MergeThreshold = mergeThreshold; assembler.OverlapAlgorithm = new PairwiseOverlapAligner(); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost; assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); assembler.AssumeStandardOrientation = AssumeOrientedReads; IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); Contig contig0 = seqAssembly.Contigs[0]; ApplicationLog.WriteLine("master sequence and contig 0 consensus:"); ApplicationLog.WriteLine(master.ConvertToString()); ApplicationLog.WriteLine(contig0.Consensus.ConvertToString()); // note that this is tricky, esp. without oriented reads - consensus // could be reversed and/or complemented relative to original Assert.AreEqual(master.ConvertToString(), contig0.Consensus.ConvertToString()); }