Ejemplo n.º 1
0
        public void SequenceAssemblerWithContigMethod()
        {
            IOverlapDeNovoAssembly assembly = GetSequenceAssembly("contig");
            string contigConsensus          = utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                              Constants.ContigConsensusNode);
            int contigSequencesCount = int.Parse(utilityObj.xmlUtil.GetTextValue(
                                                     Constants.AssemblyAlgorithmNodeName,
                                                     Constants.ContigSequencesCountNode), null);

            // Read the contig from Contig method.
            Contig contigsRead = assembly.Contigs[0];

            // Log the required info.
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "SequenceAssembly BVT : Consensus read is '{0}'.",
                                                   contigsRead.Consensus.ToString()));
            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "SequenceAssembly BVT : Consensus read is '{0}'.",
                                            contigsRead.Consensus.ToString()));

            Assert.AreEqual(contigConsensus, new String(contigsRead.Consensus.Select(a => (char)a).ToArray()));
            Assert.AreEqual(contigSequencesCount, contigsRead.Sequences.Count);

            ApplicationLog.WriteLine("SequenceAssembly BVT : Successfully read the Contig.");
            Console.WriteLine("SequenceAssembly BVT : Successfully read the Contig.");
        }
Ejemplo n.º 2
0
        public void TestContigToString()
        {
            // test parameters
            const int    matchScore         = 5;
            const int    mismatchScore      = -4;
            const int    gapCost            = -10;
            const double mergeThreshold     = 4;
            const double consensusThreshold = 66;

            Sequence seq2 = new Sequence(Alphabets.DNA, "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGAGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATATACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCGAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA");
            Sequence seq1 = new Sequence(Alphabets.DNA, "ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGACATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAATTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGTTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA");

            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler
            {
                MergeThreshold   = mergeThreshold,
                OverlapAlgorithm = new NeedlemanWunschAligner
                {
                    SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore),
                    GapOpenCost      = gapCost
                },
                ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold),
                AssumeStandardOrientation = false,
            };

            IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(new List <ISequence> {
                seq1, seq2
            });
            Contig contig0      = seqAssembly.Contigs[0];
            string actualString = contig0.ToString();
            //string expectedString = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATG... +[1678]";
            string expectedString = "AYRAARGCAAYAMWARTRRWKSYRMTAYWWRYAKTTSYRMYMKMWAMWKYWGMMACMKYAWRTR... +[1678]";

            Assert.AreEqual(actualString, expectedString);
        }
Ejemplo n.º 3
0
        public void MapContigToReverseComplementOfRead()
        {
            IList <ISequence> contigs = new List <ISequence>();
            IList <ISequence> reads   = new List <ISequence>();
            Sequence          seq     = new Sequence(Alphabets.DNA, "TCTGATAAGG".Select(a => (byte)a).ToArray());

            seq.ID = "1";
            contigs.Add(seq);
            Sequence read = new Sequence(Alphabets.DNA, "CCTTATCAG".Select(a => (byte)a).ToArray());

            read.ID = "2";
            reads.Add(read);
            const int      kmerLength = 6;
            IList <Contig> alignment  = ReadAlignment.ReadContigAlignment(contigs, reads, kmerLength);

            Assert.AreEqual(alignment.Count, contigs.Count);
            Contig contig = alignment.First();

            Contig.AssembledSequence sequence = contig.Sequences.First();
            Assert.AreEqual(sequence.Length, 9);
            Assert.AreEqual(sequence.Position, 1);
            Assert.AreEqual(sequence.ReadPosition, 0);
            Assert.AreEqual(sequence.Sequence, reads.First());
            Assert.AreEqual(sequence.IsComplemented, true);
            Assert.AreEqual(sequence.IsReversed, true);
        }
Ejemplo n.º 4
0
        public void TestContigToString()
        {
            // test parameters
            int    matchScore         = 5;
            int    mismatchScore      = -4;
            int    gapCost            = -10;
            double mergeThreshold     = 4;
            double consensusThreshold = 66;

            Sequence seq2 = new Sequence(Alphabets.DNA, "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGAGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATATACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCGAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA");
            Sequence seq1 = new Sequence(Alphabets.DNA, "ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGACATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAATTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGTTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA");

            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler();

            assembler.MergeThreshold   = mergeThreshold;
            assembler.OverlapAlgorithm = new NeedlemanWunschAligner();
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore);
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost      = gapCost;
            assembler.ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold);
            assembler.AssumeStandardOrientation = false;

            List <ISequence> inputs = new List <ISequence>();

            inputs.Add(seq1);
            inputs.Add(seq2);

            IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);
            Contig contig0        = seqAssembly.Contigs[0];
            string actualString   = contig0.ToString();
            string expectedString = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATG... +[1678]";

            Assert.AreEqual(actualString, expectedString);
        }
Ejemplo n.º 5
0
        public void XsvSparseParseContig()
        {
            // Gets the expected file from the Xml
            string filePathObj = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleXsvSparseNodeName,
                Constants.FilePathNode);

            Assert.IsTrue(File.Exists(filePathObj));
            // Logs information to the log file
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "XsvSparse Formatter BVT: File Exists in the Path '{0}'.",
                                                   filePathObj));

            using (XsvContigParser parserObj = new XsvContigParser(filePathObj,
                                                                   Alphabets.DNA, ',', '#'))
            {
                parserObj.Parse();

                Contig contig = parserObj.ParseContig();

                // Validate parsed temp file with original Xsv file.
                Assert.AreEqual(26048682, contig.Length);
                Assert.AreEqual(26048682, contig.Consensus.Count);
                Assert.AreEqual("Chr22+Chr22+Chr22+Chr22", contig.Consensus.ID);
                Assert.AreEqual(56, contig.Sequences.Count);
            }
            // Log to GUI.
            Console.WriteLine("Successfully validated the ParseConting() method with Xsv file");
            ApplicationLog.WriteLine("Successfully validated the ParseConting() method with Xsv file");
        }
Ejemplo n.º 6
0
        public void MapReadToContig()
        {
            IList <ISequence> contigs = new List <ISequence>();
            IList <ISequence> reads   = new List <ISequence>();
            Sequence          seq     = new Sequence(Alphabets.DNA, "TCTGATAAGG");

            seq.DisplayID = "1";
            contigs.Add(seq);
            Sequence read = new Sequence(Alphabets.DNA, "CTGATAAGG");

            read.DisplayID = "2";
            reads.Add(read);
            const int      kmerLength = 6;
            IList <Contig> alignment  = ReadAlignment.ReadContigAlignment(contigs, reads, kmerLength);

            Assert.AreEqual(alignment.Count, contigs.Count);
            Contig contig = alignment.First();

            Contig.AssembledSequence sequence = contig.Sequences.First();
            Assert.AreEqual(sequence.Length, 9);
            Assert.AreEqual(sequence.Position, 1);
            Assert.AreEqual(sequence.ReadPosition, 0);
            Assert.AreEqual(sequence.Sequence, reads.First());
            Assert.AreEqual(sequence.IsComplemented, false);
            Assert.AreEqual(sequence.IsReversed, false);
        }
Ejemplo n.º 7
0
        public void SimpleConsensusWithMakeConsensusMethod()
        {
            IOverlapDeNovoAssembly assembly = GetSequenceAssembly("consensus");

            string contigConsensus = utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                     Constants.ContigConsensusNode);
            double consensusThreshold = double.Parse(utilityObj.xmlUtil.GetTextValue(
                                                         Constants.AssemblyAlgorithmNodeName,
                                                         Constants.ConsensusThresholdNode), null);
            IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue(
                                                         Constants.AssemblyAlgorithmNodeName,
                                                         Constants.AlphabetNameNode));
            // Read the contig from Contig method.
            Contig contigReadForConsensus = assembly.Contigs[0];

            contigReadForConsensus.Consensus = null;
            OverlapDeNovoAssembler simpleSeqAssembler = new OverlapDeNovoAssembler();

            simpleSeqAssembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold);
            simpleSeqAssembler.MakeConsensus(alphabet, contigReadForConsensus);

            Assert.AreEqual(contigConsensus, new String(contigReadForConsensus.Consensus.Select(a => (char)a).ToArray()));

            // Log the required info.
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "SimpleConsensusMethod BVT : Consensus read is '{0}'.",
                                                   contigReadForConsensus.Consensus.ToString()));
            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "SimpleConsensusMethod BVT : Consensus read is '{0}'.",
                                            contigReadForConsensus.Consensus.ToString()));
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Clear the view
        /// </summary>
        public void Clear()
        {
            this.sequences       = null;
            this.alignedSequence = null;
            this.contig          = null;

            this.Children.Clear();
        }
Ejemplo n.º 9
0
 /// <summary>
 /// Formats a (sparse) contig to a charcter separated value file,
 /// writing the consensus first, followed by the sequence separator,
 /// and each assembled sequences followed by the sequence separator.
 /// The consensus has an offet of 0, while the assembed sequences have the
 /// offset as present in AssembledSequence.Position.
 /// </summary>
 /// <param name="contig">The contig to format as a set of sparse sequences.</param>
 /// <param name="writer">The text writer to write the formatted output to.</param>
 public void Format(Contig contig, TextWriter writer)
 {
     Format(contig.Consensus, writer);
     foreach (Contig.AssembledSequence aSeq in contig.Sequences)
     {
         Format(aSeq.Sequence, aSeq.Position, writer);
     }
 }
Ejemplo n.º 10
0
        /// <summary>
        /// Set dource data for the panel
        /// This will trigger a redraw of this panel
        /// </summary>
        /// <param name="sourceList">Alignment to plot</param>
        /// <param name="maximumWidthInChars">Maximum span of the sequences</param>
        public void SetDataSource(IAlignedSequence alignedSequence, int maximumWidthInChars)
        {
            this.sequences       = null;
            this.contig          = null;
            this.contigSequences = null;
            this.alignedSequence = alignedSequence;
            this.maxLength       = maximumWidthInChars;

            InvalidateMeasure();
        }
Ejemplo n.º 11
0
        /// <summary>
        /// Set dource data for the panel
        /// This will trigger a redraw of this panel
        /// </summary>
        /// <param name="sourceList">List of seuqences to plot</param>
        /// <param name="maximumWidthInChars">Maximum span of the sequences</param>
        public void SetDataSource(IList <ISequence> sourceList, int maximumWidthInChars)
        {
            this.contig          = null;
            this.alignedSequence = null;
            this.contigSequences = null;
            this.sequences       = sourceList;
            this.maxLength       = maximumWidthInChars;

            InvalidateMeasure();
        }
Ejemplo n.º 12
0
        /// <summary>
        /// Set dource data for the panel
        /// This will trigger a redraw of this panel
        /// </summary>
        /// <param name="sourceList">Contig to plot</param>
        /// <param name="maximumWidthInChars">Maximum span of the sequences</param>
        /// <param name="basePaddingLeft">Padding to be added to left of all sequences</param>
        public void SetDataSource(Contig contig, int maximumWidthInChars, int basePaddingLeft)
        {
            this.sequences       = null;
            this.alignedSequence = null;
            this.contig          = contig;
            this.basePaddingLeft = basePaddingLeft;
            this.maxLength       = maximumWidthInChars;
            contigSequences      = contig.Sequences.OrderBy(s => s.Position);

            InvalidateMeasure();
        }
Ejemplo n.º 13
0
        /// <summary>
        /// Formats a (sparse) contig to a charcter separated value file,
        /// writing the consensus first, followed by the sequence separator,
        /// and each assembled sequences followed by the sequence separator.
        /// The consensus has an offet of 0, while the assembed sequences have the
        /// offset as present in AssembledSequence.Position.
        /// </summary>
        /// <param name="contig">The contig to format as a set of sparse sequences.</param>
        public void Write(Contig contig)
        {
            if (contig == null)
            {
                throw new ArgumentNullException("contig");
            }

            Write(contig.Consensus);
            foreach (Contig.AssembledSequence aSeq in contig.Sequences)
            {
                Format(aSeq.Sequence, (long)aSeq.Sequence.Metadata[XsvSparseParser.MetadataOffsetKey]);
            }
        }
Ejemplo n.º 14
0
        /// <summary>
        /// This converts a list of sparse sequences read from the Text reader into a contig.
        /// Assumes the first sequence is the consensus and the rest are assembled sequences.
        /// The positions of the assembed sequences are the offsets of the sparse sequences in
        /// the sequence start line. The positions of the sequence items are the same as their
        /// position field value in each character separated line
        /// (i.e. they are not incremented by the offset)
        /// </summary>
        /// <param name="reader">Text reader with the formatted contig</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequences in the contig should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequences's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The parsed contig with consensus and assembled sequences, all represented
        /// as SparseSequences.
        /// Null if no lines were present in the reader. Exception if valid sparse sequences
        /// were not present.
        /// NOTE: This does not check if the assembled sequence positions are valid with respect to the consensus.
        /// </returns>
        public Contig ParseContig(TextReader reader, bool isReadOnly)
        {
            // parse the consensus
            XsvSparseReader sparseReader = GetSparseReader(reader);
            ISequence       consensus    = ParseOne(sparseReader, isReadOnly);

            if (consensus == null)
            {
                return(null);
            }

            Contig contig = new Contig();

            contig.Consensus = consensus;
            contig.Sequences = ParseAssembledSequence(sparseReader, isReadOnly);
            return(contig);
        }
Ejemplo n.º 15
0
        public void TestSimpleSequenceAssembler()
        {
            Trace.Set(Trace.AssemblyDetails);   // turn on log dump

            // test parameters
            int    matchScore         = 1;
            int    mismatchScore      = -8;
            int    gapCost            = -8;
            double mergeThreshold     = 4;
            double consensusThreshold = 66;

            Sequence seq1 = new Sequence(Alphabets.DNA, "GCCAAAATTTAGGC");
            Sequence seq2 = new Sequence(Alphabets.DNA, "TTATGGCGCCCACGGA");
            Sequence seq3 = new Sequence(Alphabets.DNA, "TATAAAGCGCCAA");

            // here is how the above sequences should align:
            // TATAAAGCGCCAA
            //         GCCAAAATTTAGGC
            //                   AGGCACCCGCGGTATT   <= reversed
            //
            // TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT
            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler();

            assembler.MergeThreshold   = mergeThreshold;
            assembler.OverlapAlgorithm = new PairwiseOverlapAligner();
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore);
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost      = gapCost;
            assembler.ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold);
            assembler.AssumeStandardOrientation = false;

            List <ISequence> inputs = new List <ISequence>();

            inputs.Add(seq1);
            inputs.Add(seq2);
            inputs.Add(seq3);

            IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);
            Contig contig0 = seqAssembly.Contigs[0];

            Assert.AreEqual("TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT", contig0.Consensus.ConvertToString());
            Assert.AreEqual(3, contig0.Sequences.Count);
        }
Ejemplo n.º 16
0
        /// <summary>
        /// This converts a list of sparse sequences read from the Text reader into a contig.
        /// Assumes the first sequence is the consensus and the rest are assembled sequences.
        /// The positions of the assembed sequences are the offsets of the sparse sequences in
        /// the sequence start line. The positions of the sequence items are the same as their
        /// position field value in each character separated line
        /// (i.e. they are not incremented by the offset)
        /// </summary>
        /// <returns>The parsed contig with consensus and assembled sequences, all represented
        /// as SparseSequences.
        /// Null if no lines were present in the reader. Exception if valid sparse sequences
        /// were not present.
        /// NOTE: This does not check if the assembled sequence positions are valid with respect to the consensus.
        /// </returns>
        public Contig ParseContig()
        {
            // parse the consensus
            using (StreamReader reader = new StreamReader(this.Filename))
            {
                XsvSparseReader sparseReader = new XsvSparseReader(reader, separator, sequenceIdPrefix);

                ISequence consensus = ParseOne(sparseReader);
                if (consensus == null)
                {
                    return(null);
                }

                Contig contig = new Contig();
                contig.Consensus = consensus;
                contig.Sequences = ParseAssembledSequence(sparseReader);
                return(contig);
            }
        }
Ejemplo n.º 17
0
        /// <summary>
        /// Write out a set of contigs to the given file.
        /// </summary>
        /// <param name="formatter">Formatter</param>
        /// <param name="contig">Contig to write</param>
        /// <param name="filename">Filename</param>
        public static void Format(this XsvContigFormatter formatter, Contig contig, string filename)
        {
            if (formatter == null)
            {
                throw new ArgumentNullException("formatter");
            }
            if (contig == null)
            {
                throw new ArgumentNullException("contig");
            }
            if (string.IsNullOrWhiteSpace(filename))
            {
                throw new ArgumentNullException("filename");
            }

            using (var fs = File.Create(filename))
            {
                formatter.Write(fs, contig);
            }
        }
Ejemplo n.º 18
0
        public void SequenceAssemblerWithAssembleMethod()
        {
            IOverlapDeNovoAssembly assembly = GetSequenceAssembly("assemble");
            string contigConsensus          = utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                              Constants.ContigConsensusNode);
            int contigSequencesCount = int.Parse(utilityObj.xmlUtil.GetTextValue(
                                                     Constants.AssemblyAlgorithmNodeName,
                                                     Constants.ContigSequencesCountNode), null);

            // Get the parameters from Xml for Assemble() method test cases.
            int unMergedCount = int.Parse(utilityObj.xmlUtil.GetTextValue(
                                              Constants.AssemblyAlgorithmNodeName,
                                              Constants.UnMergedSequencesCountNode), null);
            int contigsCount = int.Parse(utilityObj.xmlUtil.GetTextValue(
                                             Constants.AssemblyAlgorithmNodeName,
                                             Constants.ContigsCountNode), null);

            Assert.AreEqual(unMergedCount, assembly.UnmergedSequences.Count);
            Assert.AreEqual(contigsCount, assembly.Contigs.Count);
            Contig contigRead = assembly.Contigs[0];

            Assert.AreEqual(contigConsensus, new String(contigRead.Consensus.Select(a => (char)a).ToArray()));
            Assert.AreEqual(contigSequencesCount, contigRead.Sequences.Count);

            // Logs the concensus
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "SequenceAssembly BVT : Un Merged Sequences Count is '{0}'.",
                                                   assembly.UnmergedSequences.Count.ToString((IFormatProvider)null)));
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "SequenceAssembly BVT : Contigs Count is '{0}'.",
                                                   assembly.Contigs.Count.ToString((IFormatProvider)null)));
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "SequenceAssembly BVT : Contig Sequences Count is '{0}'.",
                                                   contigRead.Sequences.Count.ToString((IFormatProvider)null)));
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "SequenceAssembly BVT : Consensus read is '{0}'.",
                                                   contigRead.Consensus.ToString()));
            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "SequenceAssembly BVT : Consensus read is '{0}'.",
                                            contigRead.Consensus.ToString()));
        }
Ejemplo n.º 19
0
        public void XsvSparseContigFormatterWrite()
        {
            // Gets the expected sequence from the Xml
            string filePathObj = this.utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleXsvSparseNodeName,
                Constants.FilePathNode).TestDir();

            Assert.IsTrue(File.Exists(filePathObj));

            // Logs information to the log file
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "Xsv Contig Formatter BVT: File Exists in the Path '{0}'.",
                                                   filePathObj));
            Contig expectedContig;

            XsvContigParser parserObj = new XsvContigParser(Alphabets.DNA, ',', '#');
            Contig          contig    = parserObj.ParseContig(filePathObj);

            string seqId = contig.Sequences.Aggregate(string.Empty, (current, seq) => current + (seq.Sequence.ID + ","));

            // Write Xsv file.
            XsvContigFormatter formatObj = new XsvContigFormatter(',', '#');

            formatObj.Format(contig, Constants.XsvTempFileName);

            XsvContigParser parserObjNew = new XsvContigParser(Alphabets.DNA, ',', '#');

            expectedContig = parserObjNew.ParseContig(Constants.XsvTempFileName);

            string expectedseqId = expectedContig.Sequences.Aggregate(string.Empty, (current, seq) => current + (seq.Sequence.ID + ","));

            // Validate parsed temp file with original Xsv file.
            Assert.AreEqual(contig.Length, expectedContig.Length);
            Assert.AreEqual(contig.Consensus.Count, expectedContig.Consensus.Count);
            Assert.AreEqual(contig.Consensus.ID, expectedContig.Consensus.ID);
            Assert.AreEqual(contig.Sequences.Count, expectedContig.Sequences.Count);
            Assert.AreEqual(seqId.Length, expectedseqId.Length);
            Assert.AreEqual(seqId, expectedseqId);
            File.Delete(Constants.XsvTempFileName);
            ApplicationLog.WriteLine("Successfully validated the Write Xsv file");
        }
Ejemplo n.º 20
0
        /// <summary>
        /// Write out a set of contigs to the given file.
        /// </summary>
        /// <param name="formatter">Formatter</param>
        /// <param name="contig">Contig to write</param>
        public static void Format(this XsvContigFormatter formatter, Contig contig)
        {
            if (formatter == null)
            {
                throw new ArgumentNullException("formatter");
            }
            if (contig == null)
            {
                throw new ArgumentNullException("contig");
            }
            var fs = ParserFormatterExtensions <ISequenceFormatter> .GetOpenStream(formatter, true);

            if (fs != null)
            {
                formatter.Write(fs, contig);
            }
            else
            {
                throw new Exception("You must open a formatter before calling Write.");
            }
        }
Ejemplo n.º 21
0
        /// <summary>
        /// Used to set datasource if the control is to display the output of an assembly.
        /// </summary>
        /// <param name="contig">Contig retrieved after doing the assembly.</param>
        public void SetDataSource(Contig contig)
        {
            this.sequenceList           = new List <ISequence>();
            this.sequencePropertiesList = new List <SequenceProperties>();

            this.referenceSequence = contig.Consensus;

            foreach (Contig.AssembledSequence currentSeq in contig.Sequences.OrderBy(s => s.Position))
            {
                sequenceList.Add(currentSeq.Sequence);
                sequencePropertiesList.Add(new SequenceProperties
                {
                    AlignPosition          = currentSeq.Position,
                    ReadStartAlignPosition = currentSeq.ReadPosition,
                    AlignmentLength        = currentSeq.Length == 0 ? currentSeq.Sequence.Count : currentSeq.Length,
                    IsComplemented         = currentSeq.IsComplemented,
                    IsReversed             = currentSeq.IsReversed
                });
            }

            DataSourceUpdated();
        }
Ejemplo n.º 22
0
        /// <summary>
        /// This converts a list of sparse sequences read from the Text reader into a contig.
        /// Assumes the first sequence is the consensus and the rest are assembled sequences.
        /// The positions of the assembed sequences are the offsets of the sparse sequences in
        /// the sequence start line. The positions of the sequence items are the same as their
        /// position field value in each character separated line
        /// (i.e. they are not incremented by the offset)
        /// </summary>
        /// <param name="reader">Text reader with the formatted contig</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequences in the contig should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequences's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The parsed contig with consensus and assembled sequences, all represented
        /// as SparseSequences.
        /// Null if no lines were present in the reader. Exception if valid sparse sequences
        /// were not present.
        /// NOTE: This does not check if the assembled sequence positions are valid with respect to the consensus.
        /// </returns>
        public Contig ParseContig(TextReader reader, bool isReadOnly)
        {
            // Check input arguments
            if (reader == null)
            {
                throw new ArgumentNullException("reader", "Text reader to read contig from cannot be null");
            }

            // parse the consensus
            XsvSparseReader sparseReader = GetSparseReader(reader);
            ISequence       consensus    = ParseOne(sparseReader, isReadOnly);

            if (consensus == null)
            {
                return(null);
            }

            Contig contig = new Contig();

            contig.Consensus = consensus;
            contig.Sequences = ParseAssembledSequence(sparseReader, isReadOnly);
            return(contig);
        }
        /// <summary>
        /// Formats a (sparse) contig to a character-separated value file,
        /// writing the consensus first, followed by the sequence separator,
        /// and each assembled sequences followed by the sequence separator.
        /// The consensus has an offset of 0, while the assembled sequences have the
        /// offset as present in AssembledSequence.Position.
        /// </summary>
        /// <param name="stream">Stream to write to, it is left open at the end.</param>
        /// <param name="contig">The contig to format as a set of sparse sequences.</param>
        public void Write(Stream stream, Contig contig)
        {
            if (stream == null)
            {
                throw new ArgumentNullException("stream");
            }

            if (contig == null)
            {
                throw new ArgumentNullException("contig");
            }

            // Write the consensus sequence out.
            base.Format(stream, contig.Consensus);

            // Write out the contigs.
            using (StreamWriter writer = stream.OpenWrite(leaveOpen: true))
            {
                foreach (Contig.AssembledSequence aSeq in contig.Sequences)
                {
                    this.Write(writer, aSeq.Sequence, (long)aSeq.Sequence.Metadata[XsvSparseParser.MetadataOffsetKey]);
                }
            }
        }
Ejemplo n.º 24
0
        /// <summary>
        ///     Validates the Sequence Assembler for all the general test cases.
        /// </summary>
        /// <param name="nodeName">Xml Node Name</param>
        /// <param name="additionalParameter">
        ///     Additional Parameter based
        ///     on which the validations are done.
        /// </param>
        /// <param name="isSeqAssemblyctr">True if Default contructor is validated or else false.</param>
        private void ValidateSequenceAssemblerGeneral(string nodeName,
                                                      AssemblyParameters additionalParameter, bool isSeqAssemblyctr)
        {
            // Get the parameters from Xml
            int matchScore = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                       Constants.MatchScoreNode), null);
            int mismatchScore = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                          Constants.MisMatchScoreNode), null);
            int gapCost = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                    Constants.GapCostNode), null);
            double mergeThreshold = double.Parse(utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                 Constants.MergeThresholdNode), null);
            double consensusThreshold = double.Parse(utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                     Constants.ConsensusThresholdNode),
                                                     null);

            string[] sequences = utilityObj.xmlUtil.GetTextValues(nodeName,
                                                                  Constants.SequencesNode);
            IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                     Constants.AlphabetNameNode));
            string documentation = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                   Constants.DocumentaionNode);
            var info = new SerializationInfo(typeof(OverlapDeNovoAssembly),
                                             new FormatterConverter());
            var context = new StreamingContext(StreamingContextStates.All);

            var inputs = new List <ISequence>();

            switch (additionalParameter)
            {
            case AssemblyParameters.Consensus:
                for (int i = 0; i < sequences.Length; i++)
                {
                    // Logs the sequences
                    ApplicationLog.WriteLine(string.Format(null, "SimpleConsensusMethod P1 : Sequence '{0}' used is '{1}'.", i, sequences[i]));

                    var seq = new Sequence(alphabet, sequences[i]);
                    inputs.Add(seq);
                }
                break;

            default:
                for (int i = 0; i < sequences.Length; i++)
                {
                    // Logs the sequences
                    ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly P1 : Sequence '{0}' used is '{1}'.", i, sequences[i]));
                    var seq = new Sequence(alphabet, sequences[i]);
                    inputs.Add(seq);
                }
                break;
            }

            // here is how the above sequences should align:
            // TATAAAGCGCCAA
            //         GCCAAAATTTAGGC
            //                   AGGCACCCGCGGTATT   <= reversed
            //
            // TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT

            var assembler = new OverlapDeNovoAssembler
            {
                MergeThreshold   = mergeThreshold,
                OverlapAlgorithm = new PairwiseOverlapAligner()
            };

            switch (additionalParameter)
            {
            case AssemblyParameters.DiagonalSM:
                (assembler.OverlapAlgorithm).SimilarityMatrix =
                    new DiagonalSimilarityMatrix(matchScore, mismatchScore);
                break;

            case AssemblyParameters.SimilarityMatrix:
                string blosumFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode);
                (assembler.OverlapAlgorithm).SimilarityMatrix = new SimilarityMatrix(blosumFilePath);
                break;

            default:
                (assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore);
                break;
            }

            (assembler.OverlapAlgorithm).GapOpenCost = gapCost;
            assembler.ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold);
            assembler.AssumeStandardOrientation = false;

            // Assembles all the sequences.
            IOverlapDeNovoAssembly assembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            // Set Documentation property.
            assembly.Documentation = documentation;

            // Get the parameters from Xml in general
            int    contigSequencesCount = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ContigSequencesCountNode), null);
            string contigConsensus      = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ContigConsensusNode);

            switch (additionalParameter)
            {
            case AssemblyParameters.Consensus:
                // Read the contig from Contig method.
                Contig contigReadForConsensus = assembly.Contigs[0];
                contigReadForConsensus.Consensus = null;
                var simpleSeqAssembler = new OverlapDeNovoAssembler
                {
                    ConsensusResolver = new SimpleConsensusResolver(consensusThreshold)
                };
                simpleSeqAssembler.MakeConsensus(alphabet, contigReadForConsensus);

                // Log the required info.
                ApplicationLog.WriteLine(string.Format(null, "SimpleConsensusMethod BVT : Consensus read is '{0}'.", contigReadForConsensus.Consensus));
                Assert.AreEqual(contigConsensus, new String(contigReadForConsensus.Consensus.Select(a => (char)a).ToArray()));
                break;

            default:
                // Get the parameters from Xml for Assemble() method test cases.
                int unMergedCount = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                              Constants.UnMergedSequencesCountNode),
                                              null);
                int contigsCount = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                             Constants.ContigsCountNode), null);

                Assert.AreEqual(unMergedCount, assembly.UnmergedSequences.Count);
                Assert.AreEqual(contigsCount, assembly.Contigs.Count);
                Assert.AreEqual(documentation, assembly.Documentation);
                Contig contigRead = assembly.Contigs[0];

                // Logs the consensus
                ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Un Merged Sequences Count is '{0}'.", assembly.UnmergedSequences.Count));
                ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Contigs Count is '{0}'.", assembly.Contigs.Count));
                ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Contig Sequences Count is '{0}'.", contigRead.Sequences.Count));
                ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Consensus read is '{0}'.", contigRead.Consensus));

                Assert.AreEqual(contigConsensus, new String(contigRead.Consensus.Select(a => (char)a).ToArray()));
                Assert.AreEqual(contigSequencesCount, contigRead.Sequences.Count);
                break;
            }
        }
Ejemplo n.º 25
0
        public void ValidateContigToString()
        {
            const int    matchScore         = 5;
            const int    mismatchScore      = -4;
            const int    gapCost            = -10;
            const double mergeThreshold     = 4;
            const double consensusThreshold = 66;
            string       seq2Str            = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.Seq2StrNode);
            string       seq1Str            = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.Seq1StrNode);

            ISequence seq1 = new Sequence(Alphabets.DNA, seq1Str);
            ISequence seq2 = new Sequence(Alphabets.DNA, seq2Str);

            var assembler = new OverlapDeNovoAssembler
            {
                MergeThreshold   = mergeThreshold,
                OverlapAlgorithm = new NeedlemanWunschAligner
                {
                    SimilarityMatrix =
                        new DiagonalSimilarityMatrix(matchScore,
                                                     mismatchScore),
                    GapOpenCost = gapCost
                },
                ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold),
                AssumeStandardOrientation = false
            };

            var seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(new List <ISequence> {
                seq1, seq2
            });

            Contig contig0        = seqAssembly.Contigs[0];
            string actualString   = contig0.ToString();
            string expectedString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName,
                                                                         Constants.OverlapDenovoExpectedNode);

            Assert.AreEqual(expectedString.Replace("\\r\\n", ""), actualString.Replace("\r\n", ""));

            // Get the parameters from Xml
            int matchScore1 =
                int.Parse(
                    this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MatchScoreNode), null);
            int mismatchScore1 =
                int.Parse(
                    this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MisMatchScoreNode),
                    null);
            int gapCost1 =
                int.Parse(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.GapCostNode),
                          null);
            double mergeThreshold1 =
                double.Parse(
                    this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MergeThresholdNode),
                    null);
            double consensusThreshold1 =
                double.Parse(
                    this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                         Constants.ConsensusThresholdNode), null);
            string sequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                    Constants.SequenceNode1);
            string sequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                    Constants.SequenceNode2);
            string sequence3 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                    Constants.SequenceNode3);
            IAlphabet alphabet =
                Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                         Constants.AlphabetNameNode));

            ISequence seq4 = new Sequence(alphabet, sequence1);
            ISequence seq5 = new Sequence(alphabet, sequence2);
            ISequence seq6 = new Sequence(alphabet, sequence3);

            var assembler1 = new OverlapDeNovoAssembler
            {
                MergeThreshold   = mergeThreshold1,
                OverlapAlgorithm = new PairwiseOverlapAligner
                {
                    SimilarityMatrix =
                        new DiagonalSimilarityMatrix(matchScore1,
                                                     mismatchScore1),
                    GapOpenCost = gapCost1
                },
                ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold1),
                AssumeStandardOrientation = false
            };

            // Assembles all the sequences.
            var seqAssembly1 = (IOverlapDeNovoAssembly)assembler1.Assemble(new List <ISequence> {
                seq4, seq5, seq6
            });
            Contig       contig1         = seqAssembly1.Contigs[0];
            string       actualString1   = contig1.ToString();
            const string expectedString1 = "TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT";

            Assert.AreEqual(expectedString1, actualString1);
        }
Ejemplo n.º 26
0
        /// <summary>
        /// Aligns reads to contigs using kmer method of alignment.
        /// </summary>
        /// <param name="contigs">List of contig sequences.</param>
        /// <param name="reads">List of read sequences.</param>
        /// <param name="kmerLength">Kmer Length.</param>
        /// <returns>List of Contig.</returns>
        public static IList<Contig> ReadContigAlignment(IList<ISequence> contigs, IList<ISequence> reads, int kmerLength)
        {
            KmerIndexerDictionary map = SequenceToKmerBuilder.BuildKmerDictionary(reads, kmerLength);
            IList<ContigIndex> contigDatas;
            contigDatas = contigs.AsParallel().Select(contig =>
            {
                IEnumerable<ISequence> kmers = SequenceToKmerBuilder.GetKmerSequences(contig, kmerLength);
                ContigIndex index = new ContigIndex(contig);
                foreach (ISequence kmer in kmers)
                {
                    IList<KmerIndexer> positions;
                    if (map.TryGetValue(kmer, out positions) ||
                        map.TryGetValue(kmer.GetReverseComplementedSequence(), out positions))
                    {
                        index.ContigReadMatchIndexes.Add(positions);
                    }
                    else
                    {
                        index.ContigReadMatchIndexes.Add(new List<KmerIndexer>());
                    }
                }

                return index;
            }).ToList();
            
            return contigDatas.Select(contigData =>
            {
                IList<Task<IList<ReadMap>>> tasks =
                    new List<Task<IList<ReadMap>>>();

                // Stores information about contigs for which tasks has been generated.
                IList<long> visitedReads = new List<long>();

                // Creates Task for every read in nodes for a given contig.
                for (int index = 0; index < contigData.ContigReadMatchIndexes.Count; index++)
                {
                    int readPosition = index;
                    foreach (KmerIndexer kmer in contigData.ContigReadMatchIndexes[index])
                    {
                        long contigIndex = kmer.SequenceIndex;
                        if (!visitedReads.Contains(contigIndex))
                        {
                            visitedReads.Add(contigIndex);
                            tasks.Add(
                                Task<IList<ReadMap>>.Factory.StartNew(t => MapRead(readPosition, contigData.ContigReadMatchIndexes, contigIndex, kmerLength), TaskCreationOptions.AttachedToParent));
                        }
                    }
                }

                Contig contigOutputStructure = new Contig();
                contigOutputStructure.Consensus = contigData.ContigSequence;

                for (int index = 0; index < visitedReads.Count; index++)
                {
                    foreach (ReadMap maps in tasks[index].Result)
                    {
                        Contig.AssembledSequence assembledSeq = new Contig.AssembledSequence()
                        {
                            Length = maps.Length,
                            Position = maps.StartPositionOfContig,
                            ReadPosition = maps.StartPositionOfRead,
                            Sequence = reads.ElementAt(visitedReads[index])
                        };

                        if (new string(
                            contigOutputStructure.Consensus.GetSubSequence(
                            assembledSeq.Position, assembledSeq.Length).Select(a => (char)a).ToArray()).
                            Equals(new string(assembledSeq.Sequence.GetSubSequence(assembledSeq.ReadPosition, assembledSeq.Length)
                            .Select(a => (char)a).ToArray())))
                        {
                            assembledSeq.IsComplemented = false;
                            assembledSeq.IsReversed = false;
                        }
                        else
                        {
                            assembledSeq.IsComplemented = true;
                            assembledSeq.IsReversed = true;
                        }

                        contigOutputStructure.Sequences.Add(assembledSeq);
                    }
                }

                return contigOutputStructure;
            }).ToList();
        }
Ejemplo n.º 27
0
        /// <summary>
        /// Aligns reads to contigs using kmer method of alignment.
        /// </summary>
        /// <param name="contigs">List of contig sequences.</param>
        /// <param name="reads">List of read sequences.</param>
        /// <param name="kmerLength">Kmer Length.</param>
        /// <returns>List of Contig.</returns>
        public static IList <Contig> ReadContigAlignment(IList <ISequence> contigs, IList <ISequence> reads, int kmerLength)
        {
            KmerIndexerDictionary map = SequenceToKmerBuilder.BuildKmerDictionary(reads, kmerLength);
            IList <ContigIndex>   contigDatas;

            contigDatas = contigs.AsParallel().Select(contig =>
            {
                IEnumerable <ISequence> kmers = SequenceToKmerBuilder.GetKmerSequences(contig, kmerLength);
                ContigIndex index             = new ContigIndex(contig);
                IList <KmerIndexer> positions;
                foreach (ISequence kmer in kmers)
                {
                    if (map.TryGetValue(kmer, out positions) ||
                        map.TryGetValue(kmer.GetReverseComplementedSequence(), out positions))
                    {
                        index.ContigReadMatchIndexes.Add(positions);
                    }
                    else
                    {
                        index.ContigReadMatchIndexes.Add(new List <KmerIndexer>());
                    }
                }

                return(index);
            }).ToList();

            return(contigDatas.Select(contigData =>
            {
                IList <Task <IList <ReadMap> > > tasks =
                    new List <Task <IList <ReadMap> > >();

                // Stores information about contigs for which tasks has been generated.
                IList <long> visitedReads = new List <long>();

                // Creates Task for every read in nodes for a given contig.
                for (int index = 0; index < contigData.ContigReadMatchIndexes.Count; index++)
                {
                    int readPosition = index;
                    foreach (KmerIndexer kmer in contigData.ContigReadMatchIndexes[index])
                    {
                        long contigIndex = kmer.SequenceIndex;
                        if (!visitedReads.Contains(contigIndex))
                        {
                            visitedReads.Add(contigIndex);
                            tasks.Add(
                                Task <IList <ReadMap> > .Factory.StartNew(t => MapRead(readPosition, contigData.ContigReadMatchIndexes, contigIndex, kmerLength), TaskCreationOptions.AttachedToParent));
                        }
                    }
                }

                Contig contigOutputStructure = new Contig();
                contigOutputStructure.Consensus = contigData.ContigSequence;

                for (int index = 0; index < visitedReads.Count; index++)
                {
                    foreach (ReadMap maps in tasks[index].Result)
                    {
                        Contig.AssembledSequence assembledSeq = new Contig.AssembledSequence()
                        {
                            Length = maps.Length,
                            Position = maps.StartPositionOfContig,
                            ReadPosition = maps.StartPositionOfRead,
                            Sequence = reads.ElementAt(visitedReads[index])
                        };

                        if (new string(
                                contigOutputStructure.Consensus.GetSubSequence(
                                    assembledSeq.Position, assembledSeq.Length).Select(a => (char)a).ToArray()).
                            Equals(new string(assembledSeq.Sequence.GetSubSequence(assembledSeq.ReadPosition, assembledSeq.Length)
                                              .Select(a => (char)a).ToArray())))
                        {
                            assembledSeq.IsComplemented = false;
                            assembledSeq.IsReversed = false;
                        }
                        else
                        {
                            assembledSeq.IsComplemented = true;
                            assembledSeq.IsReversed = true;
                        }

                        contigOutputStructure.Sequences.Add(assembledSeq);
                    }
                }

                return contigOutputStructure;
            }).ToList());
        }
Ejemplo n.º 28
0
        public void TestSimpleSequenceAssemblerWithSwineflu()
        {
            Trace.Set(Trace.AssemblyDetails);   // turn on log dump
            // test parameters
            const int    matchScore         = 5;
            const int    mismatchScore      = -4;
            const int    gapCost            = -10;
            const double mergeThreshold     = 4;
            const double consensusThreshold = 66;

            ISequence seq2 = new Sequence(Alphabets.DNA, "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGAGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATATACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCGAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA");
            ISequence seq1 = new Sequence(Alphabets.DNA, "ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGACATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAATTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGTTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA");

            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler
            {
                MergeThreshold   = mergeThreshold,
                OverlapAlgorithm = new NeedlemanWunschAligner
                {
                    SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore),
                    GapOpenCost      = gapCost
                },
                ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold),
                AssumeStandardOrientation = false
            };

            var inputs = new List <ISequence> {
                seq1, seq2
            };
            var seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);
            Contig contig0  = seqAssembly.Contigs[0];
            string expected = "AYRAARGCAAYAMWARTRRWKSYRMTAYWWRYAKTTSYRMYMKMWAMWKYWGMMACMKYAWRTRYAGRYWMWYWWKSKAWMRRTTMWMMWGMSAMYRWWKMMACAGWMMYWGWARASAMWGTAMYAGWAAMRMAYKYWRYWRWMMYWCWMKMWGWYAASCWTMWMGRRRAMMWRYRYAAMSKRARASKRKKMRMMCYAWKRSRWKTRGSYMMATKKMAYWTKGSTRRMTGKAWCMTKGSWRRYYSRRWSYKKGRAWMWCYMKMSWSWGMAWSMYYMTSSWCMKMMAKYKYRKRRWCMTMYAKTKYRGAMAMWKSWASKTSWKACMMWGGARMKTKYWWCSMWKRWGAKKWSMTMRRWKAKSARKWGMKMWSAGWGYMATYRWKYKMARKGTYWKMRWTWKWMMSSWWKRMRAKWTYMYSSMMSAMWMRTKMMTSGMMCAAWSRTGWMWCGRMMRMAKGTSYWMMKGCWGSAKSWMMWMRYKYYKRMRMAAAWWKMWTMTRSMWARWTWWAAWAKGRMWWKYWWAMMMARRRMWYWSMWAMYCMWASMTYARYRAWWMMKRSAWWRAWGWYMWMGKGMWAKRRGKCMTYSWSCWWYSRKSYAYTMRYSMTSMMYMWMMWAGTSYYKAYCARMAWRSWSWYKMWYAKRWTKYWGWKGSRWMWKYWWKWKWSRGSWMRWMRWKMWAKMSSRARAWRKYMAWRMSRSMMAWAGYRAKRRRWCMMRAAGKGAGRRWKMAMKAWKRSWGRAYRMWMKWWKASYSGRSASWMRWARWRMCRKKMGAMRMAAYWRSAWWYSWAGYRRYWSSRARWYWWGYRKTMSCRAKRKAWRSAWWYGCWRKRKMWRGWAWTRYYRKWTCWGRTAYWMYMRTYYMMGATWSMMMWRYMMMYKRTYRSAMWMCMAMKKGTSMKAYAMMCAMSRGYSYYMYAWWYMMSARYMTMCMWYYKMWSAMWATWSRWMMRWKYMCAAWWKRWRWAWRWMSMAMAWAWKTRARAMKSRCMAMAKKRWKRMKGRMYRYMSSRTYKAKKMAWKYYMSRKSYMTWYWWKSKRSMRKYSYMKKTKKSRYYRWWGSSGGKTKSAYWGRRRKGGKRKRKRSAKGGWWSGKWKATSRMYRKYAMRRTKAKCASSRKYMARRWKAKSMRGSSKMMSKRWAKRSMRCMSASMWKRMSAKYRMMSAGAWTRCYAWYRAMGWRAWTWCTRWYAWWGWAAAKWYKRWTAYWSARWWSAYRRMWRTASRKWWMRMRKYMRWMSRYMWRGARWWMARMMWMSWGRAWWWAARWAWARARAWTKWWRATRRWWWMSTKGAYRWTKGKWYYYWSRAYRYYKRRMYKTWSRWTSYMKWRSWRWWKGWWMKAWYKKWRRAYKAMMRMRMTTYRRAYKWSMASRAYTYRWATGWRAAGRWMWKAWRYSARWWRRWAARMARYSMSMWRRAAAWYRRWRMCRRSKRMWTTGRAWWYKRCYRCWWWKRMKWTWACMMSWRMWKSGAWARYRYSWRMAWKGRRASTKWYRAMWAYSSRAMWTAYKMMKASSMARMAWAMTYARASRRAGMARAAWTARAYRGRGWARARMTRGAWKSRRYAARGMTKKAMYMRAYWWKGRYKWWCYAKWYWWYKGYSRYCWRTTCAWYKGTMSYSRKWKYMTYSSTRSKGGYARTCWSYYTSKGGRYRWKCWSTWWYKGGWYKYKMYMKWRTRGRWYWYKWMWKTRWAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA";

            Assert.AreEqual(expected, contig0.Consensus.ConvertToString(), "NeedlemanWunschAligner");
            Assert.AreEqual(2, contig0.Sequences.Count);

            assembler.OverlapAlgorithm = new SmithWatermanAligner();

            seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);
            contig0  = seqAssembly.Contigs[0];
            expected = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGRGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGWCATCAAGATAYAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATAYACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAARTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCRAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGYTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA";
            Assert.AreEqual(expected, contig0.Consensus.ConvertToString(), "SmithwatermanAligner");
            Assert.AreEqual(2, contig0.Sequences.Count);

            assembler.OverlapAlgorithm = new PairwiseOverlapAligner();

            seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);
            contig0  = seqAssembly.Contigs[0];
            expected = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGRGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGWCATCAAGATAYAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATAYACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAARTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCRAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGYTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA";
            Assert.AreEqual(expected, contig0.Consensus.ConvertToString(), "PairwiseOverlapAligner");
            Assert.AreEqual(2, contig0.Sequences.Count);

            assembler.OverlapAlgorithm = new MUMmerAligner();

            seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);
            contig0 = seqAssembly.Contigs[0];
            Assert.AreEqual(expected, contig0.Consensus.ConvertToString(), "MUMmerAligner");
            Assert.AreEqual(2, contig0.Sequences.Count);
        }
Ejemplo n.º 29
0
        public void TestSimpleSequenceAssemblerWithRandomSequence()
        {
            // Test parameters.
            //
            // In theory, as long as all positions in the master sequence are
            // covered by at least one read, we should be able to pass this test.
            // But some parameter settings will make the test fail, for
            // various reasons, including:
            // 1. Short reads, caused by the strategy used to ensure full coverage
            //  at the ends, might not score well enough to merge.
            // 2. Uncovered positions are always possible due to the random
            //  generation of reads. (Increasing the number of reads helps with this)
            // 3. The assembler might construct the reverse or complement (or both)
            //  of the master sequence.
            // 4. Too low a merge threshold could cause incorrect merges, which
            //  the algorithm will not repair.
            int        matchScore          = 1;
            int        mismatchScore       = -8;
            int        gapCost             = -8;
            double     mergeThreshold      = 3;
            double     consensusThreshold  = 99;
            const int  MasterLength        = 100;
            const int  MinReadLength       = 10;
            const int  MaxReadLength       = 30;
            const int  NumReads            = 200;
            const bool AssumeOrientedReads = true;

            // if this is uncommented, assembly details appear in log.
            // this is extremely verbose.
            // Trace.Set(Trace.AssemblyDetails);

            // make random master sequence
            // (use seed for repeatability, or omit seed for
            // different test each time)
            // Random randGen = new Random();
            Random randGen = new Random(654321);

            StringBuilder randSeq = new StringBuilder();

            for (int i = 0; i < MasterLength; ++i)
            {
                int randm = randGen.Next(8);
                if (randm < 2)
                {
                    randSeq.Append('A');
                }
                else if (randm < 4)
                {
                    randSeq.Append('C');
                }
                else if (randm < 6)
                {
                    randSeq.Append('G');
                }
                else
                {
                    randSeq.Append('T');
                }
            }

            Sequence master = new Sequence(Alphabets.AmbiguousDNA, randSeq.ToString());

            // create the reads
            List <ISequence> inputs = new List <ISequence>();

            for (int i = 0; i < NumReads; ++i)
            {
                // try for uniform coverage clear to the ends (this can lead to short reads, though)
                int       rndPos  = Math.Max(0, randGen.Next(-MinReadLength, MasterLength - 1));
                int       rndLen  = Math.Min(MasterLength - rndPos, randGen.Next(MinReadLength, MaxReadLength + 1));
                string    data    = master.ConvertToString().Substring(Math.Max(0, rndPos), rndLen);
                bool      revcomp = randGen.Next(2) > 0;
                bool      reverse = randGen.Next(2) > 0 && !AssumeOrientedReads;
                ISequence read;
                if (reverse && revcomp)
                {
                    Sequence tmp = new Sequence(Alphabets.DNA, data);
                    read = new Sequence(Alphabets.DNA, tmp.GetReversedSequence().ConvertToString());
                }
                else if (revcomp)
                {
                    Sequence tmp = new Sequence(Alphabets.DNA, data);
                    read = new Sequence(Alphabets.DNA, tmp.GetReverseComplementedSequence().ConvertToString());
                }
                else
                {
                    read = new Sequence(Alphabets.DNA, data);
                }

                ApplicationLog.WriteLine("read {0}: {1}", i, read);
                inputs.Add(read);
            }

            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler();

            assembler.MergeThreshold   = mergeThreshold;
            assembler.OverlapAlgorithm = new PairwiseOverlapAligner();
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore);
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost      = gapCost;
            assembler.ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold);
            assembler.AssumeStandardOrientation = AssumeOrientedReads;

            IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            ApplicationLog.WriteLine(
                "Assembly finished. Contigs: {0}. Unmerged sequences: {1}.",
                seqAssembly.Contigs.Count,
                seqAssembly.UnmergedSequences.Count);
            Contig contig0 = seqAssembly.Contigs[0];

            ApplicationLog.WriteLine("master sequence and contig 0 consensus:");
            ApplicationLog.WriteLine(master.ConvertToString());
            ApplicationLog.WriteLine(contig0.Consensus.ConvertToString());

            Assert.AreEqual(2, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);

            // note that this is tricky, esp. without oriented reads - consensus
            // could be reversed and/or complemented relative to original
            Assert.AreEqual(master.ConvertToString(), contig0.Consensus.ConvertToString());
        }
Ejemplo n.º 30
0
        public void MapReadsToSingleContig()
        {
            const int kmerLength = 6;

            IList <ISequence> readSeqs = new List <ISequence>();
            Sequence          read     = new Sequence(Alphabets.DNA, "GATGCCTC".Select(a => (byte)a).ToArray());

            read.ID = "0";
            readSeqs.Add(read);
            read    = new Sequence(Alphabets.DNA, "CCTCCTAT".Select(a => (byte)a).ToArray());
            read.ID = "1";
            readSeqs.Add(read);
            read    = new Sequence(Alphabets.DNA, "TCCTATC".Select(a => (byte)a).ToArray());
            read.ID = "2";
            readSeqs.Add(read);
            read    = new Sequence(Alphabets.DNA, "GCCTCCTAT".Select(a => (byte)a).ToArray());
            read.ID = "3";
            readSeqs.Add(read);
            read    = new Sequence(Alphabets.DNA, "TGCCTCCT".Select(a => (byte)a).ToArray());
            read.ID = "4";
            readSeqs.Add(read);

            IList <ISequence> contigs = new List <ISequence> {
                new Sequence(Alphabets.DNA,
                             "GATGCCTCCTATC".Select(a => (byte)a).ToArray())
            };

            IList <Contig> maps   = ReadAlignment.ReadContigAlignment(contigs, readSeqs, kmerLength);
            Contig         contig = maps.First();

            Assert.AreEqual(contig.Consensus, contigs.First());
            IList <Contig.AssembledSequence> readMap = Sort(contig.Sequences);

            Assert.AreEqual(readMap[0].Length, 8);
            Assert.AreEqual(readMap[0].Position, 4);
            Assert.AreEqual(readMap[0].ReadPosition, 0);
            Assert.AreEqual(readMap[0].IsComplemented, false);
            Assert.AreEqual(readMap[0].IsReversed, false);

            Assert.AreEqual(readMap[1].Length, 8);
            Assert.AreEqual(readMap[1].Position, 0);
            Assert.AreEqual(readMap[1].ReadPosition, 0);
            Assert.AreEqual(readMap[1].IsComplemented, false);
            Assert.AreEqual(readMap[1].IsReversed, false);

            Assert.AreEqual(readMap[2].Length, 9);
            Assert.AreEqual(readMap[2].Position, 3);
            Assert.AreEqual(readMap[2].ReadPosition, 0);
            Assert.AreEqual(readMap[2].IsComplemented, false);
            Assert.AreEqual(readMap[2].IsReversed, false);

            Assert.AreEqual(readMap[3].Length, 7);
            Assert.AreEqual(readMap[3].Position, 6);
            Assert.AreEqual(readMap[3].ReadPosition, 0);
            Assert.AreEqual(readMap[3].IsComplemented, false);
            Assert.AreEqual(readMap[3].IsReversed, false);

            Assert.AreEqual(readMap[4].Length, 8);
            Assert.AreEqual(readMap[4].Position, 2);
            Assert.AreEqual(readMap[4].ReadPosition, 0);
            Assert.AreEqual(readMap[3].IsComplemented, false);
            Assert.AreEqual(readMap[3].IsReversed, false);
        }
Ejemplo n.º 31
0
        public void TestSimpleSequenceAssemblerWithSemiRandomSequence()
        {
            // test parameters
            int        matchScore          = 1;
            int        mismatchScore       = -8;
            int        gapCost             = -8;
            double     mergeThreshold      = 4;
            double     consensusThreshold  = 66;
            const int  MasterLength        = 30;
            const int  ReadLength          = 10;
            const int  NumReads            = 5;
            const bool AssumeOrientedReads = false;

            // if this is uncommented, assembly details appear in log.
            // this is extremely verbose.
            Trace.Set(Trace.AssemblyDetails);

            // make random master sequence
            // (use seed for repeatability, or omit seed for
            // different test each time)
            // Random randGen = new Random();
            Random randGen = new Random(654321);

            StringBuilder randSeq = new StringBuilder();

            for (int i = 0; i < MasterLength; ++i)
            {
                int randm = randGen.Next(8);
                if (randm < 2)
                {
                    randSeq.Append('A');
                }
                else if (randm < 4)
                {
                    randSeq.Append('C');
                }
                else if (randm < 6)
                {
                    randSeq.Append('G');
                }
                else
                {
                    randSeq.Append('T');
                }
            }

            Sequence master = new Sequence(Alphabets.DNA, randSeq.ToString());

            // create the reads
            List <ISequence> inputs = new List <ISequence>();

            for (int i = 0; i < NumReads; ++i)
            {
                int       pos     = 5 * i;
                string    data    = master.ConvertToString().Substring(pos, ReadLength);
                bool      revcomp = randGen.Next(2) > 0;
                bool      reverse = randGen.Next(2) > 0 && !AssumeOrientedReads;
                ISequence read;
                if (reverse && revcomp)
                {
                    Sequence tmp = new Sequence(Alphabets.DNA, data);
                    read = new Sequence(Alphabets.DNA, tmp.GetReversedSequence().ConvertToString());
                }
                else if (revcomp)
                {
                    Sequence tmp = new Sequence(Alphabets.DNA, data);
                    read = new Sequence(Alphabets.DNA, tmp.GetReverseComplementedSequence().ConvertToString());
                }
                else
                {
                    read = new Sequence(Alphabets.DNA, data);
                }

                inputs.Add(read);
            }

            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler();

            assembler.MergeThreshold   = mergeThreshold;
            assembler.OverlapAlgorithm = new PairwiseOverlapAligner();
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore);
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost      = gapCost;
            assembler.ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold);
            assembler.AssumeStandardOrientation = AssumeOrientedReads;

            IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count);
            Assert.AreEqual(1, seqAssembly.Contigs.Count);
            Contig contig0 = seqAssembly.Contigs[0];

            ApplicationLog.WriteLine("master sequence and contig 0 consensus:");
            ApplicationLog.WriteLine(master.ConvertToString());
            ApplicationLog.WriteLine(contig0.Consensus.ConvertToString());

            // note that this is tricky, esp. without oriented reads - consensus
            // could be reversed and/or complemented relative to original
            Assert.AreEqual(master.ConvertToString(), contig0.Consensus.ConvertToString());
        }