Esempio n. 1
0
        public void TestPcaStep3DeletionInReferenceTwo()
        {
            Sequence r = new Sequence(DnaAlphabet.Instance, "CTACGATCGGGG");
            //                                               CTACGTGC         //   TGCGCA is deleted from reference
            //                                                  GCATCG
            //                                                 AGCATC
            //                                                       GGGG
            //                                                   CATCG
            Sequence q  = new Sequence(DnaAlphabet.Instance, "CTACGTGC");
            Sequence q2 = new Sequence(DnaAlphabet.Instance, "GCATCG");
            Sequence q3 = new Sequence(DnaAlphabet.Instance, "GGGG");
            Sequence q4 = new Sequence(DnaAlphabet.Instance, "CATCG");

            ComparativeGenomeAssembler asm = new ComparativeGenomeAssembler()
            {
                LengthOfMum = 3
            };
            var output = asm.Assemble(new List <ISequence> {
                r
            }, new List <ISequence> {
                q, q2, q3, q4
            });
            string res = new string(output.ElementAt(0).Select(a => (char)a).ToArray());

            Assert.AreEqual("CTACGTGCATCGGGG", res);
        }
Esempio n. 2
0
        public void TestPcaStep2WithTwoReads()
        {
            ComparativeGenomeAssembler asm = new ComparativeGenomeAssembler()
            {
                LengthOfMum = 9, KmerLength = 9
            };

            Sequence q = new Sequence(DnaAlphabet.Instance, "AACCTTGGCC")
            {
                ID = ">read.F:TestPcaStep2WithTwoReads"
            };
            Sequence p = new Sequence(DnaAlphabet.Instance, "GGGGGGGGGG")
            {
                ID = ">read.R:TestPcaStep2WithTwoReads"
            };

            CloneLibrary.Instance.AddLibrary("TestPcaStep2WithTwoReads", (float)61, (float)1);

            TestPcaAssemble(asm,
                            new List <Sequence> {
                new Sequence(DnaAlphabet.Instance, "AACCTTGGCCCCCACGATCGCGCTAGATCGCATCGATCCCCAACCTTGGCCGGGGGGGGGG", false)
            },
                            new List <ISequence> {
                q, p
            },
                            new List <string> {
                "AACCTTGGCC",
                "GGGGGGGGGG"
            });
        }
Esempio n. 3
0
        public void TestPcaStep3RepeatTest()
        {
            Sequence r = new Sequence(DnaAlphabet.Instance, "AACCTTGGCCCCCACGATCGCGCTAGATCGCATCGATCCCCAACCTTGGCCGGGGGGGGGG");

            Sequence q = new Sequence(DnaAlphabet.Instance, "AACCTTGGCC");

            q.ID = ">read.F:abc";
            Sequence p = new Sequence(DnaAlphabet.Instance, "GGGGGGGGGG");

            p.ID = ">read.R:abc";
            CloneLibrary.Instance.AddLibrary("abc", (float)61, (float)1);


            ComparativeGenomeAssembler asm = new ComparativeGenomeAssembler()
            {
                LengthOfMum = 9
            };
            var res = asm.Assemble(new List <ISequence> {
                r
            }, new List <ISequence> {
                q, p
            });

            string[] expectedResult = new string[2];
            expectedResult[0] = "AACCTTGGCC";
            expectedResult[1] = "GGGGGGGGGG";
            int i = 0;

            foreach (var s in res)
            {
                string actual = new string(s.Select(a => (char)a).ToArray());
                Assert.AreEqual(expectedResult[i], actual);
                i++;
            }
        }
Esempio n. 4
0
        /// <summary>
        /// Test Pca assemble.
        /// </summary>
        /// <param name="asm">Comparative Genome Assembler.</param>
        /// <param name="reference">Reference sequence.</param>
        /// <param name="query">Query sequence.</param>
        /// <param name="expected">Expected strings.</param>
        private static void TestPcaAssemble(ComparativeGenomeAssembler asm, IEnumerable <ISequence> reference, IEnumerable <ISequence> query, IList <string> expected)
        {
            IEnumerable <ISequence> result = asm.Assemble(reference, query);

            Assert.IsTrue(result.Count() == expected.Count);

            foreach (var act in result)
            {
                string actualStr = new string(act.Select(a => (char)a).ToArray());
                Assert.IsTrue(expected.Contains(actualStr));
            }
        }
Esempio n. 5
0
        public void TestPcaStep4WithOverlappingReads()
        {
            ComparativeGenomeAssembler asm = new ComparativeGenomeAssembler()
            {
                LengthOfMum = 3
            };

            TestPcaAssemble(asm,
                            new List <Sequence> {
                new Sequence(DnaAlphabet.Instance, "AGAAAAGTTTTCA", false)
            },
                            new List <ISequence> {
                new Sequence(DnaAlphabet.Instance, "AGAAAA", false),
                new Sequence(DnaAlphabet.Instance, "AAAAGTTTT", false)
            },
                            new List <string>
            {
                "AGAAAAGTTTT"
            });
        }
Esempio n. 6
0
        public void TestPcaStep3InsertionInReference()
        {
            Sequence r = new Sequence(DnaAlphabet.Instance, "AACCTTGGCCTAGTACGGATATTGCCCACGATCG");

            //                                               AACCTTGGCCTA            CCCACGATCG
            Sequence q = new Sequence(DnaAlphabet.Instance, "AACCTTGGCCTACCCACGATCG");


            ComparativeGenomeAssembler asm = new ComparativeGenomeAssembler()
            {
                LengthOfMum = 9
            };
            var output = asm.Assemble(new List <ISequence> {
                r
            }, new List <ISequence> {
                q
            });
            string res = new string(output.ElementAt(0).Select(a => (char)a).ToArray());

            Assert.AreEqual("AACCTTGGCCTACCCACGATCG", res);
        }
Esempio n. 7
0
        public void TestPcaStep3InsertionInReferenceThree()
        {
            Sequence refSeq = new Sequence(DnaAlphabet.Instance, "AAAACCCGGGGTTTTTTACGTGACTGCA");
            Sequence q      = new Sequence(DnaAlphabet.Instance, "AAAAGGGG");
            Sequence r      = new Sequence(DnaAlphabet.Instance, "ACGTTGCA");

            ComparativeGenomeAssembler asm = new ComparativeGenomeAssembler()
            {
                LengthOfMum = 4
            };
            var output = asm.Assemble(new List <ISequence> {
                refSeq
            }, new List <ISequence> {
                q, r
            });

            string res = new string(output.ElementAt(0).Select(a => (char)a).ToArray());

            Assert.AreEqual("AAAAGGGG", res);

            res = new string(output.ElementAt(1).Select(a => (char)a).ToArray());
            Assert.AreEqual("ACGTTGCA", res);
        }
Esempio n. 8
0
        /// <summary>
        /// It assembles the sequences.
        /// </summary>
        public virtual void AssembleSequences()
        {
            if (this.FilePath.Length != 2)
            {
                Console.Error.WriteLine("\nError: A reference file and 1 query file are required.");
                Environment.Exit(-1);
            }

            TimeSpan  timeSpan        = new TimeSpan();
            Stopwatch runAlgorithm    = new Stopwatch();
            FileInfo  inputFileinfo   = new FileInfo(this.FilePath[0]);
            long      inputFileLength = inputFileinfo.Length;

            inputFileinfo = null;

            if (!string.IsNullOrEmpty(this.CloneLibraryName))
            {
                CloneLibrary.Instance.AddLibrary(this.CloneLibraryName, (float)this.MeanLengthOfInsert, (float)this.StandardDeviationOfInsert);
            }

            runAlgorithm.Restart();
            // Parse input files
            IEnumerable <ISequence> referenceSequences = new FastAParser(this.FilePath[0]).Parse();

            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Console.Error.WriteLine();
                Console.Error.WriteLine("  Processed reference file: {0}", Path.GetFullPath(this.FilePath[0]));
                Console.Error.WriteLine("            Read/Processing time: {0}", runAlgorithm.Elapsed);
                Console.Error.WriteLine("            File Size           : {0}", inputFileLength);
            }

            inputFileinfo   = new FileInfo(this.FilePath[1]);
            inputFileLength = inputFileinfo.Length;
            runAlgorithm.Restart();
            FastASequencePositionParser queryParser = new FastASequencePositionParser(this.FilePath[1], true);

            queryParser.CacheSequencesForRandomAccess();
            IEnumerable <ISequence> reads = queryParser.Parse();

            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Console.Error.WriteLine();
                Console.Error.WriteLine("  Processed reads file: {0}", Path.GetFullPath(this.FilePath[1]));
                Console.Error.WriteLine("            Read/Processing time: {0}", runAlgorithm.Elapsed);
                Console.Error.WriteLine("            File Size           : {0}", inputFileLength);
            }

            runAlgorithm.Restart();
            ValidateAmbiguousReads(reads);
            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Console.Error.WriteLine();
                Console.Error.WriteLine("  Time taken for Validating reads: {0}", runAlgorithm.Elapsed);
            }

            runAlgorithm.Restart();
            ComparativeGenomeAssembler assembler = new ComparativeGenomeAssembler();

            assembler.StatusChanged     += new EventHandler <StatusChangedEventArgs>(this.AssemblerStatusChanged);
            assembler.ScaffoldingEnabled = this.Scaffold;
            assembler.KmerLength         = this.KmerLength;
            assembler.LengthOfMum        = this.MumLength;
            IEnumerable <ISequence> assemblerResult = assembler.Assemble(referenceSequences, queryParser);

            runAlgorithm.Stop();
            timeSpan = timeSpan.Add(runAlgorithm.Elapsed);

            runAlgorithm.Restart();

            if (this.OutputFile == null)
            {
                // Write output to console.
                this.WriteContigs(assemblerResult, Console.Out);
            }
            else
            {
                // Write output to the specified file.
                this.WriteContigs(assemblerResult, null);
                Console.WriteLine(Resources.OutPutWrittenToFileSpecified);
            }
            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Console.Error.WriteLine("  Assemble time: {0}", timeSpan);
                Console.Error.WriteLine("  Write() time: {0}", runAlgorithm.Elapsed);
            }
        }
Esempio n. 9
0
        /// <summary>
        /// It assembles the sequences.
        /// </summary>
        public virtual void AssembleSequences()
        {
            if (this.FilePath.Length != 2)
            {
                Output.WriteLine(OutputLevel.Error, "Error: A reference file and 1 query file are required.");
                return;
            }

            TimeSpan  timeSpan        = new TimeSpan();
            Stopwatch runAlgorithm    = new Stopwatch();
            FileInfo  inputFileinfo   = new FileInfo(this.FilePath[0]);
            long      inputFileLength = inputFileinfo.Length;

            inputFileinfo = null;

            if (!string.IsNullOrEmpty(this.CloneLibraryName))
            {
                CloneLibrary.Instance.AddLibrary(this.CloneLibraryName, (float)this.MeanLengthOfInsert, (float)this.StandardDeviationOfInsert);
            }

            runAlgorithm.Restart();

            // Parse input files
            IEnumerable <ISequence> referenceSequences = ParseFile(this.FilePath[0]);

            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose);
                Output.WriteLine(OutputLevel.Verbose, "Processed reference file: {0}", Path.GetFullPath(this.FilePath[0]));
                Output.WriteLine(OutputLevel.Verbose, "   Read/Processing time : {0}", runAlgorithm.Elapsed);
                Output.WriteLine(OutputLevel.Verbose, "   File Size            : {0}", inputFileLength);
            }

            inputFileinfo   = new FileInfo(this.FilePath[1]);
            inputFileLength = inputFileinfo.Length;
            runAlgorithm.Restart();
            FastASequencePositionParser queryParser = new FastASequencePositionParser(this.FilePath[1], true);

            queryParser.CacheSequencesForRandomAccess();
            IEnumerable <ISequence> reads = queryParser.Parse();

            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose);
                Output.WriteLine(OutputLevel.Verbose, "Processed reads file   : {0}", Path.GetFullPath(this.FilePath[1]));
                Output.WriteLine(OutputLevel.Verbose, "   Read/Processing time: {0}", runAlgorithm.Elapsed);
                Output.WriteLine(OutputLevel.Verbose, "   File Size           : {0}", inputFileLength);
            }

            runAlgorithm.Restart();
            ValidateAmbiguousReads(reads);
            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose);
                Output.WriteLine(OutputLevel.Verbose, "Time taken for Validating reads: {0}", runAlgorithm.Elapsed);
                Output.WriteLine(OutputLevel.Verbose);
            }

            runAlgorithm.Restart();
            ComparativeGenomeAssembler assembler = new ComparativeGenomeAssembler();

            assembler.StatusChanged     += this.AssemblerStatusChanged;
            assembler.ScaffoldingEnabled = this.Scaffold;
            assembler.KmerLength         = this.KmerLength;
            assembler.LengthOfMum        = this.MumLength;
            IEnumerable <ISequence> assemblerResult = assembler.Assemble(referenceSequences, queryParser);

            runAlgorithm.Stop();
            timeSpan = timeSpan.Add(runAlgorithm.Elapsed);

            runAlgorithm.Restart();

            this.WriteContigs(assemblerResult);
            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose, "Assemble time: {0}", timeSpan);
                Output.WriteLine(OutputLevel.Verbose, "Write time: {0}", runAlgorithm.Elapsed);
            }
        }