public void TestPcaStep3DeletionInReferenceTwo() { Sequence r = new Sequence(DnaAlphabet.Instance, "CTACGATCGGGG"); // CTACGTGC // TGCGCA is deleted from reference // GCATCG // AGCATC // GGGG // CATCG Sequence q = new Sequence(DnaAlphabet.Instance, "CTACGTGC"); Sequence q2 = new Sequence(DnaAlphabet.Instance, "GCATCG"); Sequence q3 = new Sequence(DnaAlphabet.Instance, "GGGG"); Sequence q4 = new Sequence(DnaAlphabet.Instance, "CATCG"); ComparativeGenomeAssembler asm = new ComparativeGenomeAssembler() { LengthOfMum = 3 }; var output = asm.Assemble(new List <ISequence> { r }, new List <ISequence> { q, q2, q3, q4 }); string res = new string(output.ElementAt(0).Select(a => (char)a).ToArray()); Assert.AreEqual("CTACGTGCATCGGGG", res); }
public void TestPcaStep2WithTwoReads() { ComparativeGenomeAssembler asm = new ComparativeGenomeAssembler() { LengthOfMum = 9, KmerLength = 9 }; Sequence q = new Sequence(DnaAlphabet.Instance, "AACCTTGGCC") { ID = ">read.F:TestPcaStep2WithTwoReads" }; Sequence p = new Sequence(DnaAlphabet.Instance, "GGGGGGGGGG") { ID = ">read.R:TestPcaStep2WithTwoReads" }; CloneLibrary.Instance.AddLibrary("TestPcaStep2WithTwoReads", (float)61, (float)1); TestPcaAssemble(asm, new List <Sequence> { new Sequence(DnaAlphabet.Instance, "AACCTTGGCCCCCACGATCGCGCTAGATCGCATCGATCCCCAACCTTGGCCGGGGGGGGGG", false) }, new List <ISequence> { q, p }, new List <string> { "AACCTTGGCC", "GGGGGGGGGG" }); }
public void TestPcaStep3RepeatTest() { Sequence r = new Sequence(DnaAlphabet.Instance, "AACCTTGGCCCCCACGATCGCGCTAGATCGCATCGATCCCCAACCTTGGCCGGGGGGGGGG"); Sequence q = new Sequence(DnaAlphabet.Instance, "AACCTTGGCC"); q.ID = ">read.F:abc"; Sequence p = new Sequence(DnaAlphabet.Instance, "GGGGGGGGGG"); p.ID = ">read.R:abc"; CloneLibrary.Instance.AddLibrary("abc", (float)61, (float)1); ComparativeGenomeAssembler asm = new ComparativeGenomeAssembler() { LengthOfMum = 9 }; var res = asm.Assemble(new List <ISequence> { r }, new List <ISequence> { q, p }); string[] expectedResult = new string[2]; expectedResult[0] = "AACCTTGGCC"; expectedResult[1] = "GGGGGGGGGG"; int i = 0; foreach (var s in res) { string actual = new string(s.Select(a => (char)a).ToArray()); Assert.AreEqual(expectedResult[i], actual); i++; } }
/// <summary> /// Test Pca assemble. /// </summary> /// <param name="asm">Comparative Genome Assembler.</param> /// <param name="reference">Reference sequence.</param> /// <param name="query">Query sequence.</param> /// <param name="expected">Expected strings.</param> private static void TestPcaAssemble(ComparativeGenomeAssembler asm, IEnumerable <ISequence> reference, IEnumerable <ISequence> query, IList <string> expected) { IEnumerable <ISequence> result = asm.Assemble(reference, query); Assert.IsTrue(result.Count() == expected.Count); foreach (var act in result) { string actualStr = new string(act.Select(a => (char)a).ToArray()); Assert.IsTrue(expected.Contains(actualStr)); } }
public void TestPcaStep4WithOverlappingReads() { ComparativeGenomeAssembler asm = new ComparativeGenomeAssembler() { LengthOfMum = 3 }; TestPcaAssemble(asm, new List <Sequence> { new Sequence(DnaAlphabet.Instance, "AGAAAAGTTTTCA", false) }, new List <ISequence> { new Sequence(DnaAlphabet.Instance, "AGAAAA", false), new Sequence(DnaAlphabet.Instance, "AAAAGTTTT", false) }, new List <string> { "AGAAAAGTTTT" }); }
public void TestPcaStep3InsertionInReference() { Sequence r = new Sequence(DnaAlphabet.Instance, "AACCTTGGCCTAGTACGGATATTGCCCACGATCG"); // AACCTTGGCCTA CCCACGATCG Sequence q = new Sequence(DnaAlphabet.Instance, "AACCTTGGCCTACCCACGATCG"); ComparativeGenomeAssembler asm = new ComparativeGenomeAssembler() { LengthOfMum = 9 }; var output = asm.Assemble(new List <ISequence> { r }, new List <ISequence> { q }); string res = new string(output.ElementAt(0).Select(a => (char)a).ToArray()); Assert.AreEqual("AACCTTGGCCTACCCACGATCG", res); }
public void TestPcaStep3InsertionInReferenceThree() { Sequence refSeq = new Sequence(DnaAlphabet.Instance, "AAAACCCGGGGTTTTTTACGTGACTGCA"); Sequence q = new Sequence(DnaAlphabet.Instance, "AAAAGGGG"); Sequence r = new Sequence(DnaAlphabet.Instance, "ACGTTGCA"); ComparativeGenomeAssembler asm = new ComparativeGenomeAssembler() { LengthOfMum = 4 }; var output = asm.Assemble(new List <ISequence> { refSeq }, new List <ISequence> { q, r }); string res = new string(output.ElementAt(0).Select(a => (char)a).ToArray()); Assert.AreEqual("AAAAGGGG", res); res = new string(output.ElementAt(1).Select(a => (char)a).ToArray()); Assert.AreEqual("ACGTTGCA", res); }
/// <summary> /// It assembles the sequences. /// </summary> public virtual void AssembleSequences() { if (this.FilePath.Length != 2) { Console.Error.WriteLine("\nError: A reference file and 1 query file are required."); Environment.Exit(-1); } TimeSpan timeSpan = new TimeSpan(); Stopwatch runAlgorithm = new Stopwatch(); FileInfo inputFileinfo = new FileInfo(this.FilePath[0]); long inputFileLength = inputFileinfo.Length; inputFileinfo = null; if (!string.IsNullOrEmpty(this.CloneLibraryName)) { CloneLibrary.Instance.AddLibrary(this.CloneLibraryName, (float)this.MeanLengthOfInsert, (float)this.StandardDeviationOfInsert); } runAlgorithm.Restart(); // Parse input files IEnumerable <ISequence> referenceSequences = new FastAParser(this.FilePath[0]).Parse(); runAlgorithm.Stop(); if (this.Verbose) { Console.Error.WriteLine(); Console.Error.WriteLine(" Processed reference file: {0}", Path.GetFullPath(this.FilePath[0])); Console.Error.WriteLine(" Read/Processing time: {0}", runAlgorithm.Elapsed); Console.Error.WriteLine(" File Size : {0}", inputFileLength); } inputFileinfo = new FileInfo(this.FilePath[1]); inputFileLength = inputFileinfo.Length; runAlgorithm.Restart(); FastASequencePositionParser queryParser = new FastASequencePositionParser(this.FilePath[1], true); queryParser.CacheSequencesForRandomAccess(); IEnumerable <ISequence> reads = queryParser.Parse(); runAlgorithm.Stop(); if (this.Verbose) { Console.Error.WriteLine(); Console.Error.WriteLine(" Processed reads file: {0}", Path.GetFullPath(this.FilePath[1])); Console.Error.WriteLine(" Read/Processing time: {0}", runAlgorithm.Elapsed); Console.Error.WriteLine(" File Size : {0}", inputFileLength); } runAlgorithm.Restart(); ValidateAmbiguousReads(reads); runAlgorithm.Stop(); if (this.Verbose) { Console.Error.WriteLine(); Console.Error.WriteLine(" Time taken for Validating reads: {0}", runAlgorithm.Elapsed); } runAlgorithm.Restart(); ComparativeGenomeAssembler assembler = new ComparativeGenomeAssembler(); assembler.StatusChanged += new EventHandler <StatusChangedEventArgs>(this.AssemblerStatusChanged); assembler.ScaffoldingEnabled = this.Scaffold; assembler.KmerLength = this.KmerLength; assembler.LengthOfMum = this.MumLength; IEnumerable <ISequence> assemblerResult = assembler.Assemble(referenceSequences, queryParser); runAlgorithm.Stop(); timeSpan = timeSpan.Add(runAlgorithm.Elapsed); runAlgorithm.Restart(); if (this.OutputFile == null) { // Write output to console. this.WriteContigs(assemblerResult, Console.Out); } else { // Write output to the specified file. this.WriteContigs(assemblerResult, null); Console.WriteLine(Resources.OutPutWrittenToFileSpecified); } runAlgorithm.Stop(); if (this.Verbose) { Console.Error.WriteLine(" Assemble time: {0}", timeSpan); Console.Error.WriteLine(" Write() time: {0}", runAlgorithm.Elapsed); } }
/// <summary> /// It assembles the sequences. /// </summary> public virtual void AssembleSequences() { if (this.FilePath.Length != 2) { Output.WriteLine(OutputLevel.Error, "Error: A reference file and 1 query file are required."); return; } TimeSpan timeSpan = new TimeSpan(); Stopwatch runAlgorithm = new Stopwatch(); FileInfo inputFileinfo = new FileInfo(this.FilePath[0]); long inputFileLength = inputFileinfo.Length; inputFileinfo = null; if (!string.IsNullOrEmpty(this.CloneLibraryName)) { CloneLibrary.Instance.AddLibrary(this.CloneLibraryName, (float)this.MeanLengthOfInsert, (float)this.StandardDeviationOfInsert); } runAlgorithm.Restart(); // Parse input files IEnumerable <ISequence> referenceSequences = ParseFile(this.FilePath[0]); runAlgorithm.Stop(); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Processed reference file: {0}", Path.GetFullPath(this.FilePath[0])); Output.WriteLine(OutputLevel.Verbose, " Read/Processing time : {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, " File Size : {0}", inputFileLength); } inputFileinfo = new FileInfo(this.FilePath[1]); inputFileLength = inputFileinfo.Length; runAlgorithm.Restart(); FastASequencePositionParser queryParser = new FastASequencePositionParser(this.FilePath[1], true); queryParser.CacheSequencesForRandomAccess(); IEnumerable <ISequence> reads = queryParser.Parse(); runAlgorithm.Stop(); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Processed reads file : {0}", Path.GetFullPath(this.FilePath[1])); Output.WriteLine(OutputLevel.Verbose, " Read/Processing time: {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, " File Size : {0}", inputFileLength); } runAlgorithm.Restart(); ValidateAmbiguousReads(reads); runAlgorithm.Stop(); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Time taken for Validating reads: {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose); } runAlgorithm.Restart(); ComparativeGenomeAssembler assembler = new ComparativeGenomeAssembler(); assembler.StatusChanged += this.AssemblerStatusChanged; assembler.ScaffoldingEnabled = this.Scaffold; assembler.KmerLength = this.KmerLength; assembler.LengthOfMum = this.MumLength; IEnumerable <ISequence> assemblerResult = assembler.Assemble(referenceSequences, queryParser); runAlgorithm.Stop(); timeSpan = timeSpan.Add(runAlgorithm.Elapsed); runAlgorithm.Restart(); this.WriteContigs(assemblerResult); runAlgorithm.Stop(); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose, "Assemble time: {0}", timeSpan); Output.WriteLine(OutputLevel.Verbose, "Write time: {0}", runAlgorithm.Elapsed); } }