public void TestPcaStep3DeletionInReferenceTwo() { Sequence r = new Sequence(DnaAlphabet.Instance, "CTACGATCGGGG"); // CTACGTGC // TGCGCA is deleted from reference // GCATCG // AGCATC // GGGG // CATCG Sequence q = new Sequence(DnaAlphabet.Instance, "CTACGTGC"); Sequence q2 = new Sequence(DnaAlphabet.Instance, "GCATCG"); Sequence q3 = new Sequence(DnaAlphabet.Instance, "GGGG"); Sequence q4 = new Sequence(DnaAlphabet.Instance, "CATCG"); ComparativeGenomeAssembler asm = new ComparativeGenomeAssembler() { LengthOfMum = 3 }; var output = asm.Assemble(new List<ISequence> { r }, new List<ISequence> { q, q2, q3, q4 }); string res = new string(output.ElementAt(0).Select(a => (char)a).ToArray()); Assert.AreEqual("CTACGTGCATCGGGG", res); }
public void TestPcaStep2WithTwoReads() { ComparativeGenomeAssembler asm = new ComparativeGenomeAssembler() { LengthOfMum = 9, KmerLength = 9 }; Sequence q = new Sequence(DnaAlphabet.Instance, "AACCTTGGCC"); q.ID = ">read.F:TestPcaStep2WithTwoReads"; Sequence p = new Sequence(DnaAlphabet.Instance, "GGGGGGGGGG"); p.ID = ">read.R:TestPcaStep2WithTwoReads"; CloneLibrary.Instance.AddLibrary("TestPcaStep2WithTwoReads", (float)61, (float)1); TestPcaAssemble(asm, new List<Sequence>{ new Sequence(DnaAlphabet.Instance, "AACCTTGGCCCCCACGATCGCGCTAGATCGCATCGATCCCCAACCTTGGCCGGGGGGGGGG", false) }, new List<ISequence> { q,p }, new List<string> { "AACCTTGGCC", "GGGGGGGGGG" }); }
/// <summary> /// Validates Assemble method .Step 1-5. /// </summary> /// <param name="nodeName">Parent Node name in Xml</param> /// <param name="isFilePath">Sequence location.</param> public void ValidateComparativeAssembleMethod(string nodeName, bool isEcOli) { ComparativeGenomeAssembler assemble = new ComparativeGenomeAssembler(); List<ISequence> referenceSeqList = new List<ISequence>(); string expectedSequence = null; string LengthOfMUM = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string fixedSeparation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FixedSeparationNode); string minimumScore = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MinimumScoreNode); string separationFactor = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SeparationFactorNode); string maximumSeparation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MaximumSeparationNode); string breakLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BreakLengthNode); // Gets the reference sequence from the FastA file string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Comparative P1 : Successfully validated the File Path '{0}'.", filePath)); using (FastAParser parser = new FastAParser(filePath)) { IEnumerable<ISequence> referenceList = parser.Parse(); foreach (ISequence seq in referenceList) { referenceSeqList.Add(seq); } } //Get the reads from configurtion file . string readFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2); assemble.LengthOfMum = int.Parse(LengthOfMUM, CultureInfo.InvariantCulture); assemble.KmerLength = int.Parse(kmerLength, CultureInfo.InvariantCulture); assemble.FixedSeparation = int.Parse(fixedSeparation, CultureInfo.InvariantCulture); assemble.MinimumScore = int.Parse(minimumScore, CultureInfo.InvariantCulture); assemble.SeparationFactor = float.Parse(separationFactor, CultureInfo.InvariantCulture); assemble.MaximumSeparation = int.Parse(maximumSeparation, CultureInfo.InvariantCulture); assemble.BreakLength = int.Parse(breakLength, CultureInfo.InvariantCulture); using (FastASequencePositionParser queryparser = new FastASequencePositionParser(readFilePath)) { IEnumerable<ISequence> outputAssemble = assemble.Assemble(referenceSeqList, queryparser); if (isEcOli) { expectedSequence = utilityObj.xmlUtil.GetFileTextValue(nodeName, Constants.ExpectedSequenceNode); } else { expectedSequence = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode); } var outputStrings = outputAssemble.Select(seq => seq.ConvertToString()).ToList(); outputStrings.Sort(); Assert.AreEqual(expectedSequence.ToUpperInvariant(), String.Join("", outputStrings).ToUpperInvariant()); } }
/// <summary> /// It assembles the sequences. /// </summary> public virtual void AssembleSequences() { if (this.FilePath.Length != 2) { Output.WriteLine(OutputLevel.Error, "Error: A reference file and 1 query file are required."); return; } TimeSpan timeSpan = new TimeSpan(); Stopwatch runAlgorithm = new Stopwatch(); FileInfo inputFileinfo = new FileInfo(this.FilePath[0]); long inputFileLength = inputFileinfo.Length; inputFileinfo = null; if (!string.IsNullOrEmpty(this.CloneLibraryName)) { CloneLibrary.Instance.AddLibrary(this.CloneLibraryName, (float)this.MeanLengthOfInsert, (float)this.StandardDeviationOfInsert); } runAlgorithm.Restart(); // Parse input files IEnumerable<ISequence> referenceSequences = ParseFile(this.FilePath[0]); runAlgorithm.Stop(); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Processed reference file: {0}", Path.GetFullPath(this.FilePath[0])); Output.WriteLine(OutputLevel.Verbose, " Read/Processing time : {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, " File Size : {0}", inputFileLength); } inputFileinfo = new FileInfo(this.FilePath[1]); inputFileLength = inputFileinfo.Length; runAlgorithm.Restart(); FastASequencePositionParser queryParser; using (var stream = File.OpenRead(this.FilePath[1])) { // Parse and cache the sequences. queryParser = new FastASequencePositionParser(stream, true); queryParser.CacheSequencesForRandomAccess(); } // Check the input var reads = queryParser.Parse(); if (reads.Any(s => s.Alphabet.HasAmbiguity)) throw new ArgumentException(Resources.AmbiguousReadsNotSupported); runAlgorithm.Stop(); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Processed reads file : {0}", Path.GetFullPath(this.FilePath[1])); Output.WriteLine(OutputLevel.Verbose, " Read/Processing time: {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, " File Size : {0}", inputFileLength); } runAlgorithm.Restart(); runAlgorithm.Stop(); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Time taken for Validating reads: {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose); } runAlgorithm.Restart(); ComparativeGenomeAssembler assembler = new ComparativeGenomeAssembler(); assembler.StatusChanged += this.AssemblerStatusChanged; assembler.ScaffoldingEnabled = this.Scaffold; assembler.KmerLength = this.KmerLength; assembler.LengthOfMum = this.MumLength; IEnumerable<ISequence> assemblerResult = assembler.Assemble(referenceSequences, queryParser); runAlgorithm.Stop(); timeSpan = timeSpan.Add(runAlgorithm.Elapsed); runAlgorithm.Restart(); this.WriteContigs(assemblerResult); runAlgorithm.Stop(); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose, "Assemble time: {0}", timeSpan); Output.WriteLine(OutputLevel.Verbose, "Write time: {0}", runAlgorithm.Elapsed); } }
/// <summary> /// Test Pca assemble. /// </summary> /// <param name="asm">Comparative Genome Assembler.</param> /// <param name="reference">Reference sequence.</param> /// <param name="query">Query sequence.</param> /// <param name="expected">Expected strings.</param> private static void TestPcaAssemble(ComparativeGenomeAssembler asm, IEnumerable<ISequence> reference, IEnumerable<ISequence> query, IList<string> expected) { IEnumerable<ISequence> result = asm.Assemble(reference, query); Assert.IsTrue(result.Count() == expected.Count); foreach (var act in result) { string actualStr = new string(act.Select(a => (char)a).ToArray()); Assert.IsTrue(expected.Contains(actualStr)); } }
public void TestPcaStep4WithOverlappingReads() { ComparativeGenomeAssembler asm = new ComparativeGenomeAssembler() { LengthOfMum = 3 }; TestPcaAssemble(asm, new List<Sequence>{ new Sequence(DnaAlphabet.Instance, "AGAAAAGTTTTCA", false) }, new List<ISequence> { new Sequence(DnaAlphabet.Instance, "AGAAAA", false) , new Sequence(DnaAlphabet.Instance, "AAAAGTTTT", false) }, new List<string> { "AGAAAAGTTTT" }); }
public void TestPcaStep3RepeatTest() { Sequence r = new Sequence(DnaAlphabet.Instance, "AACCTTGGCCCCCACGATCGCGCTAGATCGCATCGATCCCCAACCTTGGCCGGGGGGGGGG"); Sequence q = new Sequence(DnaAlphabet.Instance, "AACCTTGGCC"); q.ID = ">read.F:abc"; Sequence p = new Sequence(DnaAlphabet.Instance, "GGGGGGGGGG"); p.ID = ">read.R:abc"; CloneLibrary.Instance.AddLibrary("abc", (float)61, (float)1); ComparativeGenomeAssembler asm = new ComparativeGenomeAssembler() { LengthOfMum = 9 }; var res = asm.Assemble(new List<ISequence> { r }, new List<ISequence> { q, p }); string[] expectedResult = new string[2]; expectedResult[0] = "AACCTTGGCC"; expectedResult[1] = "GGGGGGGGGG"; int i = 0; foreach (var s in res) { string actual = new string(s.Select(a => (char)a).ToArray()); Assert.AreEqual(expectedResult[i], actual); i++; } }
public void TestPcaStep3InsertionInReference() { Sequence r = new Sequence(DnaAlphabet.Instance, "AACCTTGGCCTAGTACGGATATTGCCCACGATCG"); // AACCTTGGCCTA CCCACGATCG Sequence q = new Sequence(DnaAlphabet.Instance, "AACCTTGGCCTACCCACGATCG"); ComparativeGenomeAssembler asm = new ComparativeGenomeAssembler() { LengthOfMum = 9 }; var output = asm.Assemble(new List<ISequence> { r }, new List<ISequence> { q }); string res = new string(output.ElementAt(0).Select(a => (char)a).ToArray()); Assert.AreEqual("AACCTTGGCCTACCCACGATCG", res); }
public void TestPcaStep3InsertionInReferenceThree() { Sequence refSeq = new Sequence(DnaAlphabet.Instance, "AAAACCCGGGGTTTTTTACGTGACTGCA"); Sequence q = new Sequence(DnaAlphabet.Instance, "AAAAGGGG"); Sequence r = new Sequence(DnaAlphabet.Instance, "ACGTTGCA"); ComparativeGenomeAssembler asm = new ComparativeGenomeAssembler() { LengthOfMum = 4 }; var output = asm.Assemble(new List<ISequence> { refSeq }, new List<ISequence> { q, r }); string res = new string(output.ElementAt(0).Select(a => (char)a).ToArray()); Assert.AreEqual("AAAAGGGG", res); res = new string(output.ElementAt(1).Select(a => (char)a).ToArray()); Assert.AreEqual("ACGTTGCA", res); }
/// <summary> /// Validates Assemble method .Step 1-5. /// </summary> /// <param name="nodeName">Parent Node name in Xml</param> public void ValidateComparativeAssembleMethod(string nodeName) { ComparativeGenomeAssembler assemble = new ComparativeGenomeAssembler(); List<ISequence> referenceSeqList; StringBuilder expectedSequence = new StringBuilder(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode)); string LengthOfMUM = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string fixedSeparation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FixedSeparationNode); string minimumScore = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MinimumScoreNode); string separationFactor = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SeparationFactorNode); string maximumSeparation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MaximumSeparationNode); string breakLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BreakLengthNode); // Gets the reference sequence from the FastA file string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format(null, "Comparative BVT : Successfully validated the File Path '{0}'.", filePath)); using (FastAParser parser = new FastAParser(filePath)) { IEnumerable<ISequence> referenceList = parser.Parse(); Assert.IsNotNull(referenceList); referenceSeqList = new List<ISequence>(referenceList); } // Get the reads from configuration file. string readFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2); assemble.LengthOfMum = int.Parse(LengthOfMUM, CultureInfo.InvariantCulture); assemble.KmerLength = int.Parse(kmerLength, CultureInfo.InvariantCulture); assemble.ScaffoldingEnabled = true; assemble.FixedSeparation = int.Parse(fixedSeparation, CultureInfo.InvariantCulture); assemble.MinimumScore = int.Parse(minimumScore, CultureInfo.InvariantCulture); assemble.SeparationFactor = float.Parse(separationFactor, CultureInfo.InvariantCulture); assemble.MaximumSeparation = int.Parse(maximumSeparation, CultureInfo.InvariantCulture); assemble.BreakLength = int.Parse(breakLength, CultureInfo.InvariantCulture); using (var queryparser = new FastASequencePositionParser(readFilePath)) { IEnumerable<ISequence> output = assemble.Assemble(referenceSeqList, queryparser); StringBuilder longOutput = new StringBuilder(); foreach (string x in output.Select(seq => seq.ConvertToString()).OrderBy(c => c)) longOutput.Append(x); Assert.AreEqual(expectedSequence.ToString().ToUpperInvariant(), longOutput.ToString().ToUpperInvariant()); } }