/// <summary> /// Generate contig overlap graph. /// </summary> /// <param name="contigs">List of contig sequences.</param> /// <returns>Contig Graph.</returns> protected ContigGraph GenerateContigOverlapGraph(IList<ISequence> contigs) { if (contigs == null) { throw new ArgumentNullException("contigs"); } ContigGraph contigGraph = new ContigGraph(); contigGraph.BuildContigGraph(contigs, this.kmerLength); return contigGraph; }
public void TracePathTestWithPalindromicContig() { const int kmerLengthConst = 5; const int dangleThreshold = 3; const int redundantThreshold = 6; var sequences = new List<ISequence>() { new Sequence(Alphabets.DNA, "ATGCCTC") {ID = "0"}, new Sequence(Alphabets.DNA, "CCTCCTAT") {ID = "1"}, new Sequence(Alphabets.DNA, "TCCTATC") {ID = "2"}, new Sequence(Alphabets.DNA, "TGCCTCCT") {ID = "3"}, new Sequence(Alphabets.DNA, "ATCTTAGC") {ID = "4"}, new Sequence(Alphabets.DNA, "CTATCTTAG") {ID = "5"}, new Sequence(Alphabets.DNA, "CTTAGCG") {ID = "6"}, new Sequence(Alphabets.DNA, "GCCTCCTAT") {ID = "7"}, new Sequence(Alphabets.DNA, "TAGCGCGCTA") {ID = "8"}, new Sequence(Alphabets.DNA, "AGCGCGC") {ID = "9"}, new Sequence(Alphabets.DNA, "TTTTTT") {ID = "10"}, new Sequence(Alphabets.DNA, "TTTTTAAA") {ID = "11"}, new Sequence(Alphabets.DNA, "TAAAAA") {ID = "12"}, new Sequence(Alphabets.DNA, "TTTTAG") {ID = "13"}, new Sequence(Alphabets.DNA, "TTTAGC") {ID = "14"}, new Sequence(Alphabets.DNA, "GCGCGCCGCGCG") {ID = "15"}, }; KmerLength = kmerLengthConst; SequenceReads.Clear(); SetSequenceReads(sequences); CreateGraph(); DanglingLinksThreshold = dangleThreshold; DanglingLinksPurger = new DanglingLinksPurger(dangleThreshold); RedundantPathLengthThreshold = redundantThreshold; RedundantPathsPurger = new RedundantPathsPurger(redundantThreshold); UnDangleGraph(); RemoveRedundancy(); IList<ISequence> contigs = BuildContigs().ToList(); ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(contigs, sequences, kmerLengthConst); MatePairMapper builder = new MatePairMapper(); CloneLibrary.Instance.AddLibrary("abc", 5, 15); ContigMatePairs pairedReads = builder.MapContigToMatePairs(sequences, maps); OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); ContigMatePairs overlap = filter.FilterPairedReads(pairedReads, 0); DistanceCalculator dist = new DistanceCalculator(overlap); overlap = dist.CalculateDistance(); ContigGraph graph = new ContigGraph(); graph.BuildContigGraph(contigs, this.KmerLength); TracePath path = new TracePath(); IList<ScaffoldPath> paths = path.FindPaths(graph, overlap, kmerLengthConst, 3); Assert.AreEqual(paths.Count, 3); Assert.AreEqual(paths.First().Count, 3); ScaffoldPath scaffold = paths.First(); Assert.AreEqual("ATGCCTCCTATCTTAGC", graph.GetNodeSequence(scaffold[0].Key).ConvertToString()); Assert.AreEqual("TTAGCGCG", graph.GetNodeSequence(scaffold[1].Key).ConvertToString()); Assert.AreEqual("GCGCGC", graph.GetNodeSequence(scaffold[2].Key).ConvertToString()); }
/// <summary> /// Validate Assembled paths for a given input reads. /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidateAssembledPath(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode); string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RedundantThreshold); string library = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.LibraryName); string StdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.StdDeviation); string mean = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Mean); string expectedDepth = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DepthNode); string[] assembledPath = utilityObj.xmlUtil.GetTextValues(nodeName, Constants.SequencePathNode); // Get the input reads and build kmers FastAParser parser = new FastAParser(); parser.Open(filePath); IEnumerable<ISequence> sequenceReads = parser.Parse().ToList(); parser.Close (); // Build kmers from step1,graph in step2 // Remove the dangling links from graph in step3 // Remove bubbles form the graph in step4 // Pass the graph and build contigs // Validate contig reads this.KmerLength = Int32.Parse(kmerLength, null); this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, null); this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, null)); this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, null)); this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); ContigGraph graph = new ContigGraph(); this.UnDangleGraph(); // Build contig. this.ContigBuilder = new SimplePathContigBuilder(); this.RemoveRedundancy(); IEnumerable<ISequence> contigs = this.BuildContigs(); IList<ISequence> sortedContigs = SortContigsData(contigs.ToList()); ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(sortedContigs, sequenceReads, this.KmerLength); // Find map paired reads. CloneLibrary.Instance.AddLibrary(library, float.Parse(mean, null), float.Parse(StdDeviation, null)); MatePairMapper mapPairedReads = new MatePairMapper(); ContigMatePairs pairedReads = mapPairedReads.MapContigToMatePairs(sequenceReads, maps); // Filter contigs based on the orientation. OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairedReads, 0); DistanceCalculator dist = new DistanceCalculator(contigpairedReads); dist.CalculateDistance(); graph.BuildContigGraph(contigs.ToList(), this.KmerLength); // Validate ScaffoldPath using BFS. TracePath trace = new TracePath(); IList<ScaffoldPath> paths = trace.FindPaths(graph, contigpairedReads, Int32.Parse(kmerLength, null), Int32.Parse(expectedDepth, null)); // Assemble paths. PathPurger pathsAssembler = new PathPurger(); pathsAssembler.PurgePath(paths); // Get sequences from assembled path. IList<ISequence> seqList = paths.Select(temp => temp.BuildSequenceFromPath(graph, Int32.Parse(kmerLength, null))).ToList(); //Validate assembled sequence paths. foreach (string sequence in seqList.Select(t => t.ConvertToString())) { Assert.IsTrue(assembledPath.Contains(sequence), "Failed to locate " + sequence); } ApplicationLog.WriteLine("PADENA P1 : AssemblePath() validation for Padena step6:step7 completed successfully"); }
/// <summary> /// Validate scaffold paths for a given input reads. /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidateScaffoldPath(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode); string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RedundantThreshold); string[] expectedScaffoldNodes = utilityObj.xmlUtil.GetTextValues(nodeName,Constants.ScaffoldNodes); string libraray = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.LibraryName); string stdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.StdDeviation); string mean = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.Mean); string expectedDepth = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.DepthNode); // Get the input reads and build kmers using (FastAParser parser = new FastAParser(filePath)) { IEnumerable<ISequence> sequenceReads = parser.Parse(); // Build kmers from step1,graph in step2 // Remove the dangling links from graph in step3 // Remove bubbles form the graph in step4 // Pass the graph and build contigs // Validate contig reads this.KmerLength = Int32.Parse(kmerLength, null); this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, null); this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, null)); this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, null)); this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); ContigGraph graph = new ContigGraph(); this.UnDangleGraph(); // Build contig. this.ContigBuilder = new SimplePathContigBuilder(); this.RemoveRedundancy(); IEnumerable<ISequence> contigs = this.BuildContigs(); IList<ISequence> sortedContigs = SortContigsData(contigs.ToList()); ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(sortedContigs, sequenceReads, this.KmerLength); // Find map paired reads. CloneLibrary.Instance.AddLibrary(libraray, float.Parse(mean, null), float.Parse(stdDeviation, null)); MatePairMapper mapPairedReads = new MatePairMapper(); ContigMatePairs pairedReads = mapPairedReads.MapContigToMatePairs(sequenceReads, maps); // Filter contigs based on the orientation. OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairedReads, 0); DistanceCalculator dist = new DistanceCalculator(contigpairedReads); dist.CalculateDistance(); graph.BuildContigGraph(contigs.ToList(), this.KmerLength); // Validate ScaffoldPath using BFS. TracePath trace = new TracePath(); IList<ScaffoldPath> paths = trace.FindPaths(graph, contigpairedReads, Int32.Parse(kmerLength, null), Int32.Parse(expectedDepth, null)); ScaffoldPath scaffold = paths.First(); foreach (KeyValuePair<Node, Edge> kvp in scaffold) { ISequence seq = graph.GetNodeSequence(kvp.Key); string sequence = seq.ConvertToString(); string reversedSequence = seq.GetReverseComplementedSequence().ConvertToString(); Assert.IsTrue(expectedScaffoldNodes.Contains(sequence) || expectedScaffoldNodes.Contains(reversedSequence), "Failed to find " + sequence + ", or " + reversedSequence); } } ApplicationLog.WriteLine("PADENA P1 : FindPaths() validation for Padena step6:step6 completed successfully"); }
public void ValidateBuildSequenceFromPath() { const int KmerLength = 7; ISequence sequence = new Sequence(Alphabets.DNA, "GATTCAAGGGCTGGGGG"); ISequence sequenceNew; IList<ISequence> contigsSequence = SequenceToKmerBuilder.GetKmerSequences(sequence, KmerLength).ToList(); using (ContigGraph graph = new ContigGraph()) { graph.BuildContigGraph(contigsSequence, KmerLength); List<Node> contigs = graph.Nodes.ToList(); ScaffoldPath path = new ScaffoldPath(); foreach (Node node in contigs.GetRange(0, 11)) { path.Add(new KeyValuePair<Node, Edge>(node, new Edge(true))); } sequenceNew = path.BuildSequenceFromPath(graph, KmerLength); } Assert.IsNotNull(sequenceNew); Assert.AreEqual((new string(sequenceNew.Select(a => (char)a).ToArray())), "GATTCAAGGGCTGGGGG"); }