public void BuildScaffold() { const int kmerLength = 6; const int dangleThreshold = 3; const int redundantThreshold = 7; var sequences = new List <ISequence>(TestInputs.GetReadsForScaffolds()); KmerLength = kmerLength; SequenceReads.Clear(); this.SetSequenceReads(sequences); CreateGraph(); DanglingLinksThreshold = dangleThreshold; DanglingLinksPurger = new DanglingLinksPurger(dangleThreshold); RedundantPathLengthThreshold = redundantThreshold; RedundantPathsPurger = new RedundantPathsPurger(redundantThreshold); UnDangleGraph(); RemoveRedundancy(); IList <ISequence> contigs = BuildContigs().ToList(); CloneLibrary.Instance.AddLibrary("abc", 5, 20); using (GraphScaffoldBuilder scaffold = new GraphScaffoldBuilder()) { IEnumerable <ISequence> scaffoldSeq = scaffold.BuildScaffold(sequences, contigs, this.KmerLength, 3, 0); HashSet <string> expected = new HashSet <string> { "ATGCCTCCTATCTTAGCGCGC", "CGCGCCGCGC", "TTTTTT", "CGCGCG", "TTTTAGC", "TTTTTA", "TTTAAA", "TTTTAA", }; AlignmentHelpers.CompareSequenceLists(expected, scaffoldSeq); } }
public void BuildScaffold() { const int kmerLength = 6; const int dangleThreshold = 3; const int redundantThreshold = 7; List <ISequence> sequences = TestInputs.GetReadsForScaffolds(); KmerLength = kmerLength; SequenceReads.Clear(); this.AddSequenceReads(sequences); CreateGraph(); DanglingLinksThreshold = dangleThreshold; DanglingLinksPurger = new DanglingLinksPurger(dangleThreshold); RedundantPathLengthThreshold = redundantThreshold; RedundantPathsPurger = new RedundantPathsPurger(redundantThreshold); UnDangleGraph(); RemoveRedundancy(); IList <ISequence> contigs = BuildContigs(); DeBruijnGraph graph = Graph; CloneLibrary.Instance.AddLibrary("abc", (float)5, (float)20); GraphScaffoldBuilder scaffold = new GraphScaffoldBuilder(); IList <ISequence> scaffoldSeq = scaffold.BuildScaffold( sequences, contigs, this.KmerLength, 3, 0); Assert.AreEqual(scaffoldSeq.Count, 8); Assert.IsTrue(scaffoldSeq[0].ToString().Equals( "ATGCCTCCTATCTTAGCGCGC")); }
public void BuildScaffold() { const int kmerLength = 6; const int dangleThreshold = 3; const int redundantThreshold = 7; List <ISequence> sequences = new List <ISequence>(TestInputs.GetReadsForScaffolds()); KmerLength = kmerLength; SequenceReads.Clear(); this.SetSequenceReads(sequences); CreateGraph(); DanglingLinksThreshold = dangleThreshold; DanglingLinksPurger = new DanglingLinksPurger(dangleThreshold); RedundantPathLengthThreshold = redundantThreshold; RedundantPathsPurger = new RedundantPathsPurger(redundantThreshold); UnDangleGraph(); RemoveRedundancy(); IList <ISequence> contigs = BuildContigs().ToList(); Assert.AreEqual(contigs.Count, 10); CloneLibrary.Instance.AddLibrary("abc", (float)5, (float)20); GraphScaffoldBuilder scaffold = new GraphScaffoldBuilder(); IEnumerable <ISequence> scaffoldSeq = scaffold.BuildScaffold( sequences, contigs, this.KmerLength, 3, 0); Assert.AreEqual(scaffoldSeq.Count(), 8); Assert.IsTrue(new string(scaffoldSeq.First().Select(a => (char)a).ToArray()).Equals( "ATGCCTCCTATCTTAGCGCGC")); scaffold.Dispose(); }
public void TestContigBuilder2() { const int KmerLength = 6; const int RedundantThreshold = 10; List <ISequence> readSeqs = TestInputs.GetRedundantPathReads(); SequenceReads.Clear(); this.SetSequenceReads(readSeqs); this.KmerLength = KmerLength; RedundantPathLengthThreshold = RedundantThreshold; RedundantPathsPurger = new RedundantPathsPurger(RedundantThreshold); ContigBuilder = new SimplePathContigBuilder(); CreateGraph(); RemoveRedundancy(); long graphCount = Graph.NodeCount; long graphEdges = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum(); IEnumerable <ISequence> contigs = BuildContigs(); long contigsBuiltGraphCount = Graph.NodeCount; long contigsBuilt = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum(); // Compare the two graphs Assert.AreEqual(1, contigs.Count()); string s = new string(contigs.ElementAt(0).Select(a => (char)a).ToArray()); Assert.AreEqual("ATGCCTCCTATCTTAGCGATGCGGTGT", s); Assert.AreEqual(graphCount, contigsBuiltGraphCount); Assert.AreEqual(graphEdges, contigsBuilt); }
private List <DeBruijnPathList> GetIndelPaths(DeBruijnGraph deBruijnGraph) { List <DeBruijnPathList> redundantPaths = new List <DeBruijnPathList>(); Parallel.ForEach( deBruijnGraph.GetNodes(), node => { if (!node.ContainsSelfReference) { // Need to check for both left and right extensions for ambiguity. if (node.RightExtensionNodesCount > 1) { TraceDivergingExtensionPaths(node, node.GetRightExtensionNodesWithOrientation(), true, redundantPaths); } if (node.LeftExtensionNodesCount > 1) { TraceDivergingExtensionPaths(node, node.GetLeftExtensionNodesWithOrientation(), false, redundantPaths); } } } ); RedundantPathsPurger.ValidatePathsAreFromSameDirection(redundantPaths, deBruijnGraph.KmerLength); var indelPaths = redundantPaths.Where(x => x.Paths.Select(z => z.PathNodes.Count).Distinct().Count() != 1).ToList(); //TODO: Could merge the two filters here indelPaths = RemoveDuplicates(indelPaths); indelPaths = RemoveEmbeddedPaths(indelPaths); return(indelPaths); }
public void TestContigBuilder2() { const int KmerLength = 6; const int RedundantThreshold = 10; List <ISequence> readSeqs = TestInputs.GetRedundantPathReads(); SequenceReads.Clear(); this.AddSequenceReads(readSeqs); this.KmerLength = KmerLength; RedundantPathLengthThreshold = RedundantThreshold; RedundantPathsPurger = new RedundantPathsPurger(RedundantThreshold); ContigBuilder = new SimplePathContigBuilder(); CreateGraph(); RemoveRedundancy(); int graphCount = Graph.Nodes.Count; int graphEdges = Graph.Nodes.Select(n => n.ExtensionsCount).Sum(); IList <ISequence> contigs = BuildContigs(); int contigsBuiltGraphCount = Graph.Nodes.Count; int contigsBuilt = Graph.Nodes.Select(n => n.ExtensionsCount).Sum(); // Compare the two graphs Assert.AreEqual(1, contigs.Count); Assert.AreEqual("ATGCCTCCTATCTTAGCGATGCGGTGT", contigs[0].ToString()); Assert.AreEqual(graphCount, contigsBuiltGraphCount); Assert.AreEqual(graphEdges, contigsBuilt); }
public void TestContigBuilder1() { const int KmerLength = 11; const int DangleThreshold = 3; const int RedundantThreshold = 10; List <ISequence> readSeqs = TestInputs.GetDanglingReads(); this.SequenceReads.Clear(); this.SetSequenceReads(readSeqs); this.KmerLength = KmerLength; DanglingLinksThreshold = DangleThreshold; DanglingLinksPurger = new DanglingLinksPurger(DangleThreshold); RedundantPathLengthThreshold = RedundantThreshold; RedundantPathsPurger = new RedundantPathsPurger(RedundantThreshold); ContigBuilder = new SimplePathContigBuilder(); CreateGraph(); UnDangleGraph(); RemoveRedundancy(); long graphCount = Graph.NodeCount; long graphEdges = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum(); IEnumerable <ISequence> contigs = BuildContigs(); long contigsBuiltGraphCount = this.Graph.NodeCount; long contigsBuilt = Graph.GetNodes().Select(n => n.ExtensionsCount).Sum(); // Compare the two graphs Assert.AreEqual(1, contigs.Count()); HashSet <string> expectedContigs = new HashSet <string>() { "ATCGCTAGCATCGAACGATCATT" }; foreach (ISequence contig in contigs) { string s = new string(contig.Select(a => (char)a).ToArray()); Assert.IsTrue(expectedContigs.Contains(s)); } Assert.AreEqual(graphCount, contigsBuiltGraphCount); Assert.AreEqual(graphEdges, contigsBuilt); }
public void TracePathTestWithPalindromicContig() { const int kmerLength = 6; const int dangleThreshold = 3; const int redundantThreshold = 7; List <ISequence> sequences = new List <ISequence>(); Sequence seq = new Sequence(Alphabets.DNA, "ATGCCTC"); seq.DisplayID = ">10.x1:abc"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "CCTCCTAT"); seq.DisplayID = "1"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TCCTATC"); seq.DisplayID = "2"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TGCCTCCT"); seq.DisplayID = "3"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "ATCTTAGC"); seq.DisplayID = "4"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "CTATCTTAG"); seq.DisplayID = "5"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "CTTAGCG"); seq.DisplayID = "6"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "GCCTCCTAT"); seq.DisplayID = ">8.x1:abc"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TAGCGCGCTA"); seq.DisplayID = ">8.y1:abc"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "AGCGCGC"); seq.DisplayID = ">9.x1:abc"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TTTTTT"); seq.DisplayID = "7"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TTTTTAAA"); seq.DisplayID = "8"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TAAAAA"); seq.DisplayID = "9"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TTTTAG"); seq.DisplayID = "10"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TTTAGC"); seq.DisplayID = "11"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "GCGCGCCGCGCG"); seq.DisplayID = "12"; sequences.Add(seq); KmerLength = kmerLength; SequenceReads.Clear(); AddSequenceReads(sequences); CreateGraph(); DanglingLinksThreshold = dangleThreshold; DanglingLinksPurger = new DanglingLinksPurger(dangleThreshold); RedundantPathLengthThreshold = redundantThreshold; RedundantPathsPurger = new RedundantPathsPurger(redundantThreshold); UnDangleGraph(); RemoveRedundancy(); IList <ISequence> contigs = BuildContigs(); ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(contigs, sequences, kmerLength); MatePairMapper builder = new MatePairMapper(); CloneLibrary.Instance.AddLibrary("abc", (float)5, (float)15); ContigMatePairs pairedReads = builder.MapContigToMatePairs(sequences, maps); ContigMatePairs overlap; OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); overlap = filter.FilterPairedReads(pairedReads, 0); DistanceCalculator dist = new DistanceCalculator(); dist.CalculateDistance(overlap); Graph.BuildContigGraph(contigs, this.KmerLength); TracePath path = new TracePath(); IList <ScaffoldPath> paths = path.FindPaths(Graph, overlap, kmerLength, 3); Assert.AreEqual(paths.Count, 3); Assert.AreEqual(paths.First().Count, 3); ScaffoldPath scaffold = paths.First(); DeBruijnGraph graph = Graph; Assert.IsTrue(graph.GetNodeSequence(scaffold[0].Key).ToString().Equals("ATGCCTCCTATCTTAGC")); Assert.IsTrue(graph.GetNodeSequence(scaffold[1].Key).ToString().Equals("TTAGCGCG")); Assert.IsTrue(graph.GetNodeSequence(scaffold[2].Key).ToString().Equals("GCGCGC")); }
public void TracePathTestWithPalindromicContig() { const int kmerLengthConst = 5; const int dangleThreshold = 3; const int redundantThreshold = 6; var sequences = new List <ISequence>() { new Sequence(Alphabets.DNA, "ATGCCTC") { ID = "0" }, new Sequence(Alphabets.DNA, "CCTCCTAT") { ID = "1" }, new Sequence(Alphabets.DNA, "TCCTATC") { ID = "2" }, new Sequence(Alphabets.DNA, "TGCCTCCT") { ID = "3" }, new Sequence(Alphabets.DNA, "ATCTTAGC") { ID = "4" }, new Sequence(Alphabets.DNA, "CTATCTTAG") { ID = "5" }, new Sequence(Alphabets.DNA, "CTTAGCG") { ID = "6" }, new Sequence(Alphabets.DNA, "GCCTCCTAT") { ID = "7" }, new Sequence(Alphabets.DNA, "TAGCGCGCTA") { ID = "8" }, new Sequence(Alphabets.DNA, "AGCGCGC") { ID = "9" }, new Sequence(Alphabets.DNA, "TTTTTT") { ID = "10" }, new Sequence(Alphabets.DNA, "TTTTTAAA") { ID = "11" }, new Sequence(Alphabets.DNA, "TAAAAA") { ID = "12" }, new Sequence(Alphabets.DNA, "TTTTAG") { ID = "13" }, new Sequence(Alphabets.DNA, "TTTAGC") { ID = "14" }, new Sequence(Alphabets.DNA, "GCGCGCCGCGCG") { ID = "15" }, }; KmerLength = kmerLengthConst; SequenceReads.Clear(); SetSequenceReads(sequences); CreateGraph(); DanglingLinksThreshold = dangleThreshold; DanglingLinksPurger = new DanglingLinksPurger(dangleThreshold); RedundantPathLengthThreshold = redundantThreshold; RedundantPathsPurger = new RedundantPathsPurger(redundantThreshold); UnDangleGraph(); RemoveRedundancy(); IList <ISequence> contigs = BuildContigs().ToList(); ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(contigs, sequences, kmerLengthConst); MatePairMapper builder = new MatePairMapper(); CloneLibrary.Instance.AddLibrary("abc", 5, 15); ContigMatePairs pairedReads = builder.MapContigToMatePairs(sequences, maps); OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); ContigMatePairs overlap = filter.FilterPairedReads(pairedReads, 0); DistanceCalculator dist = new DistanceCalculator(overlap); overlap = dist.CalculateDistance(); ContigGraph graph = new ContigGraph(); graph.BuildContigGraph(contigs, this.KmerLength); TracePath path = new TracePath(); IList <ScaffoldPath> paths = path.FindPaths(graph, overlap, kmerLengthConst, 3); Assert.AreEqual(paths.Count, 3); Assert.AreEqual(paths.First().Count, 3); ScaffoldPath scaffold = paths.First(); Assert.AreEqual("ATGCCTCCTATCTTAGC", graph.GetNodeSequence(scaffold[0].Key).ConvertToString()); Assert.AreEqual("TTAGCGCG", graph.GetNodeSequence(scaffold[1].Key).ConvertToString()); Assert.AreEqual("GCGCGC", graph.GetNodeSequence(scaffold[2].Key).ConvertToString()); }