public void MapReadToContig2() { IList <ISequence> contigs = new List <ISequence>(); IList <ISequence> reads = new List <ISequence>(); Sequence seq = new Sequence(Alphabets.DNA, "TCTGATAAGG".Select(a => (byte)a).ToArray()); seq.ID = "1"; contigs.Add(seq); Sequence read = new Sequence(Alphabets.DNA, "CCTTATCAG".Select(a => (byte)a).ToArray()); read.ID = "2"; reads.Add(read); const int kmerLength = 6; ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap map = mapper.Map(contigs, reads, kmerLength); Assert.AreEqual(map.Count, reads.Count); Dictionary <ISequence, IList <ReadMap> > alignment = map[reads[0].ID]; IList <ReadMap> readMap = alignment[contigs[0]]; Assert.AreEqual(readMap[0].Length, 9); Assert.AreEqual(readMap[0].StartPositionOfContig, 1); Assert.AreEqual(readMap[0].StartPositionOfRead, 0); Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap); }
public void DistanceCalculationwithTwoContigs() { const int KmerLength = 6; IList <ISequence> sequences = new List <ISequence>() { new Sequence(Alphabets.DNA, "GATCTGATAA") { ID = ">gi|263191773|ref|NG_015830.1| H**o sapiens insulin receptor substrate 1 (IRS1) on chromosome 2.X1:0.5K" }, new Sequence(Alphabets.DNA, "ATCTGATAAG") { ID = ">gi|263191773|ref|NG_015830.1| H**o sapiens insulin receptor on chromosome 2.F:0.5K" }, new Sequence(Alphabets.DNA, "TCTGATAAGG") { ID = ">gi|263191773|ref | H**o sapiens ........insulin receptor on chromosome 2.2:0.5K" }, new Sequence(Alphabets.DNA, "TTTTTGATGG") { ID = ">gi|263191773|ref|NG_015830.1| H**o sapiens insulin receptor substrate 1 (IRS1) on chromosome 2.Y1:0.5K" }, new Sequence(Alphabets.DNA, "TTTTGATGGC") { ID = ">gi|263191773|ref|NG_015830.1| H**o sapiens insulin receptor on chromosome 2.R:0.5K" }, new Sequence(Alphabets.DNA, "TTTGATGGCA") { ID = ">gi|263191773|ref | H**o sapiens ........insulin receptor on chromosome 2.1:0.5K" } }; IList <ISequence> contigs = new List <ISequence> { new Sequence(Alphabets.DNA, "GATCTGATAAGG"), new Sequence(Alphabets.DNA, "TTTTTGATGGCA") }; ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(contigs, sequences, KmerLength); MatePairMapper mapPairedReads = new MatePairMapper(); ContigMatePairs pairs = mapPairedReads.MapContigToMatePairs(sequences, maps); OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairs); DistanceCalculator calc = new DistanceCalculator(contigpairedReads); contigpairedReads = calc.CalculateDistance(); Assert.AreEqual(contigpairedReads.Values.Count, 1); Assert.IsTrue(contigpairedReads.ContainsKey(contigs[0])); Dictionary <ISequence, IList <ValidMatePair> > map = contigpairedReads[contigs[0]]; Assert.IsTrue(map.ContainsKey(contigs[1])); IList <ValidMatePair> valid = map[contigs[1]]; Assert.AreEqual(valid.First().DistanceBetweenContigs[0], (float)478.000031); Assert.AreEqual(valid.First().DistanceBetweenContigs[1], (float)477.0); Assert.AreEqual(valid.First().StandardDeviation[0], (float)14.1421356); Assert.AreEqual(valid.First().StandardDeviation[1], (float)14.1421356); Assert.AreEqual(valid.First().Weight, 2); }
public void DistanceCalculationwithTwoContigsWeightedMean() { const int KmerLength = 6; List <ISequence> sequences = new List <ISequence>(); Sequence seq = new Sequence(Alphabets.DNA, "GATCTGATAA".Select(a => (byte)a).ToArray()); seq.ID = ">gi|263191773|ref|NG_015830.1| H**o sapiens insulin receptor" + "substrate 1 (IRS1) on chromosome 2.x1:2K"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "ATCTGATAAG".Select(a => (byte)a).ToArray()); seq.ID = ">gi|263191773|ref|NG_015830.1| H**o sapiens insulin receptor" + "on chromosome 2.f:0.5K"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TCTGATAAGG".Select(a => (byte)a).ToArray()); seq.ID = ">gi|263191773|ref | H**o sapiens ........insulin receptor" + "on chromosome 2.2:0.5K"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TTTTTGATGG".Select(a => (byte)a).ToArray()); seq.ID = ">gi|263191773|ref|NG_015830.1| H**o sapiens insulin receptor" + "substrate 1 (IRS1) on chromosome 2.y1:2K"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TTTTGATGGC".Select(a => (byte)a).ToArray()); seq.ID = ">gi|263191773|ref|NG_015830.1| H**o sapiens insulin receptor" + "on chromosome 2.r:0.5K"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TTTGATGGCA".Select(a => (byte)a).ToArray()); seq.ID = ">gi|263191773|ref | H**o sapiens ........insulin receptor" + "on chromosome 2.1:0.5K"; sequences.Add(seq); IList <ISequence> contigs = new List <ISequence> { new Sequence(Alphabets.DNA, "GATCTGATAAGG".Select(a => (byte)a).ToArray()), new Sequence(Alphabets.DNA, "TTTTTGATGGCA".Select(a => (byte)a).ToArray()) }; ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(contigs, sequences, KmerLength); MatePairMapper mapPairedReads = new MatePairMapper(); ContigMatePairs pairedReads = mapPairedReads.MapContigToMatePairs(sequences, maps); OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairedReads); DistanceCalculator calc = new DistanceCalculator(contigpairedReads); contigpairedReads = calc.CalculateDistance(); Assert.AreEqual(contigpairedReads.Values.Count, 1); Assert.IsTrue(contigpairedReads.ContainsKey(contigs[0])); Dictionary <ISequence, IList <ValidMatePair> > map = contigpairedReads[contigs[0]]; Assert.IsTrue(map.ContainsKey(contigs[1])); IList <ValidMatePair> valid = map[contigs[1]]; Assert.AreEqual(valid.First().DistanceBetweenContigs[0], (float)1228.0); Assert.AreEqual(valid.First().DistanceBetweenContigs[1], (float)1227.0); Assert.AreEqual(valid.First().StandardDeviation[0], (float)60); Assert.AreEqual(valid.First().StandardDeviation[1], (float)60); Assert.AreEqual(valid.First().Weight, 2); }
public void FilterMatePairWithTwoContigs() { const int kmerLength = 6; IList <ISequence> sequences = new List <ISequence>(); Sequence seq = new Sequence(Alphabets.DNA, "GATCTGATAA"); seq.ID = ">gi|263191773|ref|NG_015830.1| H**o sapiens insulin receptor substrate 1 (IRS1) on chromosome 2.X1:0.5K"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "ATCTGATAAG"); seq.ID = ">gi|263191773|ref|NG_015830.1| H**o sapiens insulin receptor on chromosome 2.F:0.5K"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TCTGATAAGG"); seq.ID = ">gi|263191773|ref | H**o sapiens ........insulin receptor on chromosome 2.2:0.5K"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TTTTTGATGG"); seq.ID = ">gi|263191773|ref|NG_015830.1| H**o sapiens insulin receptor substrate 1 (IRS1) on chromosome 2.Y1:0.5K"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TTTTGATGGC"); seq.ID = ">gi|263191773|ref|NG_015830.1| H**o sapiens insulin receptor on chromosome 2.R:0.5K"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TTTGATGGCA"); seq.ID = ">gi|263191773|ref | H**o sapiens ........insulin receptor on chromosome 2.1:0.5K"; sequences.Add(seq); IList <ISequence> contigs = new List <ISequence> { new Sequence(Alphabets.DNA, "GATCTGATAAGG"), new Sequence(Alphabets.DNA, "TTTTTGATGGCA") }; ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(contigs, sequences, kmerLength); MatePairMapper mapPairedReads = new MatePairMapper(); ContigMatePairs pairs = mapPairedReads.MapContigToMatePairs(sequences, maps); OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairs); Assert.AreEqual(contigpairedReads.Values.Count, 1); Assert.IsTrue(contigpairedReads.ContainsKey(contigs[0])); Dictionary <ISequence, IList <ValidMatePair> > map = contigpairedReads[contigs[0]]; Assert.IsTrue(map.ContainsKey(contigs[1])); List <ValidMatePair> valid = Sort(map[contigs[1]], sequences); Assert.AreEqual(valid[0].ForwardReadStartPosition[0], 1); Assert.AreEqual(valid[0].ReverseReadReverseComplementStartPosition[0], 10); Assert.AreEqual(valid[0].ReverseReadStartPosition[0], 10); Assert.AreEqual(valid[1].ForwardReadStartPosition[0], 0); Assert.AreEqual(valid[1].ReverseReadReverseComplementStartPosition[0], 11); Assert.AreEqual(valid[1].ReverseReadStartPosition[0], 9); }
public void ContigPairReadMap() { const int kmerLength = 6; IList <ISequence> readSeqs = new List <ISequence>(); Sequence read = new Sequence(Alphabets.DNA, "GATCTGATAA"); read.DisplayID = "0.x1:abc"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "ATCTGATAAG"); read.DisplayID = "1.F:abc"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "TCTGATAAGG"); read.DisplayID = "2.2:abc"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "TTTTTGATGG"); read.DisplayID = "0.y1:abc"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "TTTTGATGGC"); read.DisplayID = "1.R:abc"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "TTTGATGGCA"); read.DisplayID = "2.1:abc"; readSeqs.Add(read); IList <ISequence> contigs = new List <ISequence> { new Sequence(Alphabets.DNA, "GATCTGATAAGG"), new Sequence(Alphabets.DNA, "TTTTTGATGGCA") }; ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(contigs, readSeqs, kmerLength); MatePairMapper pair = new MatePairMapper(); ContigMatePairs map = pair.MapContigToMatePairs(readSeqs, maps); Assert.AreEqual(map.Count, 2); Dictionary <ISequence, IList <ValidMatePair> > reverseContigs; Assert.IsTrue(map.TryGetValue(contigs[0], out reverseContigs)); Assert.AreEqual(reverseContigs.Count, 1); IList <ValidMatePair> matePairs; Assert.IsTrue(reverseContigs.TryGetValue(contigs[1], out matePairs)); Assert.AreEqual(matePairs.Count, 2); Assert.AreEqual(matePairs[0].ForwardReadStartPosition.First(), 0); Assert.AreEqual(matePairs[0].ReverseReadStartPosition.First(), 9); Assert.AreEqual(matePairs[1].ForwardReadStartPosition.First(), 1); Assert.AreEqual(matePairs[1].ReverseReadStartPosition.First(), 10); Assert.IsTrue(map.TryGetValue(contigs[1], out reverseContigs)); Assert.AreEqual(reverseContigs.Count, 1); Assert.IsTrue(reverseContigs.TryGetValue(contigs[0], out matePairs)); Assert.AreEqual(matePairs.Count, 1); Assert.AreEqual(matePairs[0].ForwardReadStartPosition.First(), 2); Assert.AreEqual(matePairs[0].ReverseReadStartPosition.First(), 11); }
/// <summary> /// Finds contig pairs having valid mate pairs connection between them. /// </summary> /// <param name="reads">Input list of reads.</param> /// <param name="alignment">Reads contig alignment.</param> /// <returns>Contig Mate pair map.</returns> public ContigMatePairs MapContigToMatePairs(IList <ISequence> reads, ReadContigMap alignment) { if (alignment == null) { throw new ArgumentNullException("alignment"); } if (reads == null) { throw new ArgumentNullException("reads"); } Dictionary <ISequence, IList <ReadMap> > contigs1; Dictionary <ISequence, IList <ReadMap> > contigs2; ContigMatePairs contigMatePairs = new ContigMatePairs(); foreach (ISequence read in reads) { Match match = _readExpression.Match(read.DisplayID); if (match.Success) { String mateDisplayID = GenerateExpression(match); if (alignment.TryGetValue(read.DisplayID, out contigs1) && alignment.TryGetValue(mateDisplayID, out contigs2)) { MatePair pair; if (match.Groups[2].Value == "X1" || match.Groups[2].Value == "F" || match.Groups[2].Value == "1" || match.Groups[2].Value == "x1" || match.Groups[2].Value == "f" || match.Groups[2].Value == "a" || match.Groups[2].Value == "A") { pair = new MatePair(read.DisplayID, mateDisplayID, match.Groups[3].Value); ContigMatePairMapper(contigs1, contigs2, pair, contigMatePairs); } else { pair = new MatePair(mateDisplayID, read.DisplayID, match.Groups[3].Value); ContigMatePairMapper(contigs2, contigs1, pair, contigMatePairs); } alignment.Remove(read.DisplayID); alignment.Remove(mateDisplayID); } } } return(contigMatePairs); }
/// <summary> /// Map paired reads to contigs using FASTA sequence header. /// </summary> /// <param name="readContigMaps">Map between reads and contigs.</param> /// <param name="reads">Sequences of reads.</param> /// <returns>Contig Mate Pair map.</returns> protected ContigMatePairs MapPairedReadsToContigs(ReadContigMap readContigMaps, IEnumerable<ISequence> reads) { return this.mapPairedReads.MapContigToMatePairs(reads, readContigMaps); }
public void TracePathTestWithPalindromicContig() { const int kmerLength = 6; const int dangleThreshold = 3; const int redundantThreshold = 7; List <ISequence> sequences = new List <ISequence>(); Sequence seq = new Sequence(Alphabets.DNA, "ATGCCTC"); seq.DisplayID = ">10.x1:abc"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "CCTCCTAT"); seq.DisplayID = "1"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TCCTATC"); seq.DisplayID = "2"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TGCCTCCT"); seq.DisplayID = "3"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "ATCTTAGC"); seq.DisplayID = "4"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "CTATCTTAG"); seq.DisplayID = "5"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "CTTAGCG"); seq.DisplayID = "6"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "GCCTCCTAT"); seq.DisplayID = ">8.x1:abc"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TAGCGCGCTA"); seq.DisplayID = ">8.y1:abc"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "AGCGCGC"); seq.DisplayID = ">9.x1:abc"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TTTTTT"); seq.DisplayID = "7"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TTTTTAAA"); seq.DisplayID = "8"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TAAAAA"); seq.DisplayID = "9"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TTTTAG"); seq.DisplayID = "10"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "TTTAGC"); seq.DisplayID = "11"; sequences.Add(seq); seq = new Sequence(Alphabets.DNA, "GCGCGCCGCGCG"); seq.DisplayID = "12"; sequences.Add(seq); KmerLength = kmerLength; SequenceReads.Clear(); AddSequenceReads(sequences); CreateGraph(); DanglingLinksThreshold = dangleThreshold; DanglingLinksPurger = new DanglingLinksPurger(dangleThreshold); RedundantPathLengthThreshold = redundantThreshold; RedundantPathsPurger = new RedundantPathsPurger(redundantThreshold); UnDangleGraph(); RemoveRedundancy(); IList <ISequence> contigs = BuildContigs(); ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(contigs, sequences, kmerLength); MatePairMapper builder = new MatePairMapper(); CloneLibrary.Instance.AddLibrary("abc", (float)5, (float)15); ContigMatePairs pairedReads = builder.MapContigToMatePairs(sequences, maps); ContigMatePairs overlap; OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); overlap = filter.FilterPairedReads(pairedReads, 0); DistanceCalculator dist = new DistanceCalculator(); dist.CalculateDistance(overlap); Graph.BuildContigGraph(contigs, this.KmerLength); TracePath path = new TracePath(); IList <ScaffoldPath> paths = path.FindPaths(Graph, overlap, kmerLength, 3); Assert.AreEqual(paths.Count, 3); Assert.AreEqual(paths.First().Count, 3); ScaffoldPath scaffold = paths.First(); DeBruijnGraph graph = Graph; Assert.IsTrue(graph.GetNodeSequence(scaffold[0].Key).ToString().Equals("ATGCCTCCTATCTTAGC")); Assert.IsTrue(graph.GetNodeSequence(scaffold[1].Key).ToString().Equals("TTAGCGCG")); Assert.IsTrue(graph.GetNodeSequence(scaffold[2].Key).ToString().Equals("GCGCGC")); }
public void MapReadsWithSingleContigRightTraversal() { const int kmerLength = 6; IList <ISequence> readSeqs = new List <ISequence>(); Sequence read = new Sequence(Alphabets.DNA, "GATGCCTC".Select(a => (byte)a).ToArray()); read.ID = "0"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "CCTCCTAT".Select(a => (byte)a).ToArray()); read.ID = "1"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "TCCTATC".Select(a => (byte)a).ToArray()); read.ID = "2"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "GCCTCCTAT".Select(a => (byte)a).ToArray()); read.ID = "3"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "TGCCTCCT".Select(a => (byte)a).ToArray()); read.ID = "4"; readSeqs.Add(read); IList <ISequence> contigs = new List <ISequence> { new Sequence(Alphabets.DNA, "GATGCCTCCTATC".Select(a => (byte)a).ToArray()) }; ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(contigs, readSeqs, kmerLength); Assert.AreEqual(maps.Count, readSeqs.Count); Dictionary <ISequence, IList <ReadMap> > map = maps[readSeqs[0].ID]; IList <ReadMap> readMap = map[contigs[0]]; Assert.AreEqual(readMap[0].Length, 8); Assert.AreEqual(readMap[0].StartPositionOfContig, 0); Assert.AreEqual(readMap[0].StartPositionOfRead, 0); Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap); map = maps[readSeqs[1].ID]; readMap = map[contigs[0]]; Assert.AreEqual(readMap[0].Length, 8); Assert.AreEqual(readMap[0].StartPositionOfContig, 4); Assert.AreEqual(readMap[0].StartPositionOfRead, 0); Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap); map = maps[readSeqs[2].ID]; readMap = map[contigs[0]]; Assert.AreEqual(readMap[0].Length, 7); Assert.AreEqual(readMap[0].StartPositionOfContig, 6); Assert.AreEqual(readMap[0].StartPositionOfRead, 0); Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap); map = maps[readSeqs[3].ID]; readMap = map[contigs[0]]; Assert.AreEqual(readMap[0].Length, 9); Assert.AreEqual(readMap[0].StartPositionOfContig, 3); Assert.AreEqual(readMap[0].StartPositionOfRead, 0); Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap); map = maps[readSeqs[4].ID]; readMap = map[contigs[0]]; Assert.AreEqual(readMap[0].Length, 8); Assert.AreEqual(readMap[0].StartPositionOfContig, 2); Assert.AreEqual(readMap[0].StartPositionOfRead, 0); Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap); }
/// <summary> /// Finds contig pairs having valid mate pairs connection between them. /// </summary> /// <param name="reads">Input list of reads.</param> /// <param name="alignment">Reads contig alignment.</param> /// <returns>Contig Mate pair map.</returns> public ContigMatePairs MapContigToMatePairs(IEnumerable<ISequence> reads, ReadContigMap alignment) { if (alignment == null) { throw new ArgumentNullException("alignment"); } if (reads == null) { throw new ArgumentNullException("reads"); } Dictionary<ISequence, IList<ReadMap>> contigs1; Dictionary<ISequence, IList<ReadMap>> contigs2; ContigMatePairs contigMatePairs = new ContigMatePairs(); foreach (ISequence read in reads) { Match match = this.readExpression.Match(read.ID); if (match.Success) { string mateDisplayID = GenerateExpression(match); if (alignment.TryGetValue(read.ID, out contigs1) && alignment.TryGetValue(mateDisplayID, out contigs2)) { MatePair pair; if (match.Groups[2].Value == "X1" || match.Groups[2].Value == "F" || match.Groups[2].Value == "1" || match.Groups[2].Value == "x1" || match.Groups[2].Value == "f" || match.Groups[2].Value == "a" || match.Groups[2].Value == "A") { pair = new MatePair(read.ID, mateDisplayID, match.Groups[3].Value); ContigMatePairMapper(contigs1, contigs2, pair, contigMatePairs); } else { pair = new MatePair(mateDisplayID, read.ID, match.Groups[3].Value); ContigMatePairMapper(contigs2, contigs1, pair, contigMatePairs); } alignment.Remove(read.ID); alignment.Remove(mateDisplayID); } } } return contigMatePairs; }
public void MapReadsWithTwoContigRightTraversal() { const int kmerLength = 6; IList <ISequence> readSeqs = new List <ISequence>(); Sequence read = new Sequence(Alphabets.DNA, "GATCTGATAA"); read.DisplayID = "0"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "ATCTGATAAG"); read.DisplayID = "1"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "TCTGATAAGG"); read.DisplayID = "2"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "TTTTTGATGG"); read.DisplayID = "3"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "TTTTGATGGC"); read.DisplayID = "4"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "TTTGATGGCA"); read.DisplayID = "5"; readSeqs.Add(read); IList <ISequence> contigs = new List <ISequence> { new Sequence(Alphabets.DNA, "GATCTGATAAGG"), new Sequence(Alphabets.DNA, "TTTTTGATGGCA") }; ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(contigs, readSeqs, kmerLength); Assert.AreEqual(maps.Count, readSeqs.Count); Dictionary <ISequence, IList <ReadMap> > map = maps[readSeqs[0].DisplayID]; IList <ReadMap> readMap = map[contigs[0]]; Assert.AreEqual(readMap[0].Length, 10); Assert.AreEqual(readMap[0].StartPositionOfContig, 0); Assert.AreEqual(readMap[0].StartPositionOfRead, 0); Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap); map = maps[readSeqs[1].DisplayID]; readMap = map[contigs[0]]; Assert.AreEqual(readMap[0].Length, 10); Assert.AreEqual(readMap[0].StartPositionOfContig, 1); Assert.AreEqual(readMap[0].StartPositionOfRead, 0); Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap); map = maps[readSeqs[2].DisplayID]; readMap = map[contigs[0]]; Assert.AreEqual(readMap[0].Length, 10); Assert.AreEqual(readMap[0].StartPositionOfContig, 2); Assert.AreEqual(readMap[0].StartPositionOfRead, 0); Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap); map = maps[readSeqs[3].DisplayID]; readMap = map[contigs[1]]; Assert.AreEqual(readMap[0].Length, 10); Assert.AreEqual(readMap[0].StartPositionOfContig, 0); Assert.AreEqual(readMap[0].StartPositionOfRead, 0); Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap); map = maps[readSeqs[4].DisplayID]; readMap = map[contigs[1]]; Assert.AreEqual(readMap[0].Length, 10); Assert.AreEqual(readMap[0].StartPositionOfContig, 1); Assert.AreEqual(readMap[0].StartPositionOfRead, 0); Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap); map = maps[readSeqs[5].DisplayID]; readMap = map[contigs[1]]; Assert.AreEqual(readMap[0].Length, 10); Assert.AreEqual(readMap[0].StartPositionOfContig, 2); Assert.AreEqual(readMap[0].StartPositionOfRead, 0); Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap); }
public void MapReadsWithSingleContigLeftTraversal() { const int kmerLength = 6; IList <ISequence> readSeqs = new List <ISequence>(); Sequence read = new Sequence(Alphabets.DNA, "ATGCCTC"); read.DisplayID = "0"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "CCTCCTAT"); read.DisplayID = "1"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "TCCTATC"); read.DisplayID = "2"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "TGCCTCCT"); read.DisplayID = "3"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "ATCTTAGC"); read.DisplayID = "4"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "CTATCTTAG"); read.DisplayID = "5"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "CTTAGCG"); read.DisplayID = "6"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "GCCTCCTAT"); read.DisplayID = "7"; readSeqs.Add(read); IList <ISequence> contigs = new List <ISequence> { new Sequence(Alphabets.DNA, "ATGCCTCCTATCTTAGCG") }; ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(contigs, readSeqs, kmerLength); Assert.AreEqual(maps.Count, readSeqs.Count); Dictionary <ISequence, IList <ReadMap> > map = maps[readSeqs[0].DisplayID]; IList <ReadMap> readMap = map[contigs[0]]; Assert.AreEqual(readMap[0].Length, 7); Assert.AreEqual(readMap[0].StartPositionOfContig, 0); Assert.AreEqual(readMap[0].StartPositionOfRead, 0); Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap); map = maps[readSeqs[1].DisplayID]; readMap = map[contigs[0]]; Assert.AreEqual(readMap[0].Length, 8); Assert.AreEqual(readMap[0].StartPositionOfContig, 3); Assert.AreEqual(readMap[0].StartPositionOfRead, 0); Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap); map = maps[readSeqs[2].DisplayID]; readMap = map[contigs[0]]; Assert.AreEqual(readMap[0].Length, 7); Assert.AreEqual(readMap[0].StartPositionOfContig, 5); Assert.AreEqual(readMap[0].StartPositionOfRead, 0); Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap); map = maps[readSeqs[3].DisplayID]; readMap = map[contigs[0]]; Assert.AreEqual(readMap[0].Length, 8); Assert.AreEqual(readMap[0].StartPositionOfContig, 1); Assert.AreEqual(readMap[0].StartPositionOfRead, 0); Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap); map = maps[readSeqs[4].DisplayID]; readMap = map[contigs[0]]; Assert.AreEqual(readMap[0].Length, 8); Assert.AreEqual(readMap[0].StartPositionOfContig, 9); Assert.AreEqual(readMap[0].StartPositionOfRead, 0); Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap); map = maps[readSeqs[5].DisplayID]; readMap = map[contigs[0]]; Assert.AreEqual(readMap[0].StartPositionOfContig, 7); Assert.AreEqual(readMap[0].StartPositionOfRead, 0); Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap); map = maps[readSeqs[6].DisplayID]; readMap = map[contigs[0]]; Assert.AreEqual(readMap[0].Length, 7); Assert.AreEqual(readMap[0].StartPositionOfContig, 11); Assert.AreEqual(readMap[0].StartPositionOfRead, 0); Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap); map = maps[readSeqs[7].DisplayID]; readMap = map[contigs[0]]; Assert.AreEqual(readMap[0].Length, 9); Assert.AreEqual(readMap[0].StartPositionOfContig, 2); Assert.AreEqual(readMap[0].StartPositionOfRead, 0); Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap); }
public void TracePathTestWithPalindromicContig() { const int kmerLengthConst = 5; const int dangleThreshold = 3; const int redundantThreshold = 6; var sequences = new List <ISequence>() { new Sequence(Alphabets.DNA, "ATGCCTC") { ID = "0" }, new Sequence(Alphabets.DNA, "CCTCCTAT") { ID = "1" }, new Sequence(Alphabets.DNA, "TCCTATC") { ID = "2" }, new Sequence(Alphabets.DNA, "TGCCTCCT") { ID = "3" }, new Sequence(Alphabets.DNA, "ATCTTAGC") { ID = "4" }, new Sequence(Alphabets.DNA, "CTATCTTAG") { ID = "5" }, new Sequence(Alphabets.DNA, "CTTAGCG") { ID = "6" }, new Sequence(Alphabets.DNA, "GCCTCCTAT") { ID = "7" }, new Sequence(Alphabets.DNA, "TAGCGCGCTA") { ID = "8" }, new Sequence(Alphabets.DNA, "AGCGCGC") { ID = "9" }, new Sequence(Alphabets.DNA, "TTTTTT") { ID = "10" }, new Sequence(Alphabets.DNA, "TTTTTAAA") { ID = "11" }, new Sequence(Alphabets.DNA, "TAAAAA") { ID = "12" }, new Sequence(Alphabets.DNA, "TTTTAG") { ID = "13" }, new Sequence(Alphabets.DNA, "TTTAGC") { ID = "14" }, new Sequence(Alphabets.DNA, "GCGCGCCGCGCG") { ID = "15" }, }; KmerLength = kmerLengthConst; SequenceReads.Clear(); SetSequenceReads(sequences); CreateGraph(); DanglingLinksThreshold = dangleThreshold; DanglingLinksPurger = new DanglingLinksPurger(dangleThreshold); RedundantPathLengthThreshold = redundantThreshold; RedundantPathsPurger = new RedundantPathsPurger(redundantThreshold); UnDangleGraph(); RemoveRedundancy(); IList <ISequence> contigs = BuildContigs().ToList(); ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(contigs, sequences, kmerLengthConst); MatePairMapper builder = new MatePairMapper(); CloneLibrary.Instance.AddLibrary("abc", 5, 15); ContigMatePairs pairedReads = builder.MapContigToMatePairs(sequences, maps); OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter(); ContigMatePairs overlap = filter.FilterPairedReads(pairedReads, 0); DistanceCalculator dist = new DistanceCalculator(overlap); overlap = dist.CalculateDistance(); ContigGraph graph = new ContigGraph(); graph.BuildContigGraph(contigs, this.KmerLength); TracePath path = new TracePath(); IList <ScaffoldPath> paths = path.FindPaths(graph, overlap, kmerLengthConst, 3); Assert.AreEqual(paths.Count, 3); Assert.AreEqual(paths.First().Count, 3); ScaffoldPath scaffold = paths.First(); Assert.AreEqual("ATGCCTCCTATCTTAGC", graph.GetNodeSequence(scaffold[0].Key).ConvertToString()); Assert.AreEqual("TTAGCGCG", graph.GetNodeSequence(scaffold[1].Key).ConvertToString()); Assert.AreEqual("GCGCGC", graph.GetNodeSequence(scaffold[2].Key).ConvertToString()); }
/// <summary> /// Public method mapping Reads to Contigs. /// </summary> /// <param name="contigs">List of sequences of contigs.</param> /// <param name="reads">List of input reads.</param> /// <param name="kmerLength">Length of kmer.</param> /// <returns>Contig Read Map.</returns> public ReadContigMap Map(IList<ISequence> contigs, IEnumerable<ISequence> reads, int kmerLength) { KmerIndexerDictionary map = SequenceToKmerBuilder.BuildKmerDictionary(contigs, kmerLength); ReadContigMap maps = new ReadContigMap(); Parallel.ForEach(reads, readSequence => { IEnumerable<ISequence> kmers = SequenceToKmerBuilder.GetKmerSequences(readSequence, kmerLength); ReadIndex read = new ReadIndex(readSequence); foreach (ISequence kmer in kmers) { IList<KmerIndexer> positions; if (map.TryGetValue(kmer, out positions) || map.TryGetValue(kmer.GetReverseComplementedSequence(), out positions)) { read.ContigReadMatchIndexes.Add(positions); } } IList<Task<IList<ReadMap>>> tasks = new List<Task<IList<ReadMap>>>(); // Stores information about contigs for which tasks has been generated. IList<long> visitedContigs = new List<long>(); // Creates Task for every read in nodes for a given contig. for (int index = 0; index < read.ContigReadMatchIndexes.Count; index++) { int readPosition = index; foreach (KmerIndexer kmer in read.ContigReadMatchIndexes[index]) { long contigIndex = kmer.SequenceIndex; if (!visitedContigs.Contains(contigIndex)) { visitedContigs.Add(contigIndex); tasks.Add( Task<IList<ReadMap>>.Factory.StartNew( t => MapRead( readPosition, read.ContigReadMatchIndexes, contigIndex, read.ReadSequence.Count, kmerLength), TaskCreationOptions.AttachedToParent)); } } } var overlapMaps = new Dictionary<ISequence, IList<ReadMap>>(); for (int index = 0; index < visitedContigs.Count; index++) { overlapMaps.Add(contigs.ElementAt(visitedContigs[index]), tasks[index].Result); } lock (maps) { if (!maps.ContainsKey(read.ReadSequence.ID)) { maps.Add(read.ReadSequence.ID, overlapMaps); } else { throw new ArgumentException( string.Format(CultureInfo.CurrentCulture, Resource.DuplicatingReadIds, read.ReadSequence.ID)); } } }); return maps; }
public void ValidateReadContigMap() { // Read all the input sequences from xml config file string filePath = utilityObj.xmlUtil.GetTextValue(Constants.SmallChromosomeReadsNode, Constants.FilePathNode); IEnumerable<ISequence> sequenceReads = null; using (FastAParser parser = new FastAParser(filePath)) { sequenceReads = parser.Parse(); ReadContigMap map = new ReadContigMap(sequenceReads); Assert.IsNotNull(map); for (int i = 0; i < 10; i++) { Assert.IsTrue(map.ContainsKey(sequenceReads.ElementAt(0).ID)); } } }
public void ValidateReadContigMap() { // Read all the input sequences from xml config file string filePath = utilityObj.xmlUtil.GetTextValue(Constants.SmallChromosomeReadsNode, Constants.FilePathNode); IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser (); parser.Open( filePath.Replace("\\", System.IO.Path.DirectorySeparatorChar.ToString())); sequenceReads = parser.Parse().ToList(); parser.Close (); ReadContigMap map = new ReadContigMap(sequenceReads); Assert.IsNotNull(map); for (int i = 0; i < 10; i++) { Assert.IsTrue(map.ContainsKey(sequenceReads.ElementAt(0).ID)); } }