public void ContigPairReadMap() { const int kmerLength = 6; IList <ISequence> readSeqs = new List <ISequence>(); Sequence read = new Sequence(Alphabets.DNA, "GATCTGATAA"); read.DisplayID = "0.x1:abc"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "ATCTGATAAG"); read.DisplayID = "1.F:abc"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "TCTGATAAGG"); read.DisplayID = "2.2:abc"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "TTTTTGATGG"); read.DisplayID = "0.y1:abc"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "TTTTGATGGC"); read.DisplayID = "1.R:abc"; readSeqs.Add(read); read = new Sequence(Alphabets.DNA, "TTTGATGGCA"); read.DisplayID = "2.1:abc"; readSeqs.Add(read); IList <ISequence> contigs = new List <ISequence> { new Sequence(Alphabets.DNA, "GATCTGATAAGG"), new Sequence(Alphabets.DNA, "TTTTTGATGGCA") }; ReadContigMapper mapper = new ReadContigMapper(); ReadContigMap maps = mapper.Map(contigs, readSeqs, kmerLength); MatePairMapper pair = new MatePairMapper(); ContigMatePairs map = pair.MapContigToMatePairs(readSeqs, maps); Assert.AreEqual(map.Count, 2); Dictionary <ISequence, IList <ValidMatePair> > reverseContigs; Assert.IsTrue(map.TryGetValue(contigs[0], out reverseContigs)); Assert.AreEqual(reverseContigs.Count, 1); IList <ValidMatePair> matePairs; Assert.IsTrue(reverseContigs.TryGetValue(contigs[1], out matePairs)); Assert.AreEqual(matePairs.Count, 2); Assert.AreEqual(matePairs[0].ForwardReadStartPosition.First(), 0); Assert.AreEqual(matePairs[0].ReverseReadStartPosition.First(), 9); Assert.AreEqual(matePairs[1].ForwardReadStartPosition.First(), 1); Assert.AreEqual(matePairs[1].ReverseReadStartPosition.First(), 10); Assert.IsTrue(map.TryGetValue(contigs[1], out reverseContigs)); Assert.AreEqual(reverseContigs.Count, 1); Assert.IsTrue(reverseContigs.TryGetValue(contigs[0], out matePairs)); Assert.AreEqual(matePairs.Count, 1); Assert.AreEqual(matePairs[0].ForwardReadStartPosition.First(), 2); Assert.AreEqual(matePairs[0].ReverseReadStartPosition.First(), 11); }
/// <summary> /// Performs Breadth First Search to traverse through graph to generate scaffold paths. /// </summary> /// <param name="overlapGraph">Contig Overlap Graph.</param> /// <param name="contigPairedReadMaps">InterContig Distances.</param> /// <param name="lengthOfKmer">Length of Kmer.</param> /// <param name="searchDepth">Depth to which graph is searched.</param> /// <returns>List of paths/scaffold.</returns> public IList<ScaffoldPath> FindPaths( ContigGraph overlapGraph, ContigMatePairs contigPairedReadMaps, int lengthOfKmer, int searchDepth = 10) { if (overlapGraph == null) { throw new ArgumentNullException("deBruijnGraph"); } if (contigPairedReadMaps == null) { throw new ArgumentNullException("contigPairedReadMaps"); } if (lengthOfKmer <= 0) { throw new ArgumentException(Resource.KmerLength); } if (searchDepth <= 0) { throw new ArgumentException(Resource.Depth); } this.graph = overlapGraph; this.kmerLength = lengthOfKmer; this.depth = searchDepth; List<ScaffoldPath> scaffoldPaths = new List<ScaffoldPath>(); Parallel.ForEach( overlapGraph.Nodes, (Node node) => { Dictionary<ISequence, IList<ValidMatePair>> contigPairedReadMap; if (contigPairedReadMaps.TryGetValue(overlapGraph.GetNodeSequence(node), out contigPairedReadMap)) { List<ScaffoldPath> scaffoldPath = TraverseGraph(node, contigPairedReadMap); lock (scaffoldPaths) { scaffoldPaths.AddRange(scaffoldPath); } } }); return scaffoldPaths; }
/// <summary> /// Creates Paired Read Contig Map. /// </summary> /// <param name="forwardContigs">Contigs aligning to forward read.</param> /// <param name="reverseContigs">Contigs aligning to reverse read.</param> /// <param name="pair">Mate Pair.</param> /// <param name="contigMatePairs">Contig mate pair.</param> private static void ContigMatePairMapper( Dictionary <ISequence, IList <ReadMap> > forwardContigs, Dictionary <ISequence, IList <ReadMap> > reverseContigs, MatePair pair, ContigMatePairs contigMatePairs) { foreach (KeyValuePair <ISequence, IList <ReadMap> > forwardContigMaps in forwardContigs) { Dictionary <ISequence, IList <ValidMatePair> > forwardContig; if (!contigMatePairs.TryGetValue(forwardContigMaps.Key, out forwardContig)) { forwardContig = new Dictionary <ISequence, IList <ValidMatePair> >(); contigMatePairs.Add(forwardContigMaps.Key, forwardContig); } foreach (KeyValuePair <ISequence, IList <ReadMap> > reverseContigMaps in reverseContigs) { IList <ValidMatePair> matePairs; if (!forwardContig.TryGetValue(reverseContigMaps.Key, out matePairs)) { matePairs = new List <ValidMatePair>(); forwardContig.Add(reverseContigMaps.Key, matePairs); } foreach (ReadMap forwardMap in forwardContigMaps.Value) { foreach (ReadMap reverseMap in reverseContigMaps.Value) { ValidMatePair validPairedRead = new ValidMatePair(); validPairedRead.PairedRead = pair; validPairedRead.ForwardReadStartPosition.Add(forwardMap.StartPositionOfContig); validPairedRead.ReverseReadStartPosition.Add( reverseMap.StartPositionOfContig + reverseMap.Length - 1); validPairedRead.ReverseReadReverseComplementStartPosition.Add( reverseContigMaps.Key.Count - reverseMap.StartPositionOfContig - 1); matePairs.Add(validPairedRead); } } } } }
/// <summary> /// Creates Paired Read Contig Map. /// </summary> /// <param name="forwardContigs">Contigs aligning to forward read.</param> /// <param name="reverseContigs">Contigs aligning to reverse read.</param> /// <param name="pair">Mate Pair.</param> /// <param name="contigMatePairs">Contig mate pair.</param> private static void ContigMatePairMapper( Dictionary<ISequence, IList<ReadMap>> forwardContigs, Dictionary<ISequence, IList<ReadMap>> reverseContigs, MatePair pair, ContigMatePairs contigMatePairs) { foreach (KeyValuePair<ISequence, IList<ReadMap>> forwardContigMaps in forwardContigs) { Dictionary<ISequence, IList<ValidMatePair>> forwardContig; if (!contigMatePairs.TryGetValue(forwardContigMaps.Key, out forwardContig)) { forwardContig = new Dictionary<ISequence, IList<ValidMatePair>>(); contigMatePairs.Add(forwardContigMaps.Key, forwardContig); } foreach (KeyValuePair<ISequence, IList<ReadMap>> reverseContigMaps in reverseContigs) { IList<ValidMatePair> matePairs; if (!forwardContig.TryGetValue(reverseContigMaps.Key, out matePairs)) { matePairs = new List<ValidMatePair>(); forwardContig.Add(reverseContigMaps.Key, matePairs); } foreach (ReadMap forwardMap in forwardContigMaps.Value) { foreach (ReadMap reverseMap in reverseContigMaps.Value) { ValidMatePair validPairedRead = new ValidMatePair(); validPairedRead.PairedRead = pair; validPairedRead.ForwardReadStartPosition.Add(forwardMap.StartPositionOfContig); validPairedRead.ReverseReadStartPosition.Add( reverseMap.StartPositionOfContig + reverseMap.Length - 1); validPairedRead.ReverseReadReverseComplementStartPosition.Add( reverseContigMaps.Key.Count - reverseMap.StartPositionOfContig - 1); matePairs.Add(validPairedRead); } } } } }
/// <summary> /// Filter Paired Read Based on Orientation. /// </summary> /// <param name="matePairMap">Map between contigs using mate pair information.</param> /// <param name="redundancy">Number of mate pairs required to create a link between two contigs. /// Hierarchical Scaffolding With Bambus /// by: Mihai Pop, Daniel S. Kosack, Steven L. Salzberg /// Genome Research, Vol. 14, No. 1. (January 2004), pp. 149-159.</param> public ContigMatePairs FilterPairedReads(ContigMatePairs matePairMap, int redundancy = 2) { if (null == matePairMap) { throw new ArgumentNullException("matePairMap"); } if (redundancy < 0) { throw new ArgumentException(Properties.Resource.NegativeRedundancy); } foreach (KeyValuePair<ISequence, Dictionary<ISequence, IList<ValidMatePair>>> matePair in matePairMap) { foreach (KeyValuePair<ISequence, IList<ValidMatePair>> validMatePair in matePair.Value) { if (matePair.Key != validMatePair.Key) { Dictionary<ISequence, IList<ValidMatePair>> validMatePairs; if (matePairMap.TryGetValue(validMatePair.Key, out validMatePairs)) { IList<ValidMatePair> pair; if (validMatePairs.TryGetValue(matePair.Key, out pair)) { OrientationFilter(pair, validMatePair.Value, redundancy); } else { if (validMatePair.Value.Count < redundancy) { validMatePair.Value.Clear(); } } } else { if (validMatePair.Value.Count < redundancy) { validMatePair.Value.Clear(); } } } else { validMatePair.Value.Clear(); } } } ContigMatePairs newMatePairMap = new ContigMatePairs(); Parallel.ForEach(matePairMap, matePair => { var map = matePair.Value .Where(validMatePair => validMatePair.Value.Count > 0) .ToDictionary(validMatePair => validMatePair.Key, validMatePair => validMatePair.Value); if (map.Count > 0) { lock (newMatePairMap) { newMatePairMap.Add(matePair.Key, map); } } }); return newMatePairMap; }