Esempio n. 1
0
        public void DistanceCalculationwithTwoContigs()
        {
            const int         KmerLength = 6;
            IList <ISequence> sequences  = new List <ISequence>()
            {
                new Sequence(Alphabets.DNA, "GATCTGATAA")
                {
                    ID = ">gi|263191773|ref|NG_015830.1| H**o sapiens insulin receptor substrate 1 (IRS1) on chromosome 2.X1:0.5K"
                },
                new Sequence(Alphabets.DNA, "ATCTGATAAG")
                {
                    ID = ">gi|263191773|ref|NG_015830.1| H**o sapiens insulin receptor on chromosome 2.F:0.5K"
                },
                new Sequence(Alphabets.DNA, "TCTGATAAGG")
                {
                    ID = ">gi|263191773|ref | H**o sapiens ........insulin receptor on chromosome 2.2:0.5K"
                },
                new Sequence(Alphabets.DNA, "TTTTTGATGG")
                {
                    ID = ">gi|263191773|ref|NG_015830.1| H**o sapiens insulin receptor substrate 1 (IRS1) on chromosome 2.Y1:0.5K"
                },
                new Sequence(Alphabets.DNA, "TTTTGATGGC")
                {
                    ID = ">gi|263191773|ref|NG_015830.1| H**o sapiens insulin receptor on chromosome 2.R:0.5K"
                },
                new Sequence(Alphabets.DNA, "TTTGATGGCA")
                {
                    ID = ">gi|263191773|ref | H**o sapiens ........insulin receptor on chromosome 2.1:0.5K"
                }
            };

            IList <ISequence> contigs = new List <ISequence> {
                new Sequence(Alphabets.DNA, "GATCTGATAAGG"),
                new Sequence(Alphabets.DNA, "TTTTTGATGGCA")
            };

            ReadContigMapper mapper         = new ReadContigMapper();
            ReadContigMap    maps           = mapper.Map(contigs, sequences, KmerLength);
            MatePairMapper   mapPairedReads = new MatePairMapper();
            ContigMatePairs  pairs          = mapPairedReads.MapContigToMatePairs(sequences, maps);

            OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();
            ContigMatePairs    contigpairedReads  = filter.FilterPairedReads(pairs);
            DistanceCalculator calc = new DistanceCalculator(contigpairedReads);

            contigpairedReads = calc.CalculateDistance();
            Assert.AreEqual(contigpairedReads.Values.Count, 1);
            Assert.IsTrue(contigpairedReads.ContainsKey(contigs[0]));

            Dictionary <ISequence, IList <ValidMatePair> > map = contigpairedReads[contigs[0]];

            Assert.IsTrue(map.ContainsKey(contigs[1]));
            IList <ValidMatePair> valid = map[contigs[1]];

            Assert.AreEqual(valid.First().DistanceBetweenContigs[0], (float)478.000031);
            Assert.AreEqual(valid.First().DistanceBetweenContigs[1], (float)477.0);
            Assert.AreEqual(valid.First().StandardDeviation[0], (float)14.1421356);
            Assert.AreEqual(valid.First().StandardDeviation[1], (float)14.1421356);
            Assert.AreEqual(valid.First().Weight, 2);
        }
Esempio n. 2
0
        public void DistanceCalculationwithTwoContigsWeightedMean()
        {
            const int        KmerLength = 6;
            List <ISequence> sequences  = new List <ISequence>();
            Sequence         seq        = new Sequence(Alphabets.DNA, "GATCTGATAA".Select(a => (byte)a).ToArray());

            seq.ID = ">gi|263191773|ref|NG_015830.1| H**o sapiens insulin receptor"
                     + "substrate 1 (IRS1) on chromosome 2.x1:2K";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "ATCTGATAAG".Select(a => (byte)a).ToArray());
            seq.ID = ">gi|263191773|ref|NG_015830.1| H**o sapiens insulin receptor"
                     + "on chromosome 2.f:0.5K";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "TCTGATAAGG".Select(a => (byte)a).ToArray());
            seq.ID = ">gi|263191773|ref | H**o sapiens ........insulin receptor"
                     + "on chromosome 2.2:0.5K";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "TTTTTGATGG".Select(a => (byte)a).ToArray());
            seq.ID = ">gi|263191773|ref|NG_015830.1| H**o sapiens insulin receptor"
                     + "substrate 1 (IRS1) on chromosome 2.y1:2K";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "TTTTGATGGC".Select(a => (byte)a).ToArray());
            seq.ID = ">gi|263191773|ref|NG_015830.1| H**o sapiens insulin receptor"
                     + "on chromosome 2.r:0.5K";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "TTTGATGGCA".Select(a => (byte)a).ToArray());
            seq.ID = ">gi|263191773|ref | H**o sapiens ........insulin receptor"
                     + "on chromosome 2.1:0.5K";
            sequences.Add(seq);
            IList <ISequence> contigs = new List <ISequence> {
                new Sequence(Alphabets.DNA, "GATCTGATAAGG".Select(a => (byte)a).ToArray()),
                new Sequence(Alphabets.DNA, "TTTTTGATGGCA".Select(a => (byte)a).ToArray())
            };

            ReadContigMapper mapper = new ReadContigMapper();
            ReadContigMap    maps   = mapper.Map(contigs, sequences, KmerLength);

            MatePairMapper  mapPairedReads        = new MatePairMapper();
            ContigMatePairs pairedReads           = mapPairedReads.MapContigToMatePairs(sequences, maps);
            OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();
            ContigMatePairs    contigpairedReads  = filter.FilterPairedReads(pairedReads);
            DistanceCalculator calc = new DistanceCalculator(contigpairedReads);

            contigpairedReads = calc.CalculateDistance();
            Assert.AreEqual(contigpairedReads.Values.Count, 1);
            Assert.IsTrue(contigpairedReads.ContainsKey(contigs[0]));

            Dictionary <ISequence, IList <ValidMatePair> > map = contigpairedReads[contigs[0]];

            Assert.IsTrue(map.ContainsKey(contigs[1]));
            IList <ValidMatePair> valid = map[contigs[1]];

            Assert.AreEqual(valid.First().DistanceBetweenContigs[0], (float)1228.0);
            Assert.AreEqual(valid.First().DistanceBetweenContigs[1], (float)1227.0);
            Assert.AreEqual(valid.First().StandardDeviation[0], (float)60);
            Assert.AreEqual(valid.First().StandardDeviation[1], (float)60);
            Assert.AreEqual(valid.First().Weight, 2);
        }
Esempio n. 3
0
        /// <summary>
        /// Distance calculator.
        /// </summary>
        /// <param name="contigPairedReads">Contig pair reads.</param>
        public DistanceCalculator(ContigMatePairs contigPairedReads)
        {
            if (contigPairedReads == null)
            {
                throw new ArgumentNullException("contigPairedReads");
            }

            this.contigPairedReads = contigPairedReads;
        }
Esempio n. 4
0
        public void FilterMatePairWithTwoContigs()
        {
            const int         kmerLength = 6;
            IList <ISequence> sequences  = new List <ISequence>();
            Sequence          seq        = new Sequence(Alphabets.DNA, "GATCTGATAA");

            seq.ID = ">gi|263191773|ref|NG_015830.1| H**o sapiens insulin receptor substrate 1 (IRS1) on chromosome 2.X1:0.5K";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "ATCTGATAAG");
            seq.ID = ">gi|263191773|ref|NG_015830.1| H**o sapiens insulin receptor on chromosome 2.F:0.5K";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "TCTGATAAGG");
            seq.ID = ">gi|263191773|ref | H**o sapiens ........insulin receptor on chromosome 2.2:0.5K";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "TTTTTGATGG");
            seq.ID = ">gi|263191773|ref|NG_015830.1| H**o sapiens insulin receptor substrate 1 (IRS1) on chromosome 2.Y1:0.5K";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "TTTTGATGGC");
            seq.ID = ">gi|263191773|ref|NG_015830.1| H**o sapiens insulin receptor on chromosome 2.R:0.5K";
            sequences.Add(seq);
            seq    = new Sequence(Alphabets.DNA, "TTTGATGGCA");
            seq.ID = ">gi|263191773|ref | H**o sapiens ........insulin receptor on chromosome 2.1:0.5K";
            sequences.Add(seq);

            IList <ISequence> contigs = new List <ISequence> {
                new Sequence(Alphabets.DNA, "GATCTGATAAGG"),
                new Sequence(Alphabets.DNA, "TTTTTGATGGCA")
            };

            ReadContigMapper mapper = new ReadContigMapper();
            ReadContigMap    maps   = mapper.Map(contigs, sequences, kmerLength);

            MatePairMapper  mapPairedReads = new MatePairMapper();
            ContigMatePairs pairs          = mapPairedReads.MapContigToMatePairs(sequences, maps);

            OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();
            ContigMatePairs contigpairedReads     = filter.FilterPairedReads(pairs);

            Assert.AreEqual(contigpairedReads.Values.Count, 1);

            Assert.IsTrue(contigpairedReads.ContainsKey(contigs[0]));
            Dictionary <ISequence, IList <ValidMatePair> > map = contigpairedReads[contigs[0]];

            Assert.IsTrue(map.ContainsKey(contigs[1]));

            List <ValidMatePair> valid = Sort(map[contigs[1]], sequences);

            Assert.AreEqual(valid[0].ForwardReadStartPosition[0], 1);
            Assert.AreEqual(valid[0].ReverseReadReverseComplementStartPosition[0], 10);
            Assert.AreEqual(valid[0].ReverseReadStartPosition[0], 10);

            Assert.AreEqual(valid[1].ForwardReadStartPosition[0], 0);
            Assert.AreEqual(valid[1].ReverseReadReverseComplementStartPosition[0], 11);
            Assert.AreEqual(valid[1].ReverseReadStartPosition[0], 9);
        }
Esempio n. 5
0
        public void ContigPairReadMap()
        {
            const int         kmerLength = 6;
            IList <ISequence> readSeqs   = new List <ISequence>();
            Sequence          read       = new Sequence(Alphabets.DNA, "GATCTGATAA");

            read.DisplayID = "0.x1:abc";
            readSeqs.Add(read);
            read           = new Sequence(Alphabets.DNA, "ATCTGATAAG");
            read.DisplayID = "1.F:abc";
            readSeqs.Add(read);
            read           = new Sequence(Alphabets.DNA, "TCTGATAAGG");
            read.DisplayID = "2.2:abc";
            readSeqs.Add(read);
            read           = new Sequence(Alphabets.DNA, "TTTTTGATGG");
            read.DisplayID = "0.y1:abc";
            readSeqs.Add(read);
            read           = new Sequence(Alphabets.DNA, "TTTTGATGGC");
            read.DisplayID = "1.R:abc";
            readSeqs.Add(read);
            read           = new Sequence(Alphabets.DNA, "TTTGATGGCA");
            read.DisplayID = "2.1:abc";
            readSeqs.Add(read);

            IList <ISequence> contigs = new List <ISequence> {
                new Sequence(Alphabets.DNA, "GATCTGATAAGG"),
                new Sequence(Alphabets.DNA, "TTTTTGATGGCA")
            };
            ReadContigMapper mapper = new ReadContigMapper();
            ReadContigMap    maps   = mapper.Map(contigs, readSeqs, kmerLength);
            MatePairMapper   pair   = new MatePairMapper();
            ContigMatePairs  map    = pair.MapContigToMatePairs(readSeqs, maps);

            Assert.AreEqual(map.Count, 2);
            Dictionary <ISequence, IList <ValidMatePair> > reverseContigs;

            Assert.IsTrue(map.TryGetValue(contigs[0], out reverseContigs));
            Assert.AreEqual(reverseContigs.Count, 1);
            IList <ValidMatePair> matePairs;

            Assert.IsTrue(reverseContigs.TryGetValue(contigs[1], out matePairs));
            Assert.AreEqual(matePairs.Count, 2);
            Assert.AreEqual(matePairs[0].ForwardReadStartPosition.First(), 0);
            Assert.AreEqual(matePairs[0].ReverseReadStartPosition.First(), 9);
            Assert.AreEqual(matePairs[1].ForwardReadStartPosition.First(), 1);
            Assert.AreEqual(matePairs[1].ReverseReadStartPosition.First(), 10);

            Assert.IsTrue(map.TryGetValue(contigs[1], out reverseContigs));
            Assert.AreEqual(reverseContigs.Count, 1);
            Assert.IsTrue(reverseContigs.TryGetValue(contigs[0], out matePairs));
            Assert.AreEqual(matePairs.Count, 1);
            Assert.AreEqual(matePairs[0].ForwardReadStartPosition.First(), 2);
            Assert.AreEqual(matePairs[0].ReverseReadStartPosition.First(), 11);
        }
Esempio n. 6
0
        /// <summary>
        /// Performs Breadth First Search to traverse through graph to generate scaffold paths.
        /// </summary>
        /// <param name="overlapGraph">Contig Overlap Graph.</param>
        /// <param name="contigPairedReadMaps">InterContig Distances.</param>
        /// <param name="lengthOfKmer">Length of Kmer.</param>
        /// <param name="searchDepth">Depth to which graph is searched.</param>
        /// <returns>List of paths/scaffold.</returns>
        public IList<ScaffoldPath> FindPaths(
            ContigGraph overlapGraph,
            ContigMatePairs contigPairedReadMaps,
            int lengthOfKmer,
            int searchDepth = 10)
        {
            if (overlapGraph == null)
            {
                throw new ArgumentNullException("deBruijnGraph");
            }

            if (contigPairedReadMaps == null)
            {
                throw new ArgumentNullException("contigPairedReadMaps");
            }

            if (lengthOfKmer <= 0)
            {
                throw new ArgumentException(Resource.KmerLength);
            }

            if (searchDepth <= 0)
            {
                throw new ArgumentException(Resource.Depth);
            }

            this.graph = overlapGraph;
            this.kmerLength = lengthOfKmer;
            this.depth = searchDepth;

            List<ScaffoldPath> scaffoldPaths = new List<ScaffoldPath>();
            Parallel.ForEach(
                overlapGraph.Nodes, 
                (Node node) =>
                {
                Dictionary<ISequence, IList<ValidMatePair>> contigPairedReadMap;
                if (contigPairedReadMaps.TryGetValue(overlapGraph.GetNodeSequence(node), out contigPairedReadMap))
                {
                    List<ScaffoldPath> scaffoldPath = TraverseGraph(node, contigPairedReadMap);
                    lock (scaffoldPaths)
                    {
                        scaffoldPaths.AddRange(scaffoldPath);
                    }
                }
            });

            return scaffoldPaths;
        }
Esempio n. 7
0
        /// <summary>
        /// Finds contig pairs having valid mate pairs connection between them.
        /// </summary>
        /// <param name="reads">Input list of reads.</param>
        /// <param name="alignment">Reads contig alignment.</param>
        /// <returns>Contig Mate pair map.</returns>
        public ContigMatePairs MapContigToMatePairs(IList <ISequence> reads, ReadContigMap alignment)
        {
            if (alignment == null)
            {
                throw new ArgumentNullException("alignment");
            }

            if (reads == null)
            {
                throw new ArgumentNullException("reads");
            }

            Dictionary <ISequence, IList <ReadMap> > contigs1;
            Dictionary <ISequence, IList <ReadMap> > contigs2;
            ContigMatePairs contigMatePairs = new ContigMatePairs();

            foreach (ISequence read in reads)
            {
                Match match = _readExpression.Match(read.DisplayID);
                if (match.Success)
                {
                    String mateDisplayID = GenerateExpression(match);
                    if (alignment.TryGetValue(read.DisplayID, out contigs1) && alignment.TryGetValue(mateDisplayID, out contigs2))
                    {
                        MatePair pair;
                        if (match.Groups[2].Value == "X1" || match.Groups[2].Value == "F" ||
                            match.Groups[2].Value == "1" || match.Groups[2].Value == "x1" ||
                            match.Groups[2].Value == "f" || match.Groups[2].Value == "a" ||
                            match.Groups[2].Value == "A")
                        {
                            pair = new MatePair(read.DisplayID, mateDisplayID, match.Groups[3].Value);
                            ContigMatePairMapper(contigs1, contigs2, pair, contigMatePairs);
                        }
                        else
                        {
                            pair = new MatePair(mateDisplayID, read.DisplayID, match.Groups[3].Value);
                            ContigMatePairMapper(contigs2, contigs1, pair, contigMatePairs);
                        }

                        alignment.Remove(read.DisplayID);
                        alignment.Remove(mateDisplayID);
                    }
                }
            }

            return(contigMatePairs);
        }
Esempio n. 8
0
        /// <summary>
        /// Creates Paired Read Contig Map.
        /// </summary>
        /// <param name="forwardContigs">Contigs aligning to forward read.</param>
        /// <param name="reverseContigs">Contigs aligning to reverse read.</param>
        /// <param name="pair">Mate Pair.</param>
        /// <param name="contigMatePairs">Contig mate pair.</param>
        private static void ContigMatePairMapper(
            Dictionary <ISequence, IList <ReadMap> > forwardContigs,
            Dictionary <ISequence, IList <ReadMap> > reverseContigs,
            MatePair pair,
            ContigMatePairs contigMatePairs)
        {
            foreach (KeyValuePair <ISequence, IList <ReadMap> > forwardContigMaps in forwardContigs)
            {
                Dictionary <ISequence, IList <ValidMatePair> > forwardContig;
                if (!contigMatePairs.TryGetValue(forwardContigMaps.Key, out forwardContig))
                {
                    forwardContig = new Dictionary <ISequence, IList <ValidMatePair> >();
                    contigMatePairs.Add(forwardContigMaps.Key, forwardContig);
                }

                foreach (KeyValuePair <ISequence, IList <ReadMap> > reverseContigMaps in reverseContigs)
                {
                    IList <ValidMatePair> matePairs;
                    if (!forwardContig.TryGetValue(reverseContigMaps.Key, out matePairs))
                    {
                        matePairs = new List <ValidMatePair>();
                        forwardContig.Add(reverseContigMaps.Key, matePairs);
                    }

                    foreach (ReadMap forwardMap in forwardContigMaps.Value)
                    {
                        foreach (ReadMap reverseMap in reverseContigMaps.Value)
                        {
                            ValidMatePair validPairedRead = new ValidMatePair();
                            validPairedRead.PairedRead = pair;
                            validPairedRead.ForwardReadStartPosition.Add(forwardMap.StartPositionOfContig);
                            validPairedRead.ReverseReadStartPosition.Add(
                                reverseMap.StartPositionOfContig + reverseMap.Length - 1);
                            validPairedRead.ReverseReadReverseComplementStartPosition.Add(
                                reverseContigMaps.Key.Count - reverseMap.StartPositionOfContig - 1);
                            matePairs.Add(validPairedRead);
                        }
                    }
                }
            }
        }
Esempio n. 9
0
 /// <summary>
 /// Performs Breadth First Search in contig overlap graph.
 /// </summary>
 /// <param name="contigGraph">Contig Graph.</param>
 /// <param name="contigMatePairs">Contig Mate Pair map.</param>
 /// <returns>List of Scaffold Paths.</returns>
 protected IList<ScaffoldPath> TracePath(ContigGraph contigGraph, ContigMatePairs contigMatePairs)
 {
     return this.tracePath.FindPaths(contigGraph, contigMatePairs, this.kmerLength, this.depthField);
 }
Esempio n. 10
0
        /// <summary>
        /// Calculate distance between contigs using paired reads.
        /// </summary>
        /// <param name="contigMatePairs">Contig Mate Pair map.</param>
        /// <returns>Number of contig-read pairs.</returns>
        protected int CalculateDistanceBetweenContigs(ContigMatePairs contigMatePairs)
        {
            if (contigMatePairs == null)
            {
                throw new ArgumentNullException("contigMatePairs");
            }

            if (this.distanceCalculator == null)
            {
                this.distanceCalculator = new DistanceCalculator(contigMatePairs);
                contigMatePairs = this.distanceCalculator.CalculateDistance();
            }
            else
            {
                contigMatePairs = this.distanceCalculator.CalculateDistance();
            }

            // this dictionary is updated in this step.
            return contigMatePairs.Count;
        }
Esempio n. 11
0
 /// <summary>
 /// Filter reads based on orientation of contigs.
 /// </summary>
 /// <param name="contigMatePairs">Contig Mate Pair map.</param>
 /// <returns>Returns Contig Mate Pair map.</returns>
 protected ContigMatePairs FilterReadsBasedOnOrientation(ContigMatePairs contigMatePairs)
 {
     return this.pairedReadFilter.FilterPairedReads(contigMatePairs, this.redundancyField);
 }
Esempio n. 12
0
        public void TracePathTestWithPalindromicContig()
        {
            const int        kmerLength         = 6;
            const int        dangleThreshold    = 3;
            const int        redundantThreshold = 7;
            List <ISequence> sequences          = new List <ISequence>();
            Sequence         seq = new Sequence(Alphabets.DNA, "ATGCCTC");

            seq.DisplayID = ">10.x1:abc";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "CCTCCTAT");
            seq.DisplayID = "1";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TCCTATC");
            seq.DisplayID = "2";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TGCCTCCT");
            seq.DisplayID = "3";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "ATCTTAGC");
            seq.DisplayID = "4";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "CTATCTTAG");
            seq.DisplayID = "5";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "CTTAGCG");
            seq.DisplayID = "6";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "GCCTCCTAT");
            seq.DisplayID = ">8.x1:abc";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TAGCGCGCTA");
            seq.DisplayID = ">8.y1:abc";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "AGCGCGC");
            seq.DisplayID = ">9.x1:abc";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TTTTTT");
            seq.DisplayID = "7";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TTTTTAAA");
            seq.DisplayID = "8";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TAAAAA");
            seq.DisplayID = "9";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TTTTAG");
            seq.DisplayID = "10";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "TTTAGC");
            seq.DisplayID = "11";
            sequences.Add(seq);
            seq           = new Sequence(Alphabets.DNA, "GCGCGCCGCGCG");
            seq.DisplayID = "12";
            sequences.Add(seq);

            KmerLength = kmerLength;
            SequenceReads.Clear();
            AddSequenceReads(sequences);
            CreateGraph();
            DanglingLinksThreshold       = dangleThreshold;
            DanglingLinksPurger          = new DanglingLinksPurger(dangleThreshold);
            RedundantPathLengthThreshold = redundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(redundantThreshold);
            UnDangleGraph();
            RemoveRedundancy();

            IList <ISequence> contigs = BuildContigs();
            ReadContigMapper  mapper  = new ReadContigMapper();

            ReadContigMap  maps    = mapper.Map(contigs, sequences, kmerLength);
            MatePairMapper builder = new MatePairMapper();

            CloneLibrary.Instance.AddLibrary("abc", (float)5, (float)15);
            ContigMatePairs pairedReads = builder.MapContigToMatePairs(sequences, maps);

            ContigMatePairs overlap;
            OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();

            overlap = filter.FilterPairedReads(pairedReads, 0);
            DistanceCalculator dist = new DistanceCalculator();

            dist.CalculateDistance(overlap);
            Graph.BuildContigGraph(contigs, this.KmerLength);
            TracePath            path  = new TracePath();
            IList <ScaffoldPath> paths = path.FindPaths(Graph, overlap, kmerLength, 3);

            Assert.AreEqual(paths.Count, 3);
            Assert.AreEqual(paths.First().Count, 3);
            ScaffoldPath  scaffold = paths.First();
            DeBruijnGraph graph    = Graph;

            Assert.IsTrue(graph.GetNodeSequence(scaffold[0].Key).ToString().Equals("ATGCCTCCTATCTTAGC"));
            Assert.IsTrue(graph.GetNodeSequence(scaffold[1].Key).ToString().Equals("TTAGCGCG"));
            Assert.IsTrue(graph.GetNodeSequence(scaffold[2].Key).ToString().Equals("GCGCGC"));
        }
Esempio n. 13
0
        /// <summary>
        /// Creates Paired Read Contig Map.
        /// </summary>
        /// <param name="forwardContigs">Contigs aligning to forward read.</param>
        /// <param name="reverseContigs">Contigs aligning to reverse read.</param>
        /// <param name="pair">Mate Pair.</param>
        /// <param name="contigMatePairs">Contig mate pair.</param>
        private static void ContigMatePairMapper(
            Dictionary<ISequence, IList<ReadMap>> forwardContigs,
            Dictionary<ISequence, IList<ReadMap>> reverseContigs,
            MatePair pair,
            ContigMatePairs contigMatePairs)
        {
            foreach (KeyValuePair<ISequence, IList<ReadMap>> forwardContigMaps in forwardContigs)
            {
                Dictionary<ISequence, IList<ValidMatePair>> forwardContig;
                if (!contigMatePairs.TryGetValue(forwardContigMaps.Key, out forwardContig))
                {
                    forwardContig = new Dictionary<ISequence, IList<ValidMatePair>>();
                    contigMatePairs.Add(forwardContigMaps.Key, forwardContig);
                }

                foreach (KeyValuePair<ISequence, IList<ReadMap>> reverseContigMaps in reverseContigs)
                {
                    IList<ValidMatePair> matePairs;
                    if (!forwardContig.TryGetValue(reverseContigMaps.Key, out matePairs))
                    {
                        matePairs = new List<ValidMatePair>();
                        forwardContig.Add(reverseContigMaps.Key, matePairs);
                    }
                    
                    foreach (ReadMap forwardMap in forwardContigMaps.Value)
                    {
                        foreach (ReadMap reverseMap in reverseContigMaps.Value)
                        {
                            ValidMatePair validPairedRead = new ValidMatePair();
                            validPairedRead.PairedRead = pair;
                            validPairedRead.ForwardReadStartPosition.Add(forwardMap.StartPositionOfContig);
                            validPairedRead.ReverseReadStartPosition.Add(
                                reverseMap.StartPositionOfContig + reverseMap.Length - 1);
                            validPairedRead.ReverseReadReverseComplementStartPosition.Add(
                                reverseContigMaps.Key.Count - reverseMap.StartPositionOfContig - 1);
                            matePairs.Add(validPairedRead);
                        }
                    }
                }
            }
        }
Esempio n. 14
0
        /// <summary>
        /// Finds contig pairs having valid mate pairs connection between them.
        /// </summary>
        /// <param name="reads">Input list of reads.</param>
        /// <param name="alignment">Reads contig alignment.</param>
        /// <returns>Contig Mate pair map.</returns>
        public ContigMatePairs MapContigToMatePairs(IEnumerable<ISequence> reads, ReadContigMap alignment)
        {
            if (alignment == null)
            {
                throw new ArgumentNullException("alignment");
            }

            if (reads == null)
            {
                throw new ArgumentNullException("reads");
            }

            Dictionary<ISequence, IList<ReadMap>> contigs1;
            Dictionary<ISequence, IList<ReadMap>> contigs2;
            ContigMatePairs contigMatePairs = new ContigMatePairs();
            foreach (ISequence read in reads)
            {
                Match match = this.readExpression.Match(read.ID);
                if (match.Success)
                {
                    string mateDisplayID = GenerateExpression(match);
                    if (alignment.TryGetValue(read.ID, out contigs1) && alignment.TryGetValue(mateDisplayID, out contigs2))
                    {
                        MatePair pair;
                        if (match.Groups[2].Value == "X1" || match.Groups[2].Value == "F" ||
                            match.Groups[2].Value == "1" || match.Groups[2].Value == "x1" ||
                            match.Groups[2].Value == "f" || match.Groups[2].Value == "a" ||
                            match.Groups[2].Value == "A")
                        {
                            pair = new MatePair(read.ID, mateDisplayID, match.Groups[3].Value);
                            ContigMatePairMapper(contigs1, contigs2, pair, contigMatePairs);
                        }
                        else
                        {
                            pair = new MatePair(mateDisplayID, read.ID, match.Groups[3].Value);
                            ContigMatePairMapper(contigs2, contigs1, pair, contigMatePairs);
                        }

                        alignment.Remove(read.ID);
                        alignment.Remove(mateDisplayID);
                    }
                }
            }

            return contigMatePairs;
        }
Esempio n. 15
0
        public void TracePathTestWithPalindromicContig()
        {
            const int kmerLengthConst    = 5;
            const int dangleThreshold    = 3;
            const int redundantThreshold = 6;

            var sequences = new List <ISequence>()
            {
                new Sequence(Alphabets.DNA, "ATGCCTC")
                {
                    ID = "0"
                },
                new Sequence(Alphabets.DNA, "CCTCCTAT")
                {
                    ID = "1"
                },
                new Sequence(Alphabets.DNA, "TCCTATC")
                {
                    ID = "2"
                },
                new Sequence(Alphabets.DNA, "TGCCTCCT")
                {
                    ID = "3"
                },
                new Sequence(Alphabets.DNA, "ATCTTAGC")
                {
                    ID = "4"
                },
                new Sequence(Alphabets.DNA, "CTATCTTAG")
                {
                    ID = "5"
                },
                new Sequence(Alphabets.DNA, "CTTAGCG")
                {
                    ID = "6"
                },
                new Sequence(Alphabets.DNA, "GCCTCCTAT")
                {
                    ID = "7"
                },
                new Sequence(Alphabets.DNA, "TAGCGCGCTA")
                {
                    ID = "8"
                },
                new Sequence(Alphabets.DNA, "AGCGCGC")
                {
                    ID = "9"
                },
                new Sequence(Alphabets.DNA, "TTTTTT")
                {
                    ID = "10"
                },
                new Sequence(Alphabets.DNA, "TTTTTAAA")
                {
                    ID = "11"
                },
                new Sequence(Alphabets.DNA, "TAAAAA")
                {
                    ID = "12"
                },
                new Sequence(Alphabets.DNA, "TTTTAG")
                {
                    ID = "13"
                },
                new Sequence(Alphabets.DNA, "TTTAGC")
                {
                    ID = "14"
                },
                new Sequence(Alphabets.DNA, "GCGCGCCGCGCG")
                {
                    ID = "15"
                },
            };

            KmerLength = kmerLengthConst;
            SequenceReads.Clear();

            SetSequenceReads(sequences);
            CreateGraph();

            DanglingLinksThreshold       = dangleThreshold;
            DanglingLinksPurger          = new DanglingLinksPurger(dangleThreshold);
            RedundantPathLengthThreshold = redundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(redundantThreshold);

            UnDangleGraph();
            RemoveRedundancy();

            IList <ISequence> contigs = BuildContigs().ToList();
            ReadContigMapper  mapper  = new ReadContigMapper();

            ReadContigMap  maps    = mapper.Map(contigs, sequences, kmerLengthConst);
            MatePairMapper builder = new MatePairMapper();

            CloneLibrary.Instance.AddLibrary("abc", 5, 15);
            ContigMatePairs pairedReads = builder.MapContigToMatePairs(sequences, maps);

            OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();

            ContigMatePairs    overlap = filter.FilterPairedReads(pairedReads, 0);
            DistanceCalculator dist    = new DistanceCalculator(overlap);

            overlap = dist.CalculateDistance();
            ContigGraph graph = new ContigGraph();

            graph.BuildContigGraph(contigs, this.KmerLength);
            TracePath            path  = new TracePath();
            IList <ScaffoldPath> paths = path.FindPaths(graph, overlap, kmerLengthConst, 3);

            Assert.AreEqual(paths.Count, 3);
            Assert.AreEqual(paths.First().Count, 3);
            ScaffoldPath scaffold = paths.First();

            Assert.AreEqual("ATGCCTCCTATCTTAGC", graph.GetNodeSequence(scaffold[0].Key).ConvertToString());
            Assert.AreEqual("TTAGCGCG", graph.GetNodeSequence(scaffold[1].Key).ConvertToString());
            Assert.AreEqual("GCGCGC", graph.GetNodeSequence(scaffold[2].Key).ConvertToString());
        }
        /// <summary>
        /// Filter Paired Read Based on Orientation.
        /// </summary>
        /// <param name="matePairMap">Map between contigs using mate pair information.</param>
        /// <param name="redundancy">Number of mate pairs required to create a link between two contigs.
        ///  Hierarchical Scaffolding With Bambus
        ///  by: Mihai Pop, Daniel S. Kosack, Steven L. Salzberg 
        /// Genome Research, Vol. 14, No. 1. (January 2004), pp. 149-159.</param>  
        public ContigMatePairs FilterPairedReads(ContigMatePairs matePairMap, int redundancy = 2)
        {
            if (null == matePairMap)
            {
                throw new ArgumentNullException("matePairMap");
            }

            if (redundancy < 0)
            {
                throw new ArgumentException(Properties.Resource.NegativeRedundancy);
            }

            foreach (KeyValuePair<ISequence, Dictionary<ISequence, IList<ValidMatePair>>> matePair in matePairMap)
            {
                foreach (KeyValuePair<ISequence, IList<ValidMatePair>> validMatePair in matePair.Value)
                {
                    if (matePair.Key != validMatePair.Key)
                    {
                        Dictionary<ISequence, IList<ValidMatePair>> validMatePairs;
                        if (matePairMap.TryGetValue(validMatePair.Key, out validMatePairs))
                        {
                            IList<ValidMatePair> pair;
                            if (validMatePairs.TryGetValue(matePair.Key, out pair))
                            {
                                OrientationFilter(pair, validMatePair.Value, redundancy);
                            }
                            else
                            {
                                if (validMatePair.Value.Count < redundancy)
                                {
                                    validMatePair.Value.Clear();
                                }
                            }
                        }
                        else
                        {
                            if (validMatePair.Value.Count < redundancy)
                            {
                                validMatePair.Value.Clear();
                            }
                        }
                    }
                    else
                    {
                        validMatePair.Value.Clear();
                    }
                }
            }

            ContigMatePairs newMatePairMap = new ContigMatePairs();
            Parallel.ForEach(matePairMap, matePair =>
            {
                var map = matePair.Value
                    .Where(validMatePair => validMatePair.Value.Count > 0)
                    .ToDictionary(validMatePair => validMatePair.Key, validMatePair => validMatePair.Value);

                if (map.Count > 0)
                {
                    lock (newMatePairMap)
                    {
                        newMatePairMap.Add(matePair.Key, map);
                    }
                }
            });

            return newMatePairMap;
        }