Esempio n. 1
0
        public void TestRemapGenomicToBinCoordinates()
        {
            var sampleGenomicBins = new List <SampleGenomicBin>
            {
                new SampleGenomicBin("chr10", 1001, 2000, 0, 80),
                new SampleGenomicBin("chr10", 2001, 3000, 0, 79),
                new SampleGenomicBin("chr10", 3001, 4000, 0, 78),
                new SampleGenomicBin("chr10", 4001, 5000, 0, 77),
                new SampleGenomicBin("chr10", 5001, 6000, 0, 2),
                new SampleGenomicBin("chr10", 6001, 7000, 0, 2)
            };

            var intervals = new List <BedEntry>
            {
                new BedEntry("chr10\t1500\t3500"),
                new BedEntry("chr10\t4500\t6500")
            };

            var remappedIntervals = CanvasSegment.RemapGenomicToBinCoordinates(intervals, sampleGenomicBins);

            Assert.Equal(0, remappedIntervals.First().Start);
            Assert.Equal(2, remappedIntervals.First().End);
            Assert.Equal(3, remappedIntervals.Last().Start);
            Assert.Equal(5, remappedIntervals.Last().End);
        }
Esempio n. 2
0
        private void AssignMcc(CanvasSegment canvasSegment, ICopyNumberModel copyNumberModel,
                               PhasedGenotype gtStates, int copyNumber)
        {
            const int diploidCopyNumber = 2;

            if (copyNumber > diploidCopyNumber)
            {
                canvasSegment.MajorChromosomeCount =
                    Math.Max(gtStates.CopyNumberA, gtStates.CopyNumberB);
                int?selectedGtState = _genotypes[copyNumber].IndexOf(gtStates);
                canvasSegment.MajorChromosomeCountScore = GetGtLogLikelihoodScore(canvasSegment.Balleles, _genotypes[copyNumber], ref selectedGtState, copyNumberModel);
            }
            else
            {
                // variant caller does not attempt to call LOH, for DELs CN=MCC
                if (copyNumber == diploidCopyNumber)
                {
                    canvasSegment.MajorChromosomeCount = null;
                }
                else
                {
                    canvasSegment.MajorChromosomeCount = copyNumber;
                }
                canvasSegment.MajorChromosomeCountScore = null;
            }
        }
Esempio n. 3
0
        public void TestCreateSegmentsFromCommonCnvs()
        {
            var sampleGenomicBins = new List <SampleGenomicBin>
            {
                new SampleGenomicBin("chr10", 1001, 2000, 0, 80),
                new SampleGenomicBin("chr10", 2001, 3000, 0, 79),
                new SampleGenomicBin("chr10", 3001, 4000, 0, 78),
                new SampleGenomicBin("chr10", 4001, 5000, 0, 77),
                new SampleGenomicBin("chr10", 5001, 6000, 0, 2),
                new SampleGenomicBin("chr10", 6001, 7000, 0, 2)
            };

            var intervals = new List <BedInterval>
            {
                new BedInterval(0, 3),
                new BedInterval(3, 5)
            };

            var balleles = new List <Balleles>
            {
                new Balleles(new List <Ballele>()),
                new Balleles(new List <Ballele> {
                    new Ballele(5501, 30, 30)
                })
            };

            var canvasSegments = CanvasSegment.CreateSegmentsFromCommonCnvs(sampleGenomicBins, intervals, balleles);

            Assert.Equal(canvasSegments.Count, intervals.Count);
            Assert.Equal(0, canvasSegments.First().Balleles.Size());
            Assert.Equal(1, canvasSegments.Last().Balleles.Size());
            Assert.Equal(3, canvasSegments.First().Counts.Count);
            Assert.Equal(2, canvasSegments.Last().Counts.Count);
        }
Esempio n. 4
0
        public void TestCipos()
        {
            // Merge two segments, and confirm we keep the correct confidence intervals post-merge:
            List <SampleGenomicBin> counts = new List <SampleGenomicBin>()
            {
                new SampleGenomicBin("chr1", 1, 2, 100),
                new SampleGenomicBin("chr1", 1, 2, 90),
                new SampleGenomicBin("chr1", 1, 2, 110),
                new SampleGenomicBin("chr1", 1, 2, 100),
                new SampleGenomicBin("chr1", 1, 2, 95),
                new SampleGenomicBin("chr1", 1, 2, 105)
            };
            CanvasSegment segment = new CanvasSegment("chr1", 1245, 678910, counts);

            segment.StartConfidenceInterval = new Tuple <int, int>(-100, 100);
            segment.EndConfidenceInterval   = new Tuple <int, int>(-80, 80);
            CanvasSegment segment2 = new CanvasSegment("chr1", 678910, 8787888, counts);

            segment2.StartConfidenceInterval = new Tuple <int, int>(-50, 50);
            segment2.EndConfidenceInterval   = new Tuple <int, int>(-30, 30);
            segment.MergeIn(segment2);
            Assert.Equal(8787888, segment.End);
            Assert.Equal(-30, segment.EndConfidenceInterval.Item1);
            Assert.Equal(100, segment.StartConfidenceInterval.Item2);
        }
Esempio n. 5
0
        public void TestSegment()
        {
            var counts = new List <SampleGenomicBin>
            {
                new SampleGenomicBin("chr17", 100000000, 110000000, 0, 100),
                new SampleGenomicBin("chr17", 100000000, 110000000, 0, 90),
                new SampleGenomicBin("chr17", 100000000, 110000000, 0, 110),
                new SampleGenomicBin("chr17", 100000000, 110000000, 0, 100),
                new SampleGenomicBin("chr17", 100000000, 110000000, 0, 95),
                new SampleGenomicBin("chr17", 100000000, 110000000, 0, 105)
            };
            var seg1 = new CanvasSegment("chr17", 100000000, 110000000, counts);

            // Silly constructor tests:
            Assert.Equal(100000000, seg1.Begin);
            Assert.Equal(110000000, seg1.End);
            Assert.Equal(seg1.BinCount, counts.Count);
            Assert.Equal("chr17", seg1.Chr);
            // Property test:
            Assert.Equal(100, seg1.MeanCount, 2);

            // Build a second segment, and merge them, and test results:
            var seg2 = new CanvasSegment("chr17", 110000000, 120000000, counts);

            seg1.MergeIn(seg2);
            Assert.Equal(12, seg1.Counts.Count);
            Assert.Equal(seg1.End, seg2.End);
        }
Esempio n. 6
0
        private static ISampleMap <List <CanvasSegment> > MergeSegments(ISampleMap <List <CanvasSegment> > segments, int minimumCallSize, int qScoreThreshold)
        {
            int nSegments   = segments.First().Value.Count;
            var copyNumbers = new List <List <int> >(nSegments);
            var qscores     = new List <double>(nSegments);

            foreach (int segmentIndex in Enumerable.Range(0, nSegments))
            {
                copyNumbers.Add(segments.Select(s => s.Value[segmentIndex].CopyNumber).ToList());
                qscores.Add(segments.Select(s => s.Value[segmentIndex].QScore).Average());
            }

            if (copyNumbers == null && qscores != null || copyNumbers != null & qscores == null)
            {
                throw new ArgumentException("Both copyNumbers and qscores arguments must be specified.");
            }
            if (copyNumbers != null && copyNumbers.Count != nSegments)
            {
                throw new ArgumentException("Length of copyNumbers list should be equal to the number of segments.");
            }
            if (qscores != null && qscores.Count != nSegments)
            {
                throw new ArgumentException("Length of qscores list should be equal to the number of segments.");
            }

            var mergedSegments = new SampleMap <List <CanvasSegment> >();

            foreach (var sampleSegments in segments)
            {
                var mergedSegmentsThisSample = CanvasSegment.MergeSegments(sampleSegments.Value.ToList(),
                                                                           minimumCallSize, 10000, copyNumbers, qscores, qScoreThreshold);
                mergedSegments.Add(sampleSegments.Key, mergedSegmentsThisSample);
            }
            return(mergedSegments);
        }
Esempio n. 7
0
        private static List <OverlappingSegmentsRegion> GetSegmentSets(int defaultAlleleCountThreshold, Dictionary <string, List <BedEntry> > commonRegions,
                                                                       Dictionary <string, IReadOnlyList <SampleGenomicBin> > genomicBinsByChromosome, Dictionary <string, List <BedInterval> > segmentIntervalsByChromosome,
                                                                       Dictionary <string, List <Balleles> > allelesByChromosomeCommonSegs, Segments segments)
        {
            var segmentsSetByChromosome = new ConcurrentDictionary <string, List <OverlappingSegmentsRegion> >();

            Parallel.ForEach(
                segments.GetChromosomes(),
                chr =>
            {
                var segmentsByChromosome = segments.GetSegmentsForChromosome(chr).ToList();

                if (commonRegions.Keys.Any(chromosome => chromosome == chr))
                {
                    var commonCnvCanvasSegments = CanvasSegment.CreateSegmentsFromCommonCnvs(genomicBinsByChromosome[chr],
                                                                                             segmentIntervalsByChromosome[chr], allelesByChromosomeCommonSegs[chr]);

                    segmentsSetByChromosome[chr] = CanvasSegment.MergeCommonCnvSegments(segmentsByChromosome,
                                                                                        commonCnvCanvasSegments, defaultAlleleCountThreshold);
                }
                else
                {
                    segmentsSetByChromosome[chr] = segmentsByChromosome.Select(
                        segment => new OverlappingSegmentsRegion(new List <CanvasSegment> {
                        segment
                    }, null)).ToList();
                }
            });
            return(segmentsSetByChromosome.OrderBy(i => i.Key).Select(x => x.Value).SelectMany(x => x).ToList());
        }
        private static BedGraphEntry GetBedGraphEntry(CanvasSegment segment, double normalizationFactor)
        {
            var segmentBins = segment.GenomicBins;
            var medianBinCount = segmentBins.Select(b => b.Count).Median();

            var normalizedCoverage = (decimal)(medianBinCount * normalizationFactor);
            return new BedGraphEntry(segment.Chr, new BedInterval(segmentBins.Min(x=>x.Start), segmentBins.Max(x=>x.Stop)), normalizedCoverage);
        }
Esempio n. 9
0
 /// <summary>
 /// Check whether we know the CN for this segment.  Look for a known-CN interval that
 /// covers (at least half of) this segment.  Return -1 if we don't know its CN.
 /// </summary>
 protected int GetKnownCopyNumberForSegment(CanvasSegment segment)
 {
     if (_cnOracle == null)
     {
         return(-1);
     }
     return(_cnOracle.GetKnownCNForSegment(segment));
 }
Esempio n. 10
0
 /// <summary>
 /// Check whether we know the CN for this segment.  Look for a known-CN interval that
 /// covers (at least half of) this segment.  Return -1 if we don't know its CN.
 /// </summary>
 protected int GetKnownCNForSegment(CanvasSegment segment)
 {
     if (CNOracle == null)
     {
         return(-1);
     }
     return(CNOracle.GetKnownCNForSegment(segment));
 }
Esempio n. 11
0
 private void ReadInputFiles(string referenceFolder)
 {
     CoverageInfo = CanvasSegment.ReadBedInput(InputBinPath, ForbiddenIntervalBedPath);
     if (InputVafPath != null)
     {
         LoadVAFInput(referenceFolder);
     }
 }
        public static int GetPloidy(PloidyInfo referencePloidy, string chrom, int start, int end, int defaultPloidy = 2)
        {
            if (referencePloidy == null)
            {
                return(defaultPloidy);
            }

            CanvasSegment segment = new CanvasSegment(chrom, start, end, new List <float>());

            return(referencePloidy.GetReferenceCopyNumber(segment));
        }
Esempio n. 13
0
        public void TestMergeSegments()
        {
            // Construct several segments, and invoke CanvasSegment.MergeSegments, and ensure that the expected
            // merges (and no others) occurred.
            List <CanvasSegment>    allSegments = new List <CanvasSegment>();
            List <SampleGenomicBin> counts      = new List <SampleGenomicBin>();
            // Chr1 gets five segments and we should merge to three:
            CanvasSegment seg = new CanvasSegment("chr1", 1000000, 2000000, counts);

            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr1", 2000000, 2000100, counts);
            seg.CopyNumber = 3;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr1", 2000100, 3000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr1", 3000000, 3100000, counts);
            seg.CopyNumber = 3;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr1", 3100000, 4000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);

            // Chr2 gets segments with a large gap between, so can't merge:
            seg            = new CanvasSegment("chr2", 1000000, 2000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr2", 3000000, 3000100, counts);
            seg.CopyNumber = 3;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr2", 4000000, 5000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);

            // Chr3 has three segments that all merge to 1 big one:
            seg            = new CanvasSegment("chr3", 1000000, 2000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr3", 2000000, 3000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr3", 3000000, 4000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);

            var mergedSegments       = CanvasSegment.MergeSegments(allSegments, 50000, 10000);
            var segmentsByChromosome = CanvasSegment.GetSegmentsByChromosome(mergedSegments);

            Assert.Equal(3, segmentsByChromosome["chr1"].Count);
            Assert.Equal(3, segmentsByChromosome["chr2"].Count);
            Assert.Single(segmentsByChromosome["chr3"]);
        }
Esempio n. 14
0
        public void TestMergeSegments()
        {
            // Construct several segments, and invoke CanvasSegment.MergeSegments, and ensure that the expected
            // merges (and no others) occurred.
            List <CanvasSegment> allSegments = new List <CanvasSegment>();
            List <float>         counts      = new List <float>();
            // Chr1 gets five segments and we should merge to three:
            CanvasSegment seg = new CanvasSegment("chr1", 1000000, 2000000, counts);

            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr1", 2000000, 2000100, counts);
            seg.CopyNumber = 3;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr1", 2000100, 3000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr1", 3000000, 3100000, counts);
            seg.CopyNumber = 3;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr1", 3100000, 4000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);

            // Chr2 gets segments with a large gap between, so can't merge:
            seg            = new CanvasSegment("chr2", 1000000, 2000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr2", 3000000, 3000100, counts);
            seg.CopyNumber = 3;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr2", 4000000, 5000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);

            // Chr3 has three segments that all merge to 1 big one:
            seg            = new CanvasSegment("chr3", 1000000, 2000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr3", 2000000, 3000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr3", 3000000, 4000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);

            CanvasSegment.MergeSegments(ref allSegments, 50000, 10000);
            Dictionary <string, List <CanvasSegment> > segmentsByChromosome = CanvasSegment.GetSegmentsByChromosome(allSegments);

            Assert.AreEqual(segmentsByChromosome["chr1"].Count, 3);
            Assert.AreEqual(segmentsByChromosome["chr2"].Count, 3);
            Assert.AreEqual(segmentsByChromosome["chr3"].Count, 1);
        }
Esempio n. 15
0
        public void MergeIn_PreviousSegment_KeepsBinsOrdered()
        {
            List <SampleGenomicBin> binsBefore = new List <SampleGenomicBin> {
                new SampleGenomicBin("chr1", 1, 2, 100)
            };
            CanvasSegment           segmentBefore = new CanvasSegment("chr1", 1, 2, binsBefore);
            List <SampleGenomicBin> bins          = new List <SampleGenomicBin> {
                new SampleGenomicBin("chr1", 2, 3, 100)
            };
            CanvasSegment segment = new CanvasSegment("chr1", 2, 3, bins);

            segment.MergeIn(segmentBefore);
            Assert.Equal(binsBefore.Concat(bins), segment.GenomicBins);
        }
Esempio n. 16
0
        public void MergeIn_PreviousSegment_KeepsBAllelesOrdered()
        {
            List <SampleGenomicBin> emptyBins = new List <SampleGenomicBin>();
            var bAllelesBefore = new Balleles(new List <Ballele> {
                new Ballele(1, 1, 1)
            });
            CanvasSegment segmentBefore = new CanvasSegment("chr1", 1, 2, emptyBins, bAllelesBefore);
            var           bAlleles      = new Balleles(new List <Ballele> {
                new Ballele(2, 1, 1)
            });
            CanvasSegment segment = new CanvasSegment("chr1", 2, 3, emptyBins, bAlleles);

            segment.MergeIn(segmentBefore);

            Assert.Equal(bAllelesBefore.Range.Concat(bAlleles.Range), segment.Balleles.Range);
        }
Esempio n. 17
0
        public void TestSegmentStats()
        {
            List <float> counts = new List <float>()
            {
                80, 79, 78, 77, 2
            };
            List <CanvasSegment> segments = new List <CanvasSegment>();

            for (int index = 0; index < 10; index++)
            {
                CanvasSegment seg = new CanvasSegment("chr10", 1000000 * index, 1000000 * (index + 1), counts);
                segments.Add(seg);
            }
            double expectedCount = CanvasSegment.ExpectedCount(segments);

            Assert.AreEqual(expectedCount, 78, 0.01);
        }
Esempio n. 18
0
        /// <summary>
        /// CreatRecordLevelFilter CanvasSegments from common CNVs bed file and overlap with CanvasPartition
        /// segments to create SegmentHaplotypes
        /// </summary>
        private IEnumerable <ISampleMap <OverlappingSegmentsRegion> > CreateSegmentSetsFromCommonCnvs(ISampleMap <string> variantFrequencyFiles,
                                                                                                      int defaultAlleleCountThreshold, string commonCNVsbedPath, ISampleMap <Segments> sampleSegments)
        {
            if (commonCNVsbedPath == null)
            {
                var defaultSampleRegions = sampleSegments
                                           .SelectValues(segments => segments.AllSegments.Select(segment => new OverlappingSegmentsRegion(segment)).ToList());
                return(GetOverlappingSegmentsRegionSampleLists(defaultSampleRegions));
            }

            var commonRegions = ReadCommonRegions(commonCNVsbedPath);
            var chromosomes   = sampleSegments.Values.First().GetChromosomes();

            if (IsIdenticalChromosomeNames(commonRegions, chromosomes))
            {
                throw new ArgumentException(
                          $"Chromosome names in a common CNVs bed file {commonCNVsbedPath} does not match the genome reference");
            }

            var segmentIntervalsByChromosome = new Dictionary <string, List <BedInterval> >();
            var genomicBinsByChromosome      = new Dictionary <string, IReadOnlyList <SampleGenomicBin> >();

            Parallel.ForEach(
                chromosomes,
                chr =>
            {
                genomicBinsByChromosome[chr]      = sampleSegments.Values.First().GetGenomicBinsForChromosome(chr);
                segmentIntervalsByChromosome[chr] =
                    CanvasSegment.RemapGenomicToBinCoordinates(commonRegions[chr], genomicBinsByChromosome[chr]);
            });

            var sampleRegions = new SampleMap <List <OverlappingSegmentsRegion> >();

            foreach (var sampleId in sampleSegments.SampleIds)
            {
                var commonIntervals = commonRegions.ToDictionary(kvp => kvp.Key, kvp => kvp.Value.Select(bedEntry => bedEntry.Interval).ToList());
                var allelesByChromosomeCommonSegs = CanvasIO.ReadFrequenciesWrapper(_logger,
                                                                                    new FileLocation(variantFrequencyFiles[sampleId]), commonIntervals);
                var segmentsSets = GetSegmentSets(defaultAlleleCountThreshold, commonRegions,
                                                  genomicBinsByChromosome, segmentIntervalsByChromosome, allelesByChromosomeCommonSegs, sampleSegments[sampleId]);
                sampleRegions.Add(sampleId, segmentsSets);
            }

            return(GetOverlappingSegmentsRegionSampleLists(sampleRegions));
        }
        private bool IsPassVariant(CanvasSegment segment, PloidyInfo ploidyInfo)
        {
            if (!segment.Filter.IsPass)
            {
                return(false);
            }
            var referenceCopyNumber = ploidyInfo?.GetReferenceCopyNumber(segment) ?? 2;

            if (segment.CopyNumber != referenceCopyNumber)
            {
                return(true);
            }
            if (segment.CopyNumber == 2 && segment.MajorChromosomeCount == 2)
            {
                return(true);                                                              //LOH
            }
            return(false);
        }
Esempio n. 20
0
        public void NormalizedBinsCoverageCalculator_SegmentWithNoBins_ReturnsNoBedGraphEntries()
        {
            var calculator = new NormalizedBinsCoverageCalculator();
            var segment    = new CanvasSegment("chr1", 100, 120, new List <SampleGenomicBin>());
            var segments   = new List <CanvasSegment>()
            {
                segment
            };

            // Returns no bins because it was given no bins (works if normalization precomputed)
            var results = calculator.Calculate(segments, 1);

            Assert.Empty(results);

            // Throws exception if it tries to compute normalization factor but has nothing to do it with
            // If this is a reasonable scenario, might want to make a nicer exception than the unhandled AggregateException
            // But I suspect this will never actually happen. For now it's a purely theoretical edge case.
            Assert.Throws <AggregateException>(() => calculator.Calculate(segments));
        }
Esempio n. 21
0
        public void TestCIPOS()
        {
            // Merge two segments, and confirm we keep the correct confidence intervals post-merge:
            List <float> counts = new List <float>()
            {
                100, 90, 110, 100, 95, 105
            };
            CanvasSegment segment = new CanvasSegment("chr1", 1245, 678910, counts);

            segment.StartConfidenceInterval = new Tuple <int, int>(-100, 100);
            segment.EndConfidenceInterval   = new Tuple <int, int>(-80, 80);
            CanvasSegment segment2 = new CanvasSegment("chr1", 678910, 8787888, counts);

            segment2.StartConfidenceInterval = new Tuple <int, int>(-50, 50);
            segment2.EndConfidenceInterval   = new Tuple <int, int>(-30, 30);
            segment.MergeIn(segment2);
            Assert.AreEqual(segment.End, 8787888);
            Assert.AreEqual(segment.EndConfidenceInterval.Item1, -30);
            Assert.AreEqual(segment.StartConfidenceInterval.Item2, 100);
        }
Esempio n. 22
0
        public void TestSegmentStats()
        {
            var counts = new List <SampleGenomicBin>
            {
                new SampleGenomicBin("chr10", 1000000, 1000001, 0, 80),
                new SampleGenomicBin("chr10", 1000000, 1000001, 0, 79),
                new SampleGenomicBin("chr10", 1000000, 1000001, 0, 78),
                new SampleGenomicBin("chr10", 1000000, 1000001, 0, 77),
                new SampleGenomicBin("chr10", 1000000, 1000001, 0, 2)
            };
            var segments = new List <CanvasSegment>();

            for (int index = 0; index < 10; index++)
            {
                var seg = new CanvasSegment("chr10", 1000000 * index, 1000000 * (index + 1), counts);
                segments.Add(seg);
            }
            double expectedCount = CanvasSegment.ExpectedCount(segments);

            Assert.Equal(78, expectedCount, 2);
        }
Esempio n. 23
0
        public void TestUsableSegments()
        {
            List <CanvasSegment> segments = new List <CanvasSegment>();
            int currentPosition           = 1000;
            // Generate some segments.  Alternate between:
            // - Usable
            // - Too short
            // - Too few variants
            // - Too short + too few variants
            Random RNG = new Random();

            for (int index = 0; index < 100; index++)
            {
                int length = 100000;
                if (index % 2 == 1)
                {
                    length = 2000;
                }
                int variantCount = 999;
                if (index % 4 > 1)
                {
                    variantCount = 25;
                }
                List <float> counts = new List <float>();
                for (int countIndex = 0; countIndex < length / 100; countIndex++)
                {
                    counts.Add(RNG.Next(1000));
                }
                CanvasSegment segment = new CanvasSegment("chr1", currentPosition, currentPosition + length, counts);
                for (int varIndex = 0; varIndex < variantCount; varIndex++)
                {
                    segment.Alleles.Frequencies.Add(RNG.Next());
                }
                segments.Add(segment);
            }
            var usable = CanvasSomaticCaller.SomaticCaller.GetUsableSegmentsForModeling(segments, false, 50);

            Assert.AreEqual(50, usable.Count);
        }
Esempio n. 24
0
        public void TestSegment()
        {
            List <float> counts = new List <float>()
            {
                100, 90, 110, 100, 95, 105
            };
            CanvasSegment seg1 = new CanvasSegment("chr17", 100000000, 110000000, counts);

            // Silly constructor tests:
            Assert.AreEqual(seg1.Begin, 100000000);
            Assert.AreEqual(seg1.End, 110000000);
            Assert.AreEqual(seg1.BinCount, counts.Count);
            Assert.AreEqual(seg1.Chr, "chr17");
            // Property test:
            Assert.AreEqual(seg1.MeanCount, 100, 0.01);

            // Build a second segment, and merge them, and test results:
            CanvasSegment seg2 = new CanvasSegment("chr17", 110000000, 120000000, counts);

            seg1.MergeIn(seg2);
            Assert.AreEqual(seg1.Counts.Count, 12);
            Assert.AreEqual(seg1.End, seg2.End);
        }
Esempio n. 25
0
        public void TestSplitSeveralCommonCNVOverlapsCanvasCNV()
        {
            // scenario: Canvas segment spans more than one common segment
            // canvasSegment:   ------------------------------------------------
            // commonSegment:            ------------     -------------------
            var counts = new List <SampleGenomicBin>
            {
                new SampleGenomicBin("chr1", 100000, 100001, 100),
                new SampleGenomicBin("chr1", 150000, 150001, 90),
                new SampleGenomicBin("chr1", 200000, 200001, 110),
                new SampleGenomicBin("chr1", 250000, 250001, 100),
                new SampleGenomicBin("chr1", 300000, 300001, 95),
                new SampleGenomicBin("chr1", 350000, 350001, 105),
                new SampleGenomicBin("chr1", 400000, 400001, 105),
                new SampleGenomicBin("chr1", 450000, 450001, 105),
                new SampleGenomicBin("chr1", 500000, 500001, 105)
            };
            var canvasSegments = new List <CanvasSegment>
            {
                new CanvasSegment("chr1", 100000, 500002, counts),
            };
            var commonSegments = new List <CanvasSegment>
            {
                new CanvasSegment("chr1", 200000, 250001, counts.Skip(2).Take(2).ToList()),
                new CanvasSegment("chr1", 400000, 450001, counts.Skip(4).Take(2).ToList()),
            };
            var       canvasSegmentsIndex        = 0;
            var       commonSegmentsIndex        = 0;
            const int defaultReadCountsThreshold = 4;
            var       haplotypeSegments          = CanvasSegment.SplitCanvasSegments(canvasSegments, commonSegments, defaultReadCountsThreshold, ref canvasSegmentsIndex, ref commonSegmentsIndex);

            // transform into "haplotype" segments
            // canvasSegment:   ------------------------------------------------
            // commonSegment:   ---------  ------------     -------------------
            Assert.Single(haplotypeSegments.SetA);
            Assert.Equal(3, haplotypeSegments.SetB.Count);
        }
Esempio n. 26
0
        public void TestSplitCommonCNVPartOverlapsCanvasCNV()
        {
            // scenario: Canvas segment part overlaps common segment and comes first
            // canvasSegment:   --------------
            // commonSegment:            ------------
            var counts = new List <SampleGenomicBin>
            {
                new SampleGenomicBin("chr1", 100000, 100001, 100),
                new SampleGenomicBin("chr1", 150000, 150001, 90),
                new SampleGenomicBin("chr1", 200000, 200001, 110),
                new SampleGenomicBin("chr1", 250000, 250001, 100),
                new SampleGenomicBin("chr1", 300000, 300001, 95),
                new SampleGenomicBin("chr1", 350000, 350001, 105),
                new SampleGenomicBin("chr1", 400000, 400001, 105),
                new SampleGenomicBin("chr1", 450000, 450001, 105),
                new SampleGenomicBin("chr1", 500000, 500001, 105)
            };
            var canvasSegments = new List <CanvasSegment>
            {
                new CanvasSegment("chr1", 100000, 250001, counts.Take(4).ToList()),
                new CanvasSegment("chr1", 300000, 500001, counts.Skip(4).Take(5).ToList())
            };
            var commonSegments = new List <CanvasSegment>
            {
                new CanvasSegment("chr1", 200000, 350001, counts.Skip(2).Take(4).ToList())
            };
            var       canvasSegmentsIndex        = 0;
            var       commonSegmentsIndex        = 0;
            const int defaultReadCountsThreshold = 4;
            var       haplotypeSegments          = CanvasSegment.SplitCanvasSegments(canvasSegments, commonSegments, defaultReadCountsThreshold, ref canvasSegmentsIndex, ref commonSegmentsIndex);

            // transform into "haplotype" segments
            // canvasSegment:   --------------  --------
            // commonSegment:   ---------   ------------
            Assert.Equal(2, haplotypeSegments.SetA.Count);
            Assert.Equal(2, haplotypeSegments.SetB.Count);
        }
Esempio n. 27
0
        public int CallVariants(string variantFrequencyFile, string inFile, string outFile, string ploidyBedPath, string referenceFolder, string sampleName,
                                string truthDataPath)
        {
            if (!string.IsNullOrEmpty(truthDataPath))
            {
                this.CNOracle = new CopyNumberOracle();
                this.CNOracle.LoadKnownCN(truthDataPath);
            }

            this.Segments   = CanvasSegment.ReadSegments(inFile);
            this.TempFolder = Path.GetDirectoryName(inFile);
            if (this.Segments.Count == 0)
            {
                Console.WriteLine("CanvasDiploidCaller: No segments loaded; no CNV calls will be made.");
                CanvasSegment.WriteSegments(outFile, this.Segments, referenceFolder, sampleName, null, null);
                return(0);
            }
            PloidyInfo ploidy = null;

            if (!string.IsNullOrEmpty(ploidyBedPath))
            {
                ploidy = PloidyInfo.LoadPloidyFromBedFile(ploidyBedPath);
            }

            // load MAF
            this.MeanCoverage = CanvasIO.LoadVariantFrequencies(variantFrequencyFile, this.Segments);
            int medianVariantCoverage = AggregateVariantCoverage(ref this.Segments);


            // Create new models for different copy number states
            this.InitializePloidies();

            // Compute statistics on the copy number two regions
            float[] diploidCounts = AggregateCounts(ref this.Segments);
            DiploidCoverage         = CanvasCommon.Utilities.Mean(diploidCounts);
            CoverageWeightingFactor = CoverageWeighting / DiploidCoverage;


            // new coverage model
            this.Model            = new CoverageModel();
            Model.DiploidCoverage = DiploidCoverage;
            List <SegmentInfo> segments = new List <SegmentInfo>();

            foreach (CanvasSegment segment in this.Segments)
            {
                SegmentInfo info = new SegmentInfo();
                info.Segment = segment;
                List <double> MAF = new List <double>();
                foreach (float value in segment.VariantFrequencies)
                {
                    MAF.Add(value > 0.5 ? 1 - value : value);
                }

                if (MAF.Count > 0)
                {
                    info.MAF = CanvasCommon.Utilities.Median(MAF);
                }
                else
                {
                    info.MAF = -1;
                }

                info.Coverage = CanvasCommon.Utilities.Median(segment.Counts);

                if (this.Segments.Count > 100)
                {
                    info.Weight = segment.End - segment.Begin;
                }
                else
                {
                    info.Weight = segment.BinCount;
                }
                segments.Add(info);
            }

            // Assign copy number and major chromosome count for each segment
            bool useGaussianMixtureModel = false; // For now, this is set false, since we saw weird performance on chrY (CANV-115):

            if (useGaussianMixtureModel)
            {
                // optimize model covariance
                double likelihood = FitGaussians(Model, segments);
                AssignPloidyCallsGaussianMixture();
            }
            else
            {
                AssignPloidyCallsDistance(Model, segments, medianVariantCoverage);
            }

            // Merge neighboring segments that got the same copy number call.
            CanvasSegment.MergeSegments(ref this.Segments);
            CanvasSegment.AssignQualityScores(this.Segments, CanvasSegment.QScoreMethod.LogisticGermline);
            List <string> extraHeaders       = new List <string>();
            string        coverageOutputPath = CanvasCommon.Utilities.GetCoverageAndVariantFrequencyOutputPath(outFile);

            CanvasSegment.WriteCoveragePlotData(this.Segments, Model.DiploidCoverage, ploidy, coverageOutputPath, referenceFolder);

            if (this.CNOracle != null)
            {
                this.GenerateReportVersusKnownCN();
            }

            if (ploidy != null && !string.IsNullOrEmpty(ploidy.HeaderLine))
            {
                extraHeaders.Add(ploidy.HeaderLine);
            }
            CanvasSegment.WriteSegments(outFile, this.Segments, referenceFolder, sampleName, extraHeaders, ploidy);
            return(0);
        }
Esempio n. 28
0
 private static double GetCurrentGtLogLikelihood(ICopyNumberModel copyNumberModel, CanvasSegment canvasSegment, PhasedGenotype gtStates)
 {
     return(copyNumberModel.GetGenotypeLogLikelihood(canvasSegment.Balleles, gtStates));
 }
Esempio n. 29
0
        public int CallVariants(string variantFrequencyFile, string inFile, string outFile, string ploidyVcfPath, string referenceFolder, string sampleName,
                                string truthDataPath)
        {
            if (!string.IsNullOrEmpty(truthDataPath))
            {
                _cnOracle = new CopyNumberOracle();
                _cnOracle.LoadKnownCN(truthDataPath);
            }

            _segments    = Segments.ReadSegments(_logger, new FileLocation(inFile));
            _allSegments = _segments.AllSegments.ToList();
            TempFolder   = Path.GetDirectoryName(inFile);
            if (_allSegments.Count == 0)
            {
                Console.WriteLine("CanvasDiploidCaller: No segments loaded; no CNV calls will be made.");
                CanvasSegmentWriter.WriteSegments(outFile, _allSegments, _model?.DiploidCoverage, referenceFolder,
                                                  sampleName, null, null, QualityFilterThreshold, false, null, null);
                return(0);
            }
            PloidyInfo ploidy = null;

            if (!string.IsNullOrEmpty(ploidyVcfPath))
            {
                ploidy = PloidyInfo.LoadPloidyFromVcfFileNoSampleId(ploidyVcfPath);
            }

            // load MAF
            var allelesByChromosome = CanvasIO.ReadFrequenciesWrapper(_logger, new FileLocation(variantFrequencyFile), _segments.IntervalsByChromosome);

            _segments.AddAlleles(allelesByChromosome);
            MeanCoverage = allelesByChromosome.SelectMany(x => x.Value).SelectMany(y => y.TotalCoverage).Average();
            AggregateVariantCoverage(ref _allSegments);

            // Create new models for different copy number states
            InitializePloidies();

            // Compute statistics on the copy number two regions
            float[] diploidCounts = AggregateCounts(ref _allSegments);
            _diploidCoverage         = Utilities.Mean(diploidCounts);
            _coverageWeightingFactor = CoverageWeighting / _diploidCoverage;
            // new coverage model
            _model = new CoverageModel {
                DiploidCoverage = _diploidCoverage
            };
            List <SegmentInfo> segments = new List <SegmentInfo>();

            foreach (CanvasSegment segment in _allSegments)
            {
                SegmentInfo info = new SegmentInfo {
                    Segment = segment
                };
                List <double> mafs = new List <double>();
                foreach (float value in segment.Balleles.Frequencies)
                {
                    mafs.Add(value > 0.5 ? 1 - value : value);
                }

                if (mafs.Count > 0)
                {
                    info.Maf = Utilities.Median(mafs);
                }
                else
                {
                    info.Maf = -1;
                }

                info.Coverage = Utilities.Median(segment.Counts);

                info.Weight = _allSegments.Count > 100 ? segment.Length : segment.BinCount;
                segments.Add(info);
            }

            AssignPloidyCallsDistance(_model);

            CanvasSegment.AssignQualityScores(_allSegments, CanvasSegment.QScoreMethod.LogisticGermline, _germlineScoreParameters);

            // Merge neighboring segments that got the same copy number call.
            // merging segments requires quality scores so we do it after quality scores have been assigned
            var mergedSegments = CanvasSegment.MergeSegments(_allSegments);

            // recalculating qscores after merging segments improves performance!

            CanvasSegment.AssignQualityScores(mergedSegments, CanvasSegment.QScoreMethod.LogisticGermline, _germlineScoreParameters);
            CanvasSegment.SetFilterForSegments(QualityFilterThreshold, mergedSegments, CanvasFilter.SegmentSizeCutoff);

            List <string> extraHeaders       = new List <string>();
            var           coverageOutputPath = SingleSampleCallset.GetCoverageAndVariantFrequencyOutputPath(outFile);

            CanvasSegment.WriteCoveragePlotData(mergedSegments, _model.DiploidCoverage, ploidy, coverageOutputPath, referenceFolder);

            if (_cnOracle != null)
            {
                GenerateReportVersusKnownCopyNumber();
            }

            if (!string.IsNullOrEmpty(ploidy?.HeaderLine))
            {
                extraHeaders.Add(ploidy.HeaderLine);
            }

            CanvasSegmentWriter.WriteSegments(outFile, mergedSegments, _model.DiploidCoverage, referenceFolder, sampleName,
                                              extraHeaders, ploidy, QualityFilterThreshold, false, null, null);
            return(0);
        }
Esempio n. 30
0
        internal int CallVariants(List <string> variantFrequencyFiles, List <string> segmentFiles,
                                  IFileLocation outVcfFile, string ploidyBedPath, string referenceFolder, List <string> sampleNames, string commonCnvsBedPath, List <SampleType> sampleTypes)
        {
            // load files
            // initialize data structures and classes
            var fileCounter      = 0;
            var samplesInfo      = new SampleMap <SampleMetrics>();
            var sampleSegments   = new SampleMap <Segments>();
            var copyNumberModels = new SampleMap <ICopyNumberModel>();
            var variantFrequencyFilesSampleList = new SampleMap <string>();
            var kinships = new SampleMap <SampleType>();

            foreach (string sampleName in sampleNames)
            {
                var sampleId = new SampleId(sampleName);
                var segment  = Segments.ReadSegments(_logger, new FileLocation(segmentFiles[fileCounter]));
                segment.AddAlleles(CanvasIO.ReadFrequenciesWrapper(_logger, new FileLocation(variantFrequencyFiles[fileCounter]), segment.IntervalsByChromosome));
                sampleSegments.Add(sampleId, segment);
                var sampleInfo      = SampleMetrics.GetSampleInfo(segment.AllSegments, ploidyBedPath, _callerParameters.NumberOfTrimmedBins, sampleId);
                var copyNumberModel = _copyNumberModelFactory.CreateModel(_callerParameters.MaximumCopyNumber, sampleInfo.MaxCoverage, sampleInfo.MeanCoverage, sampleInfo.MeanMafCoverage);
                samplesInfo.Add(sampleId, sampleInfo);
                copyNumberModels.Add(sampleId, copyNumberModel);
                variantFrequencyFilesSampleList.Add(sampleId, variantFrequencyFiles[fileCounter]);
                kinships.Add(sampleId, sampleTypes[fileCounter]);
                fileCounter++;
            }
            var segmentSetsFromCommonCnvs = CreateSegmentSetsFromCommonCnvs(variantFrequencyFilesSampleList,
                                                                            _callerParameters.MinAlleleCountsThreshold, commonCnvsBedPath, sampleSegments);

            var          segmentsForVariantCalling = GetHighestLikelihoodSegments(segmentSetsFromCommonCnvs, samplesInfo, copyNumberModels).ToList();
            PedigreeInfo pedigreeInfo = PedigreeInfo.GetPedigreeInfo(kinships, _callerParameters);

            Parallel.ForEach(
                segmentsForVariantCalling,
                new ParallelOptions
            {
                MaxDegreeOfParallelism = Math.Min(Environment.ProcessorCount, _callerParameters.MaxCoreNumber)
            },
                segments => _variantCaller.CallVariant(segments, samplesInfo, copyNumberModels, pedigreeInfo)
                );
            var variantCalledSegments = new SampleMap <List <CanvasSegment> >();

            foreach (var key in samplesInfo.SampleIds)
            {
                variantCalledSegments.Add(key, segmentsForVariantCalling.Select(segment => segment[key]).ToList());
            }

            var mergedVariantCalledSegments = MergeSegments(variantCalledSegments, _callerParameters.MinimumCallSize, _qualityFilterThreshold);

            FilterExcessivelyShortSegments(mergedVariantCalledSegments);

            var outputFolder = outVcfFile.Directory;

            foreach (var sampleId in samplesInfo.SampleIds)
            {
                var coverageOutputPath = SingleSampleCallset.GetCoverageAndVariantFrequencyOutput(outputFolder,
                                                                                                  sampleId.ToString());
                CanvasSegment.WriteCoveragePlotData(mergedVariantCalledSegments[sampleId], samplesInfo[sampleId].MeanCoverage,
                                                    samplesInfo[sampleId].Ploidy, coverageOutputPath, referenceFolder);
            }
            bool isPedigreeInfoSupplied = pedigreeInfo != null && pedigreeInfo.HasFullPedigree();
            var  denovoQualityThreshold = isPedigreeInfoSupplied ? (int?)_deNovoQualityFilterThreshold : null;
            var  ploidies        = samplesInfo.Select(info => info.Value.Ploidy).ToList();
            var  diploidCoverage = samplesInfo.Select(info => info.Value.MeanCoverage).ToList();
            var  names           = samplesInfo.SampleIds.Select(id => id.ToString()).ToList();

            CanvasSegmentWriter.WriteMultiSampleSegments(outVcfFile.FullName, mergedVariantCalledSegments, diploidCoverage, referenceFolder, names,
                                                         null, ploidies, _qualityFilterThreshold, denovoQualityThreshold, CanvasFilter.SegmentSizeCutoff, isPedigreeInfoSupplied);

            foreach (var sampleId in samplesInfo.SampleIds)
            {
                var outputVcfPath = SingleSampleCallset.GetVcfOutput(outputFolder, sampleId.ToString());
                var sampleMetrics = samplesInfo[sampleId];
                var segments      = mergedVariantCalledSegments[sampleId];
                CanvasSegmentWriter.WriteSegments(outputVcfPath.FullName, segments,
                                                  sampleMetrics.MeanCoverage, referenceFolder, sampleId.ToString(), null,
                                                  sampleMetrics.Ploidy, _qualityFilterThreshold, isPedigreeInfoSupplied, denovoQualityThreshold, null);

                var visualizationTemp   = outputFolder.CreateSubdirectory($"VisualizationTemp{sampleId}");
                var normalizationFactor = NormalizationCalculator.ComputeNormalizationFactor(segments);
                var bigWig = _coverageBigWigWriter.Write(segments, visualizationTemp, normalizationFactor);
                bigWig?.MoveTo(SingleSampleCallset.GetCoverageBigWig(outputFolder, sampleId.ToString()));
                var copyNumberBedGraph = SingleSampleCallset.GetCopyNumberBedGraph(outputFolder, sampleId.ToString());
                _copyNumberBedGraphWriter.Write(segments, sampleMetrics.Ploidy, copyNumberBedGraph);

                var partitionBedgraphHeader = "track type=bedGraph visibility=full autoScale=on graphType=points";
                var originalSegments        = sampleSegments[sampleId];
                _partitionCoverageBedGraphWriter.Write(originalSegments.AllSegments, SingleSampleCallset.GetPartitionBedGraph(outputFolder, sampleId.ToString()), normalizationFactor, partitionBedgraphHeader);
            }
            return(0);
        }