Esempio n. 1
0
        public void TestRemapGenomicToBinCoordinates()
        {
            var sampleGenomicBins = new List <SampleGenomicBin>
            {
                new SampleGenomicBin("chr10", 1001, 2000, 0, 80),
                new SampleGenomicBin("chr10", 2001, 3000, 0, 79),
                new SampleGenomicBin("chr10", 3001, 4000, 0, 78),
                new SampleGenomicBin("chr10", 4001, 5000, 0, 77),
                new SampleGenomicBin("chr10", 5001, 6000, 0, 2),
                new SampleGenomicBin("chr10", 6001, 7000, 0, 2)
            };

            var intervals = new List <BedEntry>
            {
                new BedEntry("chr10\t1500\t3500"),
                new BedEntry("chr10\t4500\t6500")
            };

            var remappedIntervals = CanvasSegment.RemapGenomicToBinCoordinates(intervals, sampleGenomicBins);

            Assert.Equal(0, remappedIntervals.First().Start);
            Assert.Equal(2, remappedIntervals.First().End);
            Assert.Equal(3, remappedIntervals.Last().Start);
            Assert.Equal(5, remappedIntervals.Last().End);
        }
Esempio n. 2
0
        /// <summary>
        /// CreatRecordLevelFilter CanvasSegments from common CNVs bed file and overlap with CanvasPartition
        /// segments to create SegmentHaplotypes
        /// </summary>
        private IEnumerable <ISampleMap <OverlappingSegmentsRegion> > CreateSegmentSetsFromCommonCnvs(ISampleMap <string> variantFrequencyFiles,
                                                                                                      int defaultAlleleCountThreshold, string commonCNVsbedPath, ISampleMap <Segments> sampleSegments)
        {
            if (commonCNVsbedPath == null)
            {
                var defaultSampleRegions = sampleSegments
                                           .SelectValues(segments => segments.AllSegments.Select(segment => new OverlappingSegmentsRegion(segment)).ToList());
                return(GetOverlappingSegmentsRegionSampleLists(defaultSampleRegions));
            }

            var commonRegions = ReadCommonRegions(commonCNVsbedPath);
            var chromosomes   = sampleSegments.Values.First().GetChromosomes();

            if (IsIdenticalChromosomeNames(commonRegions, chromosomes))
            {
                throw new ArgumentException(
                          $"Chromosome names in a common CNVs bed file {commonCNVsbedPath} does not match the genome reference");
            }

            var segmentIntervalsByChromosome = new Dictionary <string, List <BedInterval> >();
            var genomicBinsByChromosome      = new Dictionary <string, IReadOnlyList <SampleGenomicBin> >();

            Parallel.ForEach(
                chromosomes,
                chr =>
            {
                genomicBinsByChromosome[chr]      = sampleSegments.Values.First().GetGenomicBinsForChromosome(chr);
                segmentIntervalsByChromosome[chr] =
                    CanvasSegment.RemapGenomicToBinCoordinates(commonRegions[chr], genomicBinsByChromosome[chr]);
            });

            var sampleRegions = new SampleMap <List <OverlappingSegmentsRegion> >();

            foreach (var sampleId in sampleSegments.SampleIds)
            {
                var commonIntervals = commonRegions.ToDictionary(kvp => kvp.Key, kvp => kvp.Value.Select(bedEntry => bedEntry.Interval).ToList());
                var allelesByChromosomeCommonSegs = CanvasIO.ReadFrequenciesWrapper(_logger,
                                                                                    new FileLocation(variantFrequencyFiles[sampleId]), commonIntervals);
                var segmentsSets = GetSegmentSets(defaultAlleleCountThreshold, commonRegions,
                                                  genomicBinsByChromosome, segmentIntervalsByChromosome, allelesByChromosomeCommonSegs, sampleSegments[sampleId]);
                sampleRegions.Add(sampleId, segmentsSets);
            }

            return(GetOverlappingSegmentsRegionSampleLists(sampleRegions));
        }