public static SampleMetrics GetSampleInfo(IReadOnlyList <CanvasSegment> segments, string ploidyBedPath, int numberOfTrimmedBins, SampleId id) { double meanMafCoverage = new SortedList <int>(segments.SelectMany(x => x.Balleles.TotalCoverage)).Median(); double variance = Utilities.Variance(segments.Select(x => x.TruncatedMedianCount(numberOfTrimmedBins)).ToList()); double mafVariance = Utilities.Variance(segments.Where(x => x.Balleles.TotalCoverage.Count > 0) .Select(x => x.Balleles.TotalCoverage.Average()).ToList()); double meanCoverage = new SortedList <float>(segments.SelectMany(x => x.Counts).Select(x => x)).Median(); int maxCoverage = Convert.ToInt16(segments.Select(x => x.TruncatedMedianCount(numberOfTrimmedBins)).Max()) + 10; var ploidy = new PloidyInfo(); if (!ploidyBedPath.IsNullOrEmpty() && File.Exists(ploidyBedPath)) { ploidy = PloidyInfo.LoadPloidyFromVcfFile(ploidyBedPath, id.ToString()); } return(new SampleMetrics(meanCoverage, meanMafCoverage, variance, mafVariance, maxCoverage, ploidy)); }
internal int CallVariants(List <string> variantFrequencyFiles, List <string> segmentFiles, IFileLocation outVcfFile, string ploidyBedPath, string referenceFolder, List <string> sampleNames, string commonCnvsBedPath, List <SampleType> sampleTypes) { // load files // initialize data structures and classes var fileCounter = 0; var samplesInfo = new SampleMap <SampleMetrics>(); var sampleSegments = new SampleMap <Segments>(); var copyNumberModels = new SampleMap <ICopyNumberModel>(); var variantFrequencyFilesSampleList = new SampleMap <string>(); var kinships = new SampleMap <SampleType>(); foreach (string sampleName in sampleNames) { var sampleId = new SampleId(sampleName); var segment = Segments.ReadSegments(_logger, new FileLocation(segmentFiles[fileCounter])); segment.AddAlleles(CanvasIO.ReadFrequenciesWrapper(_logger, new FileLocation(variantFrequencyFiles[fileCounter]), segment.IntervalsByChromosome)); sampleSegments.Add(sampleId, segment); var sampleInfo = SampleMetrics.GetSampleInfo(segment.AllSegments, ploidyBedPath, _callerParameters.NumberOfTrimmedBins, sampleId); var copyNumberModel = _copyNumberModelFactory.CreateModel(_callerParameters.MaximumCopyNumber, sampleInfo.MaxCoverage, sampleInfo.MeanCoverage, sampleInfo.MeanMafCoverage); samplesInfo.Add(sampleId, sampleInfo); copyNumberModels.Add(sampleId, copyNumberModel); variantFrequencyFilesSampleList.Add(sampleId, variantFrequencyFiles[fileCounter]); kinships.Add(sampleId, sampleTypes[fileCounter]); fileCounter++; } var segmentSetsFromCommonCnvs = CreateSegmentSetsFromCommonCnvs(variantFrequencyFilesSampleList, _callerParameters.MinAlleleCountsThreshold, commonCnvsBedPath, sampleSegments); var segmentsForVariantCalling = GetHighestLikelihoodSegments(segmentSetsFromCommonCnvs, samplesInfo, copyNumberModels).ToList(); PedigreeInfo pedigreeInfo = PedigreeInfo.GetPedigreeInfo(kinships, _callerParameters); Parallel.ForEach( segmentsForVariantCalling, new ParallelOptions { MaxDegreeOfParallelism = Math.Min(Environment.ProcessorCount, _callerParameters.MaxCoreNumber) }, segments => _variantCaller.CallVariant(segments, samplesInfo, copyNumberModels, pedigreeInfo) ); var variantCalledSegments = new SampleMap <List <CanvasSegment> >(); foreach (var key in samplesInfo.SampleIds) { variantCalledSegments.Add(key, segmentsForVariantCalling.Select(segment => segment[key]).ToList()); } var mergedVariantCalledSegments = MergeSegments(variantCalledSegments, _callerParameters.MinimumCallSize, _qualityFilterThreshold); FilterExcessivelyShortSegments(mergedVariantCalledSegments); var outputFolder = outVcfFile.Directory; foreach (var sampleId in samplesInfo.SampleIds) { var coverageOutputPath = SingleSampleCallset.GetCoverageAndVariantFrequencyOutput(outputFolder, sampleId.ToString()); CanvasSegment.WriteCoveragePlotData(mergedVariantCalledSegments[sampleId], samplesInfo[sampleId].MeanCoverage, samplesInfo[sampleId].Ploidy, coverageOutputPath, referenceFolder); } bool isPedigreeInfoSupplied = pedigreeInfo != null && pedigreeInfo.HasFullPedigree(); var denovoQualityThreshold = isPedigreeInfoSupplied ? (int?)_deNovoQualityFilterThreshold : null; var ploidies = samplesInfo.Select(info => info.Value.Ploidy).ToList(); var diploidCoverage = samplesInfo.Select(info => info.Value.MeanCoverage).ToList(); var names = samplesInfo.SampleIds.Select(id => id.ToString()).ToList(); CanvasSegmentWriter.WriteMultiSampleSegments(outVcfFile.FullName, mergedVariantCalledSegments, diploidCoverage, referenceFolder, names, null, ploidies, _qualityFilterThreshold, denovoQualityThreshold, CanvasFilter.SegmentSizeCutoff, isPedigreeInfoSupplied); foreach (var sampleId in samplesInfo.SampleIds) { var outputVcfPath = SingleSampleCallset.GetVcfOutput(outputFolder, sampleId.ToString()); var sampleMetrics = samplesInfo[sampleId]; var segments = mergedVariantCalledSegments[sampleId]; CanvasSegmentWriter.WriteSegments(outputVcfPath.FullName, segments, sampleMetrics.MeanCoverage, referenceFolder, sampleId.ToString(), null, sampleMetrics.Ploidy, _qualityFilterThreshold, isPedigreeInfoSupplied, denovoQualityThreshold, null); var visualizationTemp = outputFolder.CreateSubdirectory($"VisualizationTemp{sampleId}"); var normalizationFactor = NormalizationCalculator.ComputeNormalizationFactor(segments); var bigWig = _coverageBigWigWriter.Write(segments, visualizationTemp, normalizationFactor); bigWig?.MoveTo(SingleSampleCallset.GetCoverageBigWig(outputFolder, sampleId.ToString())); var copyNumberBedGraph = SingleSampleCallset.GetCopyNumberBedGraph(outputFolder, sampleId.ToString()); _copyNumberBedGraphWriter.Write(segments, sampleMetrics.Ploidy, copyNumberBedGraph); var partitionBedgraphHeader = "track type=bedGraph visibility=full autoScale=on graphType=points"; var originalSegments = sampleSegments[sampleId]; _partitionCoverageBedGraphWriter.Write(originalSegments.AllSegments, SingleSampleCallset.GetPartitionBedGraph(outputFolder, sampleId.ToString()), normalizationFactor, partitionBedgraphHeader); } return(0); }