Esempio n. 1
0
        private void MoveIntermediateOutput(SampleInfo info, IntermediateOutput output, IFileMover fileMover)
        {
            var stub = GetSingleSampleOutputStub(info);

            // Output:
            fileMover.Move(output.CnvVcf.VcfFile, SingleSampleCallset.GetVcfOutput(stub));

            // Files for visualization:
            fileMover.Move(output.CoverageBigwig, SingleSampleCallset.GetCoverageBigWig(stub));
            var targetBAlleleBedgraph = SingleSampleCallset.GetBAlleleBedGraph(stub);

            fileMover.Move(output.BAlleleBedgraph.FileLocation, targetBAlleleBedgraph.FileLocation);
            fileMover.Move(output.BAlleleBedgraph.TabixIndex, targetBAlleleBedgraph.TabixIndex);
            var targetCopyNumbedBedgraph = SingleSampleCallset.GetCopyNumberBedGraph(stub);

            fileMover.Move(output.CopyNumberBedgraph.FileLocation, targetCopyNumbedBedgraph.FileLocation);
            fileMover.Move(output.CopyNumberBedgraph.TabixIndex, targetCopyNumbedBedgraph.TabixIndex);

            // Deprecated files:
#pragma warning disable CS0618                                                                                                    // Type or member is obsolete
            fileMover.Move(output.CoverageAndVariantFrequencies, SingleSampleCallset.GetCoverageAndVariantFrequencyOutput(stub)); // Used for (non-dynamic) plotting
            fileMover.Move(output.Partitioned, SingleSampleCallset.GetPartitionedPath(stub));                                     // used by BSVI
            fileMover.Move(output.VariantFrequencies, SingleSampleCallset.GetVfSummaryPath(stub));                                // used by BSVI
            fileMover.Move(output.VariantFrequenciesBaf, SingleSampleCallset.GetVfSummaryBafPath(stub));                          // used by BSVI
#pragma warning restore CS0618                                                                                                    // Type or member is obsolete
        }
        private CanvasSmallPedigreeOutput GetCanvasOutput(SampleSet <CanvasPedigreeSample> pedigreeSamples, IDirectoryLocation sampleSandbox)
        {
            var intermediateResults = pedigreeSamples.SelectSamples(sampleInfo =>
            {
                var sampleId                      = sampleInfo.Id;
                var variantFrequencies            = SingleSampleCallset.GetVfSummaryPath(sampleSandbox, sampleId);
                var variantFrequenciesBaf         = SingleSampleCallset.GetVfSummaryBafPath(sampleSandbox, sampleId);
                var partitioned                   = SingleSampleCallset.GetPartitionedPath(sampleSandbox, sampleId);
                var coverageAndVariantFrequencies = SingleSampleCallset.GetCoverageAndVariantFrequencyOutput(sampleSandbox, sampleId);
                var singleSampleVcf               = SingleSampleCallset.GetVcfOutput(sampleSandbox, sampleId);
                var coverageBigwig                = SingleSampleCallset.GetCoverageBigWig(sampleSandbox, sampleId);
                var bAlleleBedgraph               = SingleSampleCallset.GetBAlleleBedGraph(sampleSandbox, sampleId);
                var copyNumberBedgraph            = SingleSampleCallset.GetCopyNumberBedGraph(sampleSandbox, sampleId);
                return(new IntermediateOutput(new Vcf(singleSampleVcf), coverageAndVariantFrequencies, variantFrequencies, variantFrequenciesBaf,
                                              partitioned, coverageBigwig, bAlleleBedgraph, copyNumberBedgraph));
            });
            var cnvVcf = new Vcf(sampleSandbox.GetFileLocation("CNV.vcf.gz"));

            return(new CanvasSmallPedigreeOutput(cnvVcf, intermediateResults));
        }
Esempio n. 3
0
        private CanvasSmallPedigreeOutput Load(CanvasSmallPedigreeInput input)
        {
            var intermediateOutputs = input.Samples.SelectData((info, sample) =>
            {
                var stub = GetSingleSampleOutputStub(info);
                var coverageAndVariantFrequency = SingleSampleCallset.GetCoverageAndVariantFrequencyOutput(stub);
                var singleSampleVcf             = new Vcf(SingleSampleCallset.GetVcfOutput(stub));

                var partitioned           = SingleSampleCallset.GetPartitionedPath(stub);
                var variantFrequencies    = SingleSampleCallset.GetVfSummaryPath(stub);
                var variantFrequenciesBaf = SingleSampleCallset.GetVfSummaryBafPath(stub);

                var coverageBigwig     = SingleSampleCallset.GetCoverageBigWig(stub);
                var bAlleleBedgraph    = SingleSampleCallset.GetBAlleleBedGraph(stub);
                var copyNumberBedgraph = SingleSampleCallset.GetCopyNumberBedGraph(stub);


                return(new IntermediateOutput(singleSampleVcf, coverageAndVariantFrequency, variantFrequencies, variantFrequenciesBaf, partitioned,
                                              coverageBigwig, bAlleleBedgraph, copyNumberBedgraph));
            });

            return(new CanvasSmallPedigreeOutput(new Vcf(GetPedigreeVcf()), intermediateOutputs));
        }
Esempio n. 4
0
        internal int CallVariants(List <string> variantFrequencyFiles, List <string> segmentFiles,
                                  IFileLocation outVcfFile, string ploidyBedPath, string referenceFolder, List <string> sampleNames, string commonCnvsBedPath, List <SampleType> sampleTypes)
        {
            // load files
            // initialize data structures and classes
            var fileCounter      = 0;
            var samplesInfo      = new SampleMap <SampleMetrics>();
            var sampleSegments   = new SampleMap <Segments>();
            var copyNumberModels = new SampleMap <ICopyNumberModel>();
            var variantFrequencyFilesSampleList = new SampleMap <string>();
            var kinships = new SampleMap <SampleType>();

            foreach (string sampleName in sampleNames)
            {
                var sampleId = new SampleId(sampleName);
                var segment  = Segments.ReadSegments(_logger, new FileLocation(segmentFiles[fileCounter]));
                segment.AddAlleles(CanvasIO.ReadFrequenciesWrapper(_logger, new FileLocation(variantFrequencyFiles[fileCounter]), segment.IntervalsByChromosome));
                sampleSegments.Add(sampleId, segment);
                var sampleInfo      = SampleMetrics.GetSampleInfo(segment.AllSegments, ploidyBedPath, _callerParameters.NumberOfTrimmedBins, sampleId);
                var copyNumberModel = _copyNumberModelFactory.CreateModel(_callerParameters.MaximumCopyNumber, sampleInfo.MaxCoverage, sampleInfo.MeanCoverage, sampleInfo.MeanMafCoverage);
                samplesInfo.Add(sampleId, sampleInfo);
                copyNumberModels.Add(sampleId, copyNumberModel);
                variantFrequencyFilesSampleList.Add(sampleId, variantFrequencyFiles[fileCounter]);
                kinships.Add(sampleId, sampleTypes[fileCounter]);
                fileCounter++;
            }
            var segmentSetsFromCommonCnvs = CreateSegmentSetsFromCommonCnvs(variantFrequencyFilesSampleList,
                                                                            _callerParameters.MinAlleleCountsThreshold, commonCnvsBedPath, sampleSegments);

            var          segmentsForVariantCalling = GetHighestLikelihoodSegments(segmentSetsFromCommonCnvs, samplesInfo, copyNumberModels).ToList();
            PedigreeInfo pedigreeInfo = PedigreeInfo.GetPedigreeInfo(kinships, _callerParameters);

            Parallel.ForEach(
                segmentsForVariantCalling,
                new ParallelOptions
            {
                MaxDegreeOfParallelism = Math.Min(Environment.ProcessorCount, _callerParameters.MaxCoreNumber)
            },
                segments => _variantCaller.CallVariant(segments, samplesInfo, copyNumberModels, pedigreeInfo)
                );
            var variantCalledSegments = new SampleMap <List <CanvasSegment> >();

            foreach (var key in samplesInfo.SampleIds)
            {
                variantCalledSegments.Add(key, segmentsForVariantCalling.Select(segment => segment[key]).ToList());
            }

            var mergedVariantCalledSegments = MergeSegments(variantCalledSegments, _callerParameters.MinimumCallSize, _qualityFilterThreshold);

            FilterExcessivelyShortSegments(mergedVariantCalledSegments);

            var outputFolder = outVcfFile.Directory;

            foreach (var sampleId in samplesInfo.SampleIds)
            {
                var coverageOutputPath = SingleSampleCallset.GetCoverageAndVariantFrequencyOutput(outputFolder,
                                                                                                  sampleId.ToString());
                CanvasSegment.WriteCoveragePlotData(mergedVariantCalledSegments[sampleId], samplesInfo[sampleId].MeanCoverage,
                                                    samplesInfo[sampleId].Ploidy, coverageOutputPath, referenceFolder);
            }
            bool isPedigreeInfoSupplied = pedigreeInfo != null && pedigreeInfo.HasFullPedigree();
            var  denovoQualityThreshold = isPedigreeInfoSupplied ? (int?)_deNovoQualityFilterThreshold : null;
            var  ploidies        = samplesInfo.Select(info => info.Value.Ploidy).ToList();
            var  diploidCoverage = samplesInfo.Select(info => info.Value.MeanCoverage).ToList();
            var  names           = samplesInfo.SampleIds.Select(id => id.ToString()).ToList();

            CanvasSegmentWriter.WriteMultiSampleSegments(outVcfFile.FullName, mergedVariantCalledSegments, diploidCoverage, referenceFolder, names,
                                                         null, ploidies, _qualityFilterThreshold, denovoQualityThreshold, CanvasFilter.SegmentSizeCutoff, isPedigreeInfoSupplied);

            foreach (var sampleId in samplesInfo.SampleIds)
            {
                var outputVcfPath = SingleSampleCallset.GetVcfOutput(outputFolder, sampleId.ToString());
                var sampleMetrics = samplesInfo[sampleId];
                var segments      = mergedVariantCalledSegments[sampleId];
                CanvasSegmentWriter.WriteSegments(outputVcfPath.FullName, segments,
                                                  sampleMetrics.MeanCoverage, referenceFolder, sampleId.ToString(), null,
                                                  sampleMetrics.Ploidy, _qualityFilterThreshold, isPedigreeInfoSupplied, denovoQualityThreshold, null);

                var visualizationTemp   = outputFolder.CreateSubdirectory($"VisualizationTemp{sampleId}");
                var normalizationFactor = NormalizationCalculator.ComputeNormalizationFactor(segments);
                var bigWig = _coverageBigWigWriter.Write(segments, visualizationTemp, normalizationFactor);
                bigWig?.MoveTo(SingleSampleCallset.GetCoverageBigWig(outputFolder, sampleId.ToString()));
                var copyNumberBedGraph = SingleSampleCallset.GetCopyNumberBedGraph(outputFolder, sampleId.ToString());
                _copyNumberBedGraphWriter.Write(segments, sampleMetrics.Ploidy, copyNumberBedGraph);

                var partitionBedgraphHeader = "track type=bedGraph visibility=full autoScale=on graphType=points";
                var originalSegments        = sampleSegments[sampleId];
                _partitionCoverageBedGraphWriter.Write(originalSegments.AllSegments, SingleSampleCallset.GetPartitionBedGraph(outputFolder, sampleId.ToString()), normalizationFactor, partitionBedgraphHeader);
            }
            return(0);
        }