Пример #1
0
        /// <summary>
        /// Outputs the copy number calls to a text file.
        /// </summary>
        private static void WriteVariants(IReadOnlyCollection <List <CanvasSegment> > segments, PloidyInfo ploidy, GenomeMetadata genome,
                                          BgzipOrStreamWriter writer, bool isPedigreeInfoSupplied = true, int?denovoQualityThreshold = null)
        {
            var nSamples = segments.Count;

            foreach (GenomeMetadata.SequenceMetadata chromosome in genome.Sequences)
            {
                for (int segmentIndex = 0; segmentIndex < segments.First().Count; segmentIndex++)
                {
                    var firstSampleSegment = segments.First()[segmentIndex];
                    if (!isPedigreeInfoSupplied && segments.Select(sample => sample[segmentIndex].Filter == "PASS").Any() && segments.Count > 1)
                    {
                        firstSampleSegment.Filter = "PASS";
                    }
                    if (!firstSampleSegment.Chr.Equals(chromosome.Name, StringComparison.OrdinalIgnoreCase))
                    {
                        continue;
                    }
                    var referenceCopyNumbers = segments.Select(segment => ploidy?.GetReferenceCopyNumber(segment[segmentIndex]) ?? 2).ToList();
                    var currentSegments      = segments.Select(x => x[segmentIndex]).ToList();
                    var cnvTypes             = new List <CnvType>();
                    for (int sampleIndex = 0; sampleIndex < nSamples; sampleIndex++)
                    {
                        cnvTypes.Add(currentSegments[sampleIndex].GetCnvType(referenceCopyNumbers[sampleIndex]));
                    }
                    CnvType cnvType;
                    if (cnvTypes.TrueForAll(x => x == CnvType.Reference))
                    {
                        cnvType = CnvType.Reference;
                    }
                    else if (cnvTypes.TrueForAll(x => x == CnvType.Reference | x == CnvType.Loss))
                    {
                        cnvType = CnvType.Loss;
                    }
                    else if (cnvTypes.TrueForAll(x => x == CnvType.Reference | x == CnvType.Gain))
                    {
                        cnvType = CnvType.Gain;
                    }
                    else if (cnvTypes.TrueForAll(x => x == CnvType.Reference | x == CnvType.LossOfHeterozygosity))
                    {
                        cnvType = CnvType.LossOfHeterozygosity;
                    }
                    else
                    {
                        cnvType = CnvType.ComplexCnv;
                    }

                    WriteInfoField(writer, firstSampleSegment, cnvType, denovoQualityThreshold, isMultisample: segments.Count > 1);
                    //  FORMAT field
                    if (segments.Count == 1)
                    {
                        WriteSingleSampleInfo(writer, firstSampleSegment);
                    }
                    else
                    {
                        WriteFormatField(writer, currentSegments);
                    }
                }
            }
        }
Пример #2
0
        private static void WriteFormatAndSampleFields(BgzipOrStreamWriter writer, CanvasSegment[] segments, string[] genotypes, bool reportDQ)
        {
            const string nullValue    = ".";
            string       formatColumn = "GT:RC:BC:CN:MCC:MCCQ:QS:FT";

            if (reportDQ)
            {
                formatColumn += ":DQ";
            }
            var outputFields = new List <string> {
                formatColumn
            };

            for (int i = 0; i < segments.Length; i++)
            {
                var    segment      = segments[i];
                string mcc          = segment.MajorChromosomeCount.HasValue ? segment.MajorChromosomeCount.ToString() : nullValue;
                string mccq         = segment.MajorChromosomeCountScore.HasValue ? $"{segment.MajorChromosomeCountScore.Value:F2}" : nullValue;
                string sampleColumn = $"{genotypes[i]}:{segment.MedianCount:F2}:{segment.BinCount}:{segment.CopyNumber}:{mcc}:{mccq}:{segment.QScore:F2}:{segment.Filter.ToVcfString()}";
                if (reportDQ)
                {
                    string dqscore = segment.DqScore.HasValue ? $"{segment.DqScore.Value:F2}" : nullValue;
                    sampleColumn += $":{dqscore}";
                }
                outputFields.Add(sampleColumn);
            }
            writer.WriteLine("\t" + string.Join("\t", outputFields));
        }
Пример #3
0
 public static void WriteMultiSampleSegments(string outVcfPath, ISampleMap <List <CanvasSegment> > segments, List <double> diploidCoverage,
                                             string wholeGenomeFastaDirectory, List <string> sampleNames, List <string> extraHeaders, List <PloidyInfo> ploidies, int qualityThreshold, int?denovoQualityThreshold, int?sizeThreshold, bool isPedigreeInfoSupplied = true)
 {
     using (BgzipOrStreamWriter writer = new BgzipOrStreamWriter(outVcfPath))
     {
         var genome = WriteVcfHeader(segments.Values.First(), diploidCoverage.Average(), wholeGenomeFastaDirectory, sampleNames,
                                     extraHeaders, writer, qualityThreshold, denovoQualityThreshold, sizeThreshold);
         WriteVariants(segments.Zip(), ploidies, genome, writer, denovoQualityThreshold);
     }
 }
Пример #4
0
 public static void WriteMultiSampleSegments(string outVcfPath, List <List <CanvasSegment> > segments, List <double?> diploidCoverage,
                                             string wholeGenomeFastaDirectory, List <string> sampleNames, List <string> extraHeaders, PloidyInfo ploidy, int qualityThreshold,
                                             bool isPedigreeInfoSupplied = true, int?denovoQualityThreshold = null)
 {
     using (BgzipOrStreamWriter writer = new BgzipOrStreamWriter(outVcfPath))
     {
         var genome = WriteVcfHeader(segments.First(), GetMean(diploidCoverage), wholeGenomeFastaDirectory, sampleNames,
                                     extraHeaders, qualityThreshold, writer, denovoQualityThreshold);
         WriteVariants(segments, ploidy, genome, writer, isPedigreeInfoSupplied, denovoQualityThreshold);
     }
 }
Пример #5
0
 public static void WriteHeaderAllAltCnTags(BgzipOrStreamWriter writer, int maxCopyNum = 5)
 {
     foreach (var copyNum in Enumerable.Range(0, maxCopyNum + 1))
     {
         if (copyNum == 1)
         {
             continue;
         }
         writer.WriteLine($"##ALT=<ID=CN{copyNum},Description=\"Copy number allele: {copyNum} copies\">");
     }
 }
Пример #6
0
 public void Write(IEnumerable <BedGraphEntry> bedGraphEntries, IFileLocation location, string header = null)
 {
     using (var bgzipWriter = new BgzipOrStreamWriter(location.FullName))
     {
         if (header != null)
         {
             bgzipWriter.WriteLine(header);
         }
         var writer = new BedGraphWriter(bgzipWriter);
         writer.WriteLines(bedGraphEntries);
     }
 }
Пример #7
0
        private static GenomeMetadata WriteVcfHeader(List <CanvasSegment> segments, double?diploidCoverage,
                                                     string wholeGenomeFastaDirectory, List <string> sampleNames, List <string> extraHeaders, int qualityThreshold,
                                                     BgzipOrStreamWriter writer, int?denovoQualityThreshold = null)
        {
            // Write the VCF header:
            writer.WriteLine("##fileformat=VCFv4.1");
            writer.WriteLine($"##source={CanvasVersionInfo.NameString} {CanvasVersionInfo.VersionString}");
            writer.WriteLine($"##reference={Path.Combine(wholeGenomeFastaDirectory, "genome.fa")}");
            AddPloidyAndCoverageHeaders(writer, segments, diploidCoverage);
            foreach (string header in extraHeaders ?? new List <string>())
            {
                writer.WriteLine(header);
            }

            GenomeMetadata genome = new GenomeMetadata();

            genome.Deserialize(Path.Combine(wholeGenomeFastaDirectory, "GenomeSize.xml"));
            foreach (GenomeMetadata.SequenceMetadata chromosome in genome.Sequences)
            {
                writer.WriteLine($"##contig=<ID={chromosome.Name},length={chromosome.Length}>");
            }
            string qualityFilter = $"q{qualityThreshold}";

            writer.WriteLine("##ALT=<ID=CNV,Description=\"Copy number variable region\">");
            writer.WriteLine($"##FILTER=<ID={qualityFilter},Description=\"Quality below {qualityThreshold}\">");
            writer.WriteLine("##FILTER=<ID=L10kb,Description=\"Length shorter than 10kb\">");
            writer.WriteLine("##INFO=<ID=CIEND,Number=2,Type=Integer,Description=\"Confidence interval around END for imprecise variants\">");
            writer.WriteLine("##INFO=<ID=CIPOS,Number=2,Type=Integer,Description=\"Confidence interval around POS for imprecise variants\">");
            writer.WriteLine("##INFO=<ID=CNVLEN,Number=1,Type=Integer,Description=\"Number of reference positions spanned by this CNV\">");
            writer.WriteLine("##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the variant described in this record\">");
            writer.WriteLine("##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant\">");
            writer.WriteLine("##INFO=<ID=SUBCLONAL,Number=0,Type=Flag,Description=\"Subclonal variant\">");
            if (denovoQualityThreshold.HasValue)
            {
                string denovoQualityFilter = $"dq{denovoQualityThreshold}";
                writer.WriteLine($"##INFO=<ID={denovoQualityFilter},Description=\"De novo quality score above {denovoQualityThreshold.Value}\">");
            }
            writer.WriteLine("##FORMAT=<ID=RC,Number=1,Type=Float,Description=\"Mean counts per bin in the region\">");
            writer.WriteLine("##FORMAT=<ID=BC,Number=1,Type=Float,Description=\"Number of bins in the region\">");
            writer.WriteLine("##FORMAT=<ID=CN,Number=1,Type=Integer,Description=\"Copy number genotype for imprecise events\">");
            writer.WriteLine("##FORMAT=<ID=MCC,Number=1,Type=Integer,Description=\"Major chromosome count (equal to copy number for LOH regions)\">");
            if (denovoQualityThreshold.HasValue)
            {
                writer.WriteLine("##FORMAT=<ID=DQ,Number=1,Type=Float,Description=\"De novo variants Phred-scaled quality score\">");
                writer.WriteLine("##FORMAT=<ID=QS,Number=1,Type=Float,Description=\"Phred-scaled quality score\">");
            }
            string names = string.Join("\t", sampleNames.ToArray());

            writer.WriteLine("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + names);
            SanityCheckChromosomeNames(genome, segments);
            return(genome);
        }
Пример #8
0
        /// <summary>
        /// Write to a file a single CanvasSegment record as a non-sample VCF columns
        /// </summary>
        /// <param name="writer"></param>
        /// <param name="segment"></param>
        /// <param name="cnvType"></param>
        /// <param name="denovoQualityThreshold"></param>
        /// <returns></returns>
        private static void WriteInfoField(BgzipOrStreamWriter writer, CanvasSegment segment, CnvType cnvType, int?denovoQualityThreshold, bool isMultisample)
        {
            // From vcf 4.1 spec:
            //     If any of the ALT alleles is a symbolic allele (an angle-bracketed ID String “<ID>”) then the padding base is required and POS denotes the
            //     coordinate of the base preceding the polymorphism.
            string alternateAllele = cnvType.ToAltId();
            int    position        = (alternateAllele.StartsWith("<") && alternateAllele.EndsWith(">"))
                ? segment.Begin
                : segment.Begin + 1;

            writer.Write($"{segment.Chr}\t{position}\tCanvas:{cnvType.ToVcfId()}:{segment.Chr}:{segment.Begin + 1}-{segment.End}\t");
            string qScore = "";

            qScore = isMultisample ? "." : $"{segment.QScore:F2}";
            writer.Write($"N\t{alternateAllele}\t{qScore}\t{segment.Filter}\t");

            if (cnvType != CnvType.Reference)
            {
                writer.Write($"SVTYPE={cnvType.ToSvType()};");
            }

            if (segment.IsHeterogeneous)
            {
                writer.Write("SUBCLONAL;");
            }

            if (segment.DQScore.HasValue && !isMultisample)
            {
                writer.Write($"DQ={segment.DQScore.Value};");
            }

            if (denovoQualityThreshold.HasValue & segment.DQScore.HasValue & segment.DQScore >= denovoQualityThreshold)
            {
                writer.Write($"dq{denovoQualityThreshold};");
            }
            writer.Write($"END={segment.End}");

            if (cnvType != CnvType.Reference)
            {
                writer.Write($";CNVLEN={segment.End - segment.Begin}");
            }

            if (segment.StartConfidenceInterval != null)
            {
                writer.Write($";CIPOS={segment.StartConfidenceInterval.Item1},{segment.StartConfidenceInterval.Item2}");
            }
            if (segment.EndConfidenceInterval != null)
            {
                writer.Write($";CIEND={segment.EndConfidenceInterval.Item1},{segment.EndConfidenceInterval.Item2}");
            }
        }
Пример #9
0
        private static void WriteFormatField(BgzipOrStreamWriter writer, List <CanvasSegment> segments)
        {
            writer.Write("\tRC:BC:CN:MCC:QS:DQ");
            const string nullValue = ".";

            foreach (var segment in segments)
            {
                var mcc     = segment.MajorChromosomeCount.HasValue ? segment.MajorChromosomeCount.ToString() : nullValue;
                var dqscore = segment.DQScore.HasValue ? $"{segment.DQScore.Value:F2}" : nullValue;
                var rc      = Math.Round(segment.MeanCount, 0, MidpointRounding.AwayFromZero);
                writer.Write($"\t{rc}:{segment.BinCount}:{ segment.CopyNumber}:{mcc}:{segment.QScore}:{dqscore}");
            }
            writer.WriteLine();
        }
Пример #10
0
 public static void WriteSegments(string outVcfPath, List <CanvasSegment> segments, double?diploidCoverage,
                                  string wholeGenomeFastaDirectory, string sampleName,
                                  List <string> extraHeaders, PloidyInfo ploidy, int qualityThreshold, bool isPedigreeInfoSupplied, int?denovoQualityThreshold = null)
 {
     using (BgzipOrStreamWriter writer = new BgzipOrStreamWriter(outVcfPath))
     {
         var genome = WriteVcfHeader(segments, diploidCoverage, wholeGenomeFastaDirectory, new List <string> {
             sampleName
         },
                                     extraHeaders, qualityThreshold, writer, denovoQualityThreshold);
         WriteVariants(new List <List <CanvasSegment> > {
             segments.ToList()
         }, ploidy, genome, writer, isPedigreeInfoSupplied, denovoQualityThreshold);
     }
 }
Пример #11
0
 private static void WriteSingleSampleInfo(BgzipOrStreamWriter writer, CanvasSegment segment)
 {
     writer.Write("\tRC:BC:CN", segment.End);
     if (segment.MajorChromosomeCount.HasValue)
     {
         writer.Write(":MCC");
     }
     writer.Write("\t{1}:{2}:{3}", segment.End, Math.Round(segment.MeanCount, 0, MidpointRounding.AwayFromZero),
                  segment.BinCount, segment.CopyNumber);
     if (segment.MajorChromosomeCount.HasValue)
     {
         writer.Write(":{0}", segment.MajorChromosomeCount);
     }
     writer.WriteLine();
 }
Пример #12
0
        /// <summary>
        /// Write to a file a single CanvasSegment record as a non-sample VCF columns
        /// </summary>
        /// <param name="writer"></param>
        /// <param name="firstSampleSegment"></param>
        /// <param name="alternateAllele"></param>
        /// <param name="recordLevelFilter"></param>
        /// <param name="sampleSetCnvType"></param>
        /// <param name="isMultisample"></param>
        /// <returns></returns>
        private static void WriteColumnsUntilInfoField(BgzipOrStreamWriter writer, CanvasSegment firstSampleSegment, CnvType sampleSetCnvType, string alternateAllele, string recordLevelFilter, bool isMultisample)
        {
            // From vcf 4.1 spec:
            //     If any of the ALT alleles is a symbolic allele (an angle-bracketed ID String “<ID>”) then the padding base is required and POS denotes the
            //     coordinate of the base preceding the polymorphism.
            int position = (alternateAllele.StartsWith("<") && alternateAllele.EndsWith(">"))
                ? firstSampleSegment.Begin
                : firstSampleSegment.Begin + 1;

            writer.Write($"{firstSampleSegment.Chr}\t{position}\tCanvas:{sampleSetCnvType.ToVcfId()}:{firstSampleSegment.Chr}:{firstSampleSegment.Begin + 1}-{firstSampleSegment.End}\t");
            string qScore = isMultisample ? "." : $"{firstSampleSegment.QScore:F2}";

            writer.Write($"N\t{alternateAllele}\t{qScore}\t{recordLevelFilter}\t");

            if (sampleSetCnvType != CnvType.Reference)
            {
                writer.Write($"SVTYPE={sampleSetCnvType.ToSvType()};");
            }

            if (firstSampleSegment.IsHeterogeneous)
            {
                writer.Write("SUBCLONAL;");
            }

            if (firstSampleSegment.IsCommonCnv)
            {
                writer.Write("COMMONCNV;");
            }

            writer.Write($"END={firstSampleSegment.End}");

            if (sampleSetCnvType != CnvType.Reference)
            {
                writer.Write($";CNVLEN={firstSampleSegment.Length}");
            }

            if (firstSampleSegment.StartConfidenceInterval != null)
            {
                writer.Write($";CIPOS={firstSampleSegment.StartConfidenceInterval.Item1},{firstSampleSegment.StartConfidenceInterval.Item2}");
            }
            if (firstSampleSegment.EndConfidenceInterval != null)
            {
                writer.Write($";CIEND={firstSampleSegment.EndConfidenceInterval.Item1},{firstSampleSegment.EndConfidenceInterval.Item2}");
            }
        }
Пример #13
0
 public static void WriteSegments(string outVcfPath, List <CanvasSegment> segments, double?diploidCoverage,
                                  string wholeGenomeFastaDirectory, string sampleName,
                                  List <string> extraHeaders, PloidyInfo ploidy, int qualityThreshold, bool isPedigreeInfoSupplied, int?denovoQualityThreshold, int?sizeThreshold)
 {
     using (BgzipOrStreamWriter writer = new BgzipOrStreamWriter(outVcfPath))
     {
         var genome = WriteVcfHeader(segments, diploidCoverage, wholeGenomeFastaDirectory, new List <string> {
             sampleName
         },
                                     extraHeaders, writer, qualityThreshold, denovoQualityThreshold, sizeThreshold);
         var sampleId             = new SampleId(sampleName);
         var segmentsOfAllSamples = segments.Select(x => new SampleMap <CanvasSegment> {
             { sampleId, x }
         });
         WriteVariants(segmentsOfAllSamples, new List <PloidyInfo> {
             ploidy
         }, genome, writer, denovoQualityThreshold);
     }
 }
Пример #14
0
        public static void AddPloidyAndCoverageHeaders(BgzipOrStreamWriter writer, List <CanvasSegment> segments, double?diploidCoverage)
        {
            double totalPloidy = 0;
            double totalWeight = 0;

            foreach (CanvasSegment segment in segments.Where(segment => segment.Filter.IsPass))
            {
                totalWeight += segment.Length;
                totalPloidy += segment.CopyNumber * (segment.Length);
            }
            if (totalWeight > 0)
            {
                writer.WriteLine($"##OverallPloidy={totalPloidy / totalWeight:F2}");
                if (diploidCoverage != null)
                {
                    writer.WriteLine($"##DiploidCoverage={diploidCoverage:F2}");
                }
            }
        }
Пример #15
0
        /// <summary>
        /// Outputs the copy number calls to a text file.
        /// </summary>
        private static void WriteVariants(IEnumerable <ISampleMap <CanvasSegment> > segmentsOfAllSamples, List <PloidyInfo> ploidies, GenomeMetadata genome,
                                          BgzipOrStreamWriter writer, int?denovoQualityThreshold = null)
        {
            var segmentsOfAllSamplesArray = segmentsOfAllSamples.ToArray(); // TODO: not necessary when chrom match logic has been updated
            int nSamples = segmentsOfAllSamplesArray.First().Values.Count();

            foreach (GenomeMetadata.SequenceMetadata chromosome in genome.Contigs()) //TODO: this is extremely inefficient. Segments should be sorted by chromosome
            {
                foreach (var sampleMap in segmentsOfAllSamplesArray)
                {
                    var currentSegments    = sampleMap.Values.ToArray();
                    var firstSampleSegment = currentSegments.First();
                    if (!firstSampleSegment.Chr.Equals(chromosome.Name, StringComparison.OrdinalIgnoreCase)
                        ) //TODO: this is extremely inefficient. Segments should be sorted by chromosome
                    {
                        continue;
                    }
                    var recordLevelFilter = CanvasFilter.GetRecordLevelFilterFromSampleFiltersOnly(
                        sampleMap
                        .Select(x => x.Value.Filter)
                        .ToReadOnlyList())
                                            .ToVcfString();
                    var referenceCopyNumbers = currentSegments.Zip(ploidies,
                                                                   (segment, ploidy) => ploidy?.GetReferenceCopyNumber(segment) ?? 2).ToList();
                    var cnvTypes = new CnvType[nSamples];
                    var sampleSetAlleleCopyNumbers = new int[nSamples][];
                    for (int sampleIndex = 0; sampleIndex < nSamples; sampleIndex++)
                    {
                        (cnvTypes[sampleIndex], sampleSetAlleleCopyNumbers[sampleIndex]) = currentSegments[sampleIndex]
                                                                                           .GetCnvTypeAndAlleleCopyNumbers(referenceCopyNumbers[sampleIndex]);
                    }
                    var sampleSetCnvType = AssignCnvType(cnvTypes);
                    var(alternateAllele, genotypes) = GetAltAllelesAndGenotypes(sampleSetAlleleCopyNumbers);
                    WriteColumnsUntilInfoField(writer, firstSampleSegment, sampleSetCnvType, alternateAllele,
                                               recordLevelFilter, nSamples > 1);
                    WriteFormatAndSampleFields(writer, currentSegments, genotypes,
                                               denovoQualityThreshold.HasValue);
                }
            }
        }
Пример #16
0
		public void Write(BgzipOrStreamWriter w, int nSamples)
		{
			SanityCheck(nSamples);

			WriteStringFieldWithTab(w, ChromName);
			w.WriteTab(Pos.ToString());
			WriteStringFieldWithTab(w, Id);
			WriteStringFieldWithTab(w, Ref);
			WriteStringFieldWithTab(w, GetAltString());
			WriteStringFieldWithTab(w, _qual);
			WriteStringFieldWithTab(w, GetFilterString());
			WriteStringFieldWithTab(w, GetInfoString());
			WriteStringFieldWithTab(w, GetFormatString());
			for (int sampleIndex = 0; sampleIndex < nSamples; ++sampleIndex)
			{
				if (sampleIndex == (nSamples - 1))
					WriteStringFieldNoTab(w, GetSampleString(sampleIndex));
				else
					WriteStringFieldWithTab(w, GetSampleString(sampleIndex));
			}
			w.WriteLine();
		}
Пример #17
0
		public VCFOutStreamer(BgzipOrStreamWriter writer)
			: this(writer, "SAMPLE", "SAMPLE")
		{
		}
Пример #18
0
		private void WriteStringFieldNoTab(BgzipOrStreamWriter w, string s)
		{
			w.Write(StringCheck(s));
		}
Пример #19
0
		public VCFOutStreamer(BgzipOrStreamWriter writer, string[] sampleNames, string[] sampleIds)
		{
			_sampleNames = sampleNames;
			_sampleIDs = sampleIds;
			_writer = writer;
			_headerList = new List<KeyValuePair<string, string>>();
		}
Пример #20
0
		public VCFOutStreamer(BgzipOrStreamWriter writer, string sampleName, string sampleId)
			: this(writer, new[] { sampleName }, new[] { sampleId })
		{
		}
Пример #21
0
        private static GenomeMetadata WriteVcfHeader(List <CanvasSegment> segments, double?diploidCoverage,
                                                     string wholeGenomeFastaDirectory, List <string> sampleNames, List <string> extraHeaders, BgzipOrStreamWriter writer, int qualityThreshold,
                                                     int?denovoQualityThreshold, int?sizeThreshold)
        {
            // Write the VCF header:
            writer.WriteLine("##fileformat=VCFv4.1");
            writer.WriteLine($"##source={CanvasVersionInfo.NameString} {CanvasVersionInfo.VersionString}");
            writer.WriteLine($"##reference={Path.Combine(wholeGenomeFastaDirectory, "genome.fa")}");
            // Write ##OverallPloidy and ##DiploidCoverage for a single-sample file (where it makes sense to do so):
            if (sampleNames.Count == 1)
            {
                AddPloidyAndCoverageHeaders(writer, segments, diploidCoverage);
            }
            foreach (string header in extraHeaders ?? new List <string>())
            {
                writer.WriteLine(header);
            }

            GenomeMetadata genome = new GenomeMetadata();

            genome.Deserialize(new FileLocation(Path.Combine(wholeGenomeFastaDirectory, "GenomeSize.xml")));

            foreach (GenomeMetadata.SequenceMetadata chromosome in genome.Contigs())
            {
                writer.WriteLine($"##contig=<ID={chromosome.Name},length={chromosome.Length}>");
            }
            string qualityFilter = $"q{qualityThreshold}";

            writer.WriteLine("##ALT=<ID=DUP,Description=\"Region of elevated copy number relative to the reference\">");
            WriteHeaderAllAltCnTags(writer);
            writer.WriteLine($"##FILTER=<ID={qualityFilter},Description=\"Quality below {qualityThreshold}\">");
            if (sizeThreshold.HasValue)
            {
                string sizeFilterName = CanvasFilter.GetCnvSizeFilter(sizeThreshold.Value, out var sizeFilterThreshold);
                writer.WriteLine($"##FILTER=<ID={sizeFilterName},Description=\"Length shorter than {sizeFilterThreshold.Number} {sizeFilterThreshold.Units}\">");
            }
            writer.WriteLine("##FILTER=<ID=FailedFT,Description=\"Sample-level filter failed in all the samples\">");
            writer.WriteLine("##INFO=<ID=CIEND,Number=2,Type=Integer,Description=\"Confidence interval around END for imprecise variants\">");
            writer.WriteLine("##INFO=<ID=CIPOS,Number=2,Type=Integer,Description=\"Confidence interval around POS for imprecise variants\">");
            writer.WriteLine("##INFO=<ID=CNVLEN,Number=1,Type=Integer,Description=\"Number of reference positions spanned by this CNV\">");
            writer.WriteLine("##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the variant described in this record\">");
            writer.WriteLine("##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant\">");
            writer.WriteLine("##INFO=<ID=SUBCLONAL,Number=0,Type=Flag,Description=\"Subclonal variant\">");
            writer.WriteLine("##INFO=<ID=COMMONCNV,Number=0,Type=Flag,Description=\"Common CNV variant identified from pre-specified bed intervals\">");
            writer.WriteLine("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">");
            writer.WriteLine("##FORMAT=<ID=RC,Number=1,Type=Float,Description=\"Mean counts per bin in the region\">");
            writer.WriteLine("##FORMAT=<ID=BC,Number=1,Type=Float,Description=\"Number of bins in the region\">");
            writer.WriteLine("##FORMAT=<ID=CN,Number=1,Type=Integer,Description=\"Copy number genotype for imprecise events\">");
            writer.WriteLine("##FORMAT=<ID=MCC,Number=1,Type=Integer,Description=\"Major chromosome count (equal to copy number for LOH regions)\">");
            writer.WriteLine("##FORMAT=<ID=MCCQ,Number=1,Type=Float,Description=\"Major chromosome count quality score\">");
            writer.WriteLine("##FORMAT=<ID=QS,Number=1,Type=Float,Description=\"Phred-scaled quality score. If CN is reference then this is -10log10(prob(variant)) otherwise this is -10log10(prob(no variant).\">");
            if (denovoQualityThreshold.HasValue)
            {
                writer.WriteLine($"##FORMAT=<ID=DQ,Number=1,Type=Float,Description=\"De novo quality. Threshold for passing de novo call: {denovoQualityThreshold}\">");
            }
            writer.WriteLine("##FORMAT=<ID=FT,Number=1,Type=String,Description=\"Sample filter, 'PASS' indicates that all filters have passed for this sample\">");
            var titleColumns = new List <string> {
                "#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT"
            };

            titleColumns.AddRange(sampleNames);
            writer.WriteLine(string.Join("\t", titleColumns));
            SanityCheckChromosomeNames(genome, segments);
            return(genome);
        }
Пример #22
0
        /// <summary>
        /// Outputs the copy number calls to a text file.
        /// </summary>
        public static void WriteSegments(string outVcfPath, List <CanvasSegment> segments, string wholeGenomeFastaDirectory, string sampleName,
                                         List <string> extraHeaders, PloidyInfo ploidy, int qualityThreshold = 10)
        {
            using (BgzipOrStreamWriter writer = new BgzipOrStreamWriter(outVcfPath))
            {
                // Write the VCF header:
                writer.WriteLine("##fileformat=VCFv4.1");
                writer.WriteLine($"##source={CanvasVersionInfo.NameString} {CanvasVersionInfo.VersionString}");
                writer.WriteLine($"##reference={Path.Combine(wholeGenomeFastaDirectory, "genome.fa")}");

                foreach (string header in extraHeaders ?? new List <string>())
                {
                    writer.WriteLine(header);
                }
                GenomeMetadata genome = new GenomeMetadata();
                genome.Deserialize(Path.Combine(wholeGenomeFastaDirectory, "GenomeSize.xml"));
                foreach (GenomeMetadata.SequenceMetadata chromosome in genome.Sequences)
                {
                    writer.WriteLine($"##contig=<ID={chromosome.Name},length={chromosome.Length}>");
                }
                string qualityFilter = $"q{qualityThreshold}";
                writer.WriteLine("##ALT=<ID=CNV,Description=\"Copy number variable region\">");
                writer.WriteLine($"##FILTER=<ID={qualityFilter},Description=\"Quality below {qualityThreshold}\">");
                writer.WriteLine("##FILTER=<ID=L10kb,Description=\"Length shorter than 10kb\">");
                writer.WriteLine("##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant\">");
                writer.WriteLine("##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the variant described in this record\">");
                writer.WriteLine("##INFO=<ID=CNVLEN,Number=1,Type=Integer,Description=\"Number of reference positions spanned by this CNV\">");
                writer.WriteLine("##FORMAT=<ID=RC,Number=1,Type=Float,Description=\"Mean counts per bin in the region\">");
                writer.WriteLine("##FORMAT=<ID=BC,Number=1,Type=Float,Description=\"Number of bins in the region\">");
                writer.WriteLine("##FORMAT=<ID=CN,Number=1,Type=Integer,Description=\"Copy number genotype for imprecise events\">");
                writer.WriteLine("##FORMAT=<ID=MCC,Number=1,Type=Integer,Description=\"Major chromosome count (equal to copy number for LOH regions)\">");
                writer.WriteLine("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + sampleName);

                SanityCheckChromosomeNames(genome, segments);

                foreach (GenomeMetadata.SequenceMetadata chromosome in genome.Sequences)
                {
                    foreach (CanvasSegment segment in segments)
                    {
                        if (!segment.Chr.Equals(chromosome.Name, StringComparison.OrdinalIgnoreCase))
                        {
                            continue;
                        }

                        int     referenceCopyNumber = ploidy?.GetReferenceCopyNumber(segment) ?? 2;
                        CnvType cnvType             = segment.GetCnvType(referenceCopyNumber);

                        // From vcf 4.1 spec:
                        //     If any of the ALT alleles is a symbolic allele (an angle-bracketed ID String “<ID>”) then the padding base is required and POS denotes the
                        //     coordinate of the base preceding the polymorphism.
                        string alternateAllele = cnvType.ToAltId();
                        int    position        = (alternateAllele.StartsWith("<") && alternateAllele.EndsWith(">")) ? segment.Begin : segment.Begin + 1;
                        writer.Write($"{segment.Chr}\t{position}\tCanvas:{cnvType.ToVcfId()}:{segment.Chr}:{segment.Begin + 1}-{segment.End}\t");

                        writer.Write($"N\t{alternateAllele}\t{segment.QScore}\t{segment.Filter}\t", alternateAllele, segment.QScore, segment.Filter);

                        if (cnvType != CnvType.Reference)
                        {
                            writer.Write($"SVTYPE={cnvType.ToSvType()};");
                        }
                        writer.Write($"END={segment.End}");
                        if (cnvType != CnvType.Reference)
                        {
                            writer.Write($";CNVLEN={segment.End - segment.Begin}");
                        }

                        //  FORMAT field
                        writer.Write("\tRC:BC:CN", segment.End);
                        if (segment.MajorChromosomeCount.HasValue)
                        {
                            writer.Write(":MCC");
                        }
                        writer.Write("\t{1}:{2}:{3}", segment.End, Math.Round(segment.MeanCount, 0, MidpointRounding.AwayFromZero), segment.BinCount, segment.CopyNumber);
                        if (segment.MajorChromosomeCount.HasValue)
                        {
                            writer.Write(":{0}", segment.MajorChromosomeCount);
                        }
                        writer.WriteLine();
                    }
                }
            }
        }