Ejemplo n.º 1
0
        private static void WriteFormatAndSampleFields(BgzipOrStreamWriter writer, CanvasSegment[] segments, string[] genotypes, bool reportDQ)
        {
            const string nullValue    = ".";
            string       formatColumn = "GT:RC:BC:CN:MCC:MCCQ:QS:FT";

            if (reportDQ)
            {
                formatColumn += ":DQ";
            }
            var outputFields = new List <string> {
                formatColumn
            };

            for (int i = 0; i < segments.Length; i++)
            {
                var    segment      = segments[i];
                string mcc          = segment.MajorChromosomeCount.HasValue ? segment.MajorChromosomeCount.ToString() : nullValue;
                string mccq         = segment.MajorChromosomeCountScore.HasValue ? $"{segment.MajorChromosomeCountScore.Value:F2}" : nullValue;
                string sampleColumn = $"{genotypes[i]}:{segment.MedianCount:F2}:{segment.BinCount}:{segment.CopyNumber}:{mcc}:{mccq}:{segment.QScore:F2}:{segment.Filter.ToVcfString()}";
                if (reportDQ)
                {
                    string dqscore = segment.DqScore.HasValue ? $"{segment.DqScore.Value:F2}" : nullValue;
                    sampleColumn += $":{dqscore}";
                }
                outputFields.Add(sampleColumn);
            }
            writer.WriteLine("\t" + string.Join("\t", outputFields));
        }
Ejemplo n.º 2
0
        public static void AddPloidyAndCoverageHeaders(BgzipOrStreamWriter writer, List <CanvasSegment> segments, double?diploidCoverage)
        {
            double totalPloidy = 0;
            double totalWeight = 0;

            foreach (CanvasSegment segment in segments.Where(segment => segment.Filter.IsPass))
            {
                totalWeight += segment.Length;
                totalPloidy += segment.CopyNumber * (segment.Length);
            }
            if (totalWeight > 0)
            {
                writer.WriteLine($"##OverallPloidy={totalPloidy / totalWeight:F2}");
                if (diploidCoverage != null)
                {
                    writer.WriteLine($"##DiploidCoverage={diploidCoverage:F2}");
                }
            }
        }
Ejemplo n.º 3
0
 public static void WriteHeaderAllAltCnTags(BgzipOrStreamWriter writer, int maxCopyNum = 5)
 {
     foreach (var copyNum in Enumerable.Range(0, maxCopyNum + 1))
     {
         if (copyNum == 1)
         {
             continue;
         }
         writer.WriteLine($"##ALT=<ID=CN{copyNum},Description=\"Copy number allele: {copyNum} copies\">");
     }
 }
Ejemplo n.º 4
0
 public void Write(IEnumerable <BedGraphEntry> bedGraphEntries, IFileLocation location, string header = null)
 {
     using (var bgzipWriter = new BgzipOrStreamWriter(location.FullName))
     {
         if (header != null)
         {
             bgzipWriter.WriteLine(header);
         }
         var writer = new BedGraphWriter(bgzipWriter);
         writer.WriteLines(bedGraphEntries);
     }
 }
Ejemplo n.º 5
0
        private static void WriteFormatField(BgzipOrStreamWriter writer, List <CanvasSegment> segments)
        {
            writer.Write("\tRC:BC:CN:MCC:QS:DQ");
            const string nullValue = ".";

            foreach (var segment in segments)
            {
                var mcc     = segment.MajorChromosomeCount.HasValue ? segment.MajorChromosomeCount.ToString() : nullValue;
                var dqscore = segment.DQScore.HasValue ? $"{segment.DQScore.Value:F2}" : nullValue;
                var rc      = Math.Round(segment.MeanCount, 0, MidpointRounding.AwayFromZero);
                writer.Write($"\t{rc}:{segment.BinCount}:{ segment.CopyNumber}:{mcc}:{segment.QScore}:{dqscore}");
            }
            writer.WriteLine();
        }
Ejemplo n.º 6
0
 private static void WriteSingleSampleInfo(BgzipOrStreamWriter writer, CanvasSegment segment)
 {
     writer.Write("\tRC:BC:CN", segment.End);
     if (segment.MajorChromosomeCount.HasValue)
     {
         writer.Write(":MCC");
     }
     writer.Write("\t{1}:{2}:{3}", segment.End, Math.Round(segment.MeanCount, 0, MidpointRounding.AwayFromZero),
                  segment.BinCount, segment.CopyNumber);
     if (segment.MajorChromosomeCount.HasValue)
     {
         writer.Write(":{0}", segment.MajorChromosomeCount);
     }
     writer.WriteLine();
 }
Ejemplo n.º 7
0
        private static GenomeMetadata WriteVcfHeader(List <CanvasSegment> segments, double?diploidCoverage,
                                                     string wholeGenomeFastaDirectory, List <string> sampleNames, List <string> extraHeaders, BgzipOrStreamWriter writer, int qualityThreshold,
                                                     int?denovoQualityThreshold, int?sizeThreshold)
        {
            // Write the VCF header:
            writer.WriteLine("##fileformat=VCFv4.1");
            writer.WriteLine($"##source={CanvasVersionInfo.NameString} {CanvasVersionInfo.VersionString}");
            writer.WriteLine($"##reference={Path.Combine(wholeGenomeFastaDirectory, "genome.fa")}");
            // Write ##OverallPloidy and ##DiploidCoverage for a single-sample file (where it makes sense to do so):
            if (sampleNames.Count == 1)
            {
                AddPloidyAndCoverageHeaders(writer, segments, diploidCoverage);
            }
            foreach (string header in extraHeaders ?? new List <string>())
            {
                writer.WriteLine(header);
            }

            GenomeMetadata genome = new GenomeMetadata();

            genome.Deserialize(new FileLocation(Path.Combine(wholeGenomeFastaDirectory, "GenomeSize.xml")));

            foreach (GenomeMetadata.SequenceMetadata chromosome in genome.Contigs())
            {
                writer.WriteLine($"##contig=<ID={chromosome.Name},length={chromosome.Length}>");
            }
            string qualityFilter = $"q{qualityThreshold}";

            writer.WriteLine("##ALT=<ID=DUP,Description=\"Region of elevated copy number relative to the reference\">");
            WriteHeaderAllAltCnTags(writer);
            writer.WriteLine($"##FILTER=<ID={qualityFilter},Description=\"Quality below {qualityThreshold}\">");
            if (sizeThreshold.HasValue)
            {
                string sizeFilterName = CanvasFilter.GetCnvSizeFilter(sizeThreshold.Value, out var sizeFilterThreshold);
                writer.WriteLine($"##FILTER=<ID={sizeFilterName},Description=\"Length shorter than {sizeFilterThreshold.Number} {sizeFilterThreshold.Units}\">");
            }
            writer.WriteLine("##FILTER=<ID=FailedFT,Description=\"Sample-level filter failed in all the samples\">");
            writer.WriteLine("##INFO=<ID=CIEND,Number=2,Type=Integer,Description=\"Confidence interval around END for imprecise variants\">");
            writer.WriteLine("##INFO=<ID=CIPOS,Number=2,Type=Integer,Description=\"Confidence interval around POS for imprecise variants\">");
            writer.WriteLine("##INFO=<ID=CNVLEN,Number=1,Type=Integer,Description=\"Number of reference positions spanned by this CNV\">");
            writer.WriteLine("##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the variant described in this record\">");
            writer.WriteLine("##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant\">");
            writer.WriteLine("##INFO=<ID=SUBCLONAL,Number=0,Type=Flag,Description=\"Subclonal variant\">");
            writer.WriteLine("##INFO=<ID=COMMONCNV,Number=0,Type=Flag,Description=\"Common CNV variant identified from pre-specified bed intervals\">");
            writer.WriteLine("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">");
            writer.WriteLine("##FORMAT=<ID=RC,Number=1,Type=Float,Description=\"Mean counts per bin in the region\">");
            writer.WriteLine("##FORMAT=<ID=BC,Number=1,Type=Float,Description=\"Number of bins in the region\">");
            writer.WriteLine("##FORMAT=<ID=CN,Number=1,Type=Integer,Description=\"Copy number genotype for imprecise events\">");
            writer.WriteLine("##FORMAT=<ID=MCC,Number=1,Type=Integer,Description=\"Major chromosome count (equal to copy number for LOH regions)\">");
            writer.WriteLine("##FORMAT=<ID=MCCQ,Number=1,Type=Float,Description=\"Major chromosome count quality score\">");
            writer.WriteLine("##FORMAT=<ID=QS,Number=1,Type=Float,Description=\"Phred-scaled quality score. If CN is reference then this is -10log10(prob(variant)) otherwise this is -10log10(prob(no variant).\">");
            if (denovoQualityThreshold.HasValue)
            {
                writer.WriteLine($"##FORMAT=<ID=DQ,Number=1,Type=Float,Description=\"De novo quality. Threshold for passing de novo call: {denovoQualityThreshold}\">");
            }
            writer.WriteLine("##FORMAT=<ID=FT,Number=1,Type=String,Description=\"Sample filter, 'PASS' indicates that all filters have passed for this sample\">");
            var titleColumns = new List <string> {
                "#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT"
            };

            titleColumns.AddRange(sampleNames);
            writer.WriteLine(string.Join("\t", titleColumns));
            SanityCheckChromosomeNames(genome, segments);
            return(genome);
        }
Ejemplo n.º 8
0
		public void Write(BgzipOrStreamWriter w, int nSamples)
		{
			SanityCheck(nSamples);

			WriteStringFieldWithTab(w, ChromName);
			w.WriteTab(Pos.ToString());
			WriteStringFieldWithTab(w, Id);
			WriteStringFieldWithTab(w, Ref);
			WriteStringFieldWithTab(w, GetAltString());
			WriteStringFieldWithTab(w, _qual);
			WriteStringFieldWithTab(w, GetFilterString());
			WriteStringFieldWithTab(w, GetInfoString());
			WriteStringFieldWithTab(w, GetFormatString());
			for (int sampleIndex = 0; sampleIndex < nSamples; ++sampleIndex)
			{
				if (sampleIndex == (nSamples - 1))
					WriteStringFieldNoTab(w, GetSampleString(sampleIndex));
				else
					WriteStringFieldWithTab(w, GetSampleString(sampleIndex));
			}
			w.WriteLine();
		}
Ejemplo n.º 9
0
        private static GenomeMetadata WriteVcfHeader(List <CanvasSegment> segments, double?diploidCoverage,
                                                     string wholeGenomeFastaDirectory, List <string> sampleNames, List <string> extraHeaders, int qualityThreshold,
                                                     BgzipOrStreamWriter writer, int?denovoQualityThreshold = null)
        {
            // Write the VCF header:
            writer.WriteLine("##fileformat=VCFv4.1");
            writer.WriteLine($"##source={CanvasVersionInfo.NameString} {CanvasVersionInfo.VersionString}");
            writer.WriteLine($"##reference={Path.Combine(wholeGenomeFastaDirectory, "genome.fa")}");
            AddPloidyAndCoverageHeaders(writer, segments, diploidCoverage);
            foreach (string header in extraHeaders ?? new List <string>())
            {
                writer.WriteLine(header);
            }

            GenomeMetadata genome = new GenomeMetadata();

            genome.Deserialize(Path.Combine(wholeGenomeFastaDirectory, "GenomeSize.xml"));
            foreach (GenomeMetadata.SequenceMetadata chromosome in genome.Sequences)
            {
                writer.WriteLine($"##contig=<ID={chromosome.Name},length={chromosome.Length}>");
            }
            string qualityFilter = $"q{qualityThreshold}";

            writer.WriteLine("##ALT=<ID=CNV,Description=\"Copy number variable region\">");
            writer.WriteLine($"##FILTER=<ID={qualityFilter},Description=\"Quality below {qualityThreshold}\">");
            writer.WriteLine("##FILTER=<ID=L10kb,Description=\"Length shorter than 10kb\">");
            writer.WriteLine("##INFO=<ID=CIEND,Number=2,Type=Integer,Description=\"Confidence interval around END for imprecise variants\">");
            writer.WriteLine("##INFO=<ID=CIPOS,Number=2,Type=Integer,Description=\"Confidence interval around POS for imprecise variants\">");
            writer.WriteLine("##INFO=<ID=CNVLEN,Number=1,Type=Integer,Description=\"Number of reference positions spanned by this CNV\">");
            writer.WriteLine("##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the variant described in this record\">");
            writer.WriteLine("##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant\">");
            writer.WriteLine("##INFO=<ID=SUBCLONAL,Number=0,Type=Flag,Description=\"Subclonal variant\">");
            if (denovoQualityThreshold.HasValue)
            {
                string denovoQualityFilter = $"dq{denovoQualityThreshold}";
                writer.WriteLine($"##INFO=<ID={denovoQualityFilter},Description=\"De novo quality score above {denovoQualityThreshold.Value}\">");
            }
            writer.WriteLine("##FORMAT=<ID=RC,Number=1,Type=Float,Description=\"Mean counts per bin in the region\">");
            writer.WriteLine("##FORMAT=<ID=BC,Number=1,Type=Float,Description=\"Number of bins in the region\">");
            writer.WriteLine("##FORMAT=<ID=CN,Number=1,Type=Integer,Description=\"Copy number genotype for imprecise events\">");
            writer.WriteLine("##FORMAT=<ID=MCC,Number=1,Type=Integer,Description=\"Major chromosome count (equal to copy number for LOH regions)\">");
            if (denovoQualityThreshold.HasValue)
            {
                writer.WriteLine("##FORMAT=<ID=DQ,Number=1,Type=Float,Description=\"De novo variants Phred-scaled quality score\">");
                writer.WriteLine("##FORMAT=<ID=QS,Number=1,Type=Float,Description=\"Phred-scaled quality score\">");
            }
            string names = string.Join("\t", sampleNames.ToArray());

            writer.WriteLine("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + names);
            SanityCheckChromosomeNames(genome, segments);
            return(genome);
        }
Ejemplo n.º 10
0
        /// <summary>
        /// Outputs the copy number calls to a text file.
        /// </summary>
        public static void WriteSegments(string outVcfPath, List <CanvasSegment> segments, string wholeGenomeFastaDirectory, string sampleName,
                                         List <string> extraHeaders, PloidyInfo ploidy, int qualityThreshold = 10)
        {
            using (BgzipOrStreamWriter writer = new BgzipOrStreamWriter(outVcfPath))
            {
                // Write the VCF header:
                writer.WriteLine("##fileformat=VCFv4.1");
                writer.WriteLine($"##source={CanvasVersionInfo.NameString} {CanvasVersionInfo.VersionString}");
                writer.WriteLine($"##reference={Path.Combine(wholeGenomeFastaDirectory, "genome.fa")}");

                foreach (string header in extraHeaders ?? new List <string>())
                {
                    writer.WriteLine(header);
                }
                GenomeMetadata genome = new GenomeMetadata();
                genome.Deserialize(Path.Combine(wholeGenomeFastaDirectory, "GenomeSize.xml"));
                foreach (GenomeMetadata.SequenceMetadata chromosome in genome.Sequences)
                {
                    writer.WriteLine($"##contig=<ID={chromosome.Name},length={chromosome.Length}>");
                }
                string qualityFilter = $"q{qualityThreshold}";
                writer.WriteLine("##ALT=<ID=CNV,Description=\"Copy number variable region\">");
                writer.WriteLine($"##FILTER=<ID={qualityFilter},Description=\"Quality below {qualityThreshold}\">");
                writer.WriteLine("##FILTER=<ID=L10kb,Description=\"Length shorter than 10kb\">");
                writer.WriteLine("##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant\">");
                writer.WriteLine("##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the variant described in this record\">");
                writer.WriteLine("##INFO=<ID=CNVLEN,Number=1,Type=Integer,Description=\"Number of reference positions spanned by this CNV\">");
                writer.WriteLine("##FORMAT=<ID=RC,Number=1,Type=Float,Description=\"Mean counts per bin in the region\">");
                writer.WriteLine("##FORMAT=<ID=BC,Number=1,Type=Float,Description=\"Number of bins in the region\">");
                writer.WriteLine("##FORMAT=<ID=CN,Number=1,Type=Integer,Description=\"Copy number genotype for imprecise events\">");
                writer.WriteLine("##FORMAT=<ID=MCC,Number=1,Type=Integer,Description=\"Major chromosome count (equal to copy number for LOH regions)\">");
                writer.WriteLine("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + sampleName);

                SanityCheckChromosomeNames(genome, segments);

                foreach (GenomeMetadata.SequenceMetadata chromosome in genome.Sequences)
                {
                    foreach (CanvasSegment segment in segments)
                    {
                        if (!segment.Chr.Equals(chromosome.Name, StringComparison.OrdinalIgnoreCase))
                        {
                            continue;
                        }

                        int     referenceCopyNumber = ploidy?.GetReferenceCopyNumber(segment) ?? 2;
                        CnvType cnvType             = segment.GetCnvType(referenceCopyNumber);

                        // From vcf 4.1 spec:
                        //     If any of the ALT alleles is a symbolic allele (an angle-bracketed ID String “<ID>”) then the padding base is required and POS denotes the
                        //     coordinate of the base preceding the polymorphism.
                        string alternateAllele = cnvType.ToAltId();
                        int    position        = (alternateAllele.StartsWith("<") && alternateAllele.EndsWith(">")) ? segment.Begin : segment.Begin + 1;
                        writer.Write($"{segment.Chr}\t{position}\tCanvas:{cnvType.ToVcfId()}:{segment.Chr}:{segment.Begin + 1}-{segment.End}\t");

                        writer.Write($"N\t{alternateAllele}\t{segment.QScore}\t{segment.Filter}\t", alternateAllele, segment.QScore, segment.Filter);

                        if (cnvType != CnvType.Reference)
                        {
                            writer.Write($"SVTYPE={cnvType.ToSvType()};");
                        }
                        writer.Write($"END={segment.End}");
                        if (cnvType != CnvType.Reference)
                        {
                            writer.Write($";CNVLEN={segment.End - segment.Begin}");
                        }

                        //  FORMAT field
                        writer.Write("\tRC:BC:CN", segment.End);
                        if (segment.MajorChromosomeCount.HasValue)
                        {
                            writer.Write(":MCC");
                        }
                        writer.Write("\t{1}:{2}:{3}", segment.End, Math.Round(segment.MeanCount, 0, MidpointRounding.AwayFromZero), segment.BinCount, segment.CopyNumber);
                        if (segment.MajorChromosomeCount.HasValue)
                        {
                            writer.Write(":{0}", segment.MajorChromosomeCount);
                        }
                        writer.WriteLine();
                    }
                }
            }
        }