예제 #1
0
        public static string ToSvType(this CnvType cnvType)
        {
            switch (cnvType)
            {
            case CnvType.Gain:
            case CnvType.Loss:
                return("CNV");

            case CnvType.LossOfHeterozygosity:
                return("LOH");

            default:
                throw new ApplicationException($"SVTYPE field is unsupported for CNV type: {cnvType}");
            }
        }
예제 #2
0
파일: CnvType.cs 프로젝트: zhang919/canvas
        public static string ToSvType(this CnvType cnvType)
        {
            switch (cnvType)
            {
            case CnvType.Gain:
            case CnvType.Loss:
            case CnvType.ComplexCnv:
                return(CnvTag);

            case CnvType.LossOfHeterozygosity:
                return("LOH");

            default:
                throw new Illumina.Common.IlluminaException($"SVTYPE field is unsupported for CNV type: {cnvType}");
            }
        }
예제 #3
0
        public static string ToAltId(this CnvType cnvType)
        {
            switch (cnvType)
            {
            case CnvType.Gain:
            case CnvType.Loss:
            case CnvType.LossOfHeterozygosity:
                return("<CNV>");

            case CnvType.Reference:
                return(".");

            default:
                throw new ApplicationException($"ALT is unsupported for CNV type: {cnvType}");
            }
        }
예제 #4
0
        public static string ToVcfId(this CnvType cnvType)
        {
            switch (cnvType)
            {
            case CnvType.Gain:
                return("GAIN");

            case CnvType.Loss:
                return("LOSS");

            case CnvType.Reference:
                return("REF");

            case CnvType.LossOfHeterozygosity:
                return("LOH");

            default:
                throw new ApplicationException($"Unsupported CNV type: {cnvType}");
            }
        }
예제 #5
0
        /// <summary>
        /// Outputs the copy number calls to a text file.
        /// </summary>
        private static void WriteVariants(IEnumerable <ISampleMap <CanvasSegment> > segmentsOfAllSamples, List <PloidyInfo> ploidies, GenomeMetadata genome,
                                          BgzipOrStreamWriter writer, int?denovoQualityThreshold = null)
        {
            var segmentsOfAllSamplesArray = segmentsOfAllSamples.ToArray(); // TODO: not necessary when chrom match logic has been updated
            int nSamples = segmentsOfAllSamplesArray.First().Values.Count();

            foreach (GenomeMetadata.SequenceMetadata chromosome in genome.Contigs()) //TODO: this is extremely inefficient. Segments should be sorted by chromosome
            {
                foreach (var sampleMap in segmentsOfAllSamplesArray)
                {
                    var currentSegments    = sampleMap.Values.ToArray();
                    var firstSampleSegment = currentSegments.First();
                    if (!firstSampleSegment.Chr.Equals(chromosome.Name, StringComparison.OrdinalIgnoreCase)
                        ) //TODO: this is extremely inefficient. Segments should be sorted by chromosome
                    {
                        continue;
                    }
                    var recordLevelFilter = CanvasFilter.GetRecordLevelFilterFromSampleFiltersOnly(
                        sampleMap
                        .Select(x => x.Value.Filter)
                        .ToReadOnlyList())
                                            .ToVcfString();
                    var referenceCopyNumbers = currentSegments.Zip(ploidies,
                                                                   (segment, ploidy) => ploidy?.GetReferenceCopyNumber(segment) ?? 2).ToList();
                    var cnvTypes = new CnvType[nSamples];
                    var sampleSetAlleleCopyNumbers = new int[nSamples][];
                    for (int sampleIndex = 0; sampleIndex < nSamples; sampleIndex++)
                    {
                        (cnvTypes[sampleIndex], sampleSetAlleleCopyNumbers[sampleIndex]) = currentSegments[sampleIndex]
                                                                                           .GetCnvTypeAndAlleleCopyNumbers(referenceCopyNumbers[sampleIndex]);
                    }
                    var sampleSetCnvType = AssignCnvType(cnvTypes);
                    var(alternateAllele, genotypes) = GetAltAllelesAndGenotypes(sampleSetAlleleCopyNumbers);
                    WriteColumnsUntilInfoField(writer, firstSampleSegment, sampleSetCnvType, alternateAllele,
                                               recordLevelFilter, nSamples > 1);
                    WriteFormatAndSampleFields(writer, currentSegments, genotypes,
                                               denovoQualityThreshold.HasValue);
                }
            }
        }
예제 #6
0
파일: CnvType.cs 프로젝트: zhang919/canvas
        public static string ToVcfId(this CnvType cnvType)
        {
            switch (cnvType)
            {
            case CnvType.Gain:
                return("GAIN");

            case CnvType.Loss:
                return("LOSS");

            case CnvType.Reference:
                return("REF");

            case CnvType.LossOfHeterozygosity:
                return("LOH");

            case CnvType.ComplexCnv:
                return("COMPLEXCNV");

            default:
                throw new Illumina.Common.IlluminaException($"Unsupported CNV type: {cnvType}");
            }
        }
예제 #7
0
파일: CnvType.cs 프로젝트: zhang919/canvas
 public static bool IsReference(CnvType cnvType) => cnvType.Equals(CnvType.Reference);
예제 #8
0
        /// <summary>
        /// Write to a file a single CanvasSegment record as a non-sample VCF columns
        /// </summary>
        /// <param name="writer"></param>
        /// <param name="firstSampleSegment"></param>
        /// <param name="alternateAllele"></param>
        /// <param name="recordLevelFilter"></param>
        /// <param name="sampleSetCnvType"></param>
        /// <param name="isMultisample"></param>
        /// <returns></returns>
        private static void WriteColumnsUntilInfoField(BgzipOrStreamWriter writer, CanvasSegment firstSampleSegment, CnvType sampleSetCnvType, string alternateAllele, string recordLevelFilter, bool isMultisample)
        {
            // From vcf 4.1 spec:
            //     If any of the ALT alleles is a symbolic allele (an angle-bracketed ID String “<ID>”) then the padding base is required and POS denotes the
            //     coordinate of the base preceding the polymorphism.
            int position = (alternateAllele.StartsWith("<") && alternateAllele.EndsWith(">"))
                ? firstSampleSegment.Begin
                : firstSampleSegment.Begin + 1;

            writer.Write($"{firstSampleSegment.Chr}\t{position}\tCanvas:{sampleSetCnvType.ToVcfId()}:{firstSampleSegment.Chr}:{firstSampleSegment.Begin + 1}-{firstSampleSegment.End}\t");
            string qScore = isMultisample ? "." : $"{firstSampleSegment.QScore:F2}";

            writer.Write($"N\t{alternateAllele}\t{qScore}\t{recordLevelFilter}\t");

            if (sampleSetCnvType != CnvType.Reference)
            {
                writer.Write($"SVTYPE={sampleSetCnvType.ToSvType()};");
            }

            if (firstSampleSegment.IsHeterogeneous)
            {
                writer.Write("SUBCLONAL;");
            }

            if (firstSampleSegment.IsCommonCnv)
            {
                writer.Write("COMMONCNV;");
            }

            writer.Write($"END={firstSampleSegment.End}");

            if (sampleSetCnvType != CnvType.Reference)
            {
                writer.Write($";CNVLEN={firstSampleSegment.Length}");
            }

            if (firstSampleSegment.StartConfidenceInterval != null)
            {
                writer.Write($";CIPOS={firstSampleSegment.StartConfidenceInterval.Item1},{firstSampleSegment.StartConfidenceInterval.Item2}");
            }
            if (firstSampleSegment.EndConfidenceInterval != null)
            {
                writer.Write($";CIEND={firstSampleSegment.EndConfidenceInterval.Item1},{firstSampleSegment.EndConfidenceInterval.Item2}");
            }
        }
예제 #9
0
        /// <summary>
        /// Write to a file a single CanvasSegment record as a non-sample VCF columns
        /// </summary>
        /// <param name="writer"></param>
        /// <param name="segment"></param>
        /// <param name="cnvType"></param>
        /// <param name="denovoQualityThreshold"></param>
        /// <returns></returns>
        private static void WriteInfoField(BgzipOrStreamWriter writer, CanvasSegment segment, CnvType cnvType, int?denovoQualityThreshold, bool isMultisample)
        {
            // From vcf 4.1 spec:
            //     If any of the ALT alleles is a symbolic allele (an angle-bracketed ID String “<ID>”) then the padding base is required and POS denotes the
            //     coordinate of the base preceding the polymorphism.
            string alternateAllele = cnvType.ToAltId();
            int    position        = (alternateAllele.StartsWith("<") && alternateAllele.EndsWith(">"))
                ? segment.Begin
                : segment.Begin + 1;

            writer.Write($"{segment.Chr}\t{position}\tCanvas:{cnvType.ToVcfId()}:{segment.Chr}:{segment.Begin + 1}-{segment.End}\t");
            string qScore = "";

            qScore = isMultisample ? "." : $"{segment.QScore:F2}";
            writer.Write($"N\t{alternateAllele}\t{qScore}\t{segment.Filter}\t");

            if (cnvType != CnvType.Reference)
            {
                writer.Write($"SVTYPE={cnvType.ToSvType()};");
            }

            if (segment.IsHeterogeneous)
            {
                writer.Write("SUBCLONAL;");
            }

            if (segment.DQScore.HasValue && !isMultisample)
            {
                writer.Write($"DQ={segment.DQScore.Value};");
            }

            if (denovoQualityThreshold.HasValue & segment.DQScore.HasValue & segment.DQScore >= denovoQualityThreshold)
            {
                writer.Write($"dq{denovoQualityThreshold};");
            }
            writer.Write($"END={segment.End}");

            if (cnvType != CnvType.Reference)
            {
                writer.Write($";CNVLEN={segment.End - segment.Begin}");
            }

            if (segment.StartConfidenceInterval != null)
            {
                writer.Write($";CIPOS={segment.StartConfidenceInterval.Item1},{segment.StartConfidenceInterval.Item2}");
            }
            if (segment.EndConfidenceInterval != null)
            {
                writer.Write($";CIEND={segment.EndConfidenceInterval.Item1},{segment.EndConfidenceInterval.Item2}");
            }
        }
예제 #10
0
        /// <summary>
        /// Outputs the copy number calls to a text file.
        /// </summary>
        public static void WriteSegments(string outVcfPath, List <CanvasSegment> segments, string wholeGenomeFastaDirectory, string sampleName,
                                         List <string> extraHeaders, PloidyInfo ploidy, int qualityThreshold = 10)
        {
            using (BgzipOrStreamWriter writer = new BgzipOrStreamWriter(outVcfPath))
            {
                // Write the VCF header:
                writer.WriteLine("##fileformat=VCFv4.1");
                writer.WriteLine($"##source={CanvasVersionInfo.NameString} {CanvasVersionInfo.VersionString}");
                writer.WriteLine($"##reference={Path.Combine(wholeGenomeFastaDirectory, "genome.fa")}");

                foreach (string header in extraHeaders ?? new List <string>())
                {
                    writer.WriteLine(header);
                }
                GenomeMetadata genome = new GenomeMetadata();
                genome.Deserialize(Path.Combine(wholeGenomeFastaDirectory, "GenomeSize.xml"));
                foreach (GenomeMetadata.SequenceMetadata chromosome in genome.Sequences)
                {
                    writer.WriteLine($"##contig=<ID={chromosome.Name},length={chromosome.Length}>");
                }
                string qualityFilter = $"q{qualityThreshold}";
                writer.WriteLine("##ALT=<ID=CNV,Description=\"Copy number variable region\">");
                writer.WriteLine($"##FILTER=<ID={qualityFilter},Description=\"Quality below {qualityThreshold}\">");
                writer.WriteLine("##FILTER=<ID=L10kb,Description=\"Length shorter than 10kb\">");
                writer.WriteLine("##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant\">");
                writer.WriteLine("##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the variant described in this record\">");
                writer.WriteLine("##INFO=<ID=CNVLEN,Number=1,Type=Integer,Description=\"Number of reference positions spanned by this CNV\">");
                writer.WriteLine("##FORMAT=<ID=RC,Number=1,Type=Float,Description=\"Mean counts per bin in the region\">");
                writer.WriteLine("##FORMAT=<ID=BC,Number=1,Type=Float,Description=\"Number of bins in the region\">");
                writer.WriteLine("##FORMAT=<ID=CN,Number=1,Type=Integer,Description=\"Copy number genotype for imprecise events\">");
                writer.WriteLine("##FORMAT=<ID=MCC,Number=1,Type=Integer,Description=\"Major chromosome count (equal to copy number for LOH regions)\">");
                writer.WriteLine("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + sampleName);

                SanityCheckChromosomeNames(genome, segments);

                foreach (GenomeMetadata.SequenceMetadata chromosome in genome.Sequences)
                {
                    foreach (CanvasSegment segment in segments)
                    {
                        if (!segment.Chr.Equals(chromosome.Name, StringComparison.OrdinalIgnoreCase))
                        {
                            continue;
                        }

                        int     referenceCopyNumber = ploidy?.GetReferenceCopyNumber(segment) ?? 2;
                        CnvType cnvType             = segment.GetCnvType(referenceCopyNumber);

                        // From vcf 4.1 spec:
                        //     If any of the ALT alleles is a symbolic allele (an angle-bracketed ID String “<ID>”) then the padding base is required and POS denotes the
                        //     coordinate of the base preceding the polymorphism.
                        string alternateAllele = cnvType.ToAltId();
                        int    position        = (alternateAllele.StartsWith("<") && alternateAllele.EndsWith(">")) ? segment.Begin : segment.Begin + 1;
                        writer.Write($"{segment.Chr}\t{position}\tCanvas:{cnvType.ToVcfId()}:{segment.Chr}:{segment.Begin + 1}-{segment.End}\t");

                        writer.Write($"N\t{alternateAllele}\t{segment.QScore}\t{segment.Filter}\t", alternateAllele, segment.QScore, segment.Filter);

                        if (cnvType != CnvType.Reference)
                        {
                            writer.Write($"SVTYPE={cnvType.ToSvType()};");
                        }
                        writer.Write($"END={segment.End}");
                        if (cnvType != CnvType.Reference)
                        {
                            writer.Write($";CNVLEN={segment.End - segment.Begin}");
                        }

                        //  FORMAT field
                        writer.Write("\tRC:BC:CN", segment.End);
                        if (segment.MajorChromosomeCount.HasValue)
                        {
                            writer.Write(":MCC");
                        }
                        writer.Write("\t{1}:{2}:{3}", segment.End, Math.Round(segment.MeanCount, 0, MidpointRounding.AwayFromZero), segment.BinCount, segment.CopyNumber);
                        if (segment.MajorChromosomeCount.HasValue)
                        {
                            writer.Write(":{0}", segment.MajorChromosomeCount);
                        }
                        writer.WriteLine();
                    }
                }
            }
        }