public CanvasFilter AddFilter(string newTag) { List <string> allTags = this.FailedFilterTags.ToList(); allTags.Add(newTag); return(CanvasFilter.Create(allTags)); }
/// <summary> /// Outputs the copy number calls to a text file. /// </summary> private static void WriteVariants(IEnumerable <ISampleMap <CanvasSegment> > segmentsOfAllSamples, List <PloidyInfo> ploidies, GenomeMetadata genome, BgzipOrStreamWriter writer, int?denovoQualityThreshold = null) { var segmentsOfAllSamplesArray = segmentsOfAllSamples.ToArray(); // TODO: not necessary when chrom match logic has been updated int nSamples = segmentsOfAllSamplesArray.First().Values.Count(); foreach (GenomeMetadata.SequenceMetadata chromosome in genome.Contigs()) //TODO: this is extremely inefficient. Segments should be sorted by chromosome { foreach (var sampleMap in segmentsOfAllSamplesArray) { var currentSegments = sampleMap.Values.ToArray(); var firstSampleSegment = currentSegments.First(); if (!firstSampleSegment.Chr.Equals(chromosome.Name, StringComparison.OrdinalIgnoreCase) ) //TODO: this is extremely inefficient. Segments should be sorted by chromosome { continue; } var recordLevelFilter = CanvasFilter.GetRecordLevelFilterFromSampleFiltersOnly( sampleMap .Select(x => x.Value.Filter) .ToReadOnlyList()) .ToVcfString(); var referenceCopyNumbers = currentSegments.Zip(ploidies, (segment, ploidy) => ploidy?.GetReferenceCopyNumber(segment) ?? 2).ToList(); var cnvTypes = new CnvType[nSamples]; var sampleSetAlleleCopyNumbers = new int[nSamples][]; for (int sampleIndex = 0; sampleIndex < nSamples; sampleIndex++) { (cnvTypes[sampleIndex], sampleSetAlleleCopyNumbers[sampleIndex]) = currentSegments[sampleIndex] .GetCnvTypeAndAlleleCopyNumbers(referenceCopyNumbers[sampleIndex]); } var sampleSetCnvType = AssignCnvType(cnvTypes); var(alternateAllele, genotypes) = GetAltAllelesAndGenotypes(sampleSetAlleleCopyNumbers); WriteColumnsUntilInfoField(writer, firstSampleSegment, sampleSetCnvType, alternateAllele, recordLevelFilter, nSamples > 1); WriteFormatAndSampleFields(writer, currentSegments, genotypes, denovoQualityThreshold.HasValue); } } }
private static GenomeMetadata WriteVcfHeader(List <CanvasSegment> segments, double?diploidCoverage, string wholeGenomeFastaDirectory, List <string> sampleNames, List <string> extraHeaders, BgzipOrStreamWriter writer, int qualityThreshold, int?denovoQualityThreshold, int?sizeThreshold) { // Write the VCF header: writer.WriteLine("##fileformat=VCFv4.1"); writer.WriteLine($"##source={CanvasVersionInfo.NameString} {CanvasVersionInfo.VersionString}"); writer.WriteLine($"##reference={Path.Combine(wholeGenomeFastaDirectory, "genome.fa")}"); // Write ##OverallPloidy and ##DiploidCoverage for a single-sample file (where it makes sense to do so): if (sampleNames.Count == 1) { AddPloidyAndCoverageHeaders(writer, segments, diploidCoverage); } foreach (string header in extraHeaders ?? new List <string>()) { writer.WriteLine(header); } GenomeMetadata genome = new GenomeMetadata(); genome.Deserialize(new FileLocation(Path.Combine(wholeGenomeFastaDirectory, "GenomeSize.xml"))); foreach (GenomeMetadata.SequenceMetadata chromosome in genome.Contigs()) { writer.WriteLine($"##contig=<ID={chromosome.Name},length={chromosome.Length}>"); } string qualityFilter = $"q{qualityThreshold}"; writer.WriteLine("##ALT=<ID=DUP,Description=\"Region of elevated copy number relative to the reference\">"); WriteHeaderAllAltCnTags(writer); writer.WriteLine($"##FILTER=<ID={qualityFilter},Description=\"Quality below {qualityThreshold}\">"); if (sizeThreshold.HasValue) { string sizeFilterName = CanvasFilter.GetCnvSizeFilter(sizeThreshold.Value, out var sizeFilterThreshold); writer.WriteLine($"##FILTER=<ID={sizeFilterName},Description=\"Length shorter than {sizeFilterThreshold.Number} {sizeFilterThreshold.Units}\">"); } writer.WriteLine("##FILTER=<ID=FailedFT,Description=\"Sample-level filter failed in all the samples\">"); writer.WriteLine("##INFO=<ID=CIEND,Number=2,Type=Integer,Description=\"Confidence interval around END for imprecise variants\">"); writer.WriteLine("##INFO=<ID=CIPOS,Number=2,Type=Integer,Description=\"Confidence interval around POS for imprecise variants\">"); writer.WriteLine("##INFO=<ID=CNVLEN,Number=1,Type=Integer,Description=\"Number of reference positions spanned by this CNV\">"); writer.WriteLine("##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the variant described in this record\">"); writer.WriteLine("##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant\">"); writer.WriteLine("##INFO=<ID=SUBCLONAL,Number=0,Type=Flag,Description=\"Subclonal variant\">"); writer.WriteLine("##INFO=<ID=COMMONCNV,Number=0,Type=Flag,Description=\"Common CNV variant identified from pre-specified bed intervals\">"); writer.WriteLine("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">"); writer.WriteLine("##FORMAT=<ID=RC,Number=1,Type=Float,Description=\"Mean counts per bin in the region\">"); writer.WriteLine("##FORMAT=<ID=BC,Number=1,Type=Float,Description=\"Number of bins in the region\">"); writer.WriteLine("##FORMAT=<ID=CN,Number=1,Type=Integer,Description=\"Copy number genotype for imprecise events\">"); writer.WriteLine("##FORMAT=<ID=MCC,Number=1,Type=Integer,Description=\"Major chromosome count (equal to copy number for LOH regions)\">"); writer.WriteLine("##FORMAT=<ID=MCCQ,Number=1,Type=Float,Description=\"Major chromosome count quality score\">"); writer.WriteLine("##FORMAT=<ID=QS,Number=1,Type=Float,Description=\"Phred-scaled quality score. If CN is reference then this is -10log10(prob(variant)) otherwise this is -10log10(prob(no variant).\">"); if (denovoQualityThreshold.HasValue) { writer.WriteLine($"##FORMAT=<ID=DQ,Number=1,Type=Float,Description=\"De novo quality. Threshold for passing de novo call: {denovoQualityThreshold}\">"); } writer.WriteLine("##FORMAT=<ID=FT,Number=1,Type=String,Description=\"Sample filter, 'PASS' indicates that all filters have passed for this sample\">"); var titleColumns = new List <string> { "#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT" }; titleColumns.AddRange(sampleNames); writer.WriteLine(string.Join("\t", titleColumns)); SanityCheckChromosomeNames(genome, segments); return(genome); }
public static CanvasFilter UpdateRecordLevelFilter(CanvasFilter recordLevelFilter, IReadOnlyList <CanvasFilter> sampleFilters) => sampleFilters.Any(x => x.IsPass) ? recordLevelFilter : Create(recordLevelFilter.FailedFilterTags.Concat(AllSampleFailedTag));