public static IGeneralBnd CreateFromVariant([NotNull] IVcfVariant variant) { var altBnd = variant.GetSingleAlt(); var thisRef = variant.Ref[0]; var mate = SimpleBreakEnd.Parse(altBnd, out var firstField, out var lastField); var is3Prime = !string.IsNullOrWhiteSpace(firstField); if (is3Prime && !firstField.StartsWith(thisRef) || !is3Prime && !lastField.EndsWith(thisRef)) { throw new InvalidDataException( $"Invalid breakend because neither the alt didn't start or end with ref's first base: {variant}"); } var interval = BedInterval.Create(variant.Position - 1, variant.Position); return(new GeneralBnd(variant, interval, is3Prime, mate)); }
public static FailedReason?ParseFromVariant([NotNull] IVcfVariant variant, bool isCrossTypeOn, [CanBeNull] string sampleName, [CanBeNull] out WittyerType svType) { if (variant.IsRefSite() || IsRefCall(out var ploidy, out var cn, out var hasCn)) { svType = CopyNumberReference; return(null); } var hasSvTypeKey = variant.Info.TryGetValue(VcfConstants.SvTypeKey, out var svTypeStr); if (!hasSvTypeKey) { // todo: maybe we can allow small variants, which would not have SVTYPE throw new InvalidDataException( $"Following variant does not have {VcfConstants.SvTypeKey} info key:\n{variant}"); } svType = null; if (svTypeStr == SvTypeStrings.TranslocationBreakend) { // breakends can be IntraChromosomeBreakend and TranslocationBreakend, so can't tell from SVTYPE. var mate = variant is IBreakEnd cast ? cast.Mate : SimpleBreakEnd.TryParse(variant.GetSingleAlt()).GetOrThrow(); svType = variant.Contig.Equals(mate.Contig) ? IntraChromosomeBreakend : TranslocationBreakend; return(null); } if (!TryParseSvType(svTypeStr, out svType)) { // Not BND because of check above, and if not parsable and not CNV, it's something we don't know. if (svTypeStr != SvTypeStrings.Cnv) { throw new InvalidDataException($"Cannot recognize SVTYPE of {svTypeStr}"); } } else if (!svType.HasBaseLevelStats) { // If INV or INS or whatever that doesn't need to look for CN, return. return(null); } if (!hasCn) { return(svType == null ? FailedReason.CnvWithoutCn : default(FailedReason?)); // DEL or DUP without CN } // At this point, it is CNV with CN or DEL/DUP with CN, which are also considered CNV if (cn == null) { // has CN, but can't parse. svType = null; // clear out SVTYPE=DEL/DUP return(FailedReason.UndeterminedCn); } svType = GetSvType(cn.Value); return(null); WittyerType GetSvType(int cnValue) => cnValue < ploidy ? (isCrossTypeOn ? Deletion : CopyNumberLoss) : (isCrossTypeOn ? Duplication : CopyNumberGain); bool IsRefCall(out int ploidyP, out int?cnP, out bool hasCnP) { ploidyP = 2; cnP = null; hasCnP = false; //if not refsite and no sample field, not a refcall if (variant.Samples.Count == 0) { return(false); } var sample = sampleName == null ? variant.Samples[0] : variant.Samples[sampleName]; hasCnP = sample.SampleDictionary.TryGetValue(VcfConstants.CnSampleFieldKey, out var cnString); var isGt = sample.SampleDictionary.TryGetValue(VcfConstants.GenotypeKey, out var gt); if (hasCnP && int.TryParse(cnString, out var i)) { cnP = i; } if (!isGt) { return(hasCnP && cnString == "2"); } //todo: refining how to deal with ploidy. Also here we don't deal with LOH. assuming CN = ploidy is ref var gtArray = gt.Split(VcfConstants.GtPhasedValueDelimiter[0], VcfConstants.GtUnphasedValueDelimiter[0]); ploidyP = gtArray.Length; return(cnP == null?gtArray.All(alleleIndex => alleleIndex == "0") : cnP.Value == ploidyP); } }
internal static IContigProvider CreateVariant([NotNull] IVcfVariant vcfVariant, [CanBeNull] IVcfSample sample, bool isTruth, [CanBeNull] string sampleName, IReadOnlyDictionary <WittyerType, InputSpec> inputSpecDict, IDictionary <IGeneralBnd, IVcfVariant> bndSet, List <string> errorList, bool isCrossTypeOn) { var failedReason = WittyerType.ParseFromVariant(vcfVariant, isCrossTypeOn, sampleName, out var svType); if (failedReason != null) { return(CreateUnsupportedVariant(vcfVariant, sample, failedReason.Value == FailedReason.Unset ? throw new ArgumentOutOfRangeException( $"Got {nameof(FailedReason)}.{FailedReason.Unset} which means bug in {nameof(WittyerType.TryParse)}") : failedReason.Value, isTruth)); } if (svType == null) { throw new InvalidDataException("svType should not be null with no failed reason"); } //User does not specify this SVTYPE in input spec, consider user want to exlude this particular SVTYPE comparison entirely if (!inputSpecDict.TryGetValue(svType, out var inputSpec)) { return(CreateUnsupportedVariant(vcfVariant, sample, FailedReason.VariantTypeSkipped, isTruth)); } var isSupportedVariant = IsSupportedVariant(); if (!isSupportedVariant.Equals(FailedReason.Unset)) { return(CreateUnsupportedVariant(vcfVariant, sample, isSupportedVariant, isTruth)); } var bpd = inputSpec.BasepairDistance; var pd = inputSpec.PercentDistance; var bins = inputSpec.BinSizes; if (svType == WittyerType.Insertion) { //insertion is basically using one same record as the both entries of the breakend pair return(WittyerBndInternal.Create(vcfVariant, sample, inputSpec.VariantType, bins.Select(sizeSkipTuple => sizeSkipTuple.size).ToReadOnlyList(), bpd, pd, vcfVariant)); } if (svType == WittyerType.CopyNumberReference && vcfVariant.Info.TryGetValue(VcfConstants.SvTypeKey, out var svTypeString) && !WittyerConstants.BaseLevelStatsTypeStrings.Contains(svTypeString)) { // any non-DEL/DUP/CNV that is determined to be reference copy number is not supported. return(CreateUnsupportedVariant(vcfVariant, sample, FailedReason.UnsupportedRefCall, isTruth)); } if (svType == WittyerType.TranslocationBreakend || svType == WittyerType.IntraChromosomeBreakend) { var currentBnd = GeneralBnd.CreateFromVariant(vcfVariant); //Note: this means the paired BND is found as a key in dictionary. Checkout the comparer for details if (bndSet.TryGetValue(currentBnd, out var secondVariant)) { if (!bndSet.Remove(currentBnd)) { throw new InvalidOperationException( $"Cannot remove {secondVariant} from breakend dictionary when pair is found: {vcfVariant}! Find a developer to debug!"); } return(WittyerBndInternal.Create(vcfVariant, sample, svType, bins.Select(sizeSkipTuple => sizeSkipTuple.size).ToReadOnlyList(), bpd, pd, secondVariant)); } bndSet.Add(currentBnd, vcfVariant); return(currentBnd); } try { return(WittyerVariantInternal.Create(vcfVariant, sample, svType, bins.Select(sizeSkipTuple => sizeSkipTuple.size).ToReadOnlyList(), pd, bpd)); } catch (Exception e) { if (errorList.Count <= MaxNonSupportedVariantToPrint) { errorList.Add( new[] { "Exception caught:", e.ToString(), vcfVariant.ToString() } .StringJoin(Environment.NewLine)); } return(CreateUnsupportedVariant(vcfVariant, sample, FailedReason.Other, isTruth)); } FailedReason IsSupportedVariant() { // Check filters. IReadOnlyCollection <string> includedFilters, excludedFilters; if (isTruth) { includedFilters = WittyerConstants.DefaultIncludeFilters; excludedFilters = WittyerConstants.DefaultExcludeFilters; } else { includedFilters = inputSpec.IncludedFilters; excludedFilters = inputSpec.ExcludedFilters; } if (vcfVariant.Filters.Any(excludedFilters.Contains) || includedFilters.Count > 0 && (vcfVariant.Filters.Count == 0 || !vcfVariant.Filters.Any(includedFilters.Contains))) { return(FailedReason.FilteredBySettings); } // SVLEN = 0 when they are supposed to have overlaps (svlen is needed for overlapping windows) are ignored if (svType.HasOverlappingWindows && (vcfVariant.Info.TryGetValue(VcfConstants.EndTagKey, out var endString) && vcfVariant.Position.ToString() == endString || vcfVariant.Info.TryGetValue(VcfConstants.SvLenKey, out var svLenString) && svLenString == "0")) { return(FailedReason.InvalidSvLen); } // Bnd with pos and alt to be same position (temporarily to accomendate the situation of SVLEN=0 INV representing as bnd) if (svType == WittyerType.IntraChromosomeBreakend) { var mate = SimpleBreakEnd.TryParse(vcfVariant.GetSingleAlt()).GetOrThrow(); return(vcfVariant.Position == mate.Position ? FailedReason.InvalidSvLen : FailedReason.Unset); } // todo: truth does not care about Sample FT tag, is that ok? var sampleFilterOk = isTruth || !includedFilters.Contains(VcfConstants.PassFilter) || !vcfVariant.IsPassFilter() || IsSampleFtPassFilter(); if (!sampleFilterOk) { return(FailedReason.FailedSampleFilter); } // used include bed and variant is completely within a single contig and the bed doesn't include the contig if (inputSpec.IncludedRegions != null && svType != WittyerType.TranslocationBreakend && !inputSpec.IncludedRegions.IntervalTree.ContainsKey(vcfVariant.Contig)) { return(FailedReason.OutsideBedRegion); } return(FailedReason.Unset); bool IsSampleFtPassFilter() => sample != null && (!sample.SampleDictionary.TryGetValue(WittyerConstants.Ft, out var ft) || ft.Equals(VcfConstants.PassFilter)); } }