public static IGenotypeInfo CreateFromSample([NotNull] IVcfSample sample) { if (!sample.SampleDictionary.ContainsKey(VcfConstants.GenotypeKey)) { throw new InvalidDataException($"{sample} has not GT field for this variant {sample}"); } var gtString = sample.SampleDictionary[VcfConstants.GenotypeKey]; var isPhased = gtString.Contains(VcfConstants.GtPhasedValueDelimiter); var gtIndices = ImmutableList <string> .Empty.ToBuilder(); foreach (var gt in gtString.Split(VcfConstants.GtPhasedValueDelimiter[0], VcfConstants.GtUnphasedValueDelimiter[0])) { if (uint.TryParse(gt, out _) || gt == ".") { gtIndices.Add(gt); } else { throw new InvalidDataException($"{gtString} is not a valid {VcfConstants.GenotypeKey}"); } } return(Create(gtString, isPhased, gtIndices.ToImmutable())); }
private WittyerSampleInternal(IVcfSample baseSample, WitDecision wit, [NotNull] IImmutableList <MatchEnum> what, [NotNull] IImmutableList <FailedReason> why) { _baseSample = baseSample; Wit = wit; What = what; Why = why; }
public static IWittyerSample CreateFromVariant(IVcfVariant baseVariant, [CanBeNull] IVcfSample sample, bool isReference) { if (isReference) { return(CreateReferenceSample(baseVariant, sample)); } if (sample == null) { return(WittyerSampleInternal.Create(null)); } var wittyerSample = WittyerSampleInternal.Create(sample); var hasGt = sample.SampleDictionary.ContainsKey(VcfConstants.GenotypeKey); if (!sample.SampleDictionary.TryGetValue(VcfConstants.CnSampleFieldKey, out var cnString)) { return(hasGt ? WittyerGenotypedSample.Create(wittyerSample, GenotypeInfo.CreateFromSample(sample)) as IWittyerSample : wittyerSample); } uint?cnNumber; if (cnString == VcfConstants.MissingValueString) { cnNumber = null; } else if (uint.TryParse(cnString, out var cnNumberLocal)) { cnNumber = cnNumberLocal; } else { throw new InvalidDataException($"{VcfConstants.CnSampleFieldKey} does not have a valid value in {baseVariant}"); } var cnSample = WittyerCopyNumberSample.Create(wittyerSample, cnNumber); if (!hasGt) { return(cnSample); } var gtInfo = GenotypeInfo.CreateFromSample(sample); return(WittyerGenotypedCopyNumberSample.Create(cnSample, gtInfo)); }
internal static IWittyerVariant Create([NotNull] IVcfVariant baseVariant, [CanBeNull] IVcfSample sample, [NotNull] WittyerType svType, [NotNull] IReadOnlyList <uint> bins, [CanBeNull] double?percentageDistance, uint basepairDistance) { // originalInterval is needed to adjust CIPOS and CIEND against for PD/BPD, but it won't be used for actual reflen and binning. var baseInterval = baseVariant.ToBedInterval(true, out var originalEnd, out var sharedFirstBase); if (baseInterval == null) { throw new InvalidOperationException( $"Expected failure of {nameof(WittyerUtils.ToBedInterval)} to throw, but didn't..."); } // CI intervals are always based on the original POS/END var posStart = baseVariant.Position; if (sharedFirstBase) { posStart++; } var ciPosInterval = posStart.ConvertPositionToCiInterval(baseVariant, WittyerConstants.Cipos); var ciEndInterval = originalEnd.ConvertPositionToCiInterval(baseVariant, WittyerConstants.Ciend); var baseStart = sharedFirstBase ? baseInterval.Start : baseInterval.Start + 1; // not sharing first base (ref site or complex types, etc) need adjustment // the pd/bpd intervals are based on the trimmed variant's coordinates. var(posInterval, endInterval) = WittyerUtils.GetPosAndEndInterval(baseVariant.Contig, percentageDistance, basepairDistance, ciPosInterval, baseStart, ciEndInterval, baseInterval.Stop); return(new WittyerVariantInternal(svType, baseVariant, baseInterval, Winner.Create(svType, baseInterval, bins), posInterval, ciPosInterval, endInterval, ciEndInterval, WittyerSample.CreateFromVariant(baseVariant, sample, svType == WittyerType.CopyNumberReference))); }
private static bool IsSampleFtPassFilter([NotNull] this IVcfSample sample) => !sample.SampleDictionary.ContainsKey(WittyerConstants.Ft) || sample.SampleDictionary[WittyerConstants.Ft].Equals(VcfConstants.PassFilter);
internal static IWittyerBnd Create([NotNull] IVcfVariant first, [CanBeNull] IVcfSample originalSample, [NotNull] WittyerType svType, [NotNull] IReadOnlyList <uint> bins, uint bpd, double?percentageDistance, [NotNull] IVcfVariant second) { if (!ReferenceEquals(first, second)) { (first, second) = FindBndEntriesOrder(in first, in second); } var ciPosInterval = first.Position.ConvertPositionToCiInterval(first, WittyerConstants.Cipos); var ciEndInterval = ReferenceEquals(first, second) ? ciPosInterval // same variant means same intervals. : second.Position.ConvertPositionToCiInterval(second, WittyerConstants.Cipos); IContigAndInterval posInterval, endInterval; if (ReferenceEquals(first, second)) // insertions need trimming and stuff. { var trimmed = first.TryNormalizeVariant(VariantNormalizer.TrimCommonBases, 0).GetOrThrow(); var tuple = (bpd, bpd); var(posStart, posStop) = trimmed.Position.ConvertPositionToCiInterval(tuple); WittyerUtils.GetBetterInterval(ciPosInterval, ref posStart, ref posStop); posInterval = endInterval = ContigAndInterval.Create(first.Contig, posStart, posStop); } else { (posInterval, endInterval) = WittyerUtils.GetPosAndEndInterval(first.Contig, svType == WittyerType.IntraChromosomeBreakend ? percentageDistance : null, bpd, ciPosInterval, first.Position, ciEndInterval, second.Position, second.Contig); } var winner = GetWinner(); var sample = WittyerSample.CreateFromVariant(first, originalSample, false); return(new WittyerBndInternal(svType, first, posInterval, ciPosInterval, second, endInterval, ciEndInterval, winner, sample)); (IVcfVariant first, IVcfVariant second) FindBndEntriesOrder(in IVcfVariant variantA, in IVcfVariant variantB) => ContigAndPositionComparer.Default.Compare(variantA, variantB) > 0 ? (variantB, variantA) : (variantA, variantB); Winner GetWinner() { if (svType == WittyerType.TranslocationBreakend) { return(Winner.Create(svType)); } IInterval <uint> bedInterval; if (svType == WittyerType.Insertion) { bedInterval = GetInsertionInterval(first); } else { var start = first.Position; if (start > 0) { start--; } bedInterval = BedInterval.Create(start, second.Position); } return(Winner.Create(svType, bedInterval, bins)); } }
internal static IWittyerBnd CreateInsertion([NotNull] IVcfVariant first, [CanBeNull] IVcfSample originalSample, [NotNull] WittyerType svType, [NotNull] IReadOnlyList <uint> bins, uint bpd, double?pd) => Create(first, originalSample, svType, bins, bpd, pd, first);
internal static IVcfVariant CreateUnsupportedVariant([NotNull] IVcfVariant baseVariant, [CanBeNull] IVcfSample sample, FailedReason why, bool isTruth) { var realName = (isTruth ? SamplePair.Default.TruthSampleName : SamplePair.Default.QuerySampleName) ?? throw new InvalidDataException( $"Somehow, {nameof(SamplePair)}.{nameof(SamplePair.Default)} was null!!"); var sampleBuilder = SampleDictionaries.CreateBuilder() .AddSample(realName).MoveOnToDictionaries(); var dicts = (sample?.SampleDictionary ?? ImmutableDictionary <string, string> .Empty.AsEnumerable()) .Select(kvp => (kvp.Key, kvp.Value)) .FollowedWith( (WittyerConstants.WittyerMetaInfoLineKeys.Wit, NotAssessed), (WittyerConstants.WittyerMetaInfoLineKeys.Why, why.ToString())); foreach (var tuple in dicts) { sampleBuilder.SetSampleField(realName, tuple); } return(baseVariant.ToBuilder().SetSamples(sampleBuilder.Build()).Build()); }
internal static IContigProvider CreateVariant([NotNull] IVcfVariant vcfVariant, [CanBeNull] IVcfSample sample, bool isTruth, [CanBeNull] string sampleName, IReadOnlyDictionary <WittyerType, InputSpec> inputSpecDict, IDictionary <IGeneralBnd, IVcfVariant> bndSet, List <string> errorList, bool isCrossTypeOn) { var failedReason = WittyerType.ParseFromVariant(vcfVariant, isCrossTypeOn, sampleName, out var svType); if (failedReason != null) { return(CreateUnsupportedVariant(vcfVariant, sample, failedReason.Value == FailedReason.Unset ? throw new ArgumentOutOfRangeException( $"Got {nameof(FailedReason)}.{FailedReason.Unset} which means bug in {nameof(WittyerType.TryParse)}") : failedReason.Value, isTruth)); } if (svType == null) { throw new InvalidDataException("svType should not be null with no failed reason"); } //User does not specify this SVTYPE in input spec, consider user want to exlude this particular SVTYPE comparison entirely if (!inputSpecDict.TryGetValue(svType, out var inputSpec)) { return(CreateUnsupportedVariant(vcfVariant, sample, FailedReason.VariantTypeSkipped, isTruth)); } var isSupportedVariant = IsSupportedVariant(); if (!isSupportedVariant.Equals(FailedReason.Unset)) { return(CreateUnsupportedVariant(vcfVariant, sample, isSupportedVariant, isTruth)); } var bpd = inputSpec.BasepairDistance; var pd = inputSpec.PercentDistance; var bins = inputSpec.BinSizes; if (svType == WittyerType.Insertion) { //insertion is basically using one same record as the both entries of the breakend pair return(WittyerBndInternal.Create(vcfVariant, sample, inputSpec.VariantType, bins.Select(sizeSkipTuple => sizeSkipTuple.size).ToReadOnlyList(), bpd, pd, vcfVariant)); } if (svType == WittyerType.CopyNumberReference && vcfVariant.Info.TryGetValue(VcfConstants.SvTypeKey, out var svTypeString) && !WittyerConstants.BaseLevelStatsTypeStrings.Contains(svTypeString)) { // any non-DEL/DUP/CNV that is determined to be reference copy number is not supported. return(CreateUnsupportedVariant(vcfVariant, sample, FailedReason.UnsupportedRefCall, isTruth)); } if (svType == WittyerType.TranslocationBreakend || svType == WittyerType.IntraChromosomeBreakend) { var currentBnd = GeneralBnd.CreateFromVariant(vcfVariant); //Note: this means the paired BND is found as a key in dictionary. Checkout the comparer for details if (bndSet.TryGetValue(currentBnd, out var secondVariant)) { if (!bndSet.Remove(currentBnd)) { throw new InvalidOperationException( $"Cannot remove {secondVariant} from breakend dictionary when pair is found: {vcfVariant}! Find a developer to debug!"); } return(WittyerBndInternal.Create(vcfVariant, sample, svType, bins.Select(sizeSkipTuple => sizeSkipTuple.size).ToReadOnlyList(), bpd, pd, secondVariant)); } bndSet.Add(currentBnd, vcfVariant); return(currentBnd); } try { return(WittyerVariantInternal.Create(vcfVariant, sample, svType, bins.Select(sizeSkipTuple => sizeSkipTuple.size).ToReadOnlyList(), pd, bpd)); } catch (Exception e) { if (errorList.Count <= MaxNonSupportedVariantToPrint) { errorList.Add( new[] { "Exception caught:", e.ToString(), vcfVariant.ToString() } .StringJoin(Environment.NewLine)); } return(CreateUnsupportedVariant(vcfVariant, sample, FailedReason.Other, isTruth)); } FailedReason IsSupportedVariant() { // Check filters. IReadOnlyCollection <string> includedFilters, excludedFilters; if (isTruth) { includedFilters = WittyerConstants.DefaultIncludeFilters; excludedFilters = WittyerConstants.DefaultExcludeFilters; } else { includedFilters = inputSpec.IncludedFilters; excludedFilters = inputSpec.ExcludedFilters; } if (vcfVariant.Filters.Any(excludedFilters.Contains) || includedFilters.Count > 0 && (vcfVariant.Filters.Count == 0 || !vcfVariant.Filters.Any(includedFilters.Contains))) { return(FailedReason.FilteredBySettings); } // SVLEN = 0 when they are supposed to have overlaps (svlen is needed for overlapping windows) are ignored if (svType.HasOverlappingWindows && (vcfVariant.Info.TryGetValue(VcfConstants.EndTagKey, out var endString) && vcfVariant.Position.ToString() == endString || vcfVariant.Info.TryGetValue(VcfConstants.SvLenKey, out var svLenString) && svLenString == "0")) { return(FailedReason.InvalidSvLen); } // Bnd with pos and alt to be same position (temporarily to accomendate the situation of SVLEN=0 INV representing as bnd) if (svType == WittyerType.IntraChromosomeBreakend) { var mate = SimpleBreakEnd.TryParse(vcfVariant.GetSingleAlt()).GetOrThrow(); return(vcfVariant.Position == mate.Position ? FailedReason.InvalidSvLen : FailedReason.Unset); } // todo: truth does not care about Sample FT tag, is that ok? var sampleFilterOk = isTruth || !includedFilters.Contains(VcfConstants.PassFilter) || !vcfVariant.IsPassFilter() || IsSampleFtPassFilter(); if (!sampleFilterOk) { return(FailedReason.FailedSampleFilter); } // used include bed and variant is completely within a single contig and the bed doesn't include the contig if (inputSpec.IncludedRegions != null && svType != WittyerType.TranslocationBreakend && !inputSpec.IncludedRegions.IntervalTree.ContainsKey(vcfVariant.Contig)) { return(FailedReason.OutsideBedRegion); } return(FailedReason.Unset); bool IsSampleFtPassFilter() => sample != null && (!sample.SampleDictionary.TryGetValue(WittyerConstants.Ft, out var ft) || ft.Equals(VcfConstants.PassFilter)); } }
internal static IWittyerGenotypedCopyNumberSample CreateReferenceSample([NotNull] IVcfVariant baseVariant, [CanBeNull] IVcfSample sample) { var ploidy = 2; if (sample == null) { return(WittyerGenotypedCopyNumberSample.Create( WittyerCopyNumberSample.Create(WittyerSampleInternal.Create(null), (uint)ploidy), GenotypeInfo.CreateRef(ploidy, false))); } var isPhased = false; if (sample.SampleDictionary.TryGetValue(VcfConstants.GenotypeKey, out var originalGt)) { isPhased = originalGt.Contains(VcfConstants.GtPhasedValueDelimiter); ploidy = originalGt .Split(isPhased ? VcfConstants.GtPhasedValueDelimiter : VcfConstants.GtUnphasedValueDelimiter).Length; } var cnSample = WittyerCopyNumberSample.Create(WittyerSampleInternal.Create(sample), (uint)ploidy); return(WittyerGenotypedCopyNumberSample.Create(cnSample, GenotypeInfo.CreateRef(ploidy, isPhased))); }
public static IWittyerSample Create([NotNull] IVcfSample baseSample, WitDecision wit, [NotNull] IImmutableList <MatchEnum> what, [NotNull] IImmutableList <FailedReason> why) => WittyerSampleInternal.Create(baseSample, wit, what, why);
internal static WittyerSampleInternal Create([NotNull] IVcfSample baseSample, WitDecision wit, [NotNull] IImmutableList <MatchEnum> what, [NotNull] IImmutableList <FailedReason> why) => new WittyerSampleInternal(baseSample, wit, what, why);
internal static WittyerSampleInternal Create([CanBeNull] IVcfSample baseSample) => new WittyerSampleInternal(baseSample, WitDecision.NotAssessed, ImmutableList <MatchEnum> .Empty, ImmutableList <FailedReason> .Empty);