示例#1
0
        public static IGenotypeInfo CreateFromSample([NotNull] IVcfSample sample)
        {
            if (!sample.SampleDictionary.ContainsKey(VcfConstants.GenotypeKey))
            {
                throw new InvalidDataException($"{sample} has not GT field for this variant {sample}");
            }

            var gtString = sample.SampleDictionary[VcfConstants.GenotypeKey];
            var isPhased = gtString.Contains(VcfConstants.GtPhasedValueDelimiter);

            var gtIndices = ImmutableList <string> .Empty.ToBuilder();

            foreach (var gt in gtString.Split(VcfConstants.GtPhasedValueDelimiter[0],
                                              VcfConstants.GtUnphasedValueDelimiter[0]))
            {
                if (uint.TryParse(gt, out _) || gt == ".")
                {
                    gtIndices.Add(gt);
                }
                else
                {
                    throw new InvalidDataException($"{gtString} is not a valid {VcfConstants.GenotypeKey}");
                }
            }

            return(Create(gtString, isPhased, gtIndices.ToImmutable()));
        }
示例#2
0
 private WittyerSampleInternal(IVcfSample baseSample, WitDecision wit,
                               [NotNull] IImmutableList <MatchEnum> what, [NotNull] IImmutableList <FailedReason> why)
 {
     _baseSample = baseSample;
     Wit         = wit;
     What        = what;
     Why         = why;
 }
示例#3
0
        public static IWittyerSample CreateFromVariant(IVcfVariant baseVariant, [CanBeNull] IVcfSample sample, bool isReference)
        {
            if (isReference)
            {
                return(CreateReferenceSample(baseVariant, sample));
            }

            if (sample == null)
            {
                return(WittyerSampleInternal.Create(null));
            }

            var wittyerSample = WittyerSampleInternal.Create(sample);

            var hasGt = sample.SampleDictionary.ContainsKey(VcfConstants.GenotypeKey);

            if (!sample.SampleDictionary.TryGetValue(VcfConstants.CnSampleFieldKey, out var cnString))
            {
                return(hasGt
                    ? WittyerGenotypedSample.Create(wittyerSample, GenotypeInfo.CreateFromSample(sample))
                       as IWittyerSample
                    : wittyerSample);
            }

            uint?cnNumber;

            if (cnString == VcfConstants.MissingValueString)
            {
                cnNumber = null;
            }
            else if (uint.TryParse(cnString, out var cnNumberLocal))
            {
                cnNumber = cnNumberLocal;
            }
            else
            {
                throw new InvalidDataException($"{VcfConstants.CnSampleFieldKey} does not have a valid value in {baseVariant}");
            }

            var cnSample = WittyerCopyNumberSample.Create(wittyerSample, cnNumber);

            if (!hasGt)
            {
                return(cnSample);
            }

            var gtInfo = GenotypeInfo.CreateFromSample(sample);

            return(WittyerGenotypedCopyNumberSample.Create(cnSample, gtInfo));
        }
示例#4
0
        internal static IWittyerVariant Create([NotNull] IVcfVariant baseVariant,
                                               [CanBeNull] IVcfSample sample, [NotNull] WittyerType svType,
                                               [NotNull] IReadOnlyList <uint> bins, [CanBeNull] double?percentageDistance,
                                               uint basepairDistance)
        {
            // originalInterval is needed to adjust CIPOS and CIEND against for PD/BPD, but it won't be used for actual reflen and binning.
            var baseInterval = baseVariant.ToBedInterval(true, out var originalEnd, out var sharedFirstBase);

            if (baseInterval == null)
            {
                throw new InvalidOperationException(
                          $"Expected failure of {nameof(WittyerUtils.ToBedInterval)} to throw, but didn't...");
            }

            // CI intervals are always based on the original POS/END
            var posStart = baseVariant.Position;

            if (sharedFirstBase)
            {
                posStart++;
            }
            var ciPosInterval = posStart.ConvertPositionToCiInterval(baseVariant, WittyerConstants.Cipos);
            var ciEndInterval = originalEnd.ConvertPositionToCiInterval(baseVariant, WittyerConstants.Ciend);

            var baseStart = sharedFirstBase
                ? baseInterval.Start
                : baseInterval.Start + 1; // not sharing first base (ref site or complex types,  etc) need adjustment

            // the pd/bpd intervals are based on the trimmed variant's coordinates.
            var(posInterval, endInterval) = WittyerUtils.GetPosAndEndInterval(baseVariant.Contig, percentageDistance,
                                                                              basepairDistance, ciPosInterval, baseStart, ciEndInterval, baseInterval.Stop);

            return(new WittyerVariantInternal(svType, baseVariant, baseInterval,
                                              Winner.Create(svType, baseInterval, bins),
                                              posInterval, ciPosInterval, endInterval, ciEndInterval,
                                              WittyerSample.CreateFromVariant(baseVariant, sample, svType == WittyerType.CopyNumberReference)));
        }
示例#5
0
 private static bool IsSampleFtPassFilter([NotNull] this IVcfSample sample)
 => !sample.SampleDictionary.ContainsKey(WittyerConstants.Ft) ||
 sample.SampleDictionary[WittyerConstants.Ft].Equals(VcfConstants.PassFilter);
        internal static IWittyerBnd Create([NotNull] IVcfVariant first, [CanBeNull] IVcfSample originalSample,
                                           [NotNull] WittyerType svType, [NotNull] IReadOnlyList <uint> bins, uint bpd, double?percentageDistance,
                                           [NotNull] IVcfVariant second)
        {
            if (!ReferenceEquals(first, second))
            {
                (first, second) = FindBndEntriesOrder(in first, in second);
            }

            var ciPosInterval = first.Position.ConvertPositionToCiInterval(first, WittyerConstants.Cipos);
            var ciEndInterval = ReferenceEquals(first, second)
                ? ciPosInterval // same variant means same intervals.
                : second.Position.ConvertPositionToCiInterval(second, WittyerConstants.Cipos);

            IContigAndInterval posInterval, endInterval;

            if (ReferenceEquals(first, second)) // insertions need trimming and stuff.
            {
                var trimmed = first.TryNormalizeVariant(VariantNormalizer.TrimCommonBases, 0).GetOrThrow();
                var tuple   = (bpd, bpd);
                var(posStart, posStop) = trimmed.Position.ConvertPositionToCiInterval(tuple);
                WittyerUtils.GetBetterInterval(ciPosInterval, ref posStart, ref posStop);
                posInterval = endInterval = ContigAndInterval.Create(first.Contig, posStart, posStop);
            }
            else
            {
                (posInterval, endInterval) = WittyerUtils.GetPosAndEndInterval(first.Contig,
                                                                               svType == WittyerType.IntraChromosomeBreakend ? percentageDistance : null, bpd,
                                                                               ciPosInterval, first.Position, ciEndInterval, second.Position, second.Contig);
            }

            var winner = GetWinner();

            var sample = WittyerSample.CreateFromVariant(first, originalSample, false);

            return(new WittyerBndInternal(svType, first, posInterval, ciPosInterval,
                                          second, endInterval, ciEndInterval, winner, sample));

            (IVcfVariant first, IVcfVariant second) FindBndEntriesOrder(in IVcfVariant variantA,
                                                                        in IVcfVariant variantB)
            => ContigAndPositionComparer.Default.Compare(variantA, variantB) > 0
                    ? (variantB, variantA)
                    : (variantA, variantB);

            Winner GetWinner()
            {
                if (svType == WittyerType.TranslocationBreakend)
                {
                    return(Winner.Create(svType));
                }


                IInterval <uint> bedInterval;

                if (svType == WittyerType.Insertion)
                {
                    bedInterval = GetInsertionInterval(first);
                }
                else
                {
                    var start = first.Position;
                    if (start > 0)
                    {
                        start--;
                    }
                    bedInterval = BedInterval.Create(start, second.Position);
                }

                return(Winner.Create(svType, bedInterval, bins));
            }
        }
 internal static IWittyerBnd CreateInsertion([NotNull] IVcfVariant first, [CanBeNull] IVcfSample originalSample,
                                             [NotNull] WittyerType svType, [NotNull] IReadOnlyList <uint> bins, uint bpd, double?pd)
 => Create(first, originalSample, svType, bins, bpd, pd, first);
示例#8
0
        internal static IVcfVariant CreateUnsupportedVariant([NotNull] IVcfVariant baseVariant, [CanBeNull] IVcfSample sample,
                                                             FailedReason why, bool isTruth)
        {
            var realName = (isTruth ? SamplePair.Default.TruthSampleName : SamplePair.Default.QuerySampleName)
                           ?? throw new InvalidDataException(
                                     $"Somehow, {nameof(SamplePair)}.{nameof(SamplePair.Default)} was null!!");
            var sampleBuilder = SampleDictionaries.CreateBuilder()
                                .AddSample(realName).MoveOnToDictionaries();

            var dicts = (sample?.SampleDictionary ?? ImmutableDictionary <string, string> .Empty.AsEnumerable())
                        .Select(kvp => (kvp.Key, kvp.Value))
                        .FollowedWith(
                (WittyerConstants.WittyerMetaInfoLineKeys.Wit, NotAssessed),
                (WittyerConstants.WittyerMetaInfoLineKeys.Why, why.ToString()));

            foreach (var tuple in dicts)
            {
                sampleBuilder.SetSampleField(realName, tuple);
            }

            return(baseVariant.ToBuilder().SetSamples(sampleBuilder.Build()).Build());
        }
示例#9
0
        internal static IContigProvider CreateVariant([NotNull] IVcfVariant vcfVariant, [CanBeNull] IVcfSample sample, bool isTruth,
                                                      [CanBeNull] string sampleName, IReadOnlyDictionary <WittyerType, InputSpec> inputSpecDict,
                                                      IDictionary <IGeneralBnd, IVcfVariant> bndSet, List <string> errorList, bool isCrossTypeOn)
        {
            var failedReason = WittyerType.ParseFromVariant(vcfVariant, isCrossTypeOn, sampleName, out var svType);

            if (failedReason != null)
            {
                return(CreateUnsupportedVariant(vcfVariant, sample,
                                                failedReason.Value == FailedReason.Unset
                        ? throw new ArgumentOutOfRangeException(
                                                    $"Got {nameof(FailedReason)}.{FailedReason.Unset} which means bug in {nameof(WittyerType.TryParse)}")
                        : failedReason.Value,
                                                isTruth));
            }

            if (svType == null)
            {
                throw new InvalidDataException("svType should not be null with no failed reason");
            }

            //User does not specify this SVTYPE in input spec, consider user want to exlude this particular SVTYPE comparison entirely
            if (!inputSpecDict.TryGetValue(svType, out var inputSpec))
            {
                return(CreateUnsupportedVariant(vcfVariant, sample, FailedReason.VariantTypeSkipped, isTruth));
            }

            var isSupportedVariant = IsSupportedVariant();

            if (!isSupportedVariant.Equals(FailedReason.Unset))
            {
                return(CreateUnsupportedVariant(vcfVariant, sample, isSupportedVariant, isTruth));
            }

            var bpd = inputSpec.BasepairDistance;
            var pd  = inputSpec.PercentDistance;

            var bins = inputSpec.BinSizes;

            if (svType == WittyerType.Insertion)
            {
                //insertion is basically using one same record as the both entries of the breakend pair
                return(WittyerBndInternal.Create(vcfVariant,
                                                 sample, inputSpec.VariantType, bins.Select(sizeSkipTuple => sizeSkipTuple.size).ToReadOnlyList(), bpd, pd, vcfVariant));
            }

            if (svType == WittyerType.CopyNumberReference &&
                vcfVariant.Info.TryGetValue(VcfConstants.SvTypeKey, out var svTypeString) &&
                !WittyerConstants.BaseLevelStatsTypeStrings.Contains(svTypeString))
            {
                // any non-DEL/DUP/CNV that is determined to be reference copy number is not supported.
                return(CreateUnsupportedVariant(vcfVariant, sample,
                                                FailedReason.UnsupportedRefCall, isTruth));
            }

            if (svType == WittyerType.TranslocationBreakend ||
                svType == WittyerType.IntraChromosomeBreakend)
            {
                var currentBnd = GeneralBnd.CreateFromVariant(vcfVariant);

                //Note: this means the paired BND is found as a key in dictionary. Checkout the comparer for details
                if (bndSet.TryGetValue(currentBnd, out var secondVariant))
                {
                    if (!bndSet.Remove(currentBnd))
                    {
                        throw new InvalidOperationException(
                                  $"Cannot remove {secondVariant} from breakend dictionary when pair is found: {vcfVariant}! Find a developer to debug!");
                    }
                    return(WittyerBndInternal.Create(vcfVariant, sample, svType, bins.Select(sizeSkipTuple => sizeSkipTuple.size).ToReadOnlyList(), bpd, pd, secondVariant));
                }

                bndSet.Add(currentBnd, vcfVariant);
                return(currentBnd);
            }

            try
            {
                return(WittyerVariantInternal.Create(vcfVariant, sample, svType, bins.Select(sizeSkipTuple => sizeSkipTuple.size).ToReadOnlyList(), pd, bpd));
            }
            catch (Exception e)
            {
                if (errorList.Count <= MaxNonSupportedVariantToPrint)
                {
                    errorList.Add(
                        new[] { "Exception caught:", e.ToString(), vcfVariant.ToString() }
                        .StringJoin(Environment.NewLine));
                }
                return(CreateUnsupportedVariant(vcfVariant, sample, FailedReason.Other, isTruth));
            }

            FailedReason IsSupportedVariant()
            {
                // Check filters.
                IReadOnlyCollection <string> includedFilters, excludedFilters;

                if (isTruth)
                {
                    includedFilters = WittyerConstants.DefaultIncludeFilters;
                    excludedFilters = WittyerConstants.DefaultExcludeFilters;
                }
                else
                {
                    includedFilters = inputSpec.IncludedFilters;
                    excludedFilters = inputSpec.ExcludedFilters;
                }

                if (vcfVariant.Filters.Any(excludedFilters.Contains) ||
                    includedFilters.Count > 0 &&
                    (vcfVariant.Filters.Count == 0 || !vcfVariant.Filters.Any(includedFilters.Contains)))
                {
                    return(FailedReason.FilteredBySettings);
                }

                // SVLEN = 0 when they are supposed to have overlaps (svlen is needed for overlapping windows) are ignored
                if (svType.HasOverlappingWindows &&
                    (vcfVariant.Info.TryGetValue(VcfConstants.EndTagKey, out var endString) &&
                     vcfVariant.Position.ToString() == endString ||
                     vcfVariant.Info.TryGetValue(VcfConstants.SvLenKey, out var svLenString) &&
                     svLenString == "0"))
                {
                    return(FailedReason.InvalidSvLen);
                }

                // Bnd with pos and alt to be same position (temporarily to accomendate the situation of SVLEN=0 INV representing as bnd)
                if (svType == WittyerType.IntraChromosomeBreakend)
                {
                    var mate = SimpleBreakEnd.TryParse(vcfVariant.GetSingleAlt()).GetOrThrow();
                    return(vcfVariant.Position == mate.Position ? FailedReason.InvalidSvLen : FailedReason.Unset);
                }

                // todo: truth does not care about Sample FT tag, is that ok?
                var sampleFilterOk = isTruth || !includedFilters.Contains(VcfConstants.PassFilter) || !vcfVariant.IsPassFilter() || IsSampleFtPassFilter();

                if (!sampleFilterOk)
                {
                    return(FailedReason.FailedSampleFilter);
                }

                // used include bed and variant is completely within a single contig and the bed doesn't include the contig
                if (inputSpec.IncludedRegions != null && svType != WittyerType.TranslocationBreakend &&
                    !inputSpec.IncludedRegions.IntervalTree.ContainsKey(vcfVariant.Contig))
                {
                    return(FailedReason.OutsideBedRegion);
                }

                return(FailedReason.Unset);

                bool IsSampleFtPassFilter()
                => sample != null && (!sample.SampleDictionary.TryGetValue(WittyerConstants.Ft, out var ft) ||
                                      ft.Equals(VcfConstants.PassFilter));
            }
        }
示例#10
0
        internal static IWittyerGenotypedCopyNumberSample CreateReferenceSample([NotNull] IVcfVariant baseVariant, [CanBeNull] IVcfSample sample)
        {
            var ploidy = 2;

            if (sample == null)
            {
                return(WittyerGenotypedCopyNumberSample.Create(
                           WittyerCopyNumberSample.Create(WittyerSampleInternal.Create(null), (uint)ploidy),
                           GenotypeInfo.CreateRef(ploidy, false)));
            }

            var isPhased = false;

            if (sample.SampleDictionary.TryGetValue(VcfConstants.GenotypeKey, out var originalGt))
            {
                isPhased = originalGt.Contains(VcfConstants.GtPhasedValueDelimiter);
                ploidy   = originalGt
                           .Split(isPhased ? VcfConstants.GtPhasedValueDelimiter : VcfConstants.GtUnphasedValueDelimiter).Length;
            }

            var cnSample = WittyerCopyNumberSample.Create(WittyerSampleInternal.Create(sample), (uint)ploidy);

            return(WittyerGenotypedCopyNumberSample.Create(cnSample, GenotypeInfo.CreateRef(ploidy, isPhased)));
        }
示例#11
0
 public static IWittyerSample Create([NotNull] IVcfSample baseSample, WitDecision wit,
                                     [NotNull] IImmutableList <MatchEnum> what, [NotNull] IImmutableList <FailedReason> why)
 => WittyerSampleInternal.Create(baseSample, wit, what, why);
示例#12
0
 internal static WittyerSampleInternal Create([NotNull] IVcfSample baseSample, WitDecision wit,
                                              [NotNull] IImmutableList <MatchEnum> what, [NotNull] IImmutableList <FailedReason> why)
 => new WittyerSampleInternal(baseSample, wit, what, why);
示例#13
0
 internal static WittyerSampleInternal Create([CanBeNull] IVcfSample baseSample)
 => new WittyerSampleInternal(baseSample, WitDecision.NotAssessed,
                              ImmutableList <MatchEnum> .Empty,
                              ImmutableList <FailedReason> .Empty);