Example #1
0
        public static IGeneralBnd CreateFromVariant([NotNull] IVcfVariant variant)
        {
            var altBnd = variant.GetSingleAlt();

            var thisRef = variant.Ref[0];

            var mate = SimpleBreakEnd.Parse(altBnd, out var firstField, out var lastField);

            var is3Prime = !string.IsNullOrWhiteSpace(firstField);

            if (is3Prime && !firstField.StartsWith(thisRef) ||
                !is3Prime && !lastField.EndsWith(thisRef))
            {
                throw new InvalidDataException(
                          $"Invalid breakend because neither the alt didn't start or end with ref's first base: {variant}");
            }

            var interval = BedInterval.Create(variant.Position - 1, variant.Position);

            return(new GeneralBnd(variant, interval, is3Prime, mate));
        }
Example #2
0
        public static FailedReason?ParseFromVariant([NotNull] IVcfVariant variant, bool isCrossTypeOn,
                                                    [CanBeNull] string sampleName, [CanBeNull] out WittyerType svType)
        {
            if (variant.IsRefSite() || IsRefCall(out var ploidy, out var cn, out var hasCn))
            {
                svType = CopyNumberReference;
                return(null);
            }

            var hasSvTypeKey = variant.Info.TryGetValue(VcfConstants.SvTypeKey, out var svTypeStr);

            if (!hasSvTypeKey)
            {
                // todo: maybe we can allow small variants, which would not have SVTYPE
                throw new InvalidDataException(
                          $"Following variant does not have {VcfConstants.SvTypeKey} info key:\n{variant}");
            }

            svType = null;
            if (svTypeStr == SvTypeStrings.TranslocationBreakend)
            {
                // breakends can be IntraChromosomeBreakend and TranslocationBreakend, so can't tell from SVTYPE.

                var mate = variant is IBreakEnd cast
                    ? cast.Mate
                    : SimpleBreakEnd.TryParse(variant.GetSingleAlt()).GetOrThrow();

                svType = variant.Contig.Equals(mate.Contig)
                    ? IntraChromosomeBreakend
                    : TranslocationBreakend;
                return(null);
            }

            if (!TryParseSvType(svTypeStr, out svType))
            {
                // Not BND because of check above, and if not parsable and not CNV, it's something we don't know.
                if (svTypeStr != SvTypeStrings.Cnv)
                {
                    throw new InvalidDataException($"Cannot recognize SVTYPE of {svTypeStr}");
                }
            }
            else if (!svType.HasBaseLevelStats)
            {
                // If INV or INS or whatever that doesn't need to look for CN, return.
                return(null);
            }

            if (!hasCn)
            {
                return(svType == null
                    ? FailedReason.CnvWithoutCn
                    : default(FailedReason?)); // DEL or DUP without CN
            }
            // At this point, it is CNV with CN or DEL/DUP with CN, which are also considered CNV
            if (cn == null)
            {
                // has CN, but can't parse.
                svType = null; // clear out SVTYPE=DEL/DUP
                return(FailedReason.UndeterminedCn);
            }

            svType = GetSvType(cn.Value);
            return(null);

            WittyerType GetSvType(int cnValue)
            => cnValue < ploidy
                    ? (isCrossTypeOn ? Deletion : CopyNumberLoss)
                    : (isCrossTypeOn ? Duplication : CopyNumberGain);

            bool IsRefCall(out int ploidyP, out int?cnP, out bool hasCnP)
            {
                ploidyP = 2;
                cnP     = null;
                hasCnP  = false;
                //if not refsite and no sample field, not a refcall
                if (variant.Samples.Count == 0)
                {
                    return(false);
                }

                var sample = sampleName == null ? variant.Samples[0] : variant.Samples[sampleName];

                hasCnP = sample.SampleDictionary.TryGetValue(VcfConstants.CnSampleFieldKey, out var cnString);
                var isGt = sample.SampleDictionary.TryGetValue(VcfConstants.GenotypeKey, out var gt);

                if (hasCnP && int.TryParse(cnString, out var i))
                {
                    cnP = i;
                }
                if (!isGt)
                {
                    return(hasCnP && cnString == "2");
                }

                //todo: refining how to deal with ploidy. Also here we don't deal with LOH. assuming CN = ploidy is ref
                var gtArray = gt.Split(VcfConstants.GtPhasedValueDelimiter[0],
                                       VcfConstants.GtUnphasedValueDelimiter[0]);

                ploidyP = gtArray.Length;
                return(cnP == null?gtArray.All(alleleIndex => alleleIndex == "0") : cnP.Value == ploidyP);
            }
        }
Example #3
0
        internal static IContigProvider CreateVariant([NotNull] IVcfVariant vcfVariant, [CanBeNull] IVcfSample sample, bool isTruth,
                                                      [CanBeNull] string sampleName, IReadOnlyDictionary <WittyerType, InputSpec> inputSpecDict,
                                                      IDictionary <IGeneralBnd, IVcfVariant> bndSet, List <string> errorList, bool isCrossTypeOn)
        {
            var failedReason = WittyerType.ParseFromVariant(vcfVariant, isCrossTypeOn, sampleName, out var svType);

            if (failedReason != null)
            {
                return(CreateUnsupportedVariant(vcfVariant, sample,
                                                failedReason.Value == FailedReason.Unset
                        ? throw new ArgumentOutOfRangeException(
                                                    $"Got {nameof(FailedReason)}.{FailedReason.Unset} which means bug in {nameof(WittyerType.TryParse)}")
                        : failedReason.Value,
                                                isTruth));
            }

            if (svType == null)
            {
                throw new InvalidDataException("svType should not be null with no failed reason");
            }

            //User does not specify this SVTYPE in input spec, consider user want to exlude this particular SVTYPE comparison entirely
            if (!inputSpecDict.TryGetValue(svType, out var inputSpec))
            {
                return(CreateUnsupportedVariant(vcfVariant, sample, FailedReason.VariantTypeSkipped, isTruth));
            }

            var isSupportedVariant = IsSupportedVariant();

            if (!isSupportedVariant.Equals(FailedReason.Unset))
            {
                return(CreateUnsupportedVariant(vcfVariant, sample, isSupportedVariant, isTruth));
            }

            var bpd = inputSpec.BasepairDistance;
            var pd  = inputSpec.PercentDistance;

            var bins = inputSpec.BinSizes;

            if (svType == WittyerType.Insertion)
            {
                //insertion is basically using one same record as the both entries of the breakend pair
                return(WittyerBndInternal.Create(vcfVariant,
                                                 sample, inputSpec.VariantType, bins.Select(sizeSkipTuple => sizeSkipTuple.size).ToReadOnlyList(), bpd, pd, vcfVariant));
            }

            if (svType == WittyerType.CopyNumberReference &&
                vcfVariant.Info.TryGetValue(VcfConstants.SvTypeKey, out var svTypeString) &&
                !WittyerConstants.BaseLevelStatsTypeStrings.Contains(svTypeString))
            {
                // any non-DEL/DUP/CNV that is determined to be reference copy number is not supported.
                return(CreateUnsupportedVariant(vcfVariant, sample,
                                                FailedReason.UnsupportedRefCall, isTruth));
            }

            if (svType == WittyerType.TranslocationBreakend ||
                svType == WittyerType.IntraChromosomeBreakend)
            {
                var currentBnd = GeneralBnd.CreateFromVariant(vcfVariant);

                //Note: this means the paired BND is found as a key in dictionary. Checkout the comparer for details
                if (bndSet.TryGetValue(currentBnd, out var secondVariant))
                {
                    if (!bndSet.Remove(currentBnd))
                    {
                        throw new InvalidOperationException(
                                  $"Cannot remove {secondVariant} from breakend dictionary when pair is found: {vcfVariant}! Find a developer to debug!");
                    }
                    return(WittyerBndInternal.Create(vcfVariant, sample, svType, bins.Select(sizeSkipTuple => sizeSkipTuple.size).ToReadOnlyList(), bpd, pd, secondVariant));
                }

                bndSet.Add(currentBnd, vcfVariant);
                return(currentBnd);
            }

            try
            {
                return(WittyerVariantInternal.Create(vcfVariant, sample, svType, bins.Select(sizeSkipTuple => sizeSkipTuple.size).ToReadOnlyList(), pd, bpd));
            }
            catch (Exception e)
            {
                if (errorList.Count <= MaxNonSupportedVariantToPrint)
                {
                    errorList.Add(
                        new[] { "Exception caught:", e.ToString(), vcfVariant.ToString() }
                        .StringJoin(Environment.NewLine));
                }
                return(CreateUnsupportedVariant(vcfVariant, sample, FailedReason.Other, isTruth));
            }

            FailedReason IsSupportedVariant()
            {
                // Check filters.
                IReadOnlyCollection <string> includedFilters, excludedFilters;

                if (isTruth)
                {
                    includedFilters = WittyerConstants.DefaultIncludeFilters;
                    excludedFilters = WittyerConstants.DefaultExcludeFilters;
                }
                else
                {
                    includedFilters = inputSpec.IncludedFilters;
                    excludedFilters = inputSpec.ExcludedFilters;
                }

                if (vcfVariant.Filters.Any(excludedFilters.Contains) ||
                    includedFilters.Count > 0 &&
                    (vcfVariant.Filters.Count == 0 || !vcfVariant.Filters.Any(includedFilters.Contains)))
                {
                    return(FailedReason.FilteredBySettings);
                }

                // SVLEN = 0 when they are supposed to have overlaps (svlen is needed for overlapping windows) are ignored
                if (svType.HasOverlappingWindows &&
                    (vcfVariant.Info.TryGetValue(VcfConstants.EndTagKey, out var endString) &&
                     vcfVariant.Position.ToString() == endString ||
                     vcfVariant.Info.TryGetValue(VcfConstants.SvLenKey, out var svLenString) &&
                     svLenString == "0"))
                {
                    return(FailedReason.InvalidSvLen);
                }

                // Bnd with pos and alt to be same position (temporarily to accomendate the situation of SVLEN=0 INV representing as bnd)
                if (svType == WittyerType.IntraChromosomeBreakend)
                {
                    var mate = SimpleBreakEnd.TryParse(vcfVariant.GetSingleAlt()).GetOrThrow();
                    return(vcfVariant.Position == mate.Position ? FailedReason.InvalidSvLen : FailedReason.Unset);
                }

                // todo: truth does not care about Sample FT tag, is that ok?
                var sampleFilterOk = isTruth || !includedFilters.Contains(VcfConstants.PassFilter) || !vcfVariant.IsPassFilter() || IsSampleFtPassFilter();

                if (!sampleFilterOk)
                {
                    return(FailedReason.FailedSampleFilter);
                }

                // used include bed and variant is completely within a single contig and the bed doesn't include the contig
                if (inputSpec.IncludedRegions != null && svType != WittyerType.TranslocationBreakend &&
                    !inputSpec.IncludedRegions.IntervalTree.ContainsKey(vcfVariant.Contig))
                {
                    return(FailedReason.OutsideBedRegion);
                }

                return(FailedReason.Unset);

                bool IsSampleFtPassFilter()
                => sample != null && (!sample.SampleDictionary.TryGetValue(WittyerConstants.Ft, out var ft) ||
                                      ft.Equals(VcfConstants.PassFilter));
            }
        }