Esempio n. 1
0
        public static void WittyerVariantIntervalCorrect([NotNull] string variant, uint start, uint end,
                                                         uint posStart, uint posEnd, uint endStart, uint endEnd)
        {
            const string sampleName = "tumor";
            var          vcfVariant = VcfVariant.TryParse(variant,
                                                          VcfVariantParserSettings.Create(ImmutableList.Create("normal", sampleName), GenomeAssembly.Hg38))
                                      .GetOrThrowDebug();

            var _ = WittyerType.ParseFromVariant(vcfVariant, false, sampleName, out var type);

            if (type == null)
            {
                throw new NotSupportedException("This test does not handle svType null");
            }
            var wittyerVariant = WittyerVariantInternal
                                 .Create(vcfVariant, vcfVariant.Samples[sampleName], type, Bins, PercentDistance, BasepairDistance);

            var expectedStart = ContigAndInterval.Create(vcfVariant.Contig, start, end);
            var expectedPos   = BedInterval.Create(posStart, posEnd);
            var expectedEnd   = BedInterval.Create(endStart, endEnd);

            MultiAssert.Equal(expectedStart, wittyerVariant);
            MultiAssert.Equal(expectedPos, wittyerVariant.CiPosInterval);
            MultiAssert.Equal(expectedEnd, wittyerVariant.CiEndInterval);
            MultiAssert.AssertAll();
        }
Esempio n. 2
0
        public void OverlapWorks_DupDel([NotNull] string truthVar, [NotNull] string queryVar, string type, bool isTp)
        {
            const string sampleName = "blah";
            var          vcfVariantParserSettings = VcfVariantParserSettings.Create(new List <string> {
                sampleName
            });
            var        baseVariant   = VcfVariant.TryParse(truthVar, vcfVariantParserSettings).GetOrThrow();
            const bool isCrossTypeOn = true;
            var        wittyerType   = WittyerType.Parse(type);
            var        inputSpecs    = InputSpec.GenerateCustomInputSpecs(!isCrossTypeOn, new[] { wittyerType }, percentDistance: PercentDistance).ToDictionary(s => s.VariantType, s => s);

            var bndSet    = new Dictionary <IGeneralBnd, IVcfVariant>();
            var errorList = new List <string>();
            var truthV    = (IMutableWittyerSimpleVariant)WittyerVcfReader.CreateVariant(baseVariant, baseVariant.Samples.First().Value, true, sampleName,
                                                                                         inputSpecs, bndSet, errorList, isCrossTypeOn);

            baseVariant = VcfVariant.TryParse(queryVar, vcfVariantParserSettings).GetOrThrow();
            var queryV = (IMutableWittyerVariant)WittyerVcfReader.CreateVariant(baseVariant, baseVariant.Samples.First().Value, false, sampleName,
                                                                                inputSpecs, bndSet, errorList, isCrossTypeOn);
            var tree = TruthForest.Create(sampleName, VcfHeader.CreateBuilder(VcfVersion.FourPointOne).Build());

            tree.AddTarget(truthV);
            OverlappingUtils.DoOverlapping(tree.VariantTrees, queryV, OverlappingUtils.IsVariantAlleleMatch, isCrossTypeOn, true);
            queryV.Finalize(WitDecision.FalsePositive, EvaluationMode.CrossTypeAndSimpleCounting, null);
            truthV.Finalize(WitDecision.FalseNegative, EvaluationMode.CrossTypeAndSimpleCounting, null);
            Assert.Equal(isTp ? WitDecision.TruePositive : WitDecision.FalsePositive, queryV.Sample.Wit);
            Assert.Equal(isTp ? WitDecision.TruePositive : WitDecision.FalseNegative, truthV.Sample.Wit);
        }
Esempio n. 3
0
        public static void ParseReferenceVariantWorks([NotNull] string inputVariant)
        {
            var vcfVariant = VcfVariant.TryParse(inputVariant,
                                                 VcfVariantParserSettings.Create(ImmutableList.Create("NA12878", "haha"), GenomeAssembly.Hg19))
                             .GetOrThrowDebug();

            WittyerType.ParseFromVariant(vcfVariant, false, "NA12878", out var actualType);
            Assert.Equal(WittyerType.CopyNumberReference, actualType);
        }
Esempio n. 4
0
        private static IWittyerSimpleVariant CreateWittyerVariant([NotNull] string vcfline)
        {
            var baseVariant = VcfVariant.TryParse(vcfline,
                                                  VcfVariantParserSettings.Create(ImmutableList.Create("normal"), GenomeAssembly.Grch37))
                              .GetOrThrowDebug();

            WittyerType.ParseFromVariant(baseVariant, true, null, out var svType);

            if (svType == null)
            {
                throw new NotSupportedException("This method does not handle svType null");
            }

            return(WittyerVariantInternal
                   .Create(baseVariant, baseVariant.Samples["normal"], svType, Bins, PercentDistance, BasepairDistance));
        }
Esempio n. 5
0
        internal static Winner Create([NotNull] WittyerType svType, [CanBeNull] IInterval <uint> variantInterval,
                                      [CanBeNull] IReadOnlyList <uint> bins)
        {
            if (bins == null || bins.Count == 0)
            {
                return(Create(svType));
            }

            if (variantInterval == null)
            {
                return(new Winner(svType, bins[bins.Count - 1], null));                         // means take the last bin.
            }
            var index = GetBinIndex(variantInterval, bins);

            return(index < 0
                ? Create(svType, WittyerConstants.StartingBin, bins[0])
                : Create(svType, bins[index], index < bins.Count - 1 ? bins[index + 1] : default(uint?)));
        }
Esempio n. 6
0
        public static void ParseWittyerVariantType_AssignCorrectType([NotNull] string vcfString, bool isCrossTypeOn,
                                                                     [CanBeNull] string expected)
        {
            const string sampleName = "s1";
            var          variant    = VcfVariant.TryParse(vcfString,
                                                          VcfVariantParserSettings.Create(ImmutableList.Create(sampleName, "s2"), GenomeAssembly.Hg38))
                                      .GetOrThrowDebug();
            var reason = WittyerType.ParseFromVariant(variant, isCrossTypeOn, sampleName, out var assignedType);

            if (expected == null)
            {
                Assert.Equal(FailedReason.UndeterminedCn, reason);
            }
            else
            {
                Assert.Equal(WittyerType.Parse(expected), assignedType);
            }
        }
Esempio n. 7
0
        /// <inheritdoc />
        public override InputSpec ReadJson(JsonReader reader, Type objectType, InputSpec existingValue,
                                           bool hasExistingValue, JsonSerializer serializer)
        {
            var jo = JObject.Load(reader, JsonLoadSettings);

            // Error handling
            VerifySettings();

            return(jo.ToObject <InputSpec>());

            void VerifySettings()
            {
                var fieldNamesList = jo.Properties().Select(x => x.Name).ToList();
                var fieldNames     = Enumerable.ToHashSet(fieldNamesList, StringComparer.OrdinalIgnoreCase);

                // Check all field names are unique.
                if (fieldNames.Count < fieldNamesList.Count)
                {
                    throw new JsonSerializationException("Setting contains duplicate field names in config file.");
                }

                // Check that 'VariantType' field exists.
                if (!fieldNames.Contains(WittyerSettings.VariantTypeName))
                {
                    throw new JsonSerializationException($"Setting missing {WittyerSettings.VariantTypeName} field.");
                }

                var variantType = jo.GetValue(WittyerSettings.VariantTypeName, StringComparison.Ordinal).Value <string>();

                // Make sure that variant type is recognized.
                if (!WittyerType.TryParse(variantType, out var variantTypeEnum))
                {
                    throw new JsonSerializationException($"Unknown variant type '{variantType}' in the config file.");
                }

                // Check that each field name is recognized.
                var unexpectedFields = fieldNames.Except(AllFieldNames, StringComparer.Ordinal).StringJoin(", ");

                if (unexpectedFields.Length > 0)
                {
                    throw new JsonSerializationException($"Unrecognized field names in config file: {unexpectedFields}.");
                }

                // Make sure the variant type is unique.
                if (!_typeSet.Add(variantTypeEnum))
                {
                    throw new JsonSerializationException($"Duplicate variant type '{variantType}' in the config file.");
                }

                var expectedFieldNames = variantTypeEnum.HasBins
                    ? variantTypeEnum.HasLengths ? AllFieldNames : InsFieldNames
                    : TraFieldNames;

                var missingFields = expectedFieldNames.Except(fieldNames);

                if (missingFields.Count > 0)
                {
                    throw new JsonSerializationException(
                              $"Setting for variant type '{variantType}' did not contain required fields: {string.Join(", ", missingFields)}.");
                }

                unexpectedFields = fieldNames.Except(expectedFieldNames, StringComparer.Ordinal).StringJoin(", ");
                if (unexpectedFields.Length > 0)
                {
                    // Print a warning if unexpectedFields
                    Console.WriteLine($"Warning: {variantTypeEnum} type shouldn't " +
                                      "contain the following fields in the config file: "
                                      + unexpectedFields);
                }
            }
        }
Esempio n. 8
0
        internal static IContigProvider CreateVariant([NotNull] IVcfVariant vcfVariant, [CanBeNull] IVcfSample sample, bool isTruth,
                                                      [CanBeNull] string sampleName, IReadOnlyDictionary <WittyerType, InputSpec> inputSpecDict,
                                                      IDictionary <IGeneralBnd, IVcfVariant> bndSet, List <string> errorList, bool isCrossTypeOn)
        {
            var failedReason = WittyerType.ParseFromVariant(vcfVariant, isCrossTypeOn, sampleName, out var svType);

            if (failedReason != null)
            {
                return(CreateUnsupportedVariant(vcfVariant, sample,
                                                failedReason.Value == FailedReason.Unset
                        ? throw new ArgumentOutOfRangeException(
                                                    $"Got {nameof(FailedReason)}.{FailedReason.Unset} which means bug in {nameof(WittyerType.TryParse)}")
                        : failedReason.Value,
                                                isTruth));
            }

            if (svType == null)
            {
                throw new InvalidDataException("svType should not be null with no failed reason");
            }

            //User does not specify this SVTYPE in input spec, consider user want to exlude this particular SVTYPE comparison entirely
            if (!inputSpecDict.TryGetValue(svType, out var inputSpec))
            {
                return(CreateUnsupportedVariant(vcfVariant, sample, FailedReason.VariantTypeSkipped, isTruth));
            }

            var isSupportedVariant = IsSupportedVariant();

            if (!isSupportedVariant.Equals(FailedReason.Unset))
            {
                return(CreateUnsupportedVariant(vcfVariant, sample, isSupportedVariant, isTruth));
            }

            var bpd = inputSpec.BasepairDistance;
            var pd  = inputSpec.PercentDistance;

            var bins = inputSpec.BinSizes;

            if (svType == WittyerType.Insertion)
            {
                //insertion is basically using one same record as the both entries of the breakend pair
                return(WittyerBndInternal.Create(vcfVariant,
                                                 sample, inputSpec.VariantType, bins.Select(sizeSkipTuple => sizeSkipTuple.size).ToReadOnlyList(), bpd, pd, vcfVariant));
            }

            if (svType == WittyerType.CopyNumberReference &&
                vcfVariant.Info.TryGetValue(VcfConstants.SvTypeKey, out var svTypeString) &&
                !WittyerConstants.BaseLevelStatsTypeStrings.Contains(svTypeString))
            {
                // any non-DEL/DUP/CNV that is determined to be reference copy number is not supported.
                return(CreateUnsupportedVariant(vcfVariant, sample,
                                                FailedReason.UnsupportedRefCall, isTruth));
            }

            if (svType == WittyerType.TranslocationBreakend ||
                svType == WittyerType.IntraChromosomeBreakend)
            {
                var currentBnd = GeneralBnd.CreateFromVariant(vcfVariant);

                //Note: this means the paired BND is found as a key in dictionary. Checkout the comparer for details
                if (bndSet.TryGetValue(currentBnd, out var secondVariant))
                {
                    if (!bndSet.Remove(currentBnd))
                    {
                        throw new InvalidOperationException(
                                  $"Cannot remove {secondVariant} from breakend dictionary when pair is found: {vcfVariant}! Find a developer to debug!");
                    }
                    return(WittyerBndInternal.Create(vcfVariant, sample, svType, bins.Select(sizeSkipTuple => sizeSkipTuple.size).ToReadOnlyList(), bpd, pd, secondVariant));
                }

                bndSet.Add(currentBnd, vcfVariant);
                return(currentBnd);
            }

            try
            {
                return(WittyerVariantInternal.Create(vcfVariant, sample, svType, bins.Select(sizeSkipTuple => sizeSkipTuple.size).ToReadOnlyList(), pd, bpd));
            }
            catch (Exception e)
            {
                if (errorList.Count <= MaxNonSupportedVariantToPrint)
                {
                    errorList.Add(
                        new[] { "Exception caught:", e.ToString(), vcfVariant.ToString() }
                        .StringJoin(Environment.NewLine));
                }
                return(CreateUnsupportedVariant(vcfVariant, sample, FailedReason.Other, isTruth));
            }

            FailedReason IsSupportedVariant()
            {
                // Check filters.
                IReadOnlyCollection <string> includedFilters, excludedFilters;

                if (isTruth)
                {
                    includedFilters = WittyerConstants.DefaultIncludeFilters;
                    excludedFilters = WittyerConstants.DefaultExcludeFilters;
                }
                else
                {
                    includedFilters = inputSpec.IncludedFilters;
                    excludedFilters = inputSpec.ExcludedFilters;
                }

                if (vcfVariant.Filters.Any(excludedFilters.Contains) ||
                    includedFilters.Count > 0 &&
                    (vcfVariant.Filters.Count == 0 || !vcfVariant.Filters.Any(includedFilters.Contains)))
                {
                    return(FailedReason.FilteredBySettings);
                }

                // SVLEN = 0 when they are supposed to have overlaps (svlen is needed for overlapping windows) are ignored
                if (svType.HasOverlappingWindows &&
                    (vcfVariant.Info.TryGetValue(VcfConstants.EndTagKey, out var endString) &&
                     vcfVariant.Position.ToString() == endString ||
                     vcfVariant.Info.TryGetValue(VcfConstants.SvLenKey, out var svLenString) &&
                     svLenString == "0"))
                {
                    return(FailedReason.InvalidSvLen);
                }

                // Bnd with pos and alt to be same position (temporarily to accomendate the situation of SVLEN=0 INV representing as bnd)
                if (svType == WittyerType.IntraChromosomeBreakend)
                {
                    var mate = SimpleBreakEnd.TryParse(vcfVariant.GetSingleAlt()).GetOrThrow();
                    return(vcfVariant.Position == mate.Position ? FailedReason.InvalidSvLen : FailedReason.Unset);
                }

                // todo: truth does not care about Sample FT tag, is that ok?
                var sampleFilterOk = isTruth || !includedFilters.Contains(VcfConstants.PassFilter) || !vcfVariant.IsPassFilter() || IsSampleFtPassFilter();

                if (!sampleFilterOk)
                {
                    return(FailedReason.FailedSampleFilter);
                }

                // used include bed and variant is completely within a single contig and the bed doesn't include the contig
                if (inputSpec.IncludedRegions != null && svType != WittyerType.TranslocationBreakend &&
                    !inputSpec.IncludedRegions.IntervalTree.ContainsKey(vcfVariant.Contig))
                {
                    return(FailedReason.OutsideBedRegion);
                }

                return(FailedReason.Unset);

                bool IsSampleFtPassFilter()
                => sample != null && (!sample.SampleDictionary.TryGetValue(WittyerConstants.Ft, out var ft) ||
                                      ft.Equals(VcfConstants.PassFilter));
            }
        }
Esempio n. 9
0
 public static Winner Create([NotNull] WittyerType svType, uint start, uint?end)
 => new Winner(svType, start, end);
Esempio n. 10
0
 internal static Winner Create([NotNull] WittyerType svType)
 => Create(svType, WittyerConstants.StartingBin, null);
Esempio n. 11
0
 private Winner([NotNull] WittyerType svType, uint start, uint?end)
 {
     SvType = svType;
     Start  = start;
     End    = end;
 }
Esempio n. 12
0
 private static string GenerateBinString(uint currentBin, string nextBin, WittyerType variantType)
 => variantType == WittyerType.TranslocationBreakend
         ? "NA"
         : (nextBin.Equals(WittyerConstants.Json.InfiniteBin)
             ? currentBin + nextBin
             : $"[{currentBin}, {nextBin})");
Esempio n. 13
0
        public static BinJsonStats Create([NotNull] IPerBinStats binnedStats, string nextBin, WittyerType variantType)
        {
            var result          = new List <BasicJsonStats>();
            var eventStats      = binnedStats.Stats[StatsType.Event];
            var eventBasicStats = BasicJsonStats.Create(StatsType.Event, eventStats.TruthStats.TrueCount,
                                                        eventStats.TruthStats.FalseCount, eventStats.QueryStats.TrueCount,
                                                        eventStats.QueryStats.FalseCount);

            result.Add(eventBasicStats);

            if (!binnedStats.Stats.TryGetValue(StatsType.Base, out var beb))
            {
                return(new BinJsonStats(GenerateBinString(binnedStats.Bin, nextBin, variantType), result));
            }

            var baseBasicStats = BasicJsonStats.Create(StatsType.Base, beb.TruthStats.TrueCount,
                                                       beb.TruthStats.FalseCount, beb.QueryStats.TrueCount, beb.QueryStats.FalseCount);

            result.Add(baseBasicStats);

            return(new BinJsonStats(GenerateBinString(binnedStats.Bin, nextBin, variantType), result));
        }