public static void WittyerVariantIntervalCorrect([NotNull] string variant, uint start, uint end, uint posStart, uint posEnd, uint endStart, uint endEnd) { const string sampleName = "tumor"; var vcfVariant = VcfVariant.TryParse(variant, VcfVariantParserSettings.Create(ImmutableList.Create("normal", sampleName), GenomeAssembly.Hg38)) .GetOrThrowDebug(); var _ = WittyerType.ParseFromVariant(vcfVariant, false, sampleName, out var type); if (type == null) { throw new NotSupportedException("This test does not handle svType null"); } var wittyerVariant = WittyerVariantInternal .Create(vcfVariant, vcfVariant.Samples[sampleName], type, Bins, PercentDistance, BasepairDistance); var expectedStart = ContigAndInterval.Create(vcfVariant.Contig, start, end); var expectedPos = BedInterval.Create(posStart, posEnd); var expectedEnd = BedInterval.Create(endStart, endEnd); MultiAssert.Equal(expectedStart, wittyerVariant); MultiAssert.Equal(expectedPos, wittyerVariant.CiPosInterval); MultiAssert.Equal(expectedEnd, wittyerVariant.CiEndInterval); MultiAssert.AssertAll(); }
public void OverlapWorks_DupDel([NotNull] string truthVar, [NotNull] string queryVar, string type, bool isTp) { const string sampleName = "blah"; var vcfVariantParserSettings = VcfVariantParserSettings.Create(new List <string> { sampleName }); var baseVariant = VcfVariant.TryParse(truthVar, vcfVariantParserSettings).GetOrThrow(); const bool isCrossTypeOn = true; var wittyerType = WittyerType.Parse(type); var inputSpecs = InputSpec.GenerateCustomInputSpecs(!isCrossTypeOn, new[] { wittyerType }, percentDistance: PercentDistance).ToDictionary(s => s.VariantType, s => s); var bndSet = new Dictionary <IGeneralBnd, IVcfVariant>(); var errorList = new List <string>(); var truthV = (IMutableWittyerSimpleVariant)WittyerVcfReader.CreateVariant(baseVariant, baseVariant.Samples.First().Value, true, sampleName, inputSpecs, bndSet, errorList, isCrossTypeOn); baseVariant = VcfVariant.TryParse(queryVar, vcfVariantParserSettings).GetOrThrow(); var queryV = (IMutableWittyerVariant)WittyerVcfReader.CreateVariant(baseVariant, baseVariant.Samples.First().Value, false, sampleName, inputSpecs, bndSet, errorList, isCrossTypeOn); var tree = TruthForest.Create(sampleName, VcfHeader.CreateBuilder(VcfVersion.FourPointOne).Build()); tree.AddTarget(truthV); OverlappingUtils.DoOverlapping(tree.VariantTrees, queryV, OverlappingUtils.IsVariantAlleleMatch, isCrossTypeOn, true); queryV.Finalize(WitDecision.FalsePositive, EvaluationMode.CrossTypeAndSimpleCounting, null); truthV.Finalize(WitDecision.FalseNegative, EvaluationMode.CrossTypeAndSimpleCounting, null); Assert.Equal(isTp ? WitDecision.TruePositive : WitDecision.FalsePositive, queryV.Sample.Wit); Assert.Equal(isTp ? WitDecision.TruePositive : WitDecision.FalseNegative, truthV.Sample.Wit); }
public static void ParseReferenceVariantWorks([NotNull] string inputVariant) { var vcfVariant = VcfVariant.TryParse(inputVariant, VcfVariantParserSettings.Create(ImmutableList.Create("NA12878", "haha"), GenomeAssembly.Hg19)) .GetOrThrowDebug(); WittyerType.ParseFromVariant(vcfVariant, false, "NA12878", out var actualType); Assert.Equal(WittyerType.CopyNumberReference, actualType); }
private static IWittyerSimpleVariant CreateWittyerVariant([NotNull] string vcfline) { var baseVariant = VcfVariant.TryParse(vcfline, VcfVariantParserSettings.Create(ImmutableList.Create("normal"), GenomeAssembly.Grch37)) .GetOrThrowDebug(); WittyerType.ParseFromVariant(baseVariant, true, null, out var svType); if (svType == null) { throw new NotSupportedException("This method does not handle svType null"); } return(WittyerVariantInternal .Create(baseVariant, baseVariant.Samples["normal"], svType, Bins, PercentDistance, BasepairDistance)); }
internal static Winner Create([NotNull] WittyerType svType, [CanBeNull] IInterval <uint> variantInterval, [CanBeNull] IReadOnlyList <uint> bins) { if (bins == null || bins.Count == 0) { return(Create(svType)); } if (variantInterval == null) { return(new Winner(svType, bins[bins.Count - 1], null)); // means take the last bin. } var index = GetBinIndex(variantInterval, bins); return(index < 0 ? Create(svType, WittyerConstants.StartingBin, bins[0]) : Create(svType, bins[index], index < bins.Count - 1 ? bins[index + 1] : default(uint?))); }
public static void ParseWittyerVariantType_AssignCorrectType([NotNull] string vcfString, bool isCrossTypeOn, [CanBeNull] string expected) { const string sampleName = "s1"; var variant = VcfVariant.TryParse(vcfString, VcfVariantParserSettings.Create(ImmutableList.Create(sampleName, "s2"), GenomeAssembly.Hg38)) .GetOrThrowDebug(); var reason = WittyerType.ParseFromVariant(variant, isCrossTypeOn, sampleName, out var assignedType); if (expected == null) { Assert.Equal(FailedReason.UndeterminedCn, reason); } else { Assert.Equal(WittyerType.Parse(expected), assignedType); } }
/// <inheritdoc /> public override InputSpec ReadJson(JsonReader reader, Type objectType, InputSpec existingValue, bool hasExistingValue, JsonSerializer serializer) { var jo = JObject.Load(reader, JsonLoadSettings); // Error handling VerifySettings(); return(jo.ToObject <InputSpec>()); void VerifySettings() { var fieldNamesList = jo.Properties().Select(x => x.Name).ToList(); var fieldNames = Enumerable.ToHashSet(fieldNamesList, StringComparer.OrdinalIgnoreCase); // Check all field names are unique. if (fieldNames.Count < fieldNamesList.Count) { throw new JsonSerializationException("Setting contains duplicate field names in config file."); } // Check that 'VariantType' field exists. if (!fieldNames.Contains(WittyerSettings.VariantTypeName)) { throw new JsonSerializationException($"Setting missing {WittyerSettings.VariantTypeName} field."); } var variantType = jo.GetValue(WittyerSettings.VariantTypeName, StringComparison.Ordinal).Value <string>(); // Make sure that variant type is recognized. if (!WittyerType.TryParse(variantType, out var variantTypeEnum)) { throw new JsonSerializationException($"Unknown variant type '{variantType}' in the config file."); } // Check that each field name is recognized. var unexpectedFields = fieldNames.Except(AllFieldNames, StringComparer.Ordinal).StringJoin(", "); if (unexpectedFields.Length > 0) { throw new JsonSerializationException($"Unrecognized field names in config file: {unexpectedFields}."); } // Make sure the variant type is unique. if (!_typeSet.Add(variantTypeEnum)) { throw new JsonSerializationException($"Duplicate variant type '{variantType}' in the config file."); } var expectedFieldNames = variantTypeEnum.HasBins ? variantTypeEnum.HasLengths ? AllFieldNames : InsFieldNames : TraFieldNames; var missingFields = expectedFieldNames.Except(fieldNames); if (missingFields.Count > 0) { throw new JsonSerializationException( $"Setting for variant type '{variantType}' did not contain required fields: {string.Join(", ", missingFields)}."); } unexpectedFields = fieldNames.Except(expectedFieldNames, StringComparer.Ordinal).StringJoin(", "); if (unexpectedFields.Length > 0) { // Print a warning if unexpectedFields Console.WriteLine($"Warning: {variantTypeEnum} type shouldn't " + "contain the following fields in the config file: " + unexpectedFields); } } }
internal static IContigProvider CreateVariant([NotNull] IVcfVariant vcfVariant, [CanBeNull] IVcfSample sample, bool isTruth, [CanBeNull] string sampleName, IReadOnlyDictionary <WittyerType, InputSpec> inputSpecDict, IDictionary <IGeneralBnd, IVcfVariant> bndSet, List <string> errorList, bool isCrossTypeOn) { var failedReason = WittyerType.ParseFromVariant(vcfVariant, isCrossTypeOn, sampleName, out var svType); if (failedReason != null) { return(CreateUnsupportedVariant(vcfVariant, sample, failedReason.Value == FailedReason.Unset ? throw new ArgumentOutOfRangeException( $"Got {nameof(FailedReason)}.{FailedReason.Unset} which means bug in {nameof(WittyerType.TryParse)}") : failedReason.Value, isTruth)); } if (svType == null) { throw new InvalidDataException("svType should not be null with no failed reason"); } //User does not specify this SVTYPE in input spec, consider user want to exlude this particular SVTYPE comparison entirely if (!inputSpecDict.TryGetValue(svType, out var inputSpec)) { return(CreateUnsupportedVariant(vcfVariant, sample, FailedReason.VariantTypeSkipped, isTruth)); } var isSupportedVariant = IsSupportedVariant(); if (!isSupportedVariant.Equals(FailedReason.Unset)) { return(CreateUnsupportedVariant(vcfVariant, sample, isSupportedVariant, isTruth)); } var bpd = inputSpec.BasepairDistance; var pd = inputSpec.PercentDistance; var bins = inputSpec.BinSizes; if (svType == WittyerType.Insertion) { //insertion is basically using one same record as the both entries of the breakend pair return(WittyerBndInternal.Create(vcfVariant, sample, inputSpec.VariantType, bins.Select(sizeSkipTuple => sizeSkipTuple.size).ToReadOnlyList(), bpd, pd, vcfVariant)); } if (svType == WittyerType.CopyNumberReference && vcfVariant.Info.TryGetValue(VcfConstants.SvTypeKey, out var svTypeString) && !WittyerConstants.BaseLevelStatsTypeStrings.Contains(svTypeString)) { // any non-DEL/DUP/CNV that is determined to be reference copy number is not supported. return(CreateUnsupportedVariant(vcfVariant, sample, FailedReason.UnsupportedRefCall, isTruth)); } if (svType == WittyerType.TranslocationBreakend || svType == WittyerType.IntraChromosomeBreakend) { var currentBnd = GeneralBnd.CreateFromVariant(vcfVariant); //Note: this means the paired BND is found as a key in dictionary. Checkout the comparer for details if (bndSet.TryGetValue(currentBnd, out var secondVariant)) { if (!bndSet.Remove(currentBnd)) { throw new InvalidOperationException( $"Cannot remove {secondVariant} from breakend dictionary when pair is found: {vcfVariant}! Find a developer to debug!"); } return(WittyerBndInternal.Create(vcfVariant, sample, svType, bins.Select(sizeSkipTuple => sizeSkipTuple.size).ToReadOnlyList(), bpd, pd, secondVariant)); } bndSet.Add(currentBnd, vcfVariant); return(currentBnd); } try { return(WittyerVariantInternal.Create(vcfVariant, sample, svType, bins.Select(sizeSkipTuple => sizeSkipTuple.size).ToReadOnlyList(), pd, bpd)); } catch (Exception e) { if (errorList.Count <= MaxNonSupportedVariantToPrint) { errorList.Add( new[] { "Exception caught:", e.ToString(), vcfVariant.ToString() } .StringJoin(Environment.NewLine)); } return(CreateUnsupportedVariant(vcfVariant, sample, FailedReason.Other, isTruth)); } FailedReason IsSupportedVariant() { // Check filters. IReadOnlyCollection <string> includedFilters, excludedFilters; if (isTruth) { includedFilters = WittyerConstants.DefaultIncludeFilters; excludedFilters = WittyerConstants.DefaultExcludeFilters; } else { includedFilters = inputSpec.IncludedFilters; excludedFilters = inputSpec.ExcludedFilters; } if (vcfVariant.Filters.Any(excludedFilters.Contains) || includedFilters.Count > 0 && (vcfVariant.Filters.Count == 0 || !vcfVariant.Filters.Any(includedFilters.Contains))) { return(FailedReason.FilteredBySettings); } // SVLEN = 0 when they are supposed to have overlaps (svlen is needed for overlapping windows) are ignored if (svType.HasOverlappingWindows && (vcfVariant.Info.TryGetValue(VcfConstants.EndTagKey, out var endString) && vcfVariant.Position.ToString() == endString || vcfVariant.Info.TryGetValue(VcfConstants.SvLenKey, out var svLenString) && svLenString == "0")) { return(FailedReason.InvalidSvLen); } // Bnd with pos and alt to be same position (temporarily to accomendate the situation of SVLEN=0 INV representing as bnd) if (svType == WittyerType.IntraChromosomeBreakend) { var mate = SimpleBreakEnd.TryParse(vcfVariant.GetSingleAlt()).GetOrThrow(); return(vcfVariant.Position == mate.Position ? FailedReason.InvalidSvLen : FailedReason.Unset); } // todo: truth does not care about Sample FT tag, is that ok? var sampleFilterOk = isTruth || !includedFilters.Contains(VcfConstants.PassFilter) || !vcfVariant.IsPassFilter() || IsSampleFtPassFilter(); if (!sampleFilterOk) { return(FailedReason.FailedSampleFilter); } // used include bed and variant is completely within a single contig and the bed doesn't include the contig if (inputSpec.IncludedRegions != null && svType != WittyerType.TranslocationBreakend && !inputSpec.IncludedRegions.IntervalTree.ContainsKey(vcfVariant.Contig)) { return(FailedReason.OutsideBedRegion); } return(FailedReason.Unset); bool IsSampleFtPassFilter() => sample != null && (!sample.SampleDictionary.TryGetValue(WittyerConstants.Ft, out var ft) || ft.Equals(VcfConstants.PassFilter)); } }
public static Winner Create([NotNull] WittyerType svType, uint start, uint?end) => new Winner(svType, start, end);
internal static Winner Create([NotNull] WittyerType svType) => Create(svType, WittyerConstants.StartingBin, null);
private Winner([NotNull] WittyerType svType, uint start, uint?end) { SvType = svType; Start = start; End = end; }
private static string GenerateBinString(uint currentBin, string nextBin, WittyerType variantType) => variantType == WittyerType.TranslocationBreakend ? "NA" : (nextBin.Equals(WittyerConstants.Json.InfiniteBin) ? currentBin + nextBin : $"[{currentBin}, {nextBin})");
public static BinJsonStats Create([NotNull] IPerBinStats binnedStats, string nextBin, WittyerType variantType) { var result = new List <BasicJsonStats>(); var eventStats = binnedStats.Stats[StatsType.Event]; var eventBasicStats = BasicJsonStats.Create(StatsType.Event, eventStats.TruthStats.TrueCount, eventStats.TruthStats.FalseCount, eventStats.QueryStats.TrueCount, eventStats.QueryStats.FalseCount); result.Add(eventBasicStats); if (!binnedStats.Stats.TryGetValue(StatsType.Base, out var beb)) { return(new BinJsonStats(GenerateBinString(binnedStats.Bin, nextBin, variantType), result)); } var baseBasicStats = BasicJsonStats.Create(StatsType.Base, beb.TruthStats.TrueCount, beb.TruthStats.FalseCount, beb.QueryStats.TrueCount, beb.QueryStats.FalseCount); result.Add(baseBasicStats); return(new BinJsonStats(GenerateBinString(binnedStats.Bin, nextBin, variantType), result)); }