public static WittyerVcfReader Create([NotNull] IVcfReader vcfReader, [NotNull] IReadOnlyDictionary <WittyerType, InputSpec> inputSpec, EvaluationMode mode, [CanBeNull] string sampleName) { if (inputSpec.SelectMany(kvp => kvp.Value.ExcludedFilters.Concat(kvp.Value.IncludedFilters)).Any(string.IsNullOrWhiteSpace)) { throw new InvalidDataException("Passed in empty or whitespace as a filter!"); } var successVariants = new List <IVcfVariant>(); var exceptions = new List <Exception>(); foreach (var item in vcfReader.GetAllItems()) { item.DoOnSuccess(v => { var variant = v; if (v.Samples.Count > 0) { var sample = sampleName == null ? v.Samples[0] : v.Samples[sampleName]; var sampleDict = SampleDictionaries.CreateBuilder().AddSample(sample.SampleName) .MoveOnToDictionaries(); sample.SampleDictionary.ForEach(kvp => sampleDict.SetSampleField(sample.SampleName, (kvp.Key, kvp.Value))); variant = v.ToBuilder().SetSamples(sampleDict.Build()).Build(); } successVariants.Add(variant); }).DoOnFailure(e => exceptions.Add(e)); } if (exceptions.Count == 0) { return(new WittyerVcfReader(vcfReader, inputSpec, successVariants.AsReadOnly(), mode)); } var msg = exceptions.Take(5).Select(x => x.Message) .StringJoin("\n"); throw new InvalidDataException ($"Found {exceptions.Count} variants cannot be parsed in {vcfReader.FileSource.FullName}: first 5 or less:\n {msg}"); }
internal static IVcfVariant CreateUnsupportedVariant([NotNull] IVcfVariant baseVariant, [CanBeNull] IVcfSample sample, FailedReason why, bool isTruth) { var realName = (isTruth ? SamplePair.Default.TruthSampleName : SamplePair.Default.QuerySampleName) ?? throw new InvalidDataException( $"Somehow, {nameof(SamplePair)}.{nameof(SamplePair.Default)} was null!!"); var sampleBuilder = SampleDictionaries.CreateBuilder() .AddSample(realName).MoveOnToDictionaries(); var dicts = (sample?.SampleDictionary ?? ImmutableDictionary <string, string> .Empty.AsEnumerable()) .Select(kvp => (kvp.Key, kvp.Value)) .FollowedWith( (WittyerConstants.WittyerMetaInfoLineKeys.Wit, NotAssessed), (WittyerConstants.WittyerMetaInfoLineKeys.Why, why.ToString())); foreach (var tuple in dicts) { sampleBuilder.SetSampleField(realName, tuple); } return(baseVariant.ToBuilder().SetSamples(sampleBuilder.Build()).Build()); }
internal static IEnumerable <IVcfVariant> ProcessVariants([NotNull] IWittyerResult result, bool?isTruth) { var sampleIndex = isTruth == false ? 1 : 0; foreach (var variants in result.Variants.Values) { foreach (var variant in variants) { foreach (var ret in ConvertToVcfVariant(variant)) { yield return(ret); } } } foreach (var variants in result.BreakendPairsAndInsertions.Values) { foreach (var variant in variants) { foreach (var ret in ConvertToVcfVariant(variant)) { yield return(ret); } } } foreach (var ret in result.NotAssessedVariants.Select(ConvertToUnsupportedVcfVariant)) { yield return(ret); } IVcfVariant ConvertToUnsupportedVcfVariant(IVcfVariant originalVariant) => isTruth == null ? originalVariant : originalVariant.ToBuilder().SetSamples( GetClearedSampleBuilder(originalVariant.Samples[0].SampleDictionary, DefaultTruthSampleName, DefaultQuerySampleName).Build()) .Build(); IEnumerable <IVcfVariant> ConvertToVcfVariant(IWittyerSimpleVariant originalVariant) { //Info tag var win = originalVariant.Win.ToString(); var annotations = originalVariant.OverlapInfo; if (annotations.Count > WittyerConstants.MaxNumberOfAnnotations) { annotations = annotations.Take(WittyerConstants.MaxNumberOfAnnotations).ToList(); } var where = annotations.Count == 0 ? MissingValueString : annotations.Select(x => x.Where.ToString()) .StringJoin(WittyerConstants.InfoValueDel); var who = annotations.Count == 0 ? MissingValueString : annotations.Select(x => x.Who).StringJoin(WittyerConstants.InfoValueDel); var wow = !originalVariant.VariantType.HasOverlappingWindows || annotations.Count == 0 ? MissingValueString : annotations.Select(x => ToWowString(x.Wow)) .StringJoin(WittyerConstants.InfoValueDel); var infoDict = new Dictionary <string, string> { { Win, win }, { Where, where }, { Who, who }, { Wow, wow } }; var samples = AddWitTags(originalVariant.Sample.GetOriginalSample()?.SampleDictionary, isTruth == null ? new[] { originalVariant.Sample.GetOriginalSample()?.SampleName ?? "SAMPLE" } : DefaultSampleNamesPair); var updatedInfo = originalVariant.OriginalVariant.Info.ToImmutableDictionary().SetItems(infoDict); var firstVariant = originalVariant.OriginalVariant.ToBuilder().SetInfo(updatedInfo).SetSamples(samples); yield return(firstVariant.Build()); // insertions are secretly two breakends repeated. if (originalVariant is IWittyerBnd bnd && !ReferenceEquals(bnd.OriginalVariant, bnd.EndOriginalVariant)) { var sample = bnd.EndOriginalVariant.Samples.Values.FirstOrDefault(); samples = AddWitTags(sample?.SampleDictionary, isTruth == null ? new[] { sample?.SampleName ?? "SAMPLE" } : DefaultSampleNamesPair); yield return(bnd.EndOriginalVariant.ToBuilder() .SetInfo(bnd.EndOriginalVariant.Info.ToImmutableDictionary().SetItems(infoDict)).SetSamples(samples).Build()); } string ToWowString(IInterval <uint> interval) => interval == null ? MissingValueString : $"{interval.Start}-{interval.Stop}"; SampleDictionaries AddWitTags(IReadOnlyDictionary <string, string> sampleDict, string[] sampleNames) => GetClearedSampleBuilder(sampleDict, sampleNames) .SetSampleField(sampleIndex, (Wit, originalVariant.Sample.Wit.ToStringDescription())) .SetSampleField(sampleIndex, (Why, originalVariant.Sample.Why.Count == 0 ? NoOverlapString : originalVariant.Sample.Why.Select(x => x.ToStringDescription()) .StringJoin(WittyerConstants.SampleValueDel))) .SetSampleField(sampleIndex, (What, originalVariant.Sample.What.Count == 0 ? MissingValueString : originalVariant.Sample.What.Select(x => x.ToStringDescription()) .StringJoin(WittyerConstants.SampleValueDel))).Build(); } SampleDictionaryBuilder GetClearedSampleBuilder(IReadOnlyDictionary <string, string> sampleDict, params string[] sampleNames) { var builder = SampleDictionaries.CreateBuilder(); foreach (var sampleName in sampleNames) { builder.AddSample(sampleName); } var ret = builder.MoveOnToDictionaries(); if (sampleDict == null) { return(ret); } foreach (var kvp in sampleDict) { ret.SetSampleField(sampleIndex, (kvp.Key, kvp.Value)); } return(ret); } }