Ejemplo n.º 1
0
        public static WittyerVcfReader Create([NotNull] IVcfReader vcfReader,
                                              [NotNull] IReadOnlyDictionary <WittyerType, InputSpec> inputSpec, EvaluationMode mode,
                                              [CanBeNull] string sampleName)
        {
            if (inputSpec.SelectMany(kvp => kvp.Value.ExcludedFilters.Concat(kvp.Value.IncludedFilters)).Any(string.IsNullOrWhiteSpace))
            {
                throw new InvalidDataException("Passed in empty or whitespace as a filter!");
            }

            var successVariants = new List <IVcfVariant>();
            var exceptions      = new List <Exception>();

            foreach (var item in vcfReader.GetAllItems())
            {
                item.DoOnSuccess(v =>
                {
                    var variant = v;
                    if (v.Samples.Count > 0)
                    {
                        var sample     = sampleName == null ? v.Samples[0] : v.Samples[sampleName];
                        var sampleDict = SampleDictionaries.CreateBuilder().AddSample(sample.SampleName)
                                         .MoveOnToDictionaries();
                        sample.SampleDictionary.ForEach(kvp =>
                                                        sampleDict.SetSampleField(sample.SampleName, (kvp.Key, kvp.Value)));
                        variant = v.ToBuilder().SetSamples(sampleDict.Build()).Build();
                    }
                    successVariants.Add(variant);
                }).DoOnFailure(e => exceptions.Add(e));
            }

            if (exceptions.Count == 0)
            {
                return(new WittyerVcfReader(vcfReader, inputSpec, successVariants.AsReadOnly(), mode));
            }

            var msg = exceptions.Take(5).Select(x => x.Message)
                      .StringJoin("\n");

            throw new InvalidDataException
                      ($"Found {exceptions.Count} variants cannot be parsed in {vcfReader.FileSource.FullName}: first 5 or less:\n {msg}");
        }
Ejemplo n.º 2
0
        internal static IVcfVariant CreateUnsupportedVariant([NotNull] IVcfVariant baseVariant, [CanBeNull] IVcfSample sample,
                                                             FailedReason why, bool isTruth)
        {
            var realName = (isTruth ? SamplePair.Default.TruthSampleName : SamplePair.Default.QuerySampleName)
                           ?? throw new InvalidDataException(
                                     $"Somehow, {nameof(SamplePair)}.{nameof(SamplePair.Default)} was null!!");
            var sampleBuilder = SampleDictionaries.CreateBuilder()
                                .AddSample(realName).MoveOnToDictionaries();

            var dicts = (sample?.SampleDictionary ?? ImmutableDictionary <string, string> .Empty.AsEnumerable())
                        .Select(kvp => (kvp.Key, kvp.Value))
                        .FollowedWith(
                (WittyerConstants.WittyerMetaInfoLineKeys.Wit, NotAssessed),
                (WittyerConstants.WittyerMetaInfoLineKeys.Why, why.ToString()));

            foreach (var tuple in dicts)
            {
                sampleBuilder.SetSampleField(realName, tuple);
            }

            return(baseVariant.ToBuilder().SetSamples(sampleBuilder.Build()).Build());
        }
Ejemplo n.º 3
0
        internal static IEnumerable <IVcfVariant> ProcessVariants([NotNull] IWittyerResult result, bool?isTruth)
        {
            var sampleIndex = isTruth == false ? 1 : 0;

            foreach (var variants in result.Variants.Values)
            {
                foreach (var variant in variants)
                {
                    foreach (var ret in ConvertToVcfVariant(variant))
                    {
                        yield return(ret);
                    }
                }
            }

            foreach (var variants in result.BreakendPairsAndInsertions.Values)
            {
                foreach (var variant in variants)
                {
                    foreach (var ret in ConvertToVcfVariant(variant))
                    {
                        yield return(ret);
                    }
                }
            }

            foreach (var ret in result.NotAssessedVariants.Select(ConvertToUnsupportedVcfVariant))
            {
                yield return(ret);
            }

            IVcfVariant ConvertToUnsupportedVcfVariant(IVcfVariant originalVariant)
            => isTruth == null
                    ? originalVariant
                    : originalVariant.ToBuilder().SetSamples(
                GetClearedSampleBuilder(originalVariant.Samples[0].SampleDictionary,
                                        DefaultTruthSampleName, DefaultQuerySampleName).Build())
            .Build();

            IEnumerable <IVcfVariant> ConvertToVcfVariant(IWittyerSimpleVariant originalVariant)
            {
                //Info tag
                var win         = originalVariant.Win.ToString();
                var annotations = originalVariant.OverlapInfo;

                if (annotations.Count > WittyerConstants.MaxNumberOfAnnotations)
                {
                    annotations = annotations.Take(WittyerConstants.MaxNumberOfAnnotations).ToList();
                }

                var where = annotations.Count == 0
                    ? MissingValueString
                    : annotations.Select(x => x.Where.ToString())
                            .StringJoin(WittyerConstants.InfoValueDel);
                var who = annotations.Count == 0
                    ? MissingValueString
                    : annotations.Select(x => x.Who).StringJoin(WittyerConstants.InfoValueDel);
                var wow = !originalVariant.VariantType.HasOverlappingWindows ||
                          annotations.Count == 0
                    ? MissingValueString
                    : annotations.Select(x => ToWowString(x.Wow))
                          .StringJoin(WittyerConstants.InfoValueDel);

                var infoDict = new Dictionary <string, string>
                {
                    { Win, win },
                    { Where, where },
                    { Who, who },
                    { Wow, wow }
                };

                var samples = AddWitTags(originalVariant.Sample.GetOriginalSample()?.SampleDictionary, isTruth == null
                    ? new[] { originalVariant.Sample.GetOriginalSample()?.SampleName ?? "SAMPLE" }
                    : DefaultSampleNamesPair);

                var updatedInfo  = originalVariant.OriginalVariant.Info.ToImmutableDictionary().SetItems(infoDict);
                var firstVariant = originalVariant.OriginalVariant.ToBuilder().SetInfo(updatedInfo).SetSamples(samples);


                yield return(firstVariant.Build());

                // insertions are secretly two breakends repeated.
                if (originalVariant is IWittyerBnd bnd && !ReferenceEquals(bnd.OriginalVariant, bnd.EndOriginalVariant))
                {
                    var sample = bnd.EndOriginalVariant.Samples.Values.FirstOrDefault();
                    samples = AddWitTags(sample?.SampleDictionary, isTruth == null
                        ? new[] { sample?.SampleName ?? "SAMPLE" }
                        : DefaultSampleNamesPair);
                    yield return(bnd.EndOriginalVariant.ToBuilder()
                                 .SetInfo(bnd.EndOriginalVariant.Info.ToImmutableDictionary().SetItems(infoDict)).SetSamples(samples).Build());
                }

                string ToWowString(IInterval <uint> interval)
                => interval == null ? MissingValueString : $"{interval.Start}-{interval.Stop}";

                SampleDictionaries AddWitTags(IReadOnlyDictionary <string, string> sampleDict, string[] sampleNames)
                => GetClearedSampleBuilder(sampleDict,
                                           sampleNames)
                .SetSampleField(sampleIndex,
                                (Wit, originalVariant.Sample.Wit.ToStringDescription()))
                .SetSampleField(sampleIndex,
                                (Why,
                                 originalVariant.Sample.Why.Count == 0
                                    ? NoOverlapString
                                    : originalVariant.Sample.Why.Select(x => x.ToStringDescription())
                                 .StringJoin(WittyerConstants.SampleValueDel)))
                .SetSampleField(sampleIndex,
                                (What,
                                 originalVariant.Sample.What.Count == 0
                                    ? MissingValueString
                                    : originalVariant.Sample.What.Select(x => x.ToStringDescription())
                                 .StringJoin(WittyerConstants.SampleValueDel))).Build();
            }

            SampleDictionaryBuilder GetClearedSampleBuilder(IReadOnlyDictionary <string, string> sampleDict, params string[] sampleNames)
            {
                var builder = SampleDictionaries.CreateBuilder();

                foreach (var sampleName in sampleNames)
                {
                    builder.AddSample(sampleName);
                }
                var ret = builder.MoveOnToDictionaries();

                if (sampleDict == null)
                {
                    return(ret);
                }

                foreach (var kvp in sampleDict)
                {
                    ret.SetSampleField(sampleIndex, (kvp.Key, kvp.Value));
                }

                return(ret);
            }
        }