Exemplo n.º 1
0
        internal static IInterval <uint> ConvertPositionToCiInterval(
            this uint position, [NotNull] IVcfVariant variant, [NotNull] string ciInfoTag)
        {
            if (!variant.Info.TryGetValue(ciInfoTag, out var posString))
            {
                return(BedInterval.Create(position > 0 ? position - 1 : 0, position == 0 ? 1 : position));
            }

            var split = posString.Split(WittyerConstants.InfoValueDel);

            if (split.Length != 2)
            {
                throw VcfVariantFormatException.Create(variant.ToString(), ImmutableHashSet.Create(VcfColumn.Info),
                                                       $"Invalid {ciInfoTag} found: {posString}", variant.ToStrings().ToList().AsReadOnly());
            }

            var parsedStart = GetParsedAbsValue(split[0]);

            if (parsedStart == null)
            {
                throw new InvalidOperationException($"Failed to parse {ciInfoTag}={posString}!");
            }
            var parsedStop = GetParsedAbsValue(split[1]);

            if (parsedStop == null)
            {
                throw new InvalidOperationException($"Failed to parse {ciInfoTag}={posString}!");
            }
            var(start, stop) = ConvertPositionToCiInterval(position, (parsedStart.Value, parsedStop.Value));
            return(BedInterval.Create(start, stop));

            uint?GetParsedAbsValue(string val)
            => !int.TryParse(val, out var parsed) ? (uint?)null : (uint)(parsed < 0 ? -parsed : parsed);
        }
Exemplo n.º 2
0
            internal static IWittyerVariant Create([NotNull] IVcfVariant baseVariant,
                                                   string sample, double percentageDistance, uint basepairDistance,
                                                   IReadOnlyList <uint> bins, WittyerVariantType svType)
            {
                if (svType == WittyerVariantType.Invalid)
                {
                    throw new InvalidDataException(
                              $"Invalid {VcfConstants.SvTypeKey} in variant: \n{baseVariant}\nNot sure why you got here though. Check with a witty.er developer!");
                }

                var end          = baseVariant.Position + baseVariant.GetSvLength();
                var baseInterval = BedInterval.Create(baseVariant.Position, end);

                var borderInterval =
                    baseVariant.Position.CalculateBorderInterval(baseInterval,
                                                                 baseVariant.ParseCi(WittyerConstants.Cipos),
                                                                 percentageDistance, basepairDistance);

                // wittyerVariant should all have end border, it's a matter of how to find it,
                // either END key in INFO field, sort out through SVLEN or other ways, details can be defined in FindEndBorder() later
                var endInterval = (end - 1).CalculateBorderInterval(baseInterval,
                                                                    baseVariant.ParseCi(WittyerConstants.Ciend),
                                                                    percentageDistance, basepairDistance);

                var posContigAndInterval =
                    ContigAndInterval.Create(baseVariant.Contig, borderInterval.Start, borderInterval.Stop + 1);
                var endContigAndInterval =
                    ContigAndInterval.Create(baseVariant.Contig, endInterval.Start, endInterval.Stop + 1);

                return(Create(baseVariant, baseInterval, svType, posContigAndInterval,
                              Winner.Create(svType, baseInterval, bins), new List <OverlapAnnotation>(),
                              WittyerSample.CreateOverall(baseVariant, sample, svType == WittyerVariantType.CopyNumberReference),
                              endContigAndInterval));
            }
Exemplo n.º 3
0
        internal static IInterval <uint> ConvertPositionToCiInterval(
            this uint position, [NotNull] IVcfVariant variant, [NotNull] string ciInfoTag)
        {
            if (!variant.Info.TryGetValue(ciInfoTag, out var posString))
            {
                return(BedInterval.Create(position > 0 ? position - 1 : 0, position == 0 ? 1 : position));
            }

            var split = posString.Split(WittyerConstants.InfoValueDel);

            if (split.Length != 2)
            {
                throw VcfVariantFormatException.Create(variant.ToString(), ImmutableHashSet.Create(VcfColumn.Info),
                                                       $"Invalid {ciInfoTag} found: {posString}", variant.ToStrings().ToList().AsReadOnly());
            }

            var(start, stop) = ConvertPositionToCiInterval(position, (GetParsedAbsValue(split[0]), GetParsedAbsValue(split[1])));
            return(BedInterval.Create(start, stop));

            uint GetParsedAbsValue(string val)
            {
                var parsed = int.Parse(val);

                return((uint)(parsed < 0 ? -parsed : parsed));
            }
        }
Exemplo n.º 4
0
        public static void WittyerVariantIntervalCorrect([NotNull] string variant, uint start, uint end,
                                                         uint posStart, uint posEnd, uint endStart, uint endEnd)
        {
            const string sampleName = "tumor";
            var          vcfVariant = VcfVariant.TryParse(variant,
                                                          VcfVariantParserSettings.Create(ImmutableList.Create("normal", sampleName), GenomeAssembly.Hg38))
                                      .GetOrThrowDebug();

            var _ = WittyerType.ParseFromVariant(vcfVariant, false, sampleName, out var type);

            if (type == null)
            {
                throw new NotSupportedException("This test does not handle svType null");
            }
            var wittyerVariant = WittyerVariantInternal
                                 .Create(vcfVariant, vcfVariant.Samples[sampleName], type, Bins, PercentDistance, BasepairDistance);

            var expectedStart = ContigAndInterval.Create(vcfVariant.Contig, start, end);
            var expectedPos   = BedInterval.Create(posStart, posEnd);
            var expectedEnd   = BedInterval.Create(endStart, endEnd);

            MultiAssert.Equal(expectedStart, wittyerVariant);
            MultiAssert.Equal(expectedPos, wittyerVariant.CiPosInterval);
            MultiAssert.Equal(expectedEnd, wittyerVariant.CiEndInterval);
            MultiAssert.AssertAll();
        }
Exemplo n.º 5
0
        private BedGraphEntry GetAlleleFrequencyEntry(string bafLine)
        {
            var bafFields        = CSVReader.ParseCommaDelimitedLine(bafLine);
            var chromosome       = bafFields[0];
            var oneBasedPosition = int.Parse(bafFields[1]);
            var alleleFrequency  = (decimal)double.Parse(bafFields[2]);
            var bedPosition      = new BedInterval(oneBasedPosition - 1, oneBasedPosition);

            return(new BedGraphEntry(chromosome, bedPosition, alleleFrequency));
        }
Exemplo n.º 6
0
            internal static IWittyerBnd Create([NotNull] IVcfVariant variant,
                                               IVcfVariant secondVariant, [CanBeNull] string sampleName,
                                               double percentageDistance, uint basepairDistance, IReadOnlyList <uint> bins)
            {
                var(first, second) = MiscUtils.FindBndEntriesOrder(variant, secondVariant);

                var posInterval = first.CalculateBndBorderInterval(second,
                                                                   first.ParseCi(WittyerConstants.Cipos), percentageDistance, basepairDistance);

                var endInterval = second.CalculateBndBorderInterval(first,
                                                                    second.ParseCi(WittyerConstants.Cipos), percentageDistance,
                                                                    basepairDistance);

                var svType = variant.ParseWittyerVariantType(sampleName);
                var winner = GetWinner();

                var overlapInfo = new List <OverlapAnnotation>();

                var sample = WittyerSample.CreateOverall(variant, sampleName, false);

                return(Create(first, posInterval, winner, endInterval, overlapInfo, sample, second, svType));

                Winner GetWinner()
                {
                    if (svType == WittyerVariantType.TranslocationBreakend)
                    {
                        return(Winner.Create(svType));
                    }
                    if (svType != WittyerVariantType.Insertion)
                    {
                        return(Winner.Create(svType, BedInterval.Create(first.Position, second.Position + 1), bins));
                    }

                    uint?end = null;

                    // insertion, try sequences first
                    if (variant.IsSimpleSequence(out var length))
                    {
                        end = length;
                    }

                    // try svlength, but if not, assume unknown length.
                    else if (variant.TryGetSvLength(out length) == null)
                    {
                        end = length;
                    }

                    return(Winner.Create(svType, end == null ? null : BedInterval.Create(variant.Position, variant.Position + end.Value), bins));
                }
            }
Exemplo n.º 7
0
        public static IGeneralBnd CreateFromVariant([NotNull] IVcfVariant variant)
        {
            var altBnd = variant.GetSingleAlt();

            var thisRef = variant.Ref[0];

            var mate = SimpleBreakEnd.Parse(altBnd, out var firstField, out var lastField);

            var is3Prime = !string.IsNullOrWhiteSpace(firstField);

            if (is3Prime && !firstField.StartsWith(thisRef) ||
                !is3Prime && !lastField.EndsWith(thisRef))
            {
                throw new InvalidDataException(
                          $"Invalid breakend because neither the alt didn't start or end with ref's first base: {variant}");
            }

            var interval = BedInterval.Create(variant.Position - 1, variant.Position);

            return(new GeneralBnd(variant, interval, is3Prime, mate));
        }
Exemplo n.º 8
0
        internal static IInterval <uint> ToBedInterval([NotNull] this IVcfVariant baseVariant,
                                                       bool throwException, out uint endVal, out bool sharedFirstBase)
        {
            endVal = baseVariant.Position;

            if (IsSimpleSequence(baseVariant,
                                 out var refLenVal, out sharedFirstBase, out var sharedLastBase, false))
            // first need to save the original end before normalizing
            {
                endVal += refLenVal;

                if (baseVariant.Alts.Count > 0) // refsites don't have Alts in VariantUtils
                {
                    baseVariant = baseVariant.TryNormalizeVariant(VariantNormalizer.TrimCommonBases, 0).GetOrThrow();
                }
            }

            var refLen = baseVariant.GetSvLength(throwException, out sharedFirstBase, out sharedLastBase, out var endPos);

            if (refLen == null) // means insertion of unknown length.
            {
                return(null);
            }

            if (endPos != null)
            {
                endVal = endPos.Value;
            }

            var start = sharedFirstBase || baseVariant.Position == 0 ? baseVariant.Position : baseVariant.Position - 1;
            var end   = start + refLen.Value;

            if (sharedLastBase) // rare case
            {
                end--;
            }

            return(BedInterval.Create(start, end));
        }
Exemplo n.º 9
0
        public void SubtractMultipleIntervalsWorks()
        {
            var target    = BedInterval.Create(10, 20);
            var subtracts = new[]
            {
                BedInterval.Create(16, 18), BedInterval.Create(19, 25), BedInterval.Create(27, 30),
                BedInterval.Create(2, 4), BedInterval.Create(8, 11), BedInterval.Create(13, 15)
            };

            var expectedLengths = new List <uint>
            {
                // [11,13)
                2,
                // [15,16)
                1,
                // [18,19)
                1
            };
            var intervals = target.Subtract(subtracts).ToList();

            Assert.True(expectedLengths.SequenceEqual(intervals.Select(i => i.GetLength())));
        }
Exemplo n.º 10
0
 public GenomicBin(string chromosome, BedInterval interval, int gc)
 {
     Chromosome = chromosome;
     Interval   = interval;
     GC         = gc;
 }
Exemplo n.º 11
0
 public GenomicBin(string chromosome, BedInterval interval)
 {
     Chromosome = chromosome;
     Interval   = interval;
 }
Exemplo n.º 12
0
        internal static IWittyerBnd Create([NotNull] IVcfVariant first, [CanBeNull] IVcfSample originalSample,
                                           [NotNull] WittyerType svType, [NotNull] IReadOnlyList <uint> bins, uint bpd, double?percentageDistance,
                                           [NotNull] IVcfVariant second)
        {
            if (!ReferenceEquals(first, second))
            {
                (first, second) = FindBndEntriesOrder(in first, in second);
            }

            var ciPosInterval = first.Position.ConvertPositionToCiInterval(first, WittyerConstants.Cipos);
            var ciEndInterval = ReferenceEquals(first, second)
                ? ciPosInterval // same variant means same intervals.
                : second.Position.ConvertPositionToCiInterval(second, WittyerConstants.Cipos);

            IContigAndInterval posInterval, endInterval;

            if (ReferenceEquals(first, second)) // insertions need trimming and stuff.
            {
                var trimmed = first.TryNormalizeVariant(VariantNormalizer.TrimCommonBases, 0).GetOrThrow();
                var tuple   = (bpd, bpd);
                var(posStart, posStop) = trimmed.Position.ConvertPositionToCiInterval(tuple);
                WittyerUtils.GetBetterInterval(ciPosInterval, ref posStart, ref posStop);
                posInterval = endInterval = ContigAndInterval.Create(first.Contig, posStart, posStop);
            }
            else
            {
                (posInterval, endInterval) = WittyerUtils.GetPosAndEndInterval(first.Contig,
                                                                               svType == WittyerType.IntraChromosomeBreakend ? percentageDistance : null, bpd,
                                                                               ciPosInterval, first.Position, ciEndInterval, second.Position, second.Contig);
            }

            var winner = GetWinner();

            var sample = WittyerSample.CreateFromVariant(first, originalSample, false);

            return(new WittyerBndInternal(svType, first, posInterval, ciPosInterval,
                                          second, endInterval, ciEndInterval, winner, sample));

            (IVcfVariant first, IVcfVariant second) FindBndEntriesOrder(in IVcfVariant variantA,
                                                                        in IVcfVariant variantB)
            => ContigAndPositionComparer.Default.Compare(variantA, variantB) > 0
                    ? (variantB, variantA)
                    : (variantA, variantB);

            Winner GetWinner()
            {
                if (svType == WittyerType.TranslocationBreakend)
                {
                    return(Winner.Create(svType));
                }


                IInterval <uint> bedInterval;

                if (svType == WittyerType.Insertion)
                {
                    bedInterval = GetInsertionInterval(first);
                }
                else
                {
                    var start = first.Position;
                    if (start > 0)
                    {
                        start--;
                    }
                    bedInterval = BedInterval.Create(start, second.Position);
                }

                return(Winner.Create(svType, bedInterval, bins));
            }
        }
Exemplo n.º 13
0
        private BedGraphEntry GetCopyNumberEntry(CanvasSegment segment)
        {
            var interval = new BedInterval(segment.Begin, segment.End);

            return(new BedGraphEntry(segment.Chr, interval, segment.CopyNumber));
        }