internal static IInterval <uint> ConvertPositionToCiInterval( this uint position, [NotNull] IVcfVariant variant, [NotNull] string ciInfoTag) { if (!variant.Info.TryGetValue(ciInfoTag, out var posString)) { return(BedInterval.Create(position > 0 ? position - 1 : 0, position == 0 ? 1 : position)); } var split = posString.Split(WittyerConstants.InfoValueDel); if (split.Length != 2) { throw VcfVariantFormatException.Create(variant.ToString(), ImmutableHashSet.Create(VcfColumn.Info), $"Invalid {ciInfoTag} found: {posString}", variant.ToStrings().ToList().AsReadOnly()); } var parsedStart = GetParsedAbsValue(split[0]); if (parsedStart == null) { throw new InvalidOperationException($"Failed to parse {ciInfoTag}={posString}!"); } var parsedStop = GetParsedAbsValue(split[1]); if (parsedStop == null) { throw new InvalidOperationException($"Failed to parse {ciInfoTag}={posString}!"); } var(start, stop) = ConvertPositionToCiInterval(position, (parsedStart.Value, parsedStop.Value)); return(BedInterval.Create(start, stop)); uint?GetParsedAbsValue(string val) => !int.TryParse(val, out var parsed) ? (uint?)null : (uint)(parsed < 0 ? -parsed : parsed); }
internal static IWittyerVariant Create([NotNull] IVcfVariant baseVariant, string sample, double percentageDistance, uint basepairDistance, IReadOnlyList <uint> bins, WittyerVariantType svType) { if (svType == WittyerVariantType.Invalid) { throw new InvalidDataException( $"Invalid {VcfConstants.SvTypeKey} in variant: \n{baseVariant}\nNot sure why you got here though. Check with a witty.er developer!"); } var end = baseVariant.Position + baseVariant.GetSvLength(); var baseInterval = BedInterval.Create(baseVariant.Position, end); var borderInterval = baseVariant.Position.CalculateBorderInterval(baseInterval, baseVariant.ParseCi(WittyerConstants.Cipos), percentageDistance, basepairDistance); // wittyerVariant should all have end border, it's a matter of how to find it, // either END key in INFO field, sort out through SVLEN or other ways, details can be defined in FindEndBorder() later var endInterval = (end - 1).CalculateBorderInterval(baseInterval, baseVariant.ParseCi(WittyerConstants.Ciend), percentageDistance, basepairDistance); var posContigAndInterval = ContigAndInterval.Create(baseVariant.Contig, borderInterval.Start, borderInterval.Stop + 1); var endContigAndInterval = ContigAndInterval.Create(baseVariant.Contig, endInterval.Start, endInterval.Stop + 1); return(Create(baseVariant, baseInterval, svType, posContigAndInterval, Winner.Create(svType, baseInterval, bins), new List <OverlapAnnotation>(), WittyerSample.CreateOverall(baseVariant, sample, svType == WittyerVariantType.CopyNumberReference), endContigAndInterval)); }
internal static IInterval <uint> ConvertPositionToCiInterval( this uint position, [NotNull] IVcfVariant variant, [NotNull] string ciInfoTag) { if (!variant.Info.TryGetValue(ciInfoTag, out var posString)) { return(BedInterval.Create(position > 0 ? position - 1 : 0, position == 0 ? 1 : position)); } var split = posString.Split(WittyerConstants.InfoValueDel); if (split.Length != 2) { throw VcfVariantFormatException.Create(variant.ToString(), ImmutableHashSet.Create(VcfColumn.Info), $"Invalid {ciInfoTag} found: {posString}", variant.ToStrings().ToList().AsReadOnly()); } var(start, stop) = ConvertPositionToCiInterval(position, (GetParsedAbsValue(split[0]), GetParsedAbsValue(split[1]))); return(BedInterval.Create(start, stop)); uint GetParsedAbsValue(string val) { var parsed = int.Parse(val); return((uint)(parsed < 0 ? -parsed : parsed)); } }
public static void WittyerVariantIntervalCorrect([NotNull] string variant, uint start, uint end, uint posStart, uint posEnd, uint endStart, uint endEnd) { const string sampleName = "tumor"; var vcfVariant = VcfVariant.TryParse(variant, VcfVariantParserSettings.Create(ImmutableList.Create("normal", sampleName), GenomeAssembly.Hg38)) .GetOrThrowDebug(); var _ = WittyerType.ParseFromVariant(vcfVariant, false, sampleName, out var type); if (type == null) { throw new NotSupportedException("This test does not handle svType null"); } var wittyerVariant = WittyerVariantInternal .Create(vcfVariant, vcfVariant.Samples[sampleName], type, Bins, PercentDistance, BasepairDistance); var expectedStart = ContigAndInterval.Create(vcfVariant.Contig, start, end); var expectedPos = BedInterval.Create(posStart, posEnd); var expectedEnd = BedInterval.Create(endStart, endEnd); MultiAssert.Equal(expectedStart, wittyerVariant); MultiAssert.Equal(expectedPos, wittyerVariant.CiPosInterval); MultiAssert.Equal(expectedEnd, wittyerVariant.CiEndInterval); MultiAssert.AssertAll(); }
private BedGraphEntry GetAlleleFrequencyEntry(string bafLine) { var bafFields = CSVReader.ParseCommaDelimitedLine(bafLine); var chromosome = bafFields[0]; var oneBasedPosition = int.Parse(bafFields[1]); var alleleFrequency = (decimal)double.Parse(bafFields[2]); var bedPosition = new BedInterval(oneBasedPosition - 1, oneBasedPosition); return(new BedGraphEntry(chromosome, bedPosition, alleleFrequency)); }
internal static IWittyerBnd Create([NotNull] IVcfVariant variant, IVcfVariant secondVariant, [CanBeNull] string sampleName, double percentageDistance, uint basepairDistance, IReadOnlyList <uint> bins) { var(first, second) = MiscUtils.FindBndEntriesOrder(variant, secondVariant); var posInterval = first.CalculateBndBorderInterval(second, first.ParseCi(WittyerConstants.Cipos), percentageDistance, basepairDistance); var endInterval = second.CalculateBndBorderInterval(first, second.ParseCi(WittyerConstants.Cipos), percentageDistance, basepairDistance); var svType = variant.ParseWittyerVariantType(sampleName); var winner = GetWinner(); var overlapInfo = new List <OverlapAnnotation>(); var sample = WittyerSample.CreateOverall(variant, sampleName, false); return(Create(first, posInterval, winner, endInterval, overlapInfo, sample, second, svType)); Winner GetWinner() { if (svType == WittyerVariantType.TranslocationBreakend) { return(Winner.Create(svType)); } if (svType != WittyerVariantType.Insertion) { return(Winner.Create(svType, BedInterval.Create(first.Position, second.Position + 1), bins)); } uint?end = null; // insertion, try sequences first if (variant.IsSimpleSequence(out var length)) { end = length; } // try svlength, but if not, assume unknown length. else if (variant.TryGetSvLength(out length) == null) { end = length; } return(Winner.Create(svType, end == null ? null : BedInterval.Create(variant.Position, variant.Position + end.Value), bins)); } }
public static IGeneralBnd CreateFromVariant([NotNull] IVcfVariant variant) { var altBnd = variant.GetSingleAlt(); var thisRef = variant.Ref[0]; var mate = SimpleBreakEnd.Parse(altBnd, out var firstField, out var lastField); var is3Prime = !string.IsNullOrWhiteSpace(firstField); if (is3Prime && !firstField.StartsWith(thisRef) || !is3Prime && !lastField.EndsWith(thisRef)) { throw new InvalidDataException( $"Invalid breakend because neither the alt didn't start or end with ref's first base: {variant}"); } var interval = BedInterval.Create(variant.Position - 1, variant.Position); return(new GeneralBnd(variant, interval, is3Prime, mate)); }
internal static IInterval <uint> ToBedInterval([NotNull] this IVcfVariant baseVariant, bool throwException, out uint endVal, out bool sharedFirstBase) { endVal = baseVariant.Position; if (IsSimpleSequence(baseVariant, out var refLenVal, out sharedFirstBase, out var sharedLastBase, false)) // first need to save the original end before normalizing { endVal += refLenVal; if (baseVariant.Alts.Count > 0) // refsites don't have Alts in VariantUtils { baseVariant = baseVariant.TryNormalizeVariant(VariantNormalizer.TrimCommonBases, 0).GetOrThrow(); } } var refLen = baseVariant.GetSvLength(throwException, out sharedFirstBase, out sharedLastBase, out var endPos); if (refLen == null) // means insertion of unknown length. { return(null); } if (endPos != null) { endVal = endPos.Value; } var start = sharedFirstBase || baseVariant.Position == 0 ? baseVariant.Position : baseVariant.Position - 1; var end = start + refLen.Value; if (sharedLastBase) // rare case { end--; } return(BedInterval.Create(start, end)); }
public void SubtractMultipleIntervalsWorks() { var target = BedInterval.Create(10, 20); var subtracts = new[] { BedInterval.Create(16, 18), BedInterval.Create(19, 25), BedInterval.Create(27, 30), BedInterval.Create(2, 4), BedInterval.Create(8, 11), BedInterval.Create(13, 15) }; var expectedLengths = new List <uint> { // [11,13) 2, // [15,16) 1, // [18,19) 1 }; var intervals = target.Subtract(subtracts).ToList(); Assert.True(expectedLengths.SequenceEqual(intervals.Select(i => i.GetLength()))); }
public GenomicBin(string chromosome, BedInterval interval, int gc) { Chromosome = chromosome; Interval = interval; GC = gc; }
public GenomicBin(string chromosome, BedInterval interval) { Chromosome = chromosome; Interval = interval; }
internal static IWittyerBnd Create([NotNull] IVcfVariant first, [CanBeNull] IVcfSample originalSample, [NotNull] WittyerType svType, [NotNull] IReadOnlyList <uint> bins, uint bpd, double?percentageDistance, [NotNull] IVcfVariant second) { if (!ReferenceEquals(first, second)) { (first, second) = FindBndEntriesOrder(in first, in second); } var ciPosInterval = first.Position.ConvertPositionToCiInterval(first, WittyerConstants.Cipos); var ciEndInterval = ReferenceEquals(first, second) ? ciPosInterval // same variant means same intervals. : second.Position.ConvertPositionToCiInterval(second, WittyerConstants.Cipos); IContigAndInterval posInterval, endInterval; if (ReferenceEquals(first, second)) // insertions need trimming and stuff. { var trimmed = first.TryNormalizeVariant(VariantNormalizer.TrimCommonBases, 0).GetOrThrow(); var tuple = (bpd, bpd); var(posStart, posStop) = trimmed.Position.ConvertPositionToCiInterval(tuple); WittyerUtils.GetBetterInterval(ciPosInterval, ref posStart, ref posStop); posInterval = endInterval = ContigAndInterval.Create(first.Contig, posStart, posStop); } else { (posInterval, endInterval) = WittyerUtils.GetPosAndEndInterval(first.Contig, svType == WittyerType.IntraChromosomeBreakend ? percentageDistance : null, bpd, ciPosInterval, first.Position, ciEndInterval, second.Position, second.Contig); } var winner = GetWinner(); var sample = WittyerSample.CreateFromVariant(first, originalSample, false); return(new WittyerBndInternal(svType, first, posInterval, ciPosInterval, second, endInterval, ciEndInterval, winner, sample)); (IVcfVariant first, IVcfVariant second) FindBndEntriesOrder(in IVcfVariant variantA, in IVcfVariant variantB) => ContigAndPositionComparer.Default.Compare(variantA, variantB) > 0 ? (variantB, variantA) : (variantA, variantB); Winner GetWinner() { if (svType == WittyerType.TranslocationBreakend) { return(Winner.Create(svType)); } IInterval <uint> bedInterval; if (svType == WittyerType.Insertion) { bedInterval = GetInsertionInterval(first); } else { var start = first.Position; if (start > 0) { start--; } bedInterval = BedInterval.Create(start, second.Position); } return(Winner.Create(svType, bedInterval, bins)); } }
private BedGraphEntry GetCopyNumberEntry(CanvasSegment segment) { var interval = new BedInterval(segment.Begin, segment.End); return(new BedGraphEntry(segment.Chr, interval, segment.CopyNumber)); }