private void ResetBuffer(ISimplePosition simplePosition, bool recomposable) { var functionBlockRanges = recomposable ? new List <int> { CodonInfoProvider.GetLongestFunctionBlockDistance(simplePosition) } : new List <int>(); BufferedPositions = new BufferedPositions(new List <ISimplePosition> { simplePosition }, new List <bool> { recomposable }, functionBlockRanges); CurrentChromosome = simplePosition.Chromosome; }
public BufferedPositions AddPosition(ISimplePosition simplePosition) { bool recomposable = IsRecomposable(simplePosition); bool isPositionWithinRange = !simplePosition.Chromosome.IsEmpty() && PositionWithinRange(simplePosition); if (isPositionWithinRange) { BufferedPositions.SimplePositions.Add(simplePosition); BufferedPositions.Recomposable.Add(recomposable); if (recomposable) { UpdateFunctionBlockRanges(simplePosition); } return(BufferedPositions.CreatEmptyBufferedPositions()); } var copyOfBuffer = BufferedPositions; ResetBuffer(simplePosition, recomposable); return(copyOfBuffer); }
public static IPosition ToPosition(ISimplePosition simplePosition, IRefMinorProvider refMinorProvider, ISequenceProvider sequenceProvider, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, VariantFactory variantFactory, bool enableDq = false) { if (simplePosition == null) { return(null); } sequenceProvider.LoadChromosome(simplePosition.Chromosome); string[] vcfFields = simplePosition.VcfFields; string[] altAlleles = vcfFields[VcfCommon.AltIndex].OptimizedSplit(','); bool isReference = altAlleles.Length == 1 && VcfCommon.ReferenceAltAllele.Contains(altAlleles[0]); string globalMajorAllele = isReference ? refMinorProvider?.GetGlobalMajorAllele(simplePosition.Chromosome, simplePosition.Start) : null; bool isRefMinor = isReference && globalMajorAllele != null; if (isReference && !isRefMinor) { return(GetReferencePosition(simplePosition)); } var infoData = VcfInfoParser.Parse(vcfFields[VcfCommon.InfoIndex]); int end = ExtractEnd(infoData, simplePosition.Start, simplePosition.RefAllele.Length); double?quality = vcfFields[VcfCommon.QualIndex].GetNullableValue <double>(double.TryParse); string[] filters = vcfFields[VcfCommon.FilterIndex].OptimizedSplit(';'); ISample[] samples = vcfFields.ToSamples(variantFactory.FormatIndices, simplePosition, mitoHeteroplasmyProvider, enableDq); IVariant[] variants = variantFactory.CreateVariants(simplePosition.Chromosome, simplePosition.Start, end, simplePosition.RefAllele, altAlleles, infoData, simplePosition.IsDecomposed, simplePosition.IsRecomposed, simplePosition.LinkedVids, globalMajorAllele); return(new Position(simplePosition.Chromosome, simplePosition.Start, end, simplePosition.RefAllele, altAlleles, quality, filters, variants, samples, infoData, vcfFields, simplePosition.IsDecomposed, simplePosition.IsRecomposed)); }
public static Position CreatFromSimplePosition(ISimplePosition simplePosition, VariantFactory variantFactory) { if (simplePosition == null) { return(null); } var vcfFields = simplePosition.VcfFields; var infoData = VcfInfoParser.Parse(vcfFields[VcfCommon.InfoIndex]); var id = vcfFields[VcfCommon.IdIndex]; int end = ExtractEnd(infoData, simplePosition.Start, simplePosition.RefAllele.Length); // re-calculate the end by checking INFO field string[] altAlleles = vcfFields[VcfCommon.AltIndex].Split(',').ToArray(); double? quality = vcfFields[VcfCommon.QualIndex].GetNullableValue <double>(double.TryParse); string[] filters = vcfFields[VcfCommon.FilterIndex].Split(';'); var samples = new SampleFieldExtractor(vcfFields, infoData.Depth).ExtractSamples(); var variants = variantFactory.CreateVariants(simplePosition.Chromosome, id, simplePosition.Start, end, simplePosition.RefAllele, altAlleles, infoData, simplePosition.IsDecomposed, simplePosition.IsRecomposed); return(new Position(simplePosition.Chromosome, simplePosition.Start, end, simplePosition.RefAllele, altAlleles, quality, filters, variants, samples, infoData, vcfFields, simplePosition.IsDecomposed, simplePosition.IsRecomposed)); }
public bool InGeneRegion(ISimplePosition simplePosition) => GeneIntervalForest.OverlapsAny(simplePosition.Chromosome.Index, simplePosition.Start, simplePosition.End);
public void UpdateFunctionBlockRanges(ISimplePosition simplePosition) { BufferedPositions.FunctionBlockRanges.Add(CodonInfoProvider.GetLongestFunctionBlockDistance(simplePosition)); }
public bool PositionWithinRange(ISimplePosition simplePosition) { int blockRangesCount = BufferedPositions.FunctionBlockRanges.Count; return(CurrentChromosome.Index == simplePosition.Chromosome.Index && blockRangesCount != 0 && simplePosition.Start <= BufferedPositions.FunctionBlockRanges[blockRangesCount - 1] && InGeneRegion(simplePosition)); }
internal static bool IsRecomposable(ISimplePosition simplePosition) { string formatCol = simplePosition.VcfFields[VcfCommon.FormatIndex]; return(!VcfCommon.ReferenceAltAllele.Contains(simplePosition.VcfFields[VcfCommon.AltIndex]) && (formatCol.StartsWith("GT:") || formatCol.Equals("GT"))); }
internal static ISample ExtractSample(string sampleColumn, FormatIndices formatIndices, ISimplePosition simplePosition, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, LegacySampleFieldExtractor legacyExtractor = null, bool enableDq = false) { // sanity check: make sure we have a format column if (string.IsNullOrEmpty(sampleColumn)) { return(Sample.EmptySample); } string[] sampleColumns = sampleColumn.OptimizedSplit(':', formatIndices.NumColumns); if (sampleColumns.Length == 1 && sampleColumns[0] == ".") { return(Sample.EmptySample); } sampleColumns.NormalizeNulls(); if (legacyExtractor != null) { return(legacyExtractor.ExtractSample(sampleColumn)); } int[] alleleDepths = sampleColumns.GetString(formatIndices.AD).GetIntegers(); float?artifactAdjustedQualityScore = sampleColumns.GetString(formatIndices.AQ).GetFloat(); int? copyNumber = sampleColumns.GetString(formatIndices.CN).GetInteger(); string[] diseaseAffectedStatuses = sampleColumns.GetString(formatIndices.DST).GetStrings(); bool failedFilter = sampleColumns.GetString(formatIndices.FT).GetFailedFilter(); string genotype = sampleColumns.GetString(formatIndices.GT); int? genotypeQuality = sampleColumns.GetString(formatIndices.GQ).GetInteger(); bool isDeNovo = sampleColumns.GetString(formatIndices.DN).IsDeNovo(); double? deNovoQuality = enableDq? sampleColumns.GetString(formatIndices.DQ).GetDouble():null; float? likelihoodRatioQualityScore = sampleColumns.GetString(formatIndices.LQ).GetFloat(); int[] pairedEndReadCounts = sampleColumns.GetString(formatIndices.PR).GetIntegers(); int[] repeatUnitCounts = sampleColumns.GetString(formatIndices.REPCN).GetIntegers('/'); int[] splitReadCounts = sampleColumns.GetString(formatIndices.SR).GetIntegers(); int? totalDepth = sampleColumns.GetString(formatIndices.DP).GetInteger(); double?variantFrequency = sampleColumns.GetString(formatIndices.VF).GetDouble(); int? minorHaplotypeCopyNumber = sampleColumns.GetString(formatIndices.MCN).GetInteger(); double?somaticQuality = sampleColumns.GetString(formatIndices.SQ).GetDouble(); int? binCount = sampleColumns.GetString(formatIndices.BC).GetInteger(); double[] variantFrequencies = VariantFrequency.GetVariantFrequencies(variantFrequency, alleleDepths, simplePosition.AltAlleles.Length); string[] mitoHeteroplasmyPercentiles = mitoHeteroplasmyProvider?.GetVrfPercentiles(simplePosition.Chromosome, simplePosition.Start, simplePosition.AltAlleles, variantFrequencies)?.Select(x => x?.ToString("0.##") ?? "null").ToArray(); var isLoh = GetLoh(copyNumber, minorHaplotypeCopyNumber, genotype); var sample = new Sample(alleleDepths, artifactAdjustedQualityScore, copyNumber, diseaseAffectedStatuses, failedFilter, genotype, genotypeQuality, isDeNovo, deNovoQuality, likelihoodRatioQualityScore, pairedEndReadCounts, repeatUnitCounts, splitReadCounts, totalDepth, variantFrequencies, minorHaplotypeCopyNumber, somaticQuality, isLoh, mitoHeteroplasmyPercentiles, binCount); return(sample); }
internal static ISample[] ToSamples(this string[] vcfColumns, FormatIndices formatIndices, ISimplePosition simplePosition, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, bool enableDq = false) { if (vcfColumns.Length < VcfCommon.MinNumColumnsSampleGenotypes) { return(null); } int numSamples = vcfColumns.Length - VcfCommon.MinNumColumnsSampleGenotypes + 1; var samples = new ISample[numSamples]; formatIndices.Set(vcfColumns[VcfCommon.FormatIndex]); var legacySampleExtractor = IsLegacyVariantCaller(formatIndices) ? new LegacySampleFieldExtractor(vcfColumns, formatIndices) : null; for (int index = VcfCommon.GenotypeIndex; index < vcfColumns.Length; index++) { samples[index - VcfCommon.GenotypeIndex] = ExtractSample(vcfColumns[index], formatIndices, simplePosition, mitoHeteroplasmyProvider, legacySampleExtractor, enableDq); } return(samples); }
public IEnumerable <ISimplePosition> Process(ISimplePosition simplePosition) => GenerateOutput(_positionBuffer.AddPosition(simplePosition));
public IEnumerable <ISimplePosition> ProcessSimplePosition(ISimplePosition simplePosition) { return(simplePosition == null?_positionProcessor.ProcessBufferedPositions() : _positionProcessor.Process(simplePosition)); }
private static IPosition GetReferencePosition(ISimplePosition simplePosition) => new Position(simplePosition.Chromosome, simplePosition.Start, simplePosition.Start, simplePosition.RefAllele, simplePosition.AltAlleles, null, null, null, null, null, simplePosition.VcfFields, simplePosition.IsDecomposed, simplePosition.IsRecomposed);
public IEnumerable <ISimplePosition> ProcessSimplePosition(ISimplePosition simplePosition) => new[] { simplePosition };
public IEnumerable <ISimplePosition> ProcessSimplePosition(ISimplePosition simplePosition) { return(new[] { simplePosition }); }