public void ExtractSample_EmptySampleColumn_ReturnEmptySample() { var formatIndices = new FormatIndices(); var sample = SampleFieldExtractor.ExtractSample(null, formatIndices, GetSimplePositionUsingAlleleNum(1), null); Assert.True(sample.IsEmpty); }
public void ExtractSample_EmptySampleColumn_ReturnEmptySample() { var formatIndices = new FormatIndices(); var sample = SampleFieldExtractor.ExtractSample(null, formatIndices, 1); Assert.True(sample.IsEmpty); }
public void ExtractSample_DotInSampleColumn_ReturnEmptySample() { var formatIndices = new FormatIndices(); var sample = SampleFieldExtractor.ExtractSample(".", formatIndices, 1, false); Assert.True(sample.IsEmpty); }
public void FormatIndicesTest() { const string formatColumn = "AU:GU:TAR:FT:GQ:DP:VF:CU:TU:TIR:GT:GQX:BOB:DPI:NV:NR"; var formatIndicies = FormatIndices.Extract(formatColumn); Assert.Equal(0, formatIndicies.AU); Assert.Equal(7, formatIndicies.CU); Assert.Equal(1, formatIndicies.GU); Assert.Equal(8, formatIndicies.TU); Assert.Equal(15, formatIndicies.NR); Assert.Equal(2, formatIndicies.TAR); Assert.Equal(9, formatIndicies.TIR); Assert.Equal(3, formatIndicies.FT); Assert.Equal(10, formatIndicies.GT); Assert.Equal(4, formatIndicies.GQ); Assert.Equal(11, formatIndicies.GQX); Assert.Equal(5, formatIndicies.DP); Assert.Equal(6, formatIndicies.VF); Assert.Equal(13, formatIndicies.DPI); Assert.Equal(14, formatIndicies.NV); Assert.Null(FormatIndices.Extract(null)); formatIndicies = FormatIndices.Extract("TEMP:DPI:BOB"); Assert.Equal(1, formatIndicies.DPI); Assert.Null(formatIndicies.AU); }
public void FormatIndicesTest() { const string formatColumn = "AU:GU:TAR:FT:GQ:DP:VF:CU:TU:TIR:GT:GQX:BOB:DPI:NV:NR:CHC:DST:PCH:DCS:DID:PLG:PCN:MAD:SCH:AQ:LQ"; var formatIndicies = new FormatIndices(); formatIndicies.Set(formatColumn); Assert.Equal(0, formatIndicies.AU); Assert.Equal(7, formatIndicies.CU); Assert.Equal(1, formatIndicies.GU); Assert.Equal(8, formatIndicies.TU); Assert.Equal(2, formatIndicies.TAR); Assert.Equal(9, formatIndicies.TIR); Assert.Equal(3, formatIndicies.FT); Assert.Equal(10, formatIndicies.GT); Assert.Equal(4, formatIndicies.GQ); Assert.Equal(11, formatIndicies.GQX); Assert.Equal(5, formatIndicies.DP); Assert.Equal(6, formatIndicies.VF); Assert.Equal(13, formatIndicies.DPI); Assert.Equal(17, formatIndicies.DST); Assert.Equal(25, formatIndicies.AQ); Assert.Equal(26, formatIndicies.LQ); formatIndicies.Set(null); Assert.Null(formatIndicies.TIR); Assert.Null(formatIndicies.AU); formatIndicies.Set("TEMP:DPI:BOB"); Assert.Equal(1, formatIndicies.DPI); Assert.Null(formatIndicies.AU); }
public void ExtractSample_DragenCNV_MCN_LOH(string formatField, string sampleField) { var formatIndices = new FormatIndices(); formatIndices.Set(formatField); var sample = SampleFieldExtractor.ExtractSample(sampleField, formatIndices, GetSimplePositionUsingAlleleNum(1), null); Assert.True(sample.IsLossOfHeterozygosity); }
public void ExtractSample_ExpansionHunter() { var formatIndices = new FormatIndices(); formatIndices.Set("GT:SO:REPCN:REPCI:ADSP:ADFL:ADIR:LC"); var sample = SampleFieldExtractor.ExtractSample("1/1:SPANNING/SPANNING:15/15:15-15/15-15:22/22:23/23:0/0:38.270270", formatIndices, GetSimplePositionUsingAlleleNum(1), null); Assert.Equal("1/1", sample.Genotype); Assert.Equal(new[] { 15, 15 }, sample.RepeatUnitCounts); }
public void ExtractSample_DragenCNV_AsExpected() { var formatIndices = new FormatIndices(); formatIndices.Set("GT:CN:MCN"); var sample = SampleFieldExtractor.ExtractSample("0|1:3:1", formatIndices, GetSimplePositionUsingAlleleNum(1), null); Assert.Equal("0|1", sample.Genotype); Assert.Equal(3, sample.CopyNumber); Assert.Equal(1, sample.MinorHaplotypeCopyNumber); }
public void ExtractSample_DragenSomatic_AsExpected() { var formatIndices = new FormatIndices(); formatIndices.Set("GT:SQ:AD:AF:F1R2:F2R1:DP:SB:MB:PS"); var sample = SampleFieldExtractor.ExtractSample("0|1:3.96:33,8:0.195:13,6:20,2:41:17,16,4,4:13,20,4,4:534234", formatIndices, GetSimplePositionUsingAlleleNum(1), null); Assert.Equal("0|1", sample.Genotype); Assert.Equal(3.96, sample.SomaticQuality); Assert.Equal(new[] { 33, 8 }, sample.AlleleDepths); Assert.Equal(41, sample.TotalDepth); Assert.Equal(new[] { 8 / 41.0 }, sample.VariantFrequencies); }
public void IntermediateSampleFields_AlleleCounts() { const string vcfLine = "chr1\t5592503\t.\tC\tT\t900.00\tPASS\t.\tGT:AU:CU:GU:TU:DP:DPF:AD\t1/1:10,11:20,21:30,31:40,41:70:3:0,70"; var cols = vcfLine.Split('\t'); var formatIndices = FormatIndices.Extract(cols[VcfCommon.FormatIndex]); var sampleCols = cols[VcfCommon.GenotypeIndex].Split(':'); var sampleFields = new IntermediateSampleFields(cols, formatIndices, sampleCols); Assert.Equal(10, sampleFields.ACount); Assert.Equal(20, sampleFields.CCount); Assert.Equal(30, sampleFields.GCount); Assert.Equal(40, sampleFields.TCount); }
public void ToSamples_SMN1_CNV() { // GT:AD:DST:RPL:LC // 0/1:30,20:-:35.8981:45.810811 // GT:SM:CN:BC:QS:FT:DN // ./1:1.24763:3:4:5:cnvLength:. // ./.:1.17879:2:4:8:cnvLength:. // ./1:1.26335:3:4:6:cnvLength:Inherited var formatIndices = new FormatIndices(); string[] cols = { "chr1", "125068769", "DRAGEN:GAIN:125068770-125075279", "N", "<DUP>", ".", "SampleFT", "SVTYPE=CNV;END=125075279;REFLEN=6510", "GT:AD:DST:RPL:LC:SM:CN:BC:QS:FT:DN", "0/1:30,20:-:35.8981:45.810811", "./1:.:.:.:.:1.24763:3:4:5:cnvLength:.", "./.:.:.:.:.:1.17879:2:4:8:cnvLength:.", "./1:.:.:.:.:1.26335:3:4:6:cnvLength:Inherited" }; ISample[] samples = cols.ToSamples(formatIndices, GetSimplePositionUsingAlleleNum(1), null); Assert.Equal(4, samples.Length); Assert.Equal("0/1", samples[0].Genotype); Assert.Equal(new[] { 30, 20 }, samples[0].AlleleDepths); Assert.Equal(new[] { "-" }, samples[0].DiseaseAffectedStatuses); Assert.Equal("./1", samples[1].Genotype); Assert.Equal(3, samples[1].CopyNumber); Assert.True(samples[1].FailedFilter); Assert.Equal("./.", samples[2].Genotype); Assert.Equal(2, samples[2].CopyNumber); Assert.True(samples[2].FailedFilter); Assert.Equal("./1", samples[3].Genotype); Assert.Equal(3, samples[3].CopyNumber); Assert.True(samples[3].FailedFilter); }
public void ExtractSample_PEPE() { var formatIndices = new FormatIndices(); formatIndices.Set("GT:GQ:AD:DP:VF:NL:SB:NC:US:AQ:LQ"); var sample = SampleFieldExtractor.ExtractSample("0/1:5:338,1:339:0.00295:30:-7.3191:0.0314:0,0,0,1,0,0,17,1,129,21,148,22:3.366:0.000", formatIndices, GetSimplePositionUsingAlleleNum(1), null); Assert.Equal("0/1", sample.Genotype); Assert.Equal(5, sample.GenotypeQuality); Assert.Equal(new[] { 338, 1 }, sample.AlleleDepths); Assert.Equal(339, sample.TotalDepth); Assert.Equal(new[] { 0.00295 }, sample.VariantFrequencies); Assert.Equal(3.366f, sample.ArtifactAdjustedQualityScore); Assert.Equal(0.000f, sample.LikelihoodRatioQualityScore); }
public void ExtractSample_MitoHeteroplasmy_AsExpected() { var provider = new MitoHeteroplasmyProvider(); provider.Add(1, "C", new[] { 0.123, 0.200, 0.301 }, new[] { 1, 2, 4 }); provider.Add(1, "G", new[] { 0.101, 0.201 }, new[] { 1, 2 }); var simplePosition = new SimplePosition(ChromosomeUtilities.ChrM, 1, "A", new[] { "C", "T" }); var formatIndices = new FormatIndices(); formatIndices.Set("GT:SQ:AD:AF:F1R2:F2R1:DP:SB:MB:PS"); var sample = SampleFieldExtractor.ExtractSample("1|2:3.96:0,15,85:0.195:13,6:20,2:100:17,16,4,4:13,20,4,4:534234", formatIndices, simplePosition, provider); Assert.Equal(new[] { 15 / 100.0, 85 / 100.0 }, sample.VariantFrequencies); Assert.Equal(new[] { "14.29", "null" }, sample.HeteroplasmyPercentile); }
public void ToSamples_TooFewVcfColumns_ReturnNull() { var formatIndices = new FormatIndices(); string[] cols = { "chr1", "125068769", "DRAGEN:GAIN:125068770-125075279", "N", "<DUP>", ".", "SampleFT", "SVTYPE=CNV;END=125075279;REFLEN=6510" }; ISample[] samples = cols.ToSamples(formatIndices, GetSimplePositionUsingAlleleNum(1), null); Assert.Null(samples); }
// ReSharper restore InconsistentNaming // ReSharper disable once SuggestBaseTypeForParameter public IntermediateSampleFields(string[] vcfColumns, FormatIndices formatIndices, string[] sampleCols) { VcfRefAllele = vcfColumns[VcfCommon.RefIndex]; AltAlleles = vcfColumns[VcfCommon.AltIndex].OptimizedSplit(','); FormatIndices = formatIndices; SampleColumns = sampleCols; (TAR, TIR) = GetLinkedIntegers(GetFirstValue(GetString(formatIndices.TAR, sampleCols)), GetFirstValue(GetString(formatIndices.TIR, sampleCols))); MajorChromosomeCount = GetInteger(GetString(formatIndices.MCC, sampleCols)); DST = GetStrings(GetString(formatIndices.DST, sampleCols)); AQ = GetFloat(GetString(formatIndices.AQ, sampleCols)); LQ = GetFloat(GetString(formatIndices.LQ, sampleCols)); VF = GetDouble(GetString(formatIndices.VF, sampleCols)); CopyNumber = GetCopyNumber(GetString(formatIndices.CN, sampleCols), vcfColumns[VcfCommon.AltIndex].Contains("STR")); (ACount, CCount, GCount, TCount, TotalAlleleCount) = GetAlleleCounts( GetString(formatIndices.AU, sampleCols), GetString(formatIndices.CU, sampleCols), GetString(formatIndices.GU, sampleCols), GetString(formatIndices.TU, sampleCols)); }
public void FormatIndicesTest() { const string formatColumn = "AD:AQ:BOB:CN:DN:DP:DST:FT:GQ:GT:LQ:PR:SR:VF"; var formatIndices = new FormatIndices(); formatIndices.Set(formatColumn); Assert.Equal(0, formatIndices.AD); Assert.Equal(1, formatIndices.AQ); Assert.Equal(3, formatIndices.CN); Assert.Equal(4, formatIndices.DN); Assert.Equal(5, formatIndices.DP); Assert.Equal(6, formatIndices.DST); Assert.Equal(7, formatIndices.FT); Assert.Equal(8, formatIndices.GQ); Assert.Equal(9, formatIndices.GT); Assert.Equal(10, formatIndices.LQ); Assert.Equal(11, formatIndices.PR); Assert.Equal(12, formatIndices.SR); Assert.Equal(13, formatIndices.VF); formatIndices.Set(null); Assert.False(formatIndices.AD.HasValue); Assert.False(formatIndices.AQ.HasValue); Assert.False(formatIndices.CN.HasValue); Assert.False(formatIndices.DN.HasValue); Assert.False(formatIndices.DP.HasValue); Assert.False(formatIndices.DST.HasValue); Assert.False(formatIndices.FT.HasValue); Assert.False(formatIndices.GQ.HasValue); Assert.False(formatIndices.GT.HasValue); Assert.False(formatIndices.LQ.HasValue); Assert.False(formatIndices.PR.HasValue); Assert.False(formatIndices.SR.HasValue); Assert.False(formatIndices.VF.HasValue); formatIndices.Set("TEMP:DP:BOB"); Assert.Equal(1, formatIndices.DP); }
public void FormatIndicesTest() { const string formatColumn = "AU:GU:TAR:FT:GQ:DP:VF:CU:TU:TIR:GT:GQX:BOB:DPI:NV:NR:CHC:DST:PCH:DCS:DID:PLG:PCN:MAD:SCH:AQ:LQ"; var formatIndicies = FormatIndices.Extract(formatColumn); Assert.Equal(0, formatIndicies.AU); Assert.Equal(7, formatIndicies.CU); Assert.Equal(1, formatIndicies.GU); Assert.Equal(8, formatIndicies.TU); Assert.Equal(15, formatIndicies.NR); Assert.Equal(2, formatIndicies.TAR); Assert.Equal(9, formatIndicies.TIR); Assert.Equal(3, formatIndicies.FT); Assert.Equal(10, formatIndicies.GT); Assert.Equal(4, formatIndicies.GQ); Assert.Equal(11, formatIndicies.GQX); Assert.Equal(5, formatIndicies.DP); Assert.Equal(6, formatIndicies.VF); Assert.Equal(13, formatIndicies.DPI); Assert.Equal(14, formatIndicies.NV); Assert.Equal(16, formatIndicies.CHC); Assert.Equal(17, formatIndicies.DST); Assert.Equal(18, formatIndicies.PCH); Assert.Equal(19, formatIndicies.DCS); Assert.Equal(20, formatIndicies.DID); Assert.Equal(21, formatIndicies.PLG); Assert.Equal(22, formatIndicies.PCN); Assert.Equal(23, formatIndicies.MAD); Assert.Equal(24, formatIndicies.SCH); Assert.Equal(25, formatIndicies.AQ); Assert.Equal(26, formatIndicies.LQ); Assert.Null(FormatIndices.Extract(null)); formatIndicies = FormatIndices.Extract("TEMP:DPI:BOB"); Assert.Equal(1, formatIndicies.DPI); Assert.Null(formatIndicies.AU); }
/// <summary> /// extracts the index from each genotype format field /// </summary> internal static FormatIndices Extract(string formatColumn) { // sanity check: make sure we have a format column if (formatColumn == null) { return(null); } var formatIndices = new FormatIndices(); var formatCols = formatColumn.Split(':'); for (var index = 0; index < formatCols.Length; index++) { // ReSharper disable once SwitchStatementMissingSomeCases switch (formatCols[index]) { case "AU": formatIndices.AU = index; break; case "CU": formatIndices.CU = index; break; case "GU": formatIndices.GU = index; break; case "TU": formatIndices.TU = index; break; case "TAR": formatIndices.TAR = index; break; case "TIR": formatIndices.TIR = index; break; case "FT": formatIndices.FT = index; break; case "GT": formatIndices.GT = index; break; case "GQ": formatIndices.GQ = index; break; case "GQX": formatIndices.GQX = index; break; case "DP": formatIndices.DP = index; break; case "DPI": formatIndices.DPI = index; break; case "AD": formatIndices.AD = index; break; case "VF": formatIndices.VF = index; break; case "MCC": formatIndices.MCC = index; break; case "CN": formatIndices.CN = index; break; case "CI": formatIndices.CI = index; break; case "NR": formatIndices.NR = index; break; case "NV": formatIndices.NV = index; break; case "DQ": formatIndices.DQ = index; break; case "PR": formatIndices.PR = index; break; case "SR": formatIndices.SR = index; break; case "MAD": formatIndices.MAD = index; break; case "SCH": formatIndices.SCH = index; break; case "PLG": formatIndices.PLG = index; break; case "PCN": formatIndices.PCN = index; break; case "DCS": formatIndices.DCS = index; break; case "DID": formatIndices.DID = index; break; case "DST": formatIndices.DST = index; break; case "PCH": formatIndices.PCH = index; break; case "CHC": formatIndices.CHC = index; break; case "AQ": formatIndices.AQ = index; break; case "LQ": formatIndices.LQ = index; break; } } return(formatIndices); }
internal LegacySampleFieldExtractor(string[] vcfColumns, FormatIndices formatIndices) { _vcfColumns = vcfColumns; _infoDepth = GetInfoDepth(vcfColumns[VcfCommon.InfoIndex]); _formatIndices = formatIndices; }
// ReSharper restore InconsistentNaming #endregion /// <summary> /// extracts the index from each genotype format field /// </summary> internal static FormatIndices Extract(string formatColumn) { // sanity check: make sure we have a format column if (formatColumn == null) { return(null); } var formatIndices = new FormatIndices(); var formatCols = formatColumn.Split(':'); for (var index = 0; index < formatCols.Length; index++) { switch (formatCols[index]) { case "AU": formatIndices.AU = index; break; case "CU": formatIndices.CU = index; break; case "GU": formatIndices.GU = index; break; case "TU": formatIndices.TU = index; break; case "TAR": formatIndices.TAR = index; break; case "TIR": formatIndices.TIR = index; break; case "FT": formatIndices.FT = index; break; case "GT": formatIndices.GT = index; break; case "GQ": formatIndices.GQ = index; break; case "GQX": formatIndices.GQX = index; break; case "DP": formatIndices.DP = index; break; case "DPI": formatIndices.DPI = index; break; case "AD": formatIndices.AD = index; break; case "VF": formatIndices.VF = index; break; case "MCC": formatIndices.MCC = index; break; case "CN": formatIndices.CN = index; break; case "NR": formatIndices.NR = index; break; case "NV": formatIndices.NV = index; break; case "DQ": formatIndices.DQ = index; break; case "PR": formatIndices.PR = index; break; case "SR": formatIndices.SR = index; break; } } return(formatIndices); }