Ejemplo n.º 1
0
        public void ExtractSample_EmptySampleColumn_ReturnEmptySample()
        {
            var formatIndices = new FormatIndices();
            var sample        = SampleFieldExtractor.ExtractSample(null, formatIndices, GetSimplePositionUsingAlleleNum(1), null);

            Assert.True(sample.IsEmpty);
        }
        public void ExtractSample_EmptySampleColumn_ReturnEmptySample()
        {
            var formatIndices = new FormatIndices();
            var sample        = SampleFieldExtractor.ExtractSample(null, formatIndices, 1);

            Assert.True(sample.IsEmpty);
        }
        public void ExtractSample_DotInSampleColumn_ReturnEmptySample()
        {
            var formatIndices = new FormatIndices();
            var sample        = SampleFieldExtractor.ExtractSample(".", formatIndices, 1, false);

            Assert.True(sample.IsEmpty);
        }
Ejemplo n.º 4
0
        public void FormatIndicesTest()
        {
            const string formatColumn   = "AU:GU:TAR:FT:GQ:DP:VF:CU:TU:TIR:GT:GQX:BOB:DPI:NV:NR";
            var          formatIndicies = FormatIndices.Extract(formatColumn);

            Assert.Equal(0, formatIndicies.AU);
            Assert.Equal(7, formatIndicies.CU);
            Assert.Equal(1, formatIndicies.GU);
            Assert.Equal(8, formatIndicies.TU);
            Assert.Equal(15, formatIndicies.NR);
            Assert.Equal(2, formatIndicies.TAR);
            Assert.Equal(9, formatIndicies.TIR);
            Assert.Equal(3, formatIndicies.FT);
            Assert.Equal(10, formatIndicies.GT);
            Assert.Equal(4, formatIndicies.GQ);
            Assert.Equal(11, formatIndicies.GQX);
            Assert.Equal(5, formatIndicies.DP);
            Assert.Equal(6, formatIndicies.VF);
            Assert.Equal(13, formatIndicies.DPI);
            Assert.Equal(14, formatIndicies.NV);

            Assert.Null(FormatIndices.Extract(null));

            formatIndicies = FormatIndices.Extract("TEMP:DPI:BOB");
            Assert.Equal(1, formatIndicies.DPI);
            Assert.Null(formatIndicies.AU);
        }
        public void FormatIndicesTest()
        {
            const string formatColumn   = "AU:GU:TAR:FT:GQ:DP:VF:CU:TU:TIR:GT:GQX:BOB:DPI:NV:NR:CHC:DST:PCH:DCS:DID:PLG:PCN:MAD:SCH:AQ:LQ";
            var          formatIndicies = new FormatIndices();

            formatIndicies.Set(formatColumn);

            Assert.Equal(0, formatIndicies.AU);
            Assert.Equal(7, formatIndicies.CU);
            Assert.Equal(1, formatIndicies.GU);
            Assert.Equal(8, formatIndicies.TU);
            Assert.Equal(2, formatIndicies.TAR);
            Assert.Equal(9, formatIndicies.TIR);
            Assert.Equal(3, formatIndicies.FT);
            Assert.Equal(10, formatIndicies.GT);
            Assert.Equal(4, formatIndicies.GQ);
            Assert.Equal(11, formatIndicies.GQX);
            Assert.Equal(5, formatIndicies.DP);
            Assert.Equal(6, formatIndicies.VF);
            Assert.Equal(13, formatIndicies.DPI);
            Assert.Equal(17, formatIndicies.DST);
            Assert.Equal(25, formatIndicies.AQ);
            Assert.Equal(26, formatIndicies.LQ);

            formatIndicies.Set(null);
            Assert.Null(formatIndicies.TIR);
            Assert.Null(formatIndicies.AU);


            formatIndicies.Set("TEMP:DPI:BOB");
            Assert.Equal(1, formatIndicies.DPI);
            Assert.Null(formatIndicies.AU);
        }
Ejemplo n.º 6
0
        public void ExtractSample_DragenCNV_MCN_LOH(string formatField, string sampleField)
        {
            var formatIndices = new FormatIndices();

            formatIndices.Set(formatField);
            var sample = SampleFieldExtractor.ExtractSample(sampleField, formatIndices, GetSimplePositionUsingAlleleNum(1), null);

            Assert.True(sample.IsLossOfHeterozygosity);
        }
Ejemplo n.º 7
0
        public void ExtractSample_ExpansionHunter()
        {
            var formatIndices = new FormatIndices();

            formatIndices.Set("GT:SO:REPCN:REPCI:ADSP:ADFL:ADIR:LC");
            var sample = SampleFieldExtractor.ExtractSample("1/1:SPANNING/SPANNING:15/15:15-15/15-15:22/22:23/23:0/0:38.270270", formatIndices, GetSimplePositionUsingAlleleNum(1), null);

            Assert.Equal("1/1", sample.Genotype);
            Assert.Equal(new[] { 15, 15 }, sample.RepeatUnitCounts);
        }
Ejemplo n.º 8
0
        public void ExtractSample_DragenCNV_AsExpected()
        {
            var formatIndices = new FormatIndices();

            formatIndices.Set("GT:CN:MCN");
            var sample = SampleFieldExtractor.ExtractSample("0|1:3:1", formatIndices, GetSimplePositionUsingAlleleNum(1), null);

            Assert.Equal("0|1", sample.Genotype);
            Assert.Equal(3, sample.CopyNumber);
            Assert.Equal(1, sample.MinorHaplotypeCopyNumber);
        }
Ejemplo n.º 9
0
        public void ExtractSample_DragenSomatic_AsExpected()
        {
            var formatIndices = new FormatIndices();

            formatIndices.Set("GT:SQ:AD:AF:F1R2:F2R1:DP:SB:MB:PS");
            var sample = SampleFieldExtractor.ExtractSample("0|1:3.96:33,8:0.195:13,6:20,2:41:17,16,4,4:13,20,4,4:534234", formatIndices, GetSimplePositionUsingAlleleNum(1), null);

            Assert.Equal("0|1", sample.Genotype);
            Assert.Equal(3.96, sample.SomaticQuality);
            Assert.Equal(new[] { 33, 8 }, sample.AlleleDepths);
            Assert.Equal(41, sample.TotalDepth);
            Assert.Equal(new[] { 8 / 41.0 }, sample.VariantFrequencies);
        }
        public void IntermediateSampleFields_AlleleCounts()
        {
            const string vcfLine = "chr1\t5592503\t.\tC\tT\t900.00\tPASS\t.\tGT:AU:CU:GU:TU:DP:DPF:AD\t1/1:10,11:20,21:30,31:40,41:70:3:0,70";
            var          cols    = vcfLine.Split('\t');

            var formatIndices = FormatIndices.Extract(cols[VcfCommon.FormatIndex]);
            var sampleCols    = cols[VcfCommon.GenotypeIndex].Split(':');
            var sampleFields  = new IntermediateSampleFields(cols, formatIndices, sampleCols);

            Assert.Equal(10, sampleFields.ACount);
            Assert.Equal(20, sampleFields.CCount);
            Assert.Equal(30, sampleFields.GCount);
            Assert.Equal(40, sampleFields.TCount);
        }
Ejemplo n.º 11
0
        public void ToSamples_SMN1_CNV()
        {
            // GT:AD:DST:RPL:LC
            // 0/1:30,20:-:35.8981:45.810811

            // GT:SM:CN:BC:QS:FT:DN
            // ./1:1.24763:3:4:5:cnvLength:.
            // ./.:1.17879:2:4:8:cnvLength:.
            // ./1:1.26335:3:4:6:cnvLength:Inherited

            var formatIndices = new FormatIndices();

            string[] cols =
            {
                "chr1",
                "125068769",
                "DRAGEN:GAIN:125068770-125075279",
                "N",
                "<DUP>",
                ".",
                "SampleFT",
                "SVTYPE=CNV;END=125075279;REFLEN=6510",
                "GT:AD:DST:RPL:LC:SM:CN:BC:QS:FT:DN",
                "0/1:30,20:-:35.8981:45.810811",
                "./1:.:.:.:.:1.24763:3:4:5:cnvLength:.",
                "./.:.:.:.:.:1.17879:2:4:8:cnvLength:.",
                "./1:.:.:.:.:1.26335:3:4:6:cnvLength:Inherited"
            };

            ISample[] samples = cols.ToSamples(formatIndices, GetSimplePositionUsingAlleleNum(1), null);

            Assert.Equal(4, samples.Length);

            Assert.Equal("0/1", samples[0].Genotype);
            Assert.Equal(new[] { 30, 20 }, samples[0].AlleleDepths);
            Assert.Equal(new[] { "-" }, samples[0].DiseaseAffectedStatuses);

            Assert.Equal("./1", samples[1].Genotype);
            Assert.Equal(3, samples[1].CopyNumber);
            Assert.True(samples[1].FailedFilter);

            Assert.Equal("./.", samples[2].Genotype);
            Assert.Equal(2, samples[2].CopyNumber);
            Assert.True(samples[2].FailedFilter);

            Assert.Equal("./1", samples[3].Genotype);
            Assert.Equal(3, samples[3].CopyNumber);
            Assert.True(samples[3].FailedFilter);
        }
Ejemplo n.º 12
0
        public void ExtractSample_PEPE()
        {
            var formatIndices = new FormatIndices();

            formatIndices.Set("GT:GQ:AD:DP:VF:NL:SB:NC:US:AQ:LQ");
            var sample = SampleFieldExtractor.ExtractSample("0/1:5:338,1:339:0.00295:30:-7.3191:0.0314:0,0,0,1,0,0,17,1,129,21,148,22:3.366:0.000", formatIndices, GetSimplePositionUsingAlleleNum(1), null);

            Assert.Equal("0/1", sample.Genotype);
            Assert.Equal(5, sample.GenotypeQuality);
            Assert.Equal(new[] { 338, 1 }, sample.AlleleDepths);
            Assert.Equal(339, sample.TotalDepth);
            Assert.Equal(new[] { 0.00295 }, sample.VariantFrequencies);
            Assert.Equal(3.366f, sample.ArtifactAdjustedQualityScore);
            Assert.Equal(0.000f, sample.LikelihoodRatioQualityScore);
        }
Ejemplo n.º 13
0
        public void ExtractSample_MitoHeteroplasmy_AsExpected()
        {
            var provider = new MitoHeteroplasmyProvider();

            provider.Add(1, "C", new[] { 0.123, 0.200, 0.301 }, new[] { 1, 2, 4 });
            provider.Add(1, "G", new[] { 0.101, 0.201 }, new[] { 1, 2 });

            var simplePosition = new SimplePosition(ChromosomeUtilities.ChrM, 1, "A", new[] { "C", "T" });

            var formatIndices = new FormatIndices();

            formatIndices.Set("GT:SQ:AD:AF:F1R2:F2R1:DP:SB:MB:PS");
            var sample = SampleFieldExtractor.ExtractSample("1|2:3.96:0,15,85:0.195:13,6:20,2:100:17,16,4,4:13,20,4,4:534234", formatIndices, simplePosition, provider);

            Assert.Equal(new[] { 15 / 100.0, 85 / 100.0 }, sample.VariantFrequencies);
            Assert.Equal(new[] { "14.29", "null" }, sample.HeteroplasmyPercentile);
        }
Ejemplo n.º 14
0
        public void ToSamples_TooFewVcfColumns_ReturnNull()
        {
            var formatIndices = new FormatIndices();

            string[] cols =
            {
                "chr1",
                "125068769",
                "DRAGEN:GAIN:125068770-125075279",
                "N",
                "<DUP>",
                ".",
                "SampleFT",
                "SVTYPE=CNV;END=125075279;REFLEN=6510"
            };

            ISample[] samples = cols.ToSamples(formatIndices, GetSimplePositionUsingAlleleNum(1), null);
            Assert.Null(samples);
        }
Ejemplo n.º 15
0
        // ReSharper restore InconsistentNaming

        // ReSharper disable once SuggestBaseTypeForParameter
        public IntermediateSampleFields(string[] vcfColumns, FormatIndices formatIndices, string[] sampleCols)
        {
            VcfRefAllele  = vcfColumns[VcfCommon.RefIndex];
            AltAlleles    = vcfColumns[VcfCommon.AltIndex].OptimizedSplit(',');
            FormatIndices = formatIndices;
            SampleColumns = sampleCols;

            (TAR, TIR)           = GetLinkedIntegers(GetFirstValue(GetString(formatIndices.TAR, sampleCols)), GetFirstValue(GetString(formatIndices.TIR, sampleCols)));
            MajorChromosomeCount = GetInteger(GetString(formatIndices.MCC, sampleCols));
            DST = GetStrings(GetString(formatIndices.DST, sampleCols));
            AQ  = GetFloat(GetString(formatIndices.AQ, sampleCols));
            LQ  = GetFloat(GetString(formatIndices.LQ, sampleCols));
            VF  = GetDouble(GetString(formatIndices.VF, sampleCols));

            CopyNumber = GetCopyNumber(GetString(formatIndices.CN, sampleCols), vcfColumns[VcfCommon.AltIndex].Contains("STR"));

            (ACount, CCount, GCount, TCount, TotalAlleleCount) = GetAlleleCounts(
                GetString(formatIndices.AU, sampleCols), GetString(formatIndices.CU, sampleCols),
                GetString(formatIndices.GU, sampleCols), GetString(formatIndices.TU, sampleCols));
        }
Ejemplo n.º 16
0
        public void FormatIndicesTest()
        {
            const string formatColumn  = "AD:AQ:BOB:CN:DN:DP:DST:FT:GQ:GT:LQ:PR:SR:VF";
            var          formatIndices = new FormatIndices();

            formatIndices.Set(formatColumn);

            Assert.Equal(0, formatIndices.AD);
            Assert.Equal(1, formatIndices.AQ);
            Assert.Equal(3, formatIndices.CN);
            Assert.Equal(4, formatIndices.DN);
            Assert.Equal(5, formatIndices.DP);
            Assert.Equal(6, formatIndices.DST);
            Assert.Equal(7, formatIndices.FT);
            Assert.Equal(8, formatIndices.GQ);
            Assert.Equal(9, formatIndices.GT);
            Assert.Equal(10, formatIndices.LQ);
            Assert.Equal(11, formatIndices.PR);
            Assert.Equal(12, formatIndices.SR);
            Assert.Equal(13, formatIndices.VF);

            formatIndices.Set(null);
            Assert.False(formatIndices.AD.HasValue);
            Assert.False(formatIndices.AQ.HasValue);
            Assert.False(formatIndices.CN.HasValue);
            Assert.False(formatIndices.DN.HasValue);
            Assert.False(formatIndices.DP.HasValue);
            Assert.False(formatIndices.DST.HasValue);
            Assert.False(formatIndices.FT.HasValue);
            Assert.False(formatIndices.GQ.HasValue);
            Assert.False(formatIndices.GT.HasValue);
            Assert.False(formatIndices.LQ.HasValue);
            Assert.False(formatIndices.PR.HasValue);
            Assert.False(formatIndices.SR.HasValue);
            Assert.False(formatIndices.VF.HasValue);

            formatIndices.Set("TEMP:DP:BOB");
            Assert.Equal(1, formatIndices.DP);
        }
Ejemplo n.º 17
0
        public void FormatIndicesTest()
        {
            const string formatColumn   = "AU:GU:TAR:FT:GQ:DP:VF:CU:TU:TIR:GT:GQX:BOB:DPI:NV:NR:CHC:DST:PCH:DCS:DID:PLG:PCN:MAD:SCH:AQ:LQ";
            var          formatIndicies = FormatIndices.Extract(formatColumn);

            Assert.Equal(0, formatIndicies.AU);
            Assert.Equal(7, formatIndicies.CU);
            Assert.Equal(1, formatIndicies.GU);
            Assert.Equal(8, formatIndicies.TU);
            Assert.Equal(15, formatIndicies.NR);
            Assert.Equal(2, formatIndicies.TAR);
            Assert.Equal(9, formatIndicies.TIR);
            Assert.Equal(3, formatIndicies.FT);
            Assert.Equal(10, formatIndicies.GT);
            Assert.Equal(4, formatIndicies.GQ);
            Assert.Equal(11, formatIndicies.GQX);
            Assert.Equal(5, formatIndicies.DP);
            Assert.Equal(6, formatIndicies.VF);
            Assert.Equal(13, formatIndicies.DPI);
            Assert.Equal(14, formatIndicies.NV);
            Assert.Equal(16, formatIndicies.CHC);
            Assert.Equal(17, formatIndicies.DST);
            Assert.Equal(18, formatIndicies.PCH);
            Assert.Equal(19, formatIndicies.DCS);
            Assert.Equal(20, formatIndicies.DID);
            Assert.Equal(21, formatIndicies.PLG);
            Assert.Equal(22, formatIndicies.PCN);
            Assert.Equal(23, formatIndicies.MAD);
            Assert.Equal(24, formatIndicies.SCH);
            Assert.Equal(25, formatIndicies.AQ);
            Assert.Equal(26, formatIndicies.LQ);

            Assert.Null(FormatIndices.Extract(null));

            formatIndicies = FormatIndices.Extract("TEMP:DPI:BOB");
            Assert.Equal(1, formatIndicies.DPI);
            Assert.Null(formatIndicies.AU);
        }
Ejemplo n.º 18
0
        /// <summary>
        /// extracts the index from each genotype format field
        /// </summary>
        internal static FormatIndices Extract(string formatColumn)
        {
            // sanity check: make sure we have a format column
            if (formatColumn == null)
            {
                return(null);
            }

            var formatIndices = new FormatIndices();
            var formatCols    = formatColumn.Split(':');

            for (var index = 0; index < formatCols.Length; index++)
            {
                // ReSharper disable once SwitchStatementMissingSomeCases
                switch (formatCols[index])
                {
                case "AU":
                    formatIndices.AU = index;
                    break;

                case "CU":
                    formatIndices.CU = index;
                    break;

                case "GU":
                    formatIndices.GU = index;
                    break;

                case "TU":
                    formatIndices.TU = index;
                    break;

                case "TAR":
                    formatIndices.TAR = index;
                    break;

                case "TIR":
                    formatIndices.TIR = index;
                    break;

                case "FT":
                    formatIndices.FT = index;
                    break;

                case "GT":
                    formatIndices.GT = index;
                    break;

                case "GQ":
                    formatIndices.GQ = index;
                    break;

                case "GQX":
                    formatIndices.GQX = index;
                    break;

                case "DP":
                    formatIndices.DP = index;
                    break;

                case "DPI":
                    formatIndices.DPI = index;
                    break;

                case "AD":
                    formatIndices.AD = index;
                    break;

                case "VF":
                    formatIndices.VF = index;
                    break;

                case "MCC":
                    formatIndices.MCC = index;
                    break;

                case "CN":
                    formatIndices.CN = index;
                    break;

                case "CI":
                    formatIndices.CI = index;
                    break;

                case "NR":
                    formatIndices.NR = index;
                    break;

                case "NV":
                    formatIndices.NV = index;
                    break;

                case "DQ":
                    formatIndices.DQ = index;
                    break;

                case "PR":
                    formatIndices.PR = index;
                    break;

                case "SR":
                    formatIndices.SR = index;
                    break;

                case "MAD":
                    formatIndices.MAD = index;
                    break;

                case "SCH":
                    formatIndices.SCH = index;
                    break;

                case "PLG":
                    formatIndices.PLG = index;
                    break;

                case "PCN":
                    formatIndices.PCN = index;
                    break;

                case "DCS":
                    formatIndices.DCS = index;
                    break;

                case "DID":
                    formatIndices.DID = index;
                    break;

                case "DST":
                    formatIndices.DST = index;
                    break;

                case "PCH":
                    formatIndices.PCH = index;
                    break;

                case "CHC":
                    formatIndices.CHC = index;
                    break;

                case "AQ":
                    formatIndices.AQ = index;
                    break;

                case "LQ":
                    formatIndices.LQ = index;
                    break;
                }
            }

            return(formatIndices);
        }
Ejemplo n.º 19
0
 internal LegacySampleFieldExtractor(string[] vcfColumns, FormatIndices formatIndices)
 {
     _vcfColumns    = vcfColumns;
     _infoDepth     = GetInfoDepth(vcfColumns[VcfCommon.InfoIndex]);
     _formatIndices = formatIndices;
 }
Ejemplo n.º 20
0
        // ReSharper restore InconsistentNaming

        #endregion

        /// <summary>
        /// extracts the index from each genotype format field
        /// </summary>
        internal static FormatIndices Extract(string formatColumn)
        {
            // sanity check: make sure we have a format column
            if (formatColumn == null)
            {
                return(null);
            }

            var formatIndices = new FormatIndices();
            var formatCols    = formatColumn.Split(':');

            for (var index = 0; index < formatCols.Length; index++)
            {
                switch (formatCols[index])
                {
                case "AU":
                    formatIndices.AU = index;
                    break;

                case "CU":
                    formatIndices.CU = index;
                    break;

                case "GU":
                    formatIndices.GU = index;
                    break;

                case "TU":
                    formatIndices.TU = index;
                    break;

                case "TAR":
                    formatIndices.TAR = index;
                    break;

                case "TIR":
                    formatIndices.TIR = index;
                    break;

                case "FT":
                    formatIndices.FT = index;
                    break;

                case "GT":
                    formatIndices.GT = index;
                    break;

                case "GQ":
                    formatIndices.GQ = index;
                    break;

                case "GQX":
                    formatIndices.GQX = index;
                    break;

                case "DP":
                    formatIndices.DP = index;
                    break;

                case "DPI":
                    formatIndices.DPI = index;
                    break;

                case "AD":
                    formatIndices.AD = index;
                    break;

                case "VF":
                    formatIndices.VF = index;
                    break;

                case "MCC":
                    formatIndices.MCC = index;
                    break;

                case "CN":
                    formatIndices.CN = index;
                    break;

                case "NR":
                    formatIndices.NR = index;
                    break;

                case "NV":
                    formatIndices.NV = index;
                    break;

                case "DQ":
                    formatIndices.DQ = index;
                    break;

                case "PR":
                    formatIndices.PR = index;
                    break;

                case "SR":
                    formatIndices.SR = index;
                    break;
                }
            }

            return(formatIndices);
        }