Exemplo n.º 1
0
        public static StrandBiasStats CreateStats(double support, double coverage, double noiseFreq, double minDetectableSNP,
                                                  StrandBiasModel strandBiasModel)
        {
            var stats = new StrandBiasStats(support, coverage);

            PopulateStats(stats, noiseFreq, minDetectableSNP, strandBiasModel);

            return(stats);
        }
Exemplo n.º 2
0
        public static void PopulateStats(StrandBiasStats stats, double noiseFreq, double minDetectableSNP,
                                         StrandBiasModel strandBiasModel)
        {
            if (stats.Support == 0)
            {
                if (strandBiasModel == StrandBiasModel.Poisson)
                {
                    stats.ChanceFalsePos = 1;
                    stats.ChanceVarFreqGreaterThanZero = 0;
                    stats.ChanceFalseNeg = 0;
                }
                else if ((strandBiasModel == StrandBiasModel.Extended) || (strandBiasModel == StrandBiasModel.Diploid))
                {
                    //the chance that we observe the SNP is (minDetectableSNPfreq) for one observation.
                    //the chance that we do not is (1- minDetectableSNPfreq) for one observation.
                    //the chance that we do not observe it, N times in a row is:
                    stats.ChanceVarFreqGreaterThanZero = (Math.Pow(1 - minDetectableSNP, stats.Coverage)); //used in SB metric

                    //liklihood that variant really does not exist
                    //= 1 - chance that it does but you did not see it
                    stats.ChanceFalsePos = 1 - stats.ChanceVarFreqGreaterThanZero; //used in SB metric

                    //Chance a low freq variant is at work in the model, and we did not observe it:
                    stats.ChanceFalseNeg = stats.ChanceVarFreqGreaterThanZero;
                }
            }
            else
            {
                if (strandBiasModel == StrandBiasModel.Diploid)
                {
                    PopulateDiploidStats(stats, noiseFreq, minDetectableSNP);
                }
                else
                {
                    // chance of these observations or less, given min observable variant distribution
                    stats.ChanceVarFreqGreaterThanZero = Math.Max(0, Poisson.Cdf(stats.Support - 1, stats.Coverage * noiseFreq)); //used in SB metric
                    stats.ChanceFalsePos = Math.Max(0, 1 - stats.ChanceVarFreqGreaterThanZero);                                   //used in SB metric
                    stats.ChanceFalseNeg = Math.Max(0, Poisson.Cdf(stats.Support, stats.Coverage * minDetectableSNP));

                    //NOTE:
                    // Q: Why the Math.Max?
                    // A: B/c with the forced GT feature, we began calculating SB for variants whose chance of existing (given the observations)
                    // was zero. (or subltly negative, given numerical limitations of the CDF alg).
                    //Since the SB calculation compares the chance that a variant is present on both strands vs the chance that it is present on exactly one strand* not on the other.When a forced allele is very low frequency, the chance that it is one both strands AND the chance that its on exactly one strand are BOTH essentially zero, and the algorithm falls apart.
                }
            }

            //Note:
            //
            // Type 1 error is when we rejected the null hypothesis when we should not have. (we have noise, but called a SNP)
            // Type 2 error is when we accepected the alternate when we should not have. (we have a variant, but we did not call it.)
            //
            // Type 1 error is our this.ChanceFalsePos aka p-value.
            // Type 2 error is out this.ChanceFalseNeg
        }
Exemplo n.º 3
0
        public StrandBiasStats(double support, double coverage, double noiseFreq, double minDetectableSNP,
                               StrandBiasModel strandBiasModel)
        {
            Frequency = support / coverage;
            Support   = support;
            Coverage  = coverage;

            if (support == 0)
            {
                if (strandBiasModel == StrandBiasModel.Poisson)
                {
                    ChanceFalsePos = 1;
                    ChanceVarFreqGreaterThanZero = 0;
                    ChanceFalseNeg = 0;
                }
                else if (strandBiasModel == StrandBiasModel.Extended)
                {
                    //the chance that we observe the SNP is (minDetectableSNPfreq) for one observation.
                    //the chance that we do not is (1- minDetectableSNPfreq) for one observation.
                    //the chance that we do not observe it, N times in a row is:
                    ChanceVarFreqGreaterThanZero = (Math.Pow(1 - minDetectableSNP, coverage)); //used in SB metric

                    //liklihood that variant really does not exist
                    //= 1 - chance that it does but you did not see it
                    ChanceFalsePos = 1 - ChanceVarFreqGreaterThanZero; //used in SB metric

                    //Chance a low freq variant is at work in the model, and we did not observe it:
                    ChanceFalseNeg = ChanceVarFreqGreaterThanZero;
                }
            }
            else
            {
                // chance of these observations or less, given min observable variant distribution
                ChanceVarFreqGreaterThanZero = Poisson.Cdf(support - 1, coverage * noiseFreq); //used in SB metric
                ChanceFalsePos = 1 - ChanceVarFreqGreaterThanZero;                             //used in SB metric
                ChanceFalseNeg = Poisson.Cdf(support, coverage * minDetectableSNP);
            }

            //Note:
            //
            // Type 1 error is when we rejected the null hypothesis when we should not have. (we have noise, but called a SNP)
            // Type 2 error is when we accepected the alternate when we should not have. (we have a variant, but we did not call it.)
            //
            // Type 1 error is our this.ChanceFalsePos aka p-value.
            // Type 2 error is out this.ChanceFalseNeg
        }
        private StrandBiasResults ExecuteTest(Tuple <double, int> forwardStats, Tuple <double, int> reverseStats, Tuple <double, int> stitchedStats,
                                              int estimatedBaseCallQuality = 20, float threshold = 0.5f, StrandBiasModel model = StrandBiasModel.Poisson)
        {
            var origForwardSupport  = (int)(forwardStats.Item1 * forwardStats.Item2);
            var origReverseSupport  = (int)(reverseStats.Item1 * reverseStats.Item2);
            var origStitchedSupport = (int)(stitchedStats.Item1 * stitchedStats.Item2);
            var support             = new int[]
            {
                origForwardSupport,
                origReverseSupport,
                origStitchedSupport,
            };

            var variant = new CalledVariant(AlleleCategory.Snv)
            {
                TotalCoverageByDirection = new int[]
                {
                    forwardStats.Item2, reverseStats.Item2, stitchedStats.Item2
                }
            };

            StrandBiasCalculator.Compute(variant, support, estimatedBaseCallQuality, threshold, model);
            Assert.Equal(origForwardSupport + ((float)origStitchedSupport / 2), variant.StrandBiasResults.ForwardStats.Support);
            Assert.Equal(origReverseSupport + ((float)origStitchedSupport / 2), variant.StrandBiasResults.ReverseStats.Support);
            return(variant.StrandBiasResults);
        }
Exemplo n.º 5
0
 public static void Compute(BaseCalledAllele variant, int[] supportByDirection, int qNoise, double acceptanceCriteria,
                            StrandBiasModel strandBiasModel)
 {
     variant.StrandBiasResults = CalculateStrandBiasResults(variant.TotalCoverageByDirection, supportByDirection, qNoise, acceptanceCriteria, strandBiasModel);
 }
Exemplo n.º 6
0
        /// <summary>
        ///     Assign a strandbias-score to a SNP.
        ///     (using only forward and reverse SNP counts.)
        /// </summary>
        private static StrandBiasResults CalculateStrandBiasResults(int[] coverageByStrandDirection,
                                                                    int[] supportByStrandDirection,
                                                                    int qNoise, double acceptanceCriteria, StrandBiasModel strandBiasModel)
        {
            var forwardSupport   = supportByStrandDirection[(int)DirectionType.Forward];
            var forwardCoverage  = coverageByStrandDirection[(int)DirectionType.Forward];
            var reverseSupport   = supportByStrandDirection[(int)DirectionType.Reverse];
            var reverseCoverage  = coverageByStrandDirection[(int)DirectionType.Reverse];
            var stitchedSupport  = supportByStrandDirection[(int)DirectionType.Stitched];
            var stitchedCoverage = coverageByStrandDirection[(int)DirectionType.Stitched];

            var errorRate = Math.Pow(10, -1 * qNoise / 10f);

            var overallStats = new StrandBiasStats(forwardSupport + reverseSupport + stitchedSupport,
                                                   forwardCoverage + reverseCoverage + stitchedCoverage, errorRate, errorRate, strandBiasModel);
            var forwardStats = new StrandBiasStats(forwardSupport + stitchedSupport / 2,
                                                   forwardCoverage + stitchedCoverage / 2,
                                                   errorRate, errorRate, strandBiasModel);
            var reverseStats = new StrandBiasStats(reverseSupport + stitchedSupport / 2,
                                                   reverseCoverage + stitchedCoverage / 2,
                                                   errorRate, errorRate, strandBiasModel);

            var results = new StrandBiasResults
            {
                ForwardStats = forwardStats,
                ReverseStats = reverseStats,
                OverallStats = overallStats
            };

            results.StitchedStats = new StrandBiasStats(stitchedSupport, stitchedCoverage, errorRate, errorRate,
                                                        strandBiasModel);

            var biasResults = AssignBiasScore(overallStats, forwardStats, reverseStats);

            results.BiasScore               = biasResults[0];
            results.GATKBiasScore           = biasResults[1];
            results.CovPresentOnBothStrands = ((forwardStats.Coverage > 0) && (reverseStats.Coverage > 0));
            results.VarPresentOnBothStrands = ((forwardStats.Support > 0) && (reverseStats.Support > 0));

            //not really fair to call it biased if coverage is in one direction..
            //its ambiguous if variant is found in only one direction.
            if (!results.CovPresentOnBothStrands)
            {
                results.BiasScore     = 0;
                results.GATKBiasScore = double.NegativeInfinity;
            }

            var testResults = MathOperations.GetTValue(forwardStats.Frequency, reverseStats.Frequency,
                                                       forwardStats.Coverage,
                                                       reverseStats.Coverage, acceptanceCriteria);

            results.TestScore      = testResults[0];
            results.TestAcceptable = ValueAcceptable(acceptanceCriteria, testResults[0], testResults[1]);
            results.BiasAcceptable = (results.BiasScore < acceptanceCriteria);

            return(results);
        }
Exemplo n.º 7
0
        public ApplicationOptions UpdateOptions(string[] arguments)
        {
            string lastArgumentField = string.Empty;

            try
            {
                int argumentIndex = 0;
                while (argumentIndex < arguments.Length)
                {
                    if (string.IsNullOrEmpty(arguments[argumentIndex]))
                    {
                        argumentIndex++;
                        continue;
                    }
                    string value = null;
                    if (argumentIndex < arguments.Length - 1)
                    {
                        value = arguments[argumentIndex + 1].Trim();
                    }

                    lastArgumentField = arguments[argumentIndex].ToLower();

                    switch (lastArgumentField)
                    {
                    case "-v":
                    case "-ver":
                        PrintVersionToConsole();
                        return(null);

                    //case "-a": depracated
                    case "-minvq":
                    case "-minvariantqscore":
                        MinimumVariantQScore = int.Parse(value);
                        break;

                    //case "-b": depracated
                    case "-minbq":
                    case "-minbasecallquality":
                        MinimumBaseCallQuality = int.Parse(value);
                        break;

                    case "-b":
                    case "-bam":
                        BAMPaths = value.Split(_delimiter);
                        break;

                    case "-c":
                    case "-mindp":
                    case "-mindepth":
                    case "-mincoverage":     //last release this is available. trying to be nice for backwards compatibility with Isas.
                        MinimumDepth = int.Parse(value);
                        break;

                    case "-d":
                    case "-debug":
                        DebugMode = bool.Parse(value);
                        break;

                    case "-minvf":      //used to be "f"
                    case "-minimumvariantfrequency":
                    case "-minimumfrequency":
                        MinimumFrequency = float.Parse(value);
                        break;

                    case "-vqfilter":     //used to be "F"
                    case "-variantqualityfilter":
                        FilteredVariantQScore = int.Parse(value);
                        break;

                    case "-vffilter":     //used to be "v"
                    case "-minvariantfrequencyfilter":
                        FilteredVariantFrequency = float.Parse(value);
                        break;

                    case "-gqfilter":
                    case "-genotypequalityfilter":
                        LowGenotypeQualityFilter = int.Parse(value);
                        break;

                    case "-repeatfilter":
                        IndelRepeatFilter = int.Parse(value);
                        break;

                    case "-mindpfilter":
                    case "-mindepthfilter":
                        LowDepthFilter = int.Parse(value);
                        break;

                    case "-ssfilter":     //used to be "fo"
                    case "-enablesinglestrandfilter":
                        FilterOutVariantsPresentOnlyOneStrand = bool.Parse(value);
                        break;

                    case "-g":
                    case "-genomepaths":
                        GenomePaths = value.Split(_delimiter);
                        break;

                    case "-nl":
                    case "-noiselevelforqmodel":
                        AppliedNoiseLevel = int.Parse(value);
                        break;

                    case "-gvcf":
                        OutputgVCFFiles = bool.Parse(value);
                        break;

                    case "-callmnvs":
                        //case "-phasesnps": obsolete
                        CallMNVs = bool.Parse(value);
                        break;

                    case "-maxmnvlength":
                        //case "-MaxPhaseSNPLength": obsolete
                        //case "-MaxPhasedSNPLength": obsolete
                        MaxSizeMNV = int.Parse(value);
                        break;

                    case "-maxgapbetweenmnv":
                    case "-maxrefgapinmnv":
                        //case "-MaxGapPhasedSNP":: obsolete
                        MaxGapBetweenMNV = int.Parse(value);
                        break;

                    case "-i":
                    case "-intervalpaths":
                        IntervalPaths = value.Split(_delimiter);
                        break;

                    case "-minmq":     //used to be "m"
                    case "-minmapquality":
                        MinimumMapQuality = int.Parse(value);
                        break;

                    case "-ploidy":
                        if (value.ToLower().Contains("somatic"))
                        {
                            PloidyModel = PloidyModel.Somatic;
                        }
                        else if (value.ToLower().Contains("diploid"))
                        {
                            PloidyModel = PloidyModel.Diploid;
                        }
                        else
                        {
                            throw new ArgumentException(string.Format("Unknown ploidy model '{0}'", value));
                        }
                        break;

                    case "-diploidgenotypeparameters":
                        var parameters = ParseStringToFloat(value.Split(_delimiter));
                        if (parameters.Length != 3)
                        {
                            throw new ArgumentException(string.Format("DiploidGenotypeParamteers argument requires exactly three values."));
                        }
                        DiploidThresholdingParameters = new DiploidThresholdingParameters(parameters);
                        break;

                    case "-crushvcf":
                        bool crushedallelestyle = bool.Parse(value);
                        AllowMultipleVcfLinesPerLoci = !(crushedallelestyle);
                        break;

                    case "-sbmodel":
                        if (value.ToLower().Contains("poisson"))
                        {
                            StrandBiasModel = StrandBiasModel.Poisson;
                        }
                        else if (value.ToLower().Contains("extended"))
                        {
                            StrandBiasModel = StrandBiasModel.Extended;
                        }
                        else
                        {
                            throw new ArgumentException(string.Format("Unknown strand bias model '{0}'", value));
                        }
                        break;

                    case "-outputsbfiles":
                        OutputBiasFiles = bool.Parse(value);
                        break;

                    case "-pp":
                    case "-onlyuseproperpairs":
                        OnlyUseProperPairs = bool.Parse(value);
                        break;

                    case "-maxvq":
                    case "-maxvariantqscore":
                        MaximumVariantQScore = int.Parse(value);
                        break;

                    case "-maxgq":
                    case "-maxgenotypeqscore":
                        MaximumGenotypeQScore = int.Parse(value);
                        break;

                    case "-mingq":
                    case "-minqenotypeqscore":
                        MinimumGenotypeQScore = int.Parse(value);
                        break;

                    case "-sbfilter":
                    case "-maxacceptablestrandbiasfilter":
                        StrandBiasAcceptanceCriteria = float.Parse(value);
                        break;

                    case "-stitchpairedreads":
                        throw new ArgumentException("StitchPairedReads option is obsolete.");

                    case "-t":
                        MaxNumThreads = int.Parse(value);
                        break;

                    case "-threadbychr":
                        ThreadByChr = bool.Parse(value);
                        break;

                    case "-reportnocalls":
                        ReportNoCalls = bool.Parse(value);
                        break;

                    case "-xcstitcher":
                        throw new ArgumentException("XCStitcher option is obsolete.");

                    case "-collapse":
                        Collapse = bool.Parse(value);
                        break;

                    case "-collapsefreqthreshold":
                        CollapseFreqThreshold = float.Parse(value);
                        break;

                    case "-collapsefreqratiothreshold":
                        CollapseFreqRatioThreshold = float.Parse(value);
                        break;

                    case "-priorspath":
                        PriorsPath = value;
                        break;

                    case "-trimmnvpriors":
                        TrimMnvPriors = bool.Parse(value);
                        break;

                    case "-nifydisagreements":
                        throw new ArgumentException("NifyDisagreements option is no longer valid: stitching within Pisces is obsolete.");

                    case "-coverageMethod":
                        if (value.ToLower() == "approximate")
                        {
                            CoverageMethod = CoverageMethod.Approximate;
                        }
                        else if (value.ToLower() == "exact")
                        {
                            CoverageMethod = CoverageMethod.Exact;
                        }
                        else
                        {
                            throw new ArgumentException(string.Format("Unknown coverage method '{0}'", value));
                        }
                        break;

                    case "-reportrccounts":
                        ReportRcCounts = bool.Parse(value);
                        break;

                    case "-mono":
                        MonoPath = value;
                        break;

                    case "-rmxnfilter":
                        bool turnOn = true;
                        bool worked = (bool.TryParse(value, out turnOn));
                        if (worked)
                        {
                            if (turnOn)
                            {
                                // stick with defaults
                            }
                            else
                            {
                                //turn off
                                RMxNFilterMaxLengthRepeat = null;
                                RMxNFilterMinRepetitions  = null;
                            }
                            break;
                        }
                        //else, it wasnt a bool...
                        var rmxnThresholds = ParseStringToFloat(value.Split(_delimiter));
                        if ((rmxnThresholds.Length < 2) || (rmxnThresholds.Length > 3))
                        {
                            throw new ArgumentException(string.Format("RMxNFilter argument requires two or three values."));
                        }
                        RMxNFilterMaxLengthRepeat = (int)rmxnThresholds[0];
                        RMxNFilterMinRepetitions  = (int)rmxnThresholds[1];

                        if (rmxnThresholds.Length > 2)
                        {
                            RMxNFilterFrequencyLimit = (float)rmxnThresholds[2];
                        }
                        break;

                    case "-noisemodel":
                        NoiseModel = value.ToLower() == "window" ? NoiseModel.Window : NoiseModel.Flat;
                        break;

                    case "-skipnonintervalalignments":
                        throw new Exception(string.Format("'SkipNonIntervalAlignments' option has been depracated until further notice. ", arguments[argumentIndex]));

                    //(it has bugs, speed issues, and no plan to fix it)
                    default:
                        if (!base.UpdateOptions(lastArgumentField, value))
                        {
                            throw new Exception(string.Format("Unknown argument '{0}'", arguments[argumentIndex]));
                        }
                        break;
                    }
                    argumentIndex += 2;
                }

                CommandLineArguments = arguments;

                return(this);
            }
            catch (Exception ex)
            {
                throw new Exception(string.Format("Unable to parse argument {0}: {1}", lastArgumentField, ex.Message));
            }
        }
Exemplo n.º 8
0
 public static void Compute(CalledAllele variant, int[] supportByDirection, int qNoise, double minVariantFrequency, double acceptanceCriteria,
                            StrandBiasModel strandBiasModel)
 {
     variant.StrandBiasResults = CalculateStrandBiasResults(
         variant.EstimatedCoverageByDirection, supportByDirection, qNoise, minVariantFrequency, acceptanceCriteria, strandBiasModel);
 }
Exemplo n.º 9
0
        public ApplicationOptions UpdateOptions(string[] arguments)
        {
            string lastArgumentField = string.Empty;

            try
            {
                int argumentIndex = 0;
                while (argumentIndex < arguments.Length)
                {
                    if (arguments[argumentIndex] == null || arguments[argumentIndex].Length == 0)
                    {
                        argumentIndex++;
                        continue;
                    }
                    string value = null;
                    if (argumentIndex < arguments.Length - 1)
                    {
                        value = arguments[argumentIndex + 1];
                    }

                    lastArgumentField = arguments[argumentIndex];

                    switch (lastArgumentField)
                    {
                    case "-a":
                        MinimumVariantQScore = int.Parse(value);
                        break;

                    case "-b":
                        MinimumBaseCallQuality = int.Parse(value);
                        break;

                    case "-B":
                        BAMPaths = value.Split(_delimiter);
                        break;

                    case "-BAMFolder":
                        BAMFolder = value;
                        break;

                    case "-c":
                        MinimumCoverage = int.Parse(value);
                        break;

                    case "-d":
                        DebugMode = bool.Parse(value);
                        break;

                    case "-debug":
                        DebugMode = bool.Parse(value);
                        break;

                    case "-f":
                        MinimumFrequency = float.Parse(value);
                        break;

                    case "-F":
                        FilteredVariantQScore = int.Parse(value);
                        break;

                    case "-fo":
                        FilterOutVariantsPresentOnlyOneStrand = bool.Parse(value);
                        break;

                    case "-g":
                        GenomePaths = value.Split(_delimiter);
                        break;

                    case "-NL":
                        AppliedNoiseLevel = int.Parse(value);
                        break;

                    case "-gVCF":
                        OutputgVCFFiles = bool.Parse(value);
                        break;

                    case "-CallMNVs":
                    case "-PhaseSNPs":
                        CallMNVs = bool.Parse(value);
                        break;

                    case "-MaxMNVLength":
                    case "-MaxPhaseSNPLength":
                        MaxSizeMNV = int.Parse(value);
                        break;

                    case "-MaxGapBetweenMNV":
                    case "-MaxGapPhasedSNP":
                        MaxGapBetweenMNV = int.Parse(value);
                        break;

                    case "-i":
                        IntervalPaths = value.Split(_delimiter);
                        break;

                    case "-m":
                        MinimumMapQuality = int.Parse(value);
                        break;

                    case "-GT":
                        if (value.ToLower().Contains("none"))
                        {
                            GTModel = GenotypeModel.None;
                        }
                        else if (value.ToLower().Contains("threshold"))
                        {
                            GTModel = GenotypeModel.Thresholding;
                        }
                        else if (value.ToLower().Contains("symmetric"))
                        {
                            GTModel = GenotypeModel.Symmetrical;
                        }
                        else
                        {
                            throw new ArgumentException(string.Format("Unknown genotype model '{0}'", value));
                        }
                        break;

                    case "-SBModel":
                        if (value.ToLower().Contains("poisson"))
                        {
                            StrandBiasModel = StrandBiasModel.Poisson;
                        }
                        else if (value.ToLower().Contains("extended"))
                        {
                            StrandBiasModel = StrandBiasModel.Extended;
                        }
                        else
                        {
                            throw new ArgumentException(string.Format("Unknown strand bias model '{0}'", value));
                        }
                        break;

                    case "-o":
                        OutputBiasFiles = bool.Parse(value);
                        break;

                    case "-p":
                        OnlyUseProperPairs = bool.Parse(value);
                        break;

                    case "-q":
                        MaximumVariantQScore = int.Parse(value);
                        break;

                    case "-s":
                        StrandBiasAcceptanceCriteria = float.Parse(value);
                        break;

                    case "-StitchPairedReads":
                        StitchReads = bool.Parse(value);
                        break;

                    case "-t":
                        MaxNumThreads = int.Parse(value);
                        break;

                    case "-ThreadByChr":
                        ThreadByChr = bool.Parse(value);
                        break;

                    case "-ReportNoCalls":
                        ReportNoCalls = bool.Parse(value);
                        break;

                    case "-requireXC":
                        RequireXCTagToStitch = bool.Parse(value);
                        break;

                    case "-xcStitcher":
                        UseXCStitcher = bool.Parse(value);
                        break;

                    case "-OutFolder":
                        OutputFolder = value;
                        break;

                    default:
                        throw new Exception(string.Format("Unknown argument '{0}'", arguments[argumentIndex]));
                    }

                    argumentIndex += 2;
                }

                CommandLineArguments = string.Join(" ", arguments);

                return(this);
            }
            catch (Exception ex)
            {
                if (string.IsNullOrEmpty(lastArgumentField))
                {
                    throw new Exception("Unable to parse arguments: " + ex.Message);
                }

                throw new Exception(string.Format("Unable to parse argument {0}: {1}", lastArgumentField, ex.Message));
            }
        }