Beispiel #1
0
        static int Main(string[] args)
        {
            CanvasCommon.Utilities.LogCommandLine(args);
            string inFile  = null;
            string outFile = null;
            string variantFrequencyFile = null;
            string referenceFolder      = null;
            string name               = "SAMPLE";
            string truthDataPath      = null;
            string somaticVCFPath     = null;
            bool   needHelp           = false;
            string bedPath            = null;
            string ploidyVcfPath      = null;
            string localSdMetricFile  = null;
            string evennessMetricFile = null;
            bool   isEnrichment       = false;
            bool   isDbsnpVcf         = false;
            double minimumCallSize;
            int    qualityFilterThreshold = 10; // Default quality filter threshold = 10, overridable via -q command-line argument
            // Parameters, for parameter-sweep, somatic model training:
            bool  isTrainMode = false;
            float?userPurity  = null;
            float?userPloidy  = null;

            CanvasCommon.CanvasSomaticClusteringMode somaticClusteringMode =
                CanvasCommon.CanvasSomaticClusteringMode.MeanShift;
            string parameterconfigPath = Path.Combine(Isas.Framework.Utilities.Utilities.GetAssemblyFolder(typeof(Program)),
                                                      "SomaticCallerParameters.json");
            string qualityScoreConfigPath = Path.Combine(Isas.Framework.Utilities.Utilities.GetAssemblyFolder(typeof(Program)),
                                                         "QualityScoreParameters.json");


            OptionSet p = new OptionSet()
            {
                {
                    "i|infile=",
                    "file containing bins, their counts, and assigned segments (obtained from CanvasPartition.exe)",
                    v => inFile = v
                },
                {
                    "v|varfile=", "file containing variant frequencies (obtained from CanvasSNV.exe)",
                    v => variantFrequencyFile = v
                },
                { "o|outfile=", "file name prefix to ouput copy number calls to outfile.vcf", v => outFile = v },
                { "r|reference=", "folder that contains both genome.fa and GenomeSize.xml", v => referenceFolder = v },
                { "n|name=", "sample name for output VCF header (optional)", v => name = v },
                { "t|truth=", "path to vcf/bed with CNV truth data (optional)", v => truthDataPath = v },
                { "h|help", "show this message and exit", v => needHelp = v != null },
                { "e|enrichment", "flag indicating this is enrichment data", v => isEnrichment = v != null },
                { "s|somaticvcf=", "somatic vcf file - optionally used for purity estimation", v => somaticVCFPath = v },
                { "b|bedfile=", "bed file containing regions to exclude from calling", v => bedPath = v },
                {
                    "p|ploidyVcfFile=", "vcf file specifying reference ploidy (e.g. for sex chromosomes) (optional)",
                    v => ploidyVcfPath = v
                },
                {
                    $"{CommandLineOptions.LocalSdMetricFile}=", "text file with local SD metric (calculate within CanvasClean) (optional)",
                    v => localSdMetricFile = v
                },
                {
                    $"{CommandLineOptions.EvennessMetricFile}=", "text file with evenness metric (calculated within CanvasPartition) (optional)",
                    v => evennessMetricFile = v
                },
                {
                    "d|dbsnpvcf", "flag indicating a dbSNP VCF file is used to generate the variant frequency file",
                    v => isDbsnpVcf = v != null
                },
                { "M|minimumcall=", "INTERNAL: minimum call size", v => minimumCallSize = int.Parse(v) },
                {
                    "q|qualitythreshold=", $"quality filter threshold (default {qualityFilterThreshold})",
                    v => qualityFilterThreshold = int.Parse(v)
                },
                {
                    "c|parameterconfig=", $"parameter configuration path (default {parameterconfigPath})",
                    v => parameterconfigPath = v
                },
                {
                    "g|qscoreconfig=", $"parameter configuration path (default {qualityScoreConfigPath})",
                    v => qualityScoreConfigPath = v
                },
                { "u|definedpurity=", "INTERNAL: user pre-defined purity", v => userPurity = float.Parse(v) },
                { "l|definedploidy=", "INTERNAL: user pre-defined ploidy", v => userPloidy = float.Parse(v) },
                { "a|trainmodel=", "INTERNAL: user pre-defined ploidy", v => isTrainMode = v != null }
            };

            List <string> extraArgs = p.Parse(args);

            if (extraArgs.Count > 0)
            {
                Console.WriteLine("Error: Argument '{0}' not understood", extraArgs[0]);
                needHelp = true;
            }

            if (needHelp)
            {
                ShowHelp(p);
                return(0);
            }

            if (inFile == null || outFile == null || referenceFolder == null)
            {
                ShowHelp(p);
                return(0);
            }

            if (!File.Exists(inFile))
            {
                Console.WriteLine("Canvas error: File {0} does not exist! Exiting.", inFile);
                return(1);
            }

            if (!File.Exists(variantFrequencyFile))
            {
                Console.WriteLine("Canvas error: File {0} does not exist! Exiting.", variantFrequencyFile);
                return(1);
            }

            if (!File.Exists(Path.Combine(referenceFolder, "GenomeSize.xml")))
            {
                Console.WriteLine("Canvas error: File {0} does not exist! Exiting.",
                                  Path.Combine(referenceFolder, "GenomeSize.xml"));
                return(1);
            }

            if (qualityFilterThreshold < 0)
            {
                throw new ArgumentException(
                          $"Quality filter threshold must be greater than or equal to zero. Value was {qualityFilterThreshold}");
            }

            if (!File.Exists(parameterconfigPath))
            {
                Console.WriteLine("Canvas error: File {0} does not exist! Exiting.", parameterconfigPath);
                return(1);
            }

            FileLocation            parameterconfigFile         = new FileLocation(parameterconfigPath);
            SomaticCallerParameters somaticCallerParametersJSON = Deserialize <SomaticCallerParameters>(parameterconfigFile);

            FileLocation qscoreConfigFile = new FileLocation(qualityScoreConfigPath);

            CanvasCommon.QualityScoreParameters qscoreParametersJSON = Deserialize <CanvasCommon.QualityScoreParameters>(qscoreConfigFile);
            var           logger = new Logger(new[] { Console.Out }, new[] { Console.Error });
            SomaticCaller caller = new SomaticCaller(logger);

            caller.somaticCallerParameters       = somaticCallerParametersJSON;
            caller.somaticCallerQscoreParameters = qscoreParametersJSON;
            caller.TruthDataPath          = truthDataPath;
            caller.SomaticVcfPath         = somaticVCFPath;
            caller.IsEnrichment           = isEnrichment;
            caller.IsDbsnpVcf             = isDbsnpVcf;
            caller.userPurity             = userPurity;
            caller.userPloidy             = userPloidy;
            caller.IsTrainingMode         = truthDataPath != null || isTrainMode;
            caller.IsTrainingMode         = isTrainMode;
            caller.QualityFilterThreshold = qualityFilterThreshold;

            // Set parameters:

            if (!string.IsNullOrEmpty(ploidyVcfPath))
            {
                caller.LoadReferencePloidy(ploidyVcfPath);
            }

            double?localSDmetric  = null;
            double?evennessMetric = null;

            if (!string.IsNullOrEmpty(localSdMetricFile))
            {
                localSDmetric = CanvasCommon.CanvasIO.ReadLocalSdMetricFromTextFile(localSdMetricFile);
            }

            if (!string.IsNullOrEmpty(evennessMetricFile))
            {
                evennessMetric = CanvasCommon.CanvasIO.ReadEvennessMetricFromTextFile(evennessMetricFile);
            }

            caller.LoadBedFile(bedPath);
            return(caller.CallVariants(inFile, variantFrequencyFile, outFile, referenceFolder, name, localSDmetric, evennessMetric, somaticClusteringMode));
        }
Beispiel #2
0
        public int ComputeQScore(QScoreMethod qscoreMethod, QualityScoreParameters qscoreParameters)
        {
            double score;
            int    qscore;

            switch (qscoreMethod)
            {
            case QScoreMethod.LogisticGermline:
                // Logistic model using a new selection of features.  Gives ROC curve area 0.921
                score  = qscoreParameters.LogisticGermlineIntercept;
                score += GetQScorePredictor(QScorePredictor.LogBinCount) * qscoreParameters.LogisticGermlineLogBinCount;
                score += GetQScorePredictor(QScorePredictor.ModelDistance) * qscoreParameters.LogisticGermlineModelDistance;
                score += GetQScorePredictor(QScorePredictor.DistanceRatio) * qscoreParameters.LogisticGermlineDistanceRatio;
                score  = Math.Exp(score);
                score  = score / (score + 1);
                // Transform probability into a q-score:
                qscore = (int)(Math.Round(-10 * Math.Log10(1 - score)));
                qscore = Math.Min(40, qscore);
                qscore = Math.Max(2, qscore);
                return(qscore);

            case QScoreMethod.Logistic:
                // Logistic model using a new selection of features.  Gives ROC curve area 0.8289
                score  = qscoreParameters.LogisticIntercept;
                score += GetQScorePredictor(QScorePredictor.LogBinCount) * qscoreParameters.LogisticLogBinCount;
                score += GetQScorePredictor(QScorePredictor.ModelDistance) * qscoreParameters.LogisticModelDistance;
                score += GetQScorePredictor(QScorePredictor.DistanceRatio) * qscoreParameters.LogisticDistanceRatio;
                score += GetQScorePredictor(QScorePredictor.BinCountAmpDistance);
                score  = Math.Exp(score);
                score  = score / (score + 1);
                // Transform probability into a q-score:
                qscore = (int)Math.Round(-10 * Math.Log10(1 - score));
                qscore = Math.Min(60, qscore);
                qscore = Math.Max(2, qscore);
                return(qscore);

            case QScoreMethod.BinCountLinearFit:
                if (this.BinCount >= 100)
                {
                    return(61);
                }
                else
                {
                    return((int)Math.Round(-10 * Math.Log10(1 - 1 / (1 + Math.Exp(0.5532 - this.BinCount * 0.147))), 0, MidpointRounding.AwayFromZero));
                }

            case QScoreMethod.GeneralizedLinearFit:     // Generalized linear fit with linear transformation to QScore
                double linearFit = qscoreParameters.GeneralizedLinearFitIntercept;
                linearFit += qscoreParameters.GeneralizedLinearFitLogBinCount *
                             GetQScorePredictor(QScorePredictor.LogBinCount);
                linearFit += qscoreParameters.GeneralizedLinearFitModelDistance *
                             GetQScorePredictor(QScorePredictor.ModelDistance);
                linearFit += qscoreParameters.GeneralizedLinearFitMajorChromosomeCount *
                             GetQScorePredictor(QScorePredictor.MajorChromosomeCount);
                linearFit += qscoreParameters.GeneralizedLinearFitMafMean *
                             GetQScorePredictor(QScorePredictor.MafMean);
                linearFit += qscoreParameters.GeneralizedLinearFitLogMafCv * GetQScorePredictor(QScorePredictor.LogMafCv);
                linearFit += GetQScorePredictor(QScorePredictor.BinCountAmpDistance);
                score      = -11.9 - 11.4 * linearFit; // Scaling to achieve 2 <= qscore <= 61
                score      = Math.Max(2, score);
                score      = Math.Min(61, score);
                return((int)Math.Round(score, 0, MidpointRounding.AwayFromZero));

            default:
                throw new Exception("Unhandled qscore method");
            }
        }
Beispiel #3
0
        public int ComputeQScore(QScoreMethod qscoreMethod, QualityScoreParameters qscoreParameters)
        {
            double score;
            int    qscore;

            switch (qscoreMethod)
            {
            case QScoreMethod.LogisticGermline:
                // Logistic model using a new selection of features.  Gives ROC curve area 0.921
                score  = qscoreParameters.LogisticGermlineIntercept;
                score += GetQScorePredictor(QScorePredictor.LogBinCount) *
                         qscoreParameters.LogisticGermlineLogBinCount;
                score += GetQScorePredictor(QScorePredictor.ModelDistance) *
                         qscoreParameters.LogisticGermlineModelDistance;
                score += GetQScorePredictor(QScorePredictor.DistanceRatio) *
                         qscoreParameters.LogisticGermlineDistanceRatio;
                score = Math.Exp(score);
                score = score / (score + 1);
                // Transform probability into a q-score:
                qscore = (int)(Math.Round(-10 * Math.Log10(1 - score)));
                qscore = Math.Min(40, qscore);
                qscore = Math.Max(2, qscore);
                return(qscore);

            case QScoreMethod.Logistic:
                // Logistic model using a new selection of features.  Gives ROC curve area 0.8289
                score  = qscoreParameters.LogisticIntercept;
                score += GetQScorePredictor(QScorePredictor.LogBinCount) * qscoreParameters.LogisticLogBinCount;
                score += GetQScorePredictor(QScorePredictor.ModelDistance) * qscoreParameters.LogisticModelDistance;
                score += GetQScorePredictor(QScorePredictor.DistanceRatio) * qscoreParameters.LogisticDistanceRatio;
                score += GetQScorePredictor(QScorePredictor.BinCountAmpDistance);
                double coreScore = score;
                score = Math.Exp(score);
                score = score / (score + 1);
                // Transform probability into a q-score:
                qscore = (int)Math.Round(-10 * Math.Log10(1 - score));
                qscore = Math.Min(60, qscore);
                qscore = Math.Max(2, qscore);
                //if (CopyNumber>20)
                //{
                //    Console.WriteLine($"HiCN: {CopyNumber} from {this.MedianCount} distance {ModelDistance} next {RunnerUpModelDistance} bins {BinCount}");
                //    Console.WriteLine($"      Prelogit {coreScore} = intercept {qscoreParameters.LogisticIntercept} + bins { GetQScorePredictor(QScorePredictor.LogBinCount) * qscoreParameters.LogisticLogBinCount} + dist {GetQScorePredictor(QScorePredictor.ModelDistance) * qscoreParameters.LogisticModelDistance} + ratio { GetQScorePredictor(QScorePredictor.DistanceRatio) * qscoreParameters.LogisticDistanceRatio} + amp {GetQScorePredictor(QScorePredictor.BinCountAmpDistance)}");
                //    Console.WriteLine($"      Logit {score} --> init qscore {(int)Math.Round(-10 * Math.Log10(1 - score))} --> qscore");
                //}
                return(qscore);

            case QScoreMethod.BinCountLinearFit:
                if (this.BinCount >= 100)
                {
                    return(61);
                }
                else
                {
                    return
                        ((int)
                         Math.Round(-10 * Math.Log10(1 - 1 / (1 + Math.Exp(0.5532 - this.BinCount * 0.147))), 0,
                                    MidpointRounding.AwayFromZero));
                }

            case QScoreMethod.GeneralizedLinearFit:     // Generalized linear fit with linear transformation to QScore
                double linearFit = qscoreParameters.GeneralizedLinearFitIntercept;
                linearFit += qscoreParameters.GeneralizedLinearFitLogBinCount *
                             GetQScorePredictor(QScorePredictor.LogBinCount);
                linearFit += qscoreParameters.GeneralizedLinearFitModelDistance *
                             GetQScorePredictor(QScorePredictor.ModelDistance);
                linearFit += qscoreParameters.GeneralizedLinearFitMajorChromosomeCount *
                             GetQScorePredictor(QScorePredictor.MajorChromosomeCount);
                linearFit += qscoreParameters.GeneralizedLinearFitMafMean *
                             GetQScorePredictor(QScorePredictor.MafMean);
                linearFit += qscoreParameters.GeneralizedLinearFitLogMafCv *
                             GetQScorePredictor(QScorePredictor.LogMafCv);
                linearFit += GetQScorePredictor(QScorePredictor.BinCountAmpDistance);
                score      = -11.9 - 11.4 * linearFit; // Scaling to achieve 2 <= qscore <= 61
                score      = Math.Max(2, score);
                score      = Math.Min(61, score);
                return((int)Math.Round(score, 0, MidpointRounding.AwayFromZero));

            default:
                throw new Exception("Unhandled qscore method");
            }
        }
Beispiel #4
0
 /// <summary>
 /// Apply quality scores.
 /// </summary>
 public static void AssignQualityScores(List <CanvasSegment> segments, QScoreMethod qscoreMethod, QualityScoreParameters qscoreParameters)
 {
     foreach (CanvasSegment segment in segments)
     {
         segment.QScore = segment.ComputeQScore(qscoreMethod, qscoreParameters);
     }
 }