static int Main(string[] args) { CanvasCommon.Utilities.LogCommandLine(args); string inFile = null; string outFile = null; string variantFrequencyFile = null; string referenceFolder = null; string name = "SAMPLE"; string truthDataPath = null; string somaticVCFPath = null; bool needHelp = false; string bedPath = null; string ploidyVcfPath = null; string localSdMetricFile = null; string evennessMetricFile = null; bool isEnrichment = false; bool isDbsnpVcf = false; double minimumCallSize; int qualityFilterThreshold = 10; // Default quality filter threshold = 10, overridable via -q command-line argument // Parameters, for parameter-sweep, somatic model training: bool isTrainMode = false; float?userPurity = null; float?userPloidy = null; CanvasCommon.CanvasSomaticClusteringMode somaticClusteringMode = CanvasCommon.CanvasSomaticClusteringMode.MeanShift; string parameterconfigPath = Path.Combine(Isas.Framework.Utilities.Utilities.GetAssemblyFolder(typeof(Program)), "SomaticCallerParameters.json"); string qualityScoreConfigPath = Path.Combine(Isas.Framework.Utilities.Utilities.GetAssemblyFolder(typeof(Program)), "QualityScoreParameters.json"); OptionSet p = new OptionSet() { { "i|infile=", "file containing bins, their counts, and assigned segments (obtained from CanvasPartition.exe)", v => inFile = v }, { "v|varfile=", "file containing variant frequencies (obtained from CanvasSNV.exe)", v => variantFrequencyFile = v }, { "o|outfile=", "file name prefix to ouput copy number calls to outfile.vcf", v => outFile = v }, { "r|reference=", "folder that contains both genome.fa and GenomeSize.xml", v => referenceFolder = v }, { "n|name=", "sample name for output VCF header (optional)", v => name = v }, { "t|truth=", "path to vcf/bed with CNV truth data (optional)", v => truthDataPath = v }, { "h|help", "show this message and exit", v => needHelp = v != null }, { "e|enrichment", "flag indicating this is enrichment data", v => isEnrichment = v != null }, { "s|somaticvcf=", "somatic vcf file - optionally used for purity estimation", v => somaticVCFPath = v }, { "b|bedfile=", "bed file containing regions to exclude from calling", v => bedPath = v }, { "p|ploidyVcfFile=", "vcf file specifying reference ploidy (e.g. for sex chromosomes) (optional)", v => ploidyVcfPath = v }, { $"{CommandLineOptions.LocalSdMetricFile}=", "text file with local SD metric (calculate within CanvasClean) (optional)", v => localSdMetricFile = v }, { $"{CommandLineOptions.EvennessMetricFile}=", "text file with evenness metric (calculated within CanvasPartition) (optional)", v => evennessMetricFile = v }, { "d|dbsnpvcf", "flag indicating a dbSNP VCF file is used to generate the variant frequency file", v => isDbsnpVcf = v != null }, { "M|minimumcall=", "INTERNAL: minimum call size", v => minimumCallSize = int.Parse(v) }, { "q|qualitythreshold=", $"quality filter threshold (default {qualityFilterThreshold})", v => qualityFilterThreshold = int.Parse(v) }, { "c|parameterconfig=", $"parameter configuration path (default {parameterconfigPath})", v => parameterconfigPath = v }, { "g|qscoreconfig=", $"parameter configuration path (default {qualityScoreConfigPath})", v => qualityScoreConfigPath = v }, { "u|definedpurity=", "INTERNAL: user pre-defined purity", v => userPurity = float.Parse(v) }, { "l|definedploidy=", "INTERNAL: user pre-defined ploidy", v => userPloidy = float.Parse(v) }, { "a|trainmodel=", "INTERNAL: user pre-defined ploidy", v => isTrainMode = v != null } }; List <string> extraArgs = p.Parse(args); if (extraArgs.Count > 0) { Console.WriteLine("Error: Argument '{0}' not understood", extraArgs[0]); needHelp = true; } if (needHelp) { ShowHelp(p); return(0); } if (inFile == null || outFile == null || referenceFolder == null) { ShowHelp(p); return(0); } if (!File.Exists(inFile)) { Console.WriteLine("Canvas error: File {0} does not exist! Exiting.", inFile); return(1); } if (!File.Exists(variantFrequencyFile)) { Console.WriteLine("Canvas error: File {0} does not exist! Exiting.", variantFrequencyFile); return(1); } if (!File.Exists(Path.Combine(referenceFolder, "GenomeSize.xml"))) { Console.WriteLine("Canvas error: File {0} does not exist! Exiting.", Path.Combine(referenceFolder, "GenomeSize.xml")); return(1); } if (qualityFilterThreshold < 0) { throw new ArgumentException( $"Quality filter threshold must be greater than or equal to zero. Value was {qualityFilterThreshold}"); } if (!File.Exists(parameterconfigPath)) { Console.WriteLine("Canvas error: File {0} does not exist! Exiting.", parameterconfigPath); return(1); } FileLocation parameterconfigFile = new FileLocation(parameterconfigPath); SomaticCallerParameters somaticCallerParametersJSON = Deserialize <SomaticCallerParameters>(parameterconfigFile); FileLocation qscoreConfigFile = new FileLocation(qualityScoreConfigPath); CanvasCommon.QualityScoreParameters qscoreParametersJSON = Deserialize <CanvasCommon.QualityScoreParameters>(qscoreConfigFile); var logger = new Logger(new[] { Console.Out }, new[] { Console.Error }); SomaticCaller caller = new SomaticCaller(logger); caller.somaticCallerParameters = somaticCallerParametersJSON; caller.somaticCallerQscoreParameters = qscoreParametersJSON; caller.TruthDataPath = truthDataPath; caller.SomaticVcfPath = somaticVCFPath; caller.IsEnrichment = isEnrichment; caller.IsDbsnpVcf = isDbsnpVcf; caller.userPurity = userPurity; caller.userPloidy = userPloidy; caller.IsTrainingMode = truthDataPath != null || isTrainMode; caller.IsTrainingMode = isTrainMode; caller.QualityFilterThreshold = qualityFilterThreshold; // Set parameters: if (!string.IsNullOrEmpty(ploidyVcfPath)) { caller.LoadReferencePloidy(ploidyVcfPath); } double?localSDmetric = null; double?evennessMetric = null; if (!string.IsNullOrEmpty(localSdMetricFile)) { localSDmetric = CanvasCommon.CanvasIO.ReadLocalSdMetricFromTextFile(localSdMetricFile); } if (!string.IsNullOrEmpty(evennessMetricFile)) { evennessMetric = CanvasCommon.CanvasIO.ReadEvennessMetricFromTextFile(evennessMetricFile); } caller.LoadBedFile(bedPath); return(caller.CallVariants(inFile, variantFrequencyFile, outFile, referenceFolder, name, localSDmetric, evennessMetric, somaticClusteringMode)); }
public int ComputeQScore(QScoreMethod qscoreMethod, QualityScoreParameters qscoreParameters) { double score; int qscore; switch (qscoreMethod) { case QScoreMethod.LogisticGermline: // Logistic model using a new selection of features. Gives ROC curve area 0.921 score = qscoreParameters.LogisticGermlineIntercept; score += GetQScorePredictor(QScorePredictor.LogBinCount) * qscoreParameters.LogisticGermlineLogBinCount; score += GetQScorePredictor(QScorePredictor.ModelDistance) * qscoreParameters.LogisticGermlineModelDistance; score += GetQScorePredictor(QScorePredictor.DistanceRatio) * qscoreParameters.LogisticGermlineDistanceRatio; score = Math.Exp(score); score = score / (score + 1); // Transform probability into a q-score: qscore = (int)(Math.Round(-10 * Math.Log10(1 - score))); qscore = Math.Min(40, qscore); qscore = Math.Max(2, qscore); return(qscore); case QScoreMethod.Logistic: // Logistic model using a new selection of features. Gives ROC curve area 0.8289 score = qscoreParameters.LogisticIntercept; score += GetQScorePredictor(QScorePredictor.LogBinCount) * qscoreParameters.LogisticLogBinCount; score += GetQScorePredictor(QScorePredictor.ModelDistance) * qscoreParameters.LogisticModelDistance; score += GetQScorePredictor(QScorePredictor.DistanceRatio) * qscoreParameters.LogisticDistanceRatio; score += GetQScorePredictor(QScorePredictor.BinCountAmpDistance); score = Math.Exp(score); score = score / (score + 1); // Transform probability into a q-score: qscore = (int)Math.Round(-10 * Math.Log10(1 - score)); qscore = Math.Min(60, qscore); qscore = Math.Max(2, qscore); return(qscore); case QScoreMethod.BinCountLinearFit: if (this.BinCount >= 100) { return(61); } else { return((int)Math.Round(-10 * Math.Log10(1 - 1 / (1 + Math.Exp(0.5532 - this.BinCount * 0.147))), 0, MidpointRounding.AwayFromZero)); } case QScoreMethod.GeneralizedLinearFit: // Generalized linear fit with linear transformation to QScore double linearFit = qscoreParameters.GeneralizedLinearFitIntercept; linearFit += qscoreParameters.GeneralizedLinearFitLogBinCount * GetQScorePredictor(QScorePredictor.LogBinCount); linearFit += qscoreParameters.GeneralizedLinearFitModelDistance * GetQScorePredictor(QScorePredictor.ModelDistance); linearFit += qscoreParameters.GeneralizedLinearFitMajorChromosomeCount * GetQScorePredictor(QScorePredictor.MajorChromosomeCount); linearFit += qscoreParameters.GeneralizedLinearFitMafMean * GetQScorePredictor(QScorePredictor.MafMean); linearFit += qscoreParameters.GeneralizedLinearFitLogMafCv * GetQScorePredictor(QScorePredictor.LogMafCv); linearFit += GetQScorePredictor(QScorePredictor.BinCountAmpDistance); score = -11.9 - 11.4 * linearFit; // Scaling to achieve 2 <= qscore <= 61 score = Math.Max(2, score); score = Math.Min(61, score); return((int)Math.Round(score, 0, MidpointRounding.AwayFromZero)); default: throw new Exception("Unhandled qscore method"); } }
public int ComputeQScore(QScoreMethod qscoreMethod, QualityScoreParameters qscoreParameters) { double score; int qscore; switch (qscoreMethod) { case QScoreMethod.LogisticGermline: // Logistic model using a new selection of features. Gives ROC curve area 0.921 score = qscoreParameters.LogisticGermlineIntercept; score += GetQScorePredictor(QScorePredictor.LogBinCount) * qscoreParameters.LogisticGermlineLogBinCount; score += GetQScorePredictor(QScorePredictor.ModelDistance) * qscoreParameters.LogisticGermlineModelDistance; score += GetQScorePredictor(QScorePredictor.DistanceRatio) * qscoreParameters.LogisticGermlineDistanceRatio; score = Math.Exp(score); score = score / (score + 1); // Transform probability into a q-score: qscore = (int)(Math.Round(-10 * Math.Log10(1 - score))); qscore = Math.Min(40, qscore); qscore = Math.Max(2, qscore); return(qscore); case QScoreMethod.Logistic: // Logistic model using a new selection of features. Gives ROC curve area 0.8289 score = qscoreParameters.LogisticIntercept; score += GetQScorePredictor(QScorePredictor.LogBinCount) * qscoreParameters.LogisticLogBinCount; score += GetQScorePredictor(QScorePredictor.ModelDistance) * qscoreParameters.LogisticModelDistance; score += GetQScorePredictor(QScorePredictor.DistanceRatio) * qscoreParameters.LogisticDistanceRatio; score += GetQScorePredictor(QScorePredictor.BinCountAmpDistance); double coreScore = score; score = Math.Exp(score); score = score / (score + 1); // Transform probability into a q-score: qscore = (int)Math.Round(-10 * Math.Log10(1 - score)); qscore = Math.Min(60, qscore); qscore = Math.Max(2, qscore); //if (CopyNumber>20) //{ // Console.WriteLine($"HiCN: {CopyNumber} from {this.MedianCount} distance {ModelDistance} next {RunnerUpModelDistance} bins {BinCount}"); // Console.WriteLine($" Prelogit {coreScore} = intercept {qscoreParameters.LogisticIntercept} + bins { GetQScorePredictor(QScorePredictor.LogBinCount) * qscoreParameters.LogisticLogBinCount} + dist {GetQScorePredictor(QScorePredictor.ModelDistance) * qscoreParameters.LogisticModelDistance} + ratio { GetQScorePredictor(QScorePredictor.DistanceRatio) * qscoreParameters.LogisticDistanceRatio} + amp {GetQScorePredictor(QScorePredictor.BinCountAmpDistance)}"); // Console.WriteLine($" Logit {score} --> init qscore {(int)Math.Round(-10 * Math.Log10(1 - score))} --> qscore"); //} return(qscore); case QScoreMethod.BinCountLinearFit: if (this.BinCount >= 100) { return(61); } else { return ((int) Math.Round(-10 * Math.Log10(1 - 1 / (1 + Math.Exp(0.5532 - this.BinCount * 0.147))), 0, MidpointRounding.AwayFromZero)); } case QScoreMethod.GeneralizedLinearFit: // Generalized linear fit with linear transformation to QScore double linearFit = qscoreParameters.GeneralizedLinearFitIntercept; linearFit += qscoreParameters.GeneralizedLinearFitLogBinCount * GetQScorePredictor(QScorePredictor.LogBinCount); linearFit += qscoreParameters.GeneralizedLinearFitModelDistance * GetQScorePredictor(QScorePredictor.ModelDistance); linearFit += qscoreParameters.GeneralizedLinearFitMajorChromosomeCount * GetQScorePredictor(QScorePredictor.MajorChromosomeCount); linearFit += qscoreParameters.GeneralizedLinearFitMafMean * GetQScorePredictor(QScorePredictor.MafMean); linearFit += qscoreParameters.GeneralizedLinearFitLogMafCv * GetQScorePredictor(QScorePredictor.LogMafCv); linearFit += GetQScorePredictor(QScorePredictor.BinCountAmpDistance); score = -11.9 - 11.4 * linearFit; // Scaling to achieve 2 <= qscore <= 61 score = Math.Max(2, score); score = Math.Min(61, score); return((int)Math.Round(score, 0, MidpointRounding.AwayFromZero)); default: throw new Exception("Unhandled qscore method"); } }
/// <summary> /// Apply quality scores. /// </summary> public static void AssignQualityScores(List <CanvasSegment> segments, QScoreMethod qscoreMethod, QualityScoreParameters qscoreParameters) { foreach (CanvasSegment segment in segments) { segment.QScore = segment.ComputeQScore(qscoreMethod, qscoreParameters); } }