static void Main(string[] args) { var baselineVcfPath = args[0]; var testVcfPath = args[1]; var comparisonConfig = new VcfComparisonConfig(); var configurationManager = new ProgramConfigSettings(); var region = new VcfRegion(); if (args.Length > 2) { comparisonConfig.Exact = args.Contains("-Exact"); comparisonConfig.CheckGT = args.Contains("-GT"); comparisonConfig.CheckDP = args.Contains("-DP"); comparisonConfig.CheckVF = args.Contains("-VF"); comparisonConfig.CheckSB = args.Contains("-SB"); comparisonConfig.CheckFilter = args.Contains("-Filter"); comparisonConfig.CheckQual = args.Contains("-Qual"); comparisonConfig.PassingOnly = !args.Contains("-AllVars"); comparisonConfig.ConsiderRefs = args.Contains("-Refs"); comparisonConfig.CheckAD = args.Contains("-AD"); comparisonConfig.AllCheck(args.Contains("-AllCheck")); comparisonConfig.OutputFile = GetParameterValue(args, "Out"); comparisonConfig.SummaryFile = GetParameterValue(args, "Summary"); comparisonConfig.HideSharedDiffs = args.Contains("-HideShared"); } region.Chromosome = GetParameterValue(args, "Chrom"); region.Start = int.Parse(GetParameterValue(args, "Start") ?? "0"); region.End = int.Parse(GetParameterValue(args, "End") ?? int.MaxValue.ToString()); var configMargin = configurationManager.MarginOfError; if (configMargin < 0) { comparisonConfig.MarginOfError = configMargin; } ; var variantTypes = configurationManager.VariantTypes.Split(','); var verbose = configurationManager.Verbose; comparisonConfig.BlockSize = configurationManager.BlockSize; comparisonConfig.CheckDeletions = variantTypes.Contains("Del"); comparisonConfig.CheckInsertions = variantTypes.Contains("Ins"); comparisonConfig.CheckSnv = variantTypes.Contains("Snv"); comparisonConfig.CheckMnv = variantTypes.Contains("Mnv"); Console.WriteLine(); Console.WriteLine(string.Join(" ", args)); Console.WriteLine("==============================" + Environment.NewLine); Console.WriteLine("Variant Types: " + string.Join(",", variantTypes)); VcfComparer.BaselineVcfs(baselineVcfPath, testVcfPath, comparisonConfig, region, verbose); }
static void Main(string[] args) { var baselineVcfPath = args[0]; var testVcfPath = args[1]; var config = new VcfComparisonConfig(); if (args.Length > 2) { config.Exact = args.Contains("-Exact"); config.CheckGT = args.Contains("-GT"); config.CheckDP = args.Contains("-DP"); config.CheckVF = args.Contains("-VF"); config.CheckSB = args.Contains("-SB"); config.CheckFilter = args.Contains("-Filter"); config.CheckQual = args.Contains("-Qual"); config.PassingOnly = !args.Contains("-AllVars"); config.ConsiderRefs = args.Contains("-Refs"); config.CheckAD = args.Contains("-AD"); } double marginOfError = 0; var configMargin = ConfigurationManager.AppSettings["MarginOfError"]; if (configMargin != "N/A") { config.MarginOfError = float.Parse(configMargin); } ; var variantTypes = ConfigurationManager.AppSettings["VariantTypes"].Split(','); config.CheckDeletions = variantTypes.Contains("Del"); config.CheckInsertions = variantTypes.Contains("Ins"); config.CheckSnv = variantTypes.Contains("Snv"); config.CheckMnv = variantTypes.Contains("Mnv"); Console.WriteLine(); Console.WriteLine(string.Join(" ", args)); Console.WriteLine("==============================" + Environment.NewLine); Console.WriteLine("Variant Types: " + string.Join(",", variantTypes)); BaselineVcfs(baselineVcfPath, testVcfPath, config, args.Contains("-AllComparisons")); }
private static List <VcfVariant> ProcessVariants(VcfComparisonConfig config, List <VcfVariant> testAlleles, out Dictionary <string, List <VcfVariant> > testVariantsDict) { var testVariants = new List <VcfVariant>(); testVariantsDict = new Dictionary <string, List <VcfVariant> >(); var variantCalls = config.ConsiderRefs ? testAlleles : testAlleles.Where(a => a.VariantAlleles[0] != ".").ToList(); foreach (var variantCall in variantCalls) { var type = GetVariantType(variantCall); if ((config.CheckSnv && type == Program.VariantType.Snv) || (config.CheckMnv && type == Program.VariantType.Mnv) || (config.CheckDeletions && type == Program.VariantType.Deletion) || (config.CheckInsertions && type == Program.VariantType.Insertion)) { testVariants.Add(variantCall); if (!testVariantsDict.ContainsKey(variantCall.ReferenceName)) { testVariantsDict[variantCall.ReferenceName] = new List <VcfVariant>(); } testVariantsDict[variantCall.ReferenceName].Add(variantCall); } } return(testVariants); }
public static void BaselineVcfs(string baselineVcfPath, string testVcfPath, VcfComparisonConfig config, bool showAllComparisons) { Console.WriteLine("Configuration Options"); Console.WriteLine("------------------------------"); Console.WriteLine(config); Console.WriteLine("==============================" + Environment.NewLine); Console.WriteLine("Comparing {0} to {1}", baselineVcfPath, testVcfPath); var baselineVariantsMissingInTest = new List <VcfVariant>(); var testVariantsMissingInBaseline = new List <VcfVariant>(); var sharedVariants = new List <Tuple <VcfVariant, VcfVariant> >(); var baselineVariants = new List <VcfVariant>(); var baselineVariantsDict = new Dictionary <string, List <VcfVariant> >(); using (var reader = new VcfReader(baselineVcfPath)) { var alleles = reader.GetVariants().ToList(); var variantCalls = config.ConsiderRefs ? alleles : alleles.Where(a => a.VariantAlleles[0] != ".").ToList(); foreach (var variantCall in variantCalls) { var type = GetVariantType(variantCall); if ((config.CheckSnv && type == VariantType.Snv) || (config.CheckMnv && type == VariantType.Mnv) || (config.CheckDeletions && type == VariantType.Deletion) || (config.CheckInsertions && type == VariantType.Insertion)) { baselineVariants.Add(variantCall); if (!baselineVariantsDict.ContainsKey(variantCall.ReferenceName)) { baselineVariantsDict[variantCall.ReferenceName] = new List <VcfVariant>(); } baselineVariantsDict[variantCall.ReferenceName].Add(variantCall); } } } var testVariants = new List <VcfVariant>(); var testVariantsDict = new Dictionary <string, List <VcfVariant> >(); using (var reader = new VcfReader(testVcfPath)) { var alleles = reader.GetVariants().ToList(); var variantCalls = config.ConsiderRefs ? alleles : alleles.Where(a => a.VariantAlleles[0] != ".").ToList(); foreach (var variantCall in variantCalls) { var type = GetVariantType(variantCall); if ((config.CheckSnv && type == VariantType.Snv) || (config.CheckMnv && type == VariantType.Mnv) || (config.CheckDeletions && type == VariantType.Deletion) || (config.CheckInsertions && type == VariantType.Insertion)) { testVariants.Add(variantCall); if (!testVariantsDict.ContainsKey(variantCall.ReferenceName)) { testVariantsDict[variantCall.ReferenceName] = new List <VcfVariant>(); } testVariantsDict[variantCall.ReferenceName].Add(variantCall); } } } var numBaselines = baselineVariants.Count(v => !config.PassingOnly || v.Filters == "PASS"); var numTestVars = testVariants.Count(v => !config.PassingOnly || v.Filters == "PASS"); Console.WriteLine("Baseline Variants : " + numBaselines); Console.WriteLine("Test Variants : " + numTestVars); foreach (var testVariant in testVariants) { var variantInOtherVcf = FindVariantInOtherVcf(testVariant, baselineVariants, baselineVariantsDict, config.PassingOnly); if (variantInOtherVcf == null) { testVariantsMissingInBaseline.Add(testVariant); } } foreach (var baselineVariant in baselineVariants) { var variantInOtherVcf = FindVariantInOtherVcf(baselineVariant, testVariants, testVariantsDict, config.PassingOnly); if (variantInOtherVcf == null) { baselineVariantsMissingInTest.Add(baselineVariant); } else if (variantInOtherVcf.ReferenceName != null) { sharedVariants.Add(new Tuple <VcfVariant, VcfVariant>(baselineVariant, variantInOtherVcf)); } } Console.WriteLine(Environment.NewLine + "------------------------------" + Environment.NewLine); Console.WriteLine("Baseline Missing In Test : {0} ({1}%)", baselineVariantsMissingInTest.Count(), Math.Round(100 * (float)baselineVariantsMissingInTest.Count() / numBaselines, 2)); Console.WriteLine("------------------------------"); PrintVariants(baselineVariantsMissingInTest); Console.WriteLine(Environment.NewLine + "------------------------------" + Environment.NewLine); Console.WriteLine("Test Missing In Baseline : {0} ({1}%)", testVariantsMissingInBaseline.Count(), Math.Round(100 * (float)testVariantsMissingInBaseline.Count() / numTestVars, 2)); Console.WriteLine("------------------------------"); PrintVariants(testVariantsMissingInBaseline); Console.WriteLine(Environment.NewLine + "------------------------------" + Environment.NewLine); Console.WriteLine("Comparison of Shared Variants ({0})", sharedVariants.Count()); Console.WriteLine("------------------------------"); var allDiffs = new List <Dictionary <string, string> >(); foreach (var sharedVariant in sharedVariants) { if (sharedVariant.Item1.ToString() == sharedVariant.Item2.ToString()) { //these are exactly the same. continue; } var matchDict = new Dictionary <string, string>(); var okResult = "OK"; var matchStrings = new List <string>(); bool allOk = true; var variantString = string.Format("{0} {1} {2} {3}", sharedVariant.Item1.ReferenceName, sharedVariant.Item1.ReferencePosition, sharedVariant.Item1.ReferenceAllele, sharedVariant.Item1.VariantAlleles.First()); matchStrings.Add(variantString); matchDict["Variant"] = variantString; if (config.CheckVF) { var result = okResult; var vf1 = GetVF(sharedVariant.Item1); var vf2 = GetVF(sharedVariant.Item2); var vfsEqual = ApproximatelyEqual(vf1, vf2, config.MarginOfError); if (!vfsEqual) { matchStrings.Add(string.Format("VF: ({0} vs {1})", vf1, vf2)); allOk = false; result = ((float)(vf2) / vf1).ToString(); } matchDict["VF"] = result; } if (config.CheckDP) { var result = okResult; var dp1 = GetDP(sharedVariant.Item1); var dp2 = GetDP(sharedVariant.Item2); var dpsEqual = ApproximatelyEqual(dp1, dp2, config.MarginOfError); if (!dpsEqual) { matchStrings.Add(string.Format("DP: ({0} vs {1})", dp1, dp2)); allOk = false; result = ((float)(dp2) / dp1).ToString(); } matchDict["DP"] = result; } if (config.CheckGT) { var result = okResult; var gt1 = GetGT(sharedVariant.Item1); var gt2 = GetGT(sharedVariant.Item2); var gtsEqual = gt1 == gt2; if (!gtsEqual) { matchStrings.Add(string.Format("GT: ({0} vs {1})", gt1, gt2)); allOk = false; } matchDict["GT"] = result; } if (config.CheckFilter) { var result = okResult; var filtersEqual = sharedVariant.Item1.Filters == sharedVariant.Item2.Filters; if (!filtersEqual) { matchStrings.Add(string.Format("Filters: ({0} vs {1})", sharedVariant.Item1.Filters, sharedVariant.Item2.Filters)); allOk = false; result = "False"; } matchDict["Filters"] = result; } if (config.CheckSB) { var result = okResult; var sb1 = GetSB(sharedVariant.Item1); var sb2 = GetSB(sharedVariant.Item2); var sbsEqual = ApproximatelyEqual(sb1, sb2, config.MarginOfError); if (!sbsEqual) { matchStrings.Add(string.Format("SB: ({0} vs {1})", sb1, sb2)); allOk = false; result = ((float)(sb2) / sb1).ToString(); } matchDict["SB"] = result; } if (config.CheckAD) { var result = okResult; var ad1 = GetAD(sharedVariant.Item1); var ad2 = GetAD(sharedVariant.Item2); var adsEqual = ad1 == ad2; //var adsEqual = ApproximatelyEqual(ad1, ad2, config.MarginOfError); if (!adsEqual) { matchStrings.Add(string.Format("AD: ({0} vs {1})", ad1, ad2)); allOk = false; result = "False"; } matchDict["AD"] = result; var ad1split = ad1.Split(','); var ad2split = ad2.Split(','); matchDict["RefDepthRatio"] = ((float)(int.Parse(ad2split[0])) / int.Parse(ad1split[0])).ToString(); matchDict["AltDepthRatio"] = ((float)(int.Parse(ad2split[1])) / int.Parse(ad1split[1])).ToString(); } if (config.CheckQual) { var result = okResult; var qualsEqual = ApproximatelyEqual(sharedVariant.Item1.Quality, sharedVariant.Item2.Quality); if (!qualsEqual) { matchStrings.Add(string.Format("Qual: ({0} vs {1})", sharedVariant.Item1.Quality, sharedVariant.Item2.Quality)); allOk = false; result = ((float)sharedVariant.Item2.Quality / sharedVariant.Item1.Quality).ToString(); } matchDict["Qual"] = result; } if (allOk || config.Exact) { if (sharedVariant.Item1.ToString() != sharedVariant.Item2.ToString()) { matchStrings.Add( string.Format( Environment.NewLine + "{0}" + Environment.NewLine + "vs" + Environment.NewLine + "{1}" + Environment.NewLine, sharedVariant.Item1.ToString(), sharedVariant.Item2.ToString())); allOk = false; } } if (!allOk && !showAllComparisons) { Console.WriteLine(string.Join("\t", matchStrings)); } if (!allOk) { allDiffs.Add(matchDict); } } if (showAllComparisons && allDiffs.Any()) { var allDiffKeys = allDiffs.First().Keys; Console.WriteLine(string.Join(",", allDiffKeys)); foreach (var allDiff in allDiffs) { var resultsList = new List <string>(); foreach (var allDiffKey in allDiffKeys) { resultsList.Add(allDiff[allDiffKey]); } Console.WriteLine(string.Join(",", resultsList)); } } }
public static void BaselineVcfs(string baselineVcfPath, string testVcfPath, VcfComparisonConfig config, VcfRegion region = null, bool verbose = false) { var logger = new Logger(verbose); Console.WriteLine("Configuration Options"); Console.WriteLine("------------------------------"); Console.WriteLine(config); Console.WriteLine("==============================" + Environment.NewLine); Console.WriteLine("Comparing {0} to {1}", baselineVcfPath, testVcfPath); var numblocks = region.Chromosome != null ? 1 + ((region.End - region.Start) / config.BlockSize) : 1; var comparisonKeys = config.GetKeys(); if (config.OutputFile != null) { using (StreamWriter sw = File.CreateText(config.OutputFile)) { sw.WriteLine("Variant\tInBaseline\tInTest," + string.Join("\t", comparisonKeys.Select(x => string.Join("\t", new List <string>() { x + "_1", x + "_2", x + "_OK" })))); } } if (config.SummaryFile != null && !File.Exists(config.SummaryFile)) { using (StreamWriter sw = File.CreateText(config.SummaryFile)) { sw.WriteLine("BaselineVcf,TestVcf,SharedDiffs,BaselineOnly,TestOnly"); } } for (int i = 0; i < numblocks; i++) { bool hitEndOfChromosome; var subRegion = new VcfRegion(); if (region.Chromosome != null) { subRegion.Chromosome = region.Chromosome; subRegion.Start = region.Start + i * config.BlockSize; if (region.End > 0) { subRegion.End = Math.Min(region.Start + (i + 1) * config.BlockSize, region.End); } Console.WriteLine("=============================="); Console.WriteLine("PROCESSING FOR REGION: " + subRegion.ToString()); } // Get baseline variants Dictionary <string, List <VcfVariant> > baselineVariantsDict; var alleles = VcfParser.GetAlleles(baselineVcfPath, subRegion, logger, out hitEndOfChromosome); var baselineVariants = ProcessVariants(config, alleles, out baselineVariantsDict); // Get test variants var testAlleles = VcfParser.GetAlleles(testVcfPath, subRegion, logger, out hitEndOfChromosome); Dictionary <string, List <VcfVariant> > testVariantsDict; var testVariants = ProcessVariants(config, testAlleles, out testVariantsDict); // Compare baseline vs test var allDiffs = Compare(config, baselineVariants, testVariants, baselineVariantsDict, testVariantsDict); if (config.SummaryFile != null) { FlushSummary(config.SummaryFile, allDiffs, baselineVcfPath, testVcfPath); } if (allDiffs.Any() && config.OutputFile != null) { foreach (var diff in allDiffs.OrderBy(d => d.Variant.ReferencePosition)) { FlushDiff(config.OutputFile, diff.GetEntry(comparisonKeys)); } } if (hitEndOfChromosome) { break; } } }
private static List <Comparison> CompareSharedVariants(VcfComparisonConfig config, List <Tuple <VcfVariant, VcfVariant> > sharedVariants) { var allDiffs = new List <Dictionary <string, string> >(); var allComparisons = new List <Comparison>(); foreach (var sharedVariant in sharedVariants) { if (sharedVariant.Item1.ToString() == sharedVariant.Item2.ToString()) { //these are exactly the same. continue; } var okResult = "OK"; bool allOk = true; var comparison = new Comparison(sharedVariant.Item1, true, true); if (config.CheckVF) { var result = okResult; var vf1 = GetVF(sharedVariant.Item1); var vf2 = GetVF(sharedVariant.Item2); var vfsEqual = ApproximatelyEqual(vf1, vf2, config.MarginOfError); if (!vfsEqual) { allOk = false; result = ((float)(vf2) / vf1).ToString(); } comparison.AddResult(new ComparisonResult("VF", vf1, vf2, vfsEqual, result)); } if (config.CheckDP) { var result = okResult; var dp1 = GetDP(sharedVariant.Item1); var dp2 = GetDP(sharedVariant.Item2); var dpsEqual = ApproximatelyEqual(dp1, dp2, config.MarginOfError); if (!dpsEqual) { allOk = false; result = ((float)(dp2) / dp1).ToString(); } comparison.AddResult(new ComparisonResult("DP", dp1, dp2, dpsEqual, result)); } if (config.CheckGT) { var result = okResult; var gt1 = GetGT(sharedVariant.Item1); var gt2 = GetGT(sharedVariant.Item2); var gtsEqual = gt1 == gt2; if (!gtsEqual) { allOk = false; result = "False"; } comparison.AddResult(new ComparisonResult("GT", gt1, gt2, gtsEqual)); } if (config.CheckFilter) { var result = okResult; var filtersEqual = sharedVariant.Item1.Filters == sharedVariant.Item2.Filters; if (!filtersEqual) { allOk = false; result = "False"; } comparison.AddResult(new ComparisonResult("Filters", sharedVariant.Item1.Filters, sharedVariant.Item2.Filters, filtersEqual, result)); } if (config.CheckSB) { var result = okResult; var sb1 = GetSB(sharedVariant.Item1); var sb2 = GetSB(sharedVariant.Item2); var sbsEqual = ApproximatelyEqual(sb1, sb2, config.MarginOfError); if (!sbsEqual) { allOk = false; result = ((float)(sb2) / sb1).ToString(); } comparison.AddResult(new ComparisonResult("SB", sb1, sb2, sbsEqual, result)); } if (config.CheckAD) { var result = okResult; var ad1 = GetAD(sharedVariant.Item1); var ad2 = GetAD(sharedVariant.Item2); var adsEqual = ad1 == ad2; if (!adsEqual) { var ad1split = ad1.Split(','); var ad2split = ad2.Split(','); allOk = false; result = "RefDepthRatio:" + ((float)(int.Parse(ad2split[0])) / int.Parse(ad1split[0])).ToString() + ";AltDepthRatio:" + (ad1split.Length > 1 && ad2split.Length > 1 ? ((float)(int.Parse(ad2split[1])) / int.Parse(ad1split[1])).ToString() : "NA"); } comparison.AddResult(new ComparisonResult("AD", ad1, ad2, adsEqual, result)); } if (config.CheckQual) { var result = okResult; var qualsEqual = ApproximatelyEqual(sharedVariant.Item1.Quality, sharedVariant.Item2.Quality); if (!qualsEqual) { allOk = false; result = ((float)sharedVariant.Item2.Quality / sharedVariant.Item1.Quality).ToString(); } comparison.AddResult(new ComparisonResult("Qual", sharedVariant.Item1.Quality, sharedVariant.Item2.Quality, qualsEqual, result)); } if (allOk && config.Exact) { if (sharedVariant.Item1.ToString() != sharedVariant.Item2.ToString()) { comparison.AddResult(new ComparisonResult("Exact", sharedVariant.Item1.ToString(), sharedVariant.Item2.ToString(), false)); allOk = false; } } if (!allOk) { Console.WriteLine(comparison.Variant + "\t" + string.Join("\t", comparison.GetDiffs())); allComparisons.Add(comparison); } } return(allComparisons); }
private static List <Comparison> Compare(VcfComparisonConfig config, List <VcfVariant> baselineVariants, List <VcfVariant> testVariants, Dictionary <string, List <VcfVariant> > baselineVariantsDict, Dictionary <string, List <VcfVariant> > testVariantsDict) { var numBaselines = baselineVariants.Count(v => !config.PassingOnly || v.Filters == "PASS"); var numTestVars = testVariants.Count(v => !config.PassingOnly || v.Filters == "PASS"); Console.WriteLine("Baseline Variants : " + numBaselines); Console.WriteLine("Test Variants : " + numTestVars); // Check for variants missing from either baseline or test var baselineVariantsMissingInTest = new List <VcfVariant>(); var testVariantsMissingInBaseline = new List <VcfVariant>(); var sharedVariants = new List <Tuple <VcfVariant, VcfVariant> >(); foreach (var testVariant in testVariants) { var variantInOtherVcf = FindVariantInOtherVcf(testVariant, baselineVariantsDict, config.PassingOnly); if (variantInOtherVcf == null) { testVariantsMissingInBaseline.Add(testVariant); } } foreach (var baselineVariant in baselineVariants) { var variantInOtherVcf = FindVariantInOtherVcf(baselineVariant, testVariantsDict, config.PassingOnly); if (variantInOtherVcf == null) { baselineVariantsMissingInTest.Add(baselineVariant); } else if (variantInOtherVcf.ReferenceName != null) { sharedVariants.Add(new Tuple <VcfVariant, VcfVariant>(baselineVariant, variantInOtherVcf)); } } Console.WriteLine(Environment.NewLine + "------------------------------"); Console.WriteLine("Baseline Missing In Test : {0} ({1}%)", baselineVariantsMissingInTest.Count(), Math.Round(100 * (float)baselineVariantsMissingInTest.Count() / numBaselines, 2)); Console.WriteLine("------------------------------"); if (baselineVariantsMissingInTest.Any()) { PrintVariants(baselineVariantsMissingInTest); Console.WriteLine("------------------------------" + Environment.NewLine); } Console.WriteLine("Test Missing In Baseline : {0} ({1}%)", testVariantsMissingInBaseline.Count(), Math.Round(100 * (float)testVariantsMissingInBaseline.Count() / numTestVars, 2)); Console.WriteLine("------------------------------"); if (testVariantsMissingInBaseline.Any()) { PrintVariants(testVariantsMissingInBaseline); Console.WriteLine("------------------------------" + Environment.NewLine); } var allDiffs = new List <Comparison>(); if (!config.HideSharedDiffs) { Console.WriteLine("Comparison of Shared Variants ({0})", sharedVariants.Count()); Console.WriteLine("------------------------------"); // Compare shared variants allDiffs = CompareSharedVariants(config, sharedVariants); Console.WriteLine("------------------------------"); } allDiffs.AddRange(baselineVariantsMissingInTest.Select(variant => new Comparison(variant, true, false))); allDiffs.AddRange(testVariantsMissingInBaseline.Select(variant => new Comparison(variant, false, true))); return(allDiffs); }