static void Main(string[] args) { var baselineVcfPath = args[0]; var testVcfPath = args[1]; var comparisonConfig = new VcfComparisonConfig(); var configurationManager = new ProgramConfigSettings(); var region = new VcfRegion(); if (args.Length > 2) { comparisonConfig.Exact = args.Contains("-Exact"); comparisonConfig.CheckGT = args.Contains("-GT"); comparisonConfig.CheckDP = args.Contains("-DP"); comparisonConfig.CheckVF = args.Contains("-VF"); comparisonConfig.CheckSB = args.Contains("-SB"); comparisonConfig.CheckFilter = args.Contains("-Filter"); comparisonConfig.CheckQual = args.Contains("-Qual"); comparisonConfig.PassingOnly = !args.Contains("-AllVars"); comparisonConfig.ConsiderRefs = args.Contains("-Refs"); comparisonConfig.CheckAD = args.Contains("-AD"); comparisonConfig.AllCheck(args.Contains("-AllCheck")); comparisonConfig.OutputFile = GetParameterValue(args, "Out"); comparisonConfig.SummaryFile = GetParameterValue(args, "Summary"); comparisonConfig.HideSharedDiffs = args.Contains("-HideShared"); } region.Chromosome = GetParameterValue(args, "Chrom"); region.Start = int.Parse(GetParameterValue(args, "Start") ?? "0"); region.End = int.Parse(GetParameterValue(args, "End") ?? int.MaxValue.ToString()); var configMargin = configurationManager.MarginOfError; if (configMargin < 0) { comparisonConfig.MarginOfError = configMargin; } ; var variantTypes = configurationManager.VariantTypes.Split(','); var verbose = configurationManager.Verbose; comparisonConfig.BlockSize = configurationManager.BlockSize; comparisonConfig.CheckDeletions = variantTypes.Contains("Del"); comparisonConfig.CheckInsertions = variantTypes.Contains("Ins"); comparisonConfig.CheckSnv = variantTypes.Contains("Snv"); comparisonConfig.CheckMnv = variantTypes.Contains("Mnv"); Console.WriteLine(); Console.WriteLine(string.Join(" ", args)); Console.WriteLine("==============================" + Environment.NewLine); Console.WriteLine("Variant Types: " + string.Join(",", variantTypes)); VcfComparer.BaselineVcfs(baselineVcfPath, testVcfPath, comparisonConfig, region, verbose); }
public static List <VcfVariant> GetAlleles(string path, VcfRegion region, Logger logger, out bool hitChromEnd) { var alleles = new List <VcfVariant>(); hitChromEnd = false; using (var reader = new VcfReader(path)) { var stopwatch = Stopwatch.StartNew(); logger.Log("Reading " + path); if (region.Chromosome != null) { var variantsRead = 0; logger.Log("Looking for chromosome " + region.Chromosome + " " + region.Start + " - " + region.End); while (true) { VcfVariant variant = new VcfVariant(); bool result = reader.GetNextVariant(variant); variantsRead++; if (variantsRead % 50000 == 0) { logger.Log(string.Format("At chromosome '{0}' position {1} in {2}: {3} variants gathered", variant.ReferenceName, variant.ReferencePosition, stopwatch.Elapsed, alleles.Count)); } if (!result) { hitChromEnd = true; logger.Log("Reached end of VCF"); break; } if (PassedChromosome(variant.ReferenceName, region.Chromosome)) { hitChromEnd = true; logger.Log("Passed chromosome " + region.Chromosome); break; } if (variant.ReferenceName != region.Chromosome) { continue; } if (variant.ReferencePosition < region.Start) { continue; } if (region.End > 0 && variant.ReferencePosition > region.End) { logger.Log("Passed region end"); break; } alleles.Add(variant); } logger.Log(string.Format("Finished processing VCF {0}", path)); } else { alleles = reader.GetVariants().ToList(); } } return(alleles); }
public static void BaselineVcfs(string baselineVcfPath, string testVcfPath, VcfComparisonConfig config, VcfRegion region = null, bool verbose = false) { var logger = new Logger(verbose); Console.WriteLine("Configuration Options"); Console.WriteLine("------------------------------"); Console.WriteLine(config); Console.WriteLine("==============================" + Environment.NewLine); Console.WriteLine("Comparing {0} to {1}", baselineVcfPath, testVcfPath); var numblocks = region.Chromosome != null ? 1 + ((region.End - region.Start) / config.BlockSize) : 1; var comparisonKeys = config.GetKeys(); if (config.OutputFile != null) { using (StreamWriter sw = File.CreateText(config.OutputFile)) { sw.WriteLine("Variant\tInBaseline\tInTest," + string.Join("\t", comparisonKeys.Select(x => string.Join("\t", new List <string>() { x + "_1", x + "_2", x + "_OK" })))); } } if (config.SummaryFile != null && !File.Exists(config.SummaryFile)) { using (StreamWriter sw = File.CreateText(config.SummaryFile)) { sw.WriteLine("BaselineVcf,TestVcf,SharedDiffs,BaselineOnly,TestOnly"); } } for (int i = 0; i < numblocks; i++) { bool hitEndOfChromosome; var subRegion = new VcfRegion(); if (region.Chromosome != null) { subRegion.Chromosome = region.Chromosome; subRegion.Start = region.Start + i * config.BlockSize; if (region.End > 0) { subRegion.End = Math.Min(region.Start + (i + 1) * config.BlockSize, region.End); } Console.WriteLine("=============================="); Console.WriteLine("PROCESSING FOR REGION: " + subRegion.ToString()); } // Get baseline variants Dictionary <string, List <VcfVariant> > baselineVariantsDict; var alleles = VcfParser.GetAlleles(baselineVcfPath, subRegion, logger, out hitEndOfChromosome); var baselineVariants = ProcessVariants(config, alleles, out baselineVariantsDict); // Get test variants var testAlleles = VcfParser.GetAlleles(testVcfPath, subRegion, logger, out hitEndOfChromosome); Dictionary <string, List <VcfVariant> > testVariantsDict; var testVariants = ProcessVariants(config, testAlleles, out testVariantsDict); // Compare baseline vs test var allDiffs = Compare(config, baselineVariants, testVariants, baselineVariantsDict, testVariantsDict); if (config.SummaryFile != null) { FlushSummary(config.SummaryFile, allDiffs, baselineVcfPath, testVcfPath); } if (allDiffs.Any() && config.OutputFile != null) { foreach (var diff in allDiffs.OrderBy(d => d.Variant.ReferencePosition)) { FlushDiff(config.OutputFile, diff.GetEntry(comparisonKeys)); } } if (hitEndOfChromosome) { break; } } }