Ejemplo n.º 1
0
        static void Main(string[] args)
        {
            var baselineVcfPath = args[0];
            var testVcfPath     = args[1];

            var comparisonConfig     = new VcfComparisonConfig();
            var configurationManager = new ProgramConfigSettings();
            var region = new VcfRegion();

            if (args.Length > 2)
            {
                comparisonConfig.Exact        = args.Contains("-Exact");
                comparisonConfig.CheckGT      = args.Contains("-GT");
                comparisonConfig.CheckDP      = args.Contains("-DP");
                comparisonConfig.CheckVF      = args.Contains("-VF");
                comparisonConfig.CheckSB      = args.Contains("-SB");
                comparisonConfig.CheckFilter  = args.Contains("-Filter");
                comparisonConfig.CheckQual    = args.Contains("-Qual");
                comparisonConfig.PassingOnly  = !args.Contains("-AllVars");
                comparisonConfig.ConsiderRefs = args.Contains("-Refs");
                comparisonConfig.CheckAD      = args.Contains("-AD");
                comparisonConfig.AllCheck(args.Contains("-AllCheck"));
                comparisonConfig.OutputFile      = GetParameterValue(args, "Out");
                comparisonConfig.SummaryFile     = GetParameterValue(args, "Summary");
                comparisonConfig.HideSharedDiffs = args.Contains("-HideShared");
            }

            region.Chromosome = GetParameterValue(args, "Chrom");
            region.Start      = int.Parse(GetParameterValue(args, "Start") ?? "0");
            region.End        = int.Parse(GetParameterValue(args, "End") ?? int.MaxValue.ToString());

            var configMargin = configurationManager.MarginOfError;

            if (configMargin < 0)
            {
                comparisonConfig.MarginOfError = configMargin;
            }
            ;
            var variantTypes = configurationManager.VariantTypes.Split(',');
            var verbose      = configurationManager.Verbose;

            comparisonConfig.BlockSize = configurationManager.BlockSize;

            comparisonConfig.CheckDeletions  = variantTypes.Contains("Del");
            comparisonConfig.CheckInsertions = variantTypes.Contains("Ins");
            comparisonConfig.CheckSnv        = variantTypes.Contains("Snv");
            comparisonConfig.CheckMnv        = variantTypes.Contains("Mnv");

            Console.WriteLine();
            Console.WriteLine(string.Join(" ", args));
            Console.WriteLine("==============================" + Environment.NewLine);
            Console.WriteLine("Variant Types: " + string.Join(",", variantTypes));

            VcfComparer.BaselineVcfs(baselineVcfPath, testVcfPath, comparisonConfig, region, verbose);
        }
Ejemplo n.º 2
0
        public static List <VcfVariant> GetAlleles(string path, VcfRegion region, Logger logger, out bool hitChromEnd)
        {
            var alleles = new List <VcfVariant>();

            hitChromEnd = false;

            using (var reader = new VcfReader(path))
            {
                var stopwatch = Stopwatch.StartNew();
                logger.Log("Reading " + path);
                if (region.Chromosome != null)
                {
                    var variantsRead = 0;
                    logger.Log("Looking for chromosome " + region.Chromosome + " " + region.Start + " - " + region.End);
                    while (true)
                    {
                        VcfVariant variant = new VcfVariant();
                        bool       result  = reader.GetNextVariant(variant);
                        variantsRead++;

                        if (variantsRead % 50000 == 0)
                        {
                            logger.Log(string.Format("At chromosome '{0}' position {1} in {2}: {3} variants gathered", variant.ReferenceName, variant.ReferencePosition, stopwatch.Elapsed, alleles.Count));
                        }

                        if (!result)
                        {
                            hitChromEnd = true;
                            logger.Log("Reached end of VCF");
                            break;
                        }

                        if (PassedChromosome(variant.ReferenceName, region.Chromosome))
                        {
                            hitChromEnd = true;
                            logger.Log("Passed chromosome " + region.Chromosome);
                            break;
                        }

                        if (variant.ReferenceName != region.Chromosome)
                        {
                            continue;
                        }
                        if (variant.ReferencePosition < region.Start)
                        {
                            continue;
                        }
                        if (region.End > 0 && variant.ReferencePosition > region.End)
                        {
                            logger.Log("Passed region end");
                            break;
                        }

                        alleles.Add(variant);
                    }
                    logger.Log(string.Format("Finished processing VCF {0}", path));
                }
                else
                {
                    alleles = reader.GetVariants().ToList();
                }
            }

            return(alleles);
        }
Ejemplo n.º 3
0
        public static void BaselineVcfs(string baselineVcfPath, string testVcfPath, VcfComparisonConfig config, VcfRegion region = null, bool verbose = false)
        {
            var logger = new Logger(verbose);

            Console.WriteLine("Configuration Options");
            Console.WriteLine("------------------------------");
            Console.WriteLine(config);
            Console.WriteLine("==============================" + Environment.NewLine);
            Console.WriteLine("Comparing {0} to {1}", baselineVcfPath, testVcfPath);

            var numblocks = region.Chromosome != null ? 1 + ((region.End - region.Start) / config.BlockSize) : 1;

            var comparisonKeys = config.GetKeys();

            if (config.OutputFile != null)
            {
                using (StreamWriter sw = File.CreateText(config.OutputFile))
                {
                    sw.WriteLine("Variant\tInBaseline\tInTest," + string.Join("\t", comparisonKeys.Select(x => string.Join("\t", new List <string>()
                    {
                        x + "_1", x + "_2", x + "_OK"
                    }))));
                }
            }
            if (config.SummaryFile != null && !File.Exists(config.SummaryFile))
            {
                using (StreamWriter sw = File.CreateText(config.SummaryFile))
                {
                    sw.WriteLine("BaselineVcf,TestVcf,SharedDiffs,BaselineOnly,TestOnly");
                }
            }


            for (int i = 0; i < numblocks; i++)
            {
                bool hitEndOfChromosome;

                var subRegion = new VcfRegion();
                if (region.Chromosome != null)
                {
                    subRegion.Chromosome = region.Chromosome;
                    subRegion.Start      = region.Start + i * config.BlockSize;
                    if (region.End > 0)
                    {
                        subRegion.End = Math.Min(region.Start + (i + 1) * config.BlockSize, region.End);
                    }

                    Console.WriteLine("==============================");
                    Console.WriteLine("PROCESSING FOR REGION: " + subRegion.ToString());
                }

                // Get baseline variants
                Dictionary <string, List <VcfVariant> > baselineVariantsDict;
                var alleles          = VcfParser.GetAlleles(baselineVcfPath, subRegion, logger, out hitEndOfChromosome);
                var baselineVariants = ProcessVariants(config, alleles, out baselineVariantsDict);

                // Get test variants
                var testAlleles = VcfParser.GetAlleles(testVcfPath, subRegion, logger, out hitEndOfChromosome);
                Dictionary <string, List <VcfVariant> > testVariantsDict;
                var testVariants = ProcessVariants(config, testAlleles, out testVariantsDict);

                // Compare baseline vs test
                var allDiffs = Compare(config, baselineVariants, testVariants, baselineVariantsDict, testVariantsDict);

                if (config.SummaryFile != null)
                {
                    FlushSummary(config.SummaryFile, allDiffs, baselineVcfPath, testVcfPath);
                }

                if (allDiffs.Any() && config.OutputFile != null)
                {
                    foreach (var diff in allDiffs.OrderBy(d => d.Variant.ReferencePosition))
                    {
                        FlushDiff(config.OutputFile, diff.GetEntry(comparisonKeys));
                    }
                }

                if (hitEndOfChromosome)
                {
                    break;
                }
            }
        }