예제 #1
0
        public void Pisces_LowDepthTest()
        {
            List <ChrReference> chrRef = new List <ChrReference>()
            {
                new ChrReference()
                {
                    Name     = "chr19",
                    Sequence = "TTGTCAGTGCGCTTTTCCCAACACCACCTGCTCCGACCACCACCAGTTTGTACTCAGTCATTTCACACCAGCAAGAACCTGTTGGAAACCAGTAATCAGGGTTAATTGGCGGCGAAAAAAAAAAAAAAAAAAAAAAAAAA"
                }
            };

            var options = new ApplicationOptions()
            {
                BAMPaths    = new[] { _bamSmallS1 },
                GenomePaths = new[] { _genomeChr19 },
                //IntervalPaths = new[] { _intervalsChr17Chr19 },
                DebugMode          = true,
                CallMNVs           = true,
                UseMNVReallocation = false,
                MaxSizeMNV         = 100,
                OutputgVCFFiles    = true,
                MinimumDepth       = 1000,
                OutputFolder       = UnitTestPaths.TestDataDirectory
            };

            var vcfFilePath = Path.ChangeExtension(options.BAMPaths[0], "genome.vcf");

            var     factory = new Factory(options);
            IGenome genomeRef;

            genomeRef = new MockGenome(chrRef, _genomeChr19);

            var bp = new GenomeProcessor(factory, genomeRef);

            bp.Execute(1);
            List <VcfVariant> coverage1000results = VcfReader.GetAllVariantsInFile(vcfFilePath);

            options = new ApplicationOptions()
            {
                BAMPaths    = new[] { _bamSmallS1 },
                GenomePaths = new[] { _genomeChr19 },
                // IntervalPaths = new[] { _intervalsChr17Chr19 },
                DebugMode          = true,
                CallMNVs           = true,
                UseMNVReallocation = false,
                OutputgVCFFiles    = true,
                OutputFolder       = UnitTestPaths.TestDataDirectory
            };
            factory = new Factory(options);
            bp      = new GenomeProcessor(factory, genomeRef);
            bp.Execute(1);
            List <VcfVariant> coverage10results = VcfReader.GetAllVariantsInFile(vcfFilePath);

            // Assert.NotEqual(coverage1000results.Count, coverage10results.Count);
            // Assert.Equal(coverage1000results.Count, 84);
            // Assert.Equal(coverage10results.Count, 100);
        }
예제 #2
0
        // tests two bams in different folders
        // expectations:
        // - if outputfolder is not specified, logs are in directory of first bam
        // - if outputfolder specified, logs are in output folder
        // - vcf files have header and both chromosomes, output is where normally expected
        private void ExecuteTest(int numberOfThreads, string outputFolder = null)
        {
            var sourcePath         = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam");
            var otherTestDirectory = Path.Combine(TestPaths.LocalScratchDirectory, "MultiProcessIn");
            var bamFilePath1       = Stage(sourcePath, "In1", otherTestDirectory + "1");
            var bamFilePath2       = Stage(sourcePath, "In2", otherTestDirectory + "2");

            var genomePath = Path.Combine(TestPaths.SharedGenomesDirectory, "chr17chr19");

            var options = new PiscesApplicationOptions
            {
                BAMPaths             = new[] { bamFilePath1, bamFilePath2 },
                GenomePaths          = new[] { genomePath },
                OutputDirectory      = outputFolder,
                CommandLineArguments = string.Format("-B {0},{1} -g {2}{3} -gVCF false", bamFilePath1, bamFilePath2, genomePath, string.IsNullOrEmpty(outputFolder) ? string.Empty : " -OutFolder " + outputFolder).Split(' '),
                VcfWritingParameters = new VcfWritingParameters()
                {
                    OutputGvcfFile = true
                }
            };

            options.SetIODirectories("Pisces");
            var factory = new Factory(options);

            foreach (var workRequest in factory.WorkRequests)
            {
                if (File.Exists(workRequest.OutputFilePath))
                {
                    File.Delete(workRequest.OutputFilePath);
                }
            }

            Logger.OpenLog(options.LogFolder, options.LogFileName, true);

            var processor = new GenomeProcessor(factory, factory.GetReferenceGenome(options.GenomePaths[0]), false, true);

            processor.Execute(numberOfThreads);

            Logger.CloseLog();

            foreach (var workRequest in factory.WorkRequests)
            {
                using (var reader = new VcfReader(workRequest.OutputFilePath))
                {
                    Assert.True(reader.HeaderLines.Any());
                    var variants = reader.GetVariants().ToList();

                    Assert.Equal(251, variants.Count());
                    Assert.Equal("chr17", variants.First().ReferenceName);
                    Assert.Equal("chr19", variants.Last().ReferenceName);
                }
            }

            Assert.True(Directory.GetFiles(options.LogFolder, options.LogFileNameBase).Any());
        }
예제 #3
0
 private static void CreateAndExecuteProcessor(bool threadByChr, MockFactoryWithDefaults factory, Genome genome)
 {
     if (threadByChr)
     {
         var processor = new GenomeProcessor(factory, genome);
         processor.Execute(1);
     }
     else
     {
         var processor = new GenomeProcessor(factory, genome);
         processor.Execute(1);
     }
 }
예제 #4
0
        public void FlowWithMultipleBams()
        {
            var factory = GetMockFactory(2);

            var processor = new GenomeProcessor(factory, GetGenome().Object);

            processor.Execute(1);

            factory.MockWriter.Verify(w => w.Initialize(), Times.Exactly(2));
            factory.MockWriter.Verify(w => w.FlushAllBufferedRecords(), Times.Exactly(6));  // flush inbetween each chr
            factory.MockWriter.Verify(w => w.FinishAll(), Times.Exactly(2));

            factory.MockChrRealigner.Verify(r => r.Execute(), Times.Exactly(6));
        }
예제 #5
0
        public void FlowWithChrFilter()
        {
            var factory = GetMockFactory();

            factory.MockAlignmentExtractor.Setup(e => e.GetNextAlignment(It.IsAny <Read>())).Returns(false);

            var processor = new GenomeProcessor(factory, GetGenome().Object, "chr2");

            processor.Execute(1);

            factory.MockWriter.Verify(w => w.Initialize(), Times.Exactly(1));
            factory.MockWriter.Verify(w => w.FlushAllBufferedRecords(), Times.Exactly(3));  // flush inbetween each chr
            factory.MockWriter.Verify(w => w.FinishAll(), Times.Exactly(1));

            factory.MockChrRealigner.Verify(r => r.Execute(), Times.Exactly(1));
            factory.MockAlignmentExtractor.Verify(r => r.GetNextAlignment(It.IsAny <Read>()), Times.Exactly(2));
        }
예제 #6
0
        protected override void ProgramExecution()
        {
            var factory = new Factory(_options);
            var distinctGenomeDirectories = _options.GenomePaths.Distinct();

            foreach (var genomeDirectory in distinctGenomeDirectories)
            {
                var genome = factory.GetReferenceGenome(genomeDirectory);

                var processor = new GenomeProcessor(factory, genome, _options.ChromosomeFilter);

                processor.Execute(_options.MaxNumThreads);
                if (!_options.InsideSubProcess)
                {
                    ConcatenateLogs();
                }
            }
        }
예제 #7
0
        public void ReadGenome()
        {
            Directory.CreateDirectory(_outputFolder);
            var options_1 = new HygeaOptions()
            {
                BAMPaths        = BamProcessorParsingUtils.UpdateBamPathsWithBamsFromFolder(_existingBamFolder),
                GenomePaths     = new[] { _existingGenome },
                OutputDirectory = _outputFolder
            };

            var factory = new Factory(options_1);

            Assert.Equal(factory.GetOutputFile(_existingBamPath), _outputFilePath);

            // Run the genome processor using the filter for chr19, it will run through the IndelRealigner path as usual.
            var genome = new Genome(_existingGenome, new List <string>()
            {
                "chr19"
            });
            var gp1 = new GenomeProcessor(factory, genome, "chr19");

            gp1.Execute(1);

            var outputFilePath = Path.Combine(_outputFolder, Path.GetFileName(_existingBamPath));

            Assert.True(File.Exists(outputFilePath));
            Assert.NotEqual(new FileInfo(outputFilePath).Length, new FileInfo(_existingBamPath).Length);
            File.Delete(outputFilePath);

            // Run the genome processor using the filter for chr18 to follow the path in GenomeProcessor.Process
            // for chromosomes outside the filter.
            var gp2 = new GenomeProcessor(factory, genome, "chr18");

            gp2.Execute(1);

            Assert.True(File.Exists(outputFilePath));
            Assert.NotEqual(new FileInfo(outputFilePath).Length, new FileInfo(_existingBamPath).Length);
        }
예제 #8
0
        public void Execute()
        {
            var factory = new Factory(_options);

            if (!_options.ThreadByChr)
            {
                var distinctGenomeDirectories = _options.GenomePaths.Distinct();

                foreach (var genomeDirectory in distinctGenomeDirectories)
                {
                    var genome          = factory.GetReferenceGenome(genomeDirectory);
                    var genomeProcessor = new GenomeProcessor(factory, genome);
                    genomeProcessor.Execute(_options.MaxNumThreads);
                }
            }
            else
            {
                var workRequest = factory.WorkRequests.First();

                var genome       = factory.GetReferenceGenome(workRequest.GenomeDirectory);
                var bamProcessor = new BamProcessor(factory, genome);
                bamProcessor.Execute(_options.MaxNumThreads);
            }
        }
        private void ExecuteChromosomeThreadingTest(int numberOfThreads, int expectedNumberOfThreads, string outDir)
        {
            var bamFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam");
            var vcfFilePath = Path.Combine(outDir, "Chr17Chr19.vcf");
            var genomePath  = Path.Combine(TestPaths.SharedGenomesDirectory, "chr17chr19");

            var options = new PiscesApplicationOptions
            {
                BAMPaths             = new[] { bamFilePath },
                GenomePaths          = new[] { genomePath },
                VcfWritingParameters = new VcfWritingParameters()
                {
                    OutputGvcfFile = false
                },
                OutputDirectory = outDir
            };

            options.SetIODirectories("Pisces");
            var logFile = Path.Combine(options.LogFolder, options.LogFileName);

            if (File.Exists(logFile))
            {
                File.Delete(logFile);
            }

            Logger.OpenLog(options.LogFolder, options.LogFileName);

            var factory = new MockFactoryWithDefaults(options);

            factory.MockSomaticVariantCaller = new Mock <ISmallVariantCaller>();
            factory.MockSomaticVariantCaller.Setup(s => s.Execute()).Callback(() =>
            {
                Thread.Sleep(500);
            });
            var processor = new GenomeProcessor(factory, factory.GetReferenceGenome(genomePath), false);

            processor.Execute(numberOfThreads);


            Assert.False(File.Exists(vcfFilePath + "_chr17"));
            Assert.False(File.Exists(vcfFilePath + "_chr19"));
            Assert.True(File.Exists(vcfFilePath));

            Logger.CloseLog();

            //var threadsSpawnedBeforeFirstCompleted = 0;

            /* dont worry about logging
             * using (var reader = new StreamReader(new FileStream(logFile, FileMode.Open, FileAccess.Read)))
             * {
             *  string line;
             *  while ((line = reader.ReadLine()) != null)
             *  {
             *      if (string.IsNullOrEmpty(line)) continue;
             *
             *      if (line.Contains("Completed processing chr")) break;
             *
             *      if (line.Contains("Start processing chr"))
             *          threadsSpawnedBeforeFirstCompleted++;
             *  }
             * }*/

            //Assert.Equal(expectedNumberOfThreads, threadsSpawnedBeforeFirstCompleted);
        }
        private void ExecuteEmptyIntervalsTest(bool throttle)
        {
            // ----------------------
            // test when one bam has intervals and the other is empty
            // ----------------------

            var bamFilePath    = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam");
            var bamFilePath2   = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19_removedSQlines.bam");
            var genomePath     = Path.Combine(TestPaths.SharedGenomesDirectory, "chr17chr19");
            var validIntervals = Path.Combine(TestPaths.LocalTestDataDirectory, "chr17only.picard");
            var emptyIntervals = Path.Combine(TestPaths.LocalTestDataDirectory, "empty.picard");
            var outputFolder   = Path.Combine(TestPaths.LocalTestDataDirectory, "EmptyIntervalsTest_Mixed");

            var options = new PiscesApplicationOptions
            {
                BAMPaths             = new[] { bamFilePath, bamFilePath2 },
                IntervalPaths        = new [] { validIntervals, emptyIntervals },
                GenomePaths          = new[] { genomePath },
                OutputDirectory      = outputFolder,
                VcfWritingParameters = new Domain.Options.VcfWritingParameters()
                {
                    OutputGvcfFile = true
                }
            };

            var factory   = new Factory(options);
            var processor = new GenomeProcessor(factory, factory.GetReferenceGenome(genomePath), throttle);

            processor.Execute(2);

            // first vcf file should have been processed regularly
            using (var reader = new AlleleReader(factory.WorkRequests.First().OutputFilePath))
            {
                var variants = reader.GetVariants();
                Assert.Equal(11, variants.Count());
            }

            // second vcf file should be empty
            using (var reader = new AlleleReader(factory.WorkRequests.Last().OutputFilePath))
            {
                var variants = reader.GetVariants();
                Assert.Equal(0, variants.Count());
            }

            // ----------------------
            // try again but with both bams using empty intervals
            // ----------------------

            options.IntervalPaths   = new[] { emptyIntervals };
            options.OutputDirectory = Path.Combine(TestPaths.LocalTestDataDirectory, "EmptyIntervalsTest_All");

            factory   = new Factory(options);
            processor = new GenomeProcessor(factory, factory.GetReferenceGenome(genomePath), throttle);

            processor.Execute(2);

            foreach (var workRequest in factory.WorkRequests)
            {
                // both vcf file should be empty
                using (var reader = new AlleleReader(workRequest.OutputFilePath))
                {
                    var variants = reader.GetVariants();
                    Assert.Equal(0, variants.Count());
                }
            }
        }
        public void Execute(
            string bamFilePath,
            string vcfFilePath,
            string intervalPath,
            List <BaseCalledAllele> expectedVariants,
            List <ChrReference> fakeReferences = null,
            bool doCheckVariants            = true,
            bool doCheckReferences          = false,
            int expectedNumCoveredPositions = 0,
            bool threadByChr = false,
            int doCountsOnly = 0,
            bool doLog       = false,
            bool callMnvs    = true,
            ApplicationOptions applicationOptions = null)
        {
            if (doCheckReferences)
            {
                vcfFilePath = Path.ChangeExtension(vcfFilePath, "genome.vcf");
            }

            if (applicationOptions == null)
            {
                applicationOptions = new ApplicationOptions
                {
                    BAMPaths               = new[] { bamFilePath },
                    IntervalPaths          = string.IsNullOrEmpty(intervalPath) ? null : new[] { intervalPath },
                    GenomePaths            = new[] { GenomeDirectory },
                    OutputgVCFFiles        = doCheckReferences,
                    OutputBiasFiles        = true,
                    DebugMode              = doLog,
                    MinimumBaseCallQuality = 20,
                    CallMNVs               = callMnvs
                };
            }

            Logger.TryOpenLog(applicationOptions.LogFolder, ApplicationOptions.LogFileName);

            var factory = GetFactory(applicationOptions);

            IGenome genome;

            if (fakeReferences == null)
            {
                genome = factory.GetReferenceGenome(GenomeDirectory);
            }
            else
            {
                genome = new MockGenome(fakeReferences, GenomeDirectory);
            }

            if (threadByChr)
            {
                var processor = new BamProcessor(factory, genome);

                processor.Execute(1);
            }
            else
            {
                var processor = new GenomeProcessor(factory, genome);

                processor.Execute(1);
            }

            Logger.TryCloseLog();

            using (var reader = new VcfReader(vcfFilePath))
            {
                var alleles = reader.GetVariants().ToList();

                var variantCalls = alleles.Where(a => a.VariantAlleles[0] != ".").ToList();

                if (doCheckVariants)
                {
                    if (doCountsOnly > 0)
                    {
                        Assert.Equal(variantCalls.Count(), doCountsOnly);
                    }
                    else
                    {
                        CheckVariants(variantCalls, expectedVariants);
                    }
                }

                if (doCheckReferences)
                {
                    var referenceAlleles = alleles.Where(a => a.VariantAlleles[0] == ".").ToList();

                    // make sure no reference calls at variant positions
                    Assert.Equal(referenceAlleles.Count(), alleles.Count(a => !variantCalls.Select(v => v.ReferencePosition).Contains(a.ReferencePosition)));
                }
            }
        }
        public void Execute(
            string bamFilePath,
            string vcfFilePath,
            string intervalPath,
            List <CalledAllele> expectedVariants,
            List <ChrReference> fakeReferences = null,
            bool doCheckVariants            = true,
            bool doCheckReferences          = false,
            int expectedNumCoveredPositions = 0,
            bool threadByChr = false,
            int doCountsOnly = 0,
            bool doLog       = false,
            bool callMnvs    = true,
            PiscesApplicationOptions applicationOptions = null,
            bool collapse = true)
        {
            if (doCheckReferences)
            {
                vcfFilePath = Path.ChangeExtension(vcfFilePath, "genome.vcf");
            }

            if (applicationOptions == null)
            {
                applicationOptions = new PiscesApplicationOptions
                {
                    BAMPaths            = new[] { bamFilePath },
                    IntervalPaths       = string.IsNullOrEmpty(intervalPath) ? null : new[] { intervalPath },
                    GenomePaths         = new[] { GenomeDirectory },
                    OutputBiasFiles     = true,
                    DebugMode           = doLog,
                    CallMNVs            = callMnvs,
                    MaxGapBetweenMNV    = 10,
                    MaxSizeMNV          = 15,
                    Collapse            = collapse,
                    BamFilterParameters = new BamFilterParameters()
                    {
                        MinimumBaseCallQuality = 20
                    },
                    VariantCallingParameters = new VariantCallingParameters(),
                    VcfWritingParameters     = new VcfWritingParameters()
                    {
                        OutputGvcfFile = doCheckReferences,
                    },
                    CommandLineArguments = new string[] { "some", "cmds" }
                };
            }

            applicationOptions.OutputDirectory = OutputDirectory;

            var factory = GetFactory(applicationOptions);

            IGenome genome;

            if (fakeReferences == null)
            {
                genome = factory.GetReferenceGenome(GenomeDirectory);
            }
            else
            {
                genome = new MockGenome(fakeReferences, GenomeDirectory);
            }

            if (threadByChr)
            {
                var processor = new GenomeProcessor(factory, genome, false);

                processor.Execute(1);
            }
            else
            {
                var processor = new GenomeProcessor(factory, genome);

                processor.Execute(1);
            }

            var alleles      = AlleleReader.GetAllVariantsInFile(vcfFilePath);
            var variantCalls = alleles.Where(a => !a.IsRefType).ToList();

            if (doCheckVariants)
            {
                if (doCountsOnly > 0)
                {
                    Assert.Equal(variantCalls.Count(), doCountsOnly);
                }
                else
                {
                    CheckVariants(variantCalls, expectedVariants);
                }
            }

            if (doCheckReferences)
            {
                var referenceAlleles = alleles.Where(a => a.IsRefType).ToList();

                // make sure no reference calls at variant positions
                Assert.Equal(referenceAlleles.Count(),
                             alleles.Count(a => !variantCalls.Select(v => v.ReferencePosition).Contains(a.ReferencePosition)));
            }
        }
        public void Pisces_LowDepthTest()
        {
            List <ChrReference> chrRef = new List <ChrReference>()
            {
                new ChrReference()
                {
                    Name     = "chr19",
                    Sequence = "TTGTCAGTGCGCTTTTCCCAACACCACCTGCTCCGACCACCACCAGTTTGTACTCAGTCATTTCACACCAGCAAGAACCTGTTGGAAACCAGTAATCAGGGTTAATTGGCGGCGAAAAAAAAAAAAAAAAAAAAAAAAAA"
                }
            };

            var options = new PiscesApplicationOptions()
            {
                BAMPaths    = new[] { _bamSmallS1 },
                GenomePaths = new[] { _genomeChr19 },
                //IntervalPaths = new[] { _intervalsChr17Chr19 },
                DebugMode           = true,
                CallMNVs            = true,
                UseMNVReallocation  = false,
                MaxSizeMNV          = 100,
                OutputDirectory     = TestPaths.LocalTestDataDirectory,
                BamFilterParameters = new Domain.Options.BamFilterParameters()
                {
                    MinimumBaseCallQuality = 20
                },
                VariantCallingParameters = new Domain.Options.VariantCallingParameters()
                {
                    MinimumVariantQScore = 20,
                    MinimumCoverage      = 1000,
                },
                VcfWritingParameters = new Domain.Options.VcfWritingParameters()
                {
                    OutputGvcfFile = true,
                }
            };

            var vcfFilePath = Path.ChangeExtension(options.BAMPaths[0], "genome.vcf");

            var     factory = new Factory(options);
            IGenome genomeRef;

            genomeRef = new MockGenome(chrRef, _genomeChr19);

            var bp = new GenomeProcessor(factory, genomeRef);

            bp.Execute(1);
            var coverage1000results = AlleleReader.GetAllVariantsInFile(vcfFilePath);

            options = new PiscesApplicationOptions()
            {
                BAMPaths    = new[] { _bamSmallS1 },
                GenomePaths = new[] { _genomeChr19 },
                // IntervalPaths = new[] { _intervalsChr17Chr19 },
                DebugMode            = true,
                CallMNVs             = true,
                UseMNVReallocation   = false,
                OutputDirectory      = TestPaths.LocalTestDataDirectory,
                VcfWritingParameters = new Domain.Options.VcfWritingParameters()
                {
                    OutputGvcfFile = false,
                }
            };
            factory = new Factory(options);
            bp      = new GenomeProcessor(factory, genomeRef);
            bp.Execute(1);
            var coverage10results = AlleleReader.GetAllVariantsInFile(vcfFilePath);
        }
        public void IntervalTestingWithVcf()
        {
            var bamFile1Path         = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam");          //has data from chr17,7572952 and chr19,3118883
            var interval1Path        = Path.Combine(TestPaths.LocalTestDataDirectory, "chr17int.picard");         //chr 17 only
            var outDir               = Path.Combine(TestPaths.LocalTestDataDirectory, "IntervalTests");
            var vcfObservedFile1Path = Path.Combine(outDir, "Chr17Chr19.vcf");                                    //only results from chr17
            var vcfExpectedFile1     = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.expected.vcf"); //only results from chr17


            var genomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "fourChrs");

            var factory = MakeVcfFactory(new List <string> {
                bamFile1Path
            },
                                         new List <string> {
                interval1Path
            }, outDir);

            var genome1 = factory.GetReferenceGenome(genomeDirectory);

            var processor = new GenomeProcessor(factory, genome1);
            var chrs      = genome1.ChromosomesToProcess;

            Assert.Equal("chr17", chrs[0]);

            processor.InternalExecute(10);
            Assert.Equal(1, genome1.ChromosomesToProcess.Count);
            Assert.Equal("chr17", genome1.ChromosomesToProcess[0]);

            var reader1 = new AlleleReader(vcfObservedFile1Path);

            var observedFilters1Results = GetFilters(reader1);
            var observedContigs1Results = GetContigs(reader1);
            var observedVcf1Results     = reader1.GetVariants().ToList();


            //the expected results:
            var readerExp1 = new AlleleReader(vcfExpectedFile1);

            var filters1Expected = GetFilters(readerExp1);
            var contigs1Expected = GetContigs(readerExp1);
            var vcf1Expected     = readerExp1.GetVariants().ToList();

            /*
             ##FILTER=<ID=q30,Description="Quality score less than 30">
             ##FILTER=<ID=SB,Description="Variant strand bias too high">
             ##FILTER=<ID=R5x9,Description="Repeats of part or all of the variant allele (max repeat length 5) in the reference greater than or equal to 9">
             ##FILTER=<ID=NC,Description="No-call rate is above 0.6">
             * */

            Assert.Equal(4, observedFilters1Results.Count);

            //##contig=<ID=chr17,length=7573100>
            Assert.Equal(1, observedContigs1Results.Count);
            Assert.Equal(1, observedVcf1Results.Count);

            //check variants and contigs all come out the same
            for (int i = 0; i < contigs1Expected.Count; i++)
            {
                Assert.Equal(contigs1Expected[i], observedContigs1Results[i]);
            }

            for (int i = 0; i < filters1Expected.Count; i++)
            {
                Assert.Equal(filters1Expected[i].ToString(), observedFilters1Results[i].ToString());
            }

            for (int i = 0; i < vcf1Expected.Count; i++)
            {
                Assert.Equal(vcf1Expected[i].ToString(), observedVcf1Results[i].ToString());
            }


            reader1.Dispose();
            File.Delete(vcfObservedFile1Path);
        }
예제 #15
0
        private void ExecuteChromosomeThreadingTest(int numberOfThreads, int expectedNumberOfThreads)
        {
            var bamFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "Chr17Chr19.bam");
            var vcfFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "Chr17Chr19.vcf");
            var genomePath  = Path.Combine(UnitTestPaths.TestGenomesDirectory, "chr17chr19");

            var options = new ApplicationOptions
            {
                BAMPaths    = new[] { bamFilePath },
                GenomePaths = new[] { genomePath },
            };

            var logFile = Path.Combine(options.LogFolder, options.LogFileName);

            if (File.Exists(logFile))
            {
                File.Delete(logFile);
            }

            Logger.TryOpenLog(options.LogFolder, options.LogFileName);

            var factory = new MockFactoryWithDefaults(options);

            factory.MockSomaticVariantCaller = new Mock <ISomaticVariantCaller>();
            factory.MockSomaticVariantCaller.Setup(s => s.Execute()).Callback(() =>
            {
                Thread.Sleep(500);
            });
            var processor = new GenomeProcessor(factory, factory.GetReferenceGenome(genomePath), false);

            processor.Execute(numberOfThreads);

            Assert.False(File.Exists(vcfFilePath + "_chr17"));
            Assert.False(File.Exists(vcfFilePath + "_chr19"));
            Assert.True(File.Exists(vcfFilePath));

            Logger.TryCloseLog();

            var threadsSpawnedBeforeFirstCompleted = 0;

            using (var reader = new StreamReader(logFile))
            {
                string line;
                while ((line = reader.ReadLine()) != null)
                {
                    if (string.IsNullOrEmpty(line))
                    {
                        continue;
                    }

                    if (line.Contains("Completed processing chr"))
                    {
                        break;
                    }

                    if (line.Contains("Start processing chr"))
                    {
                        threadsSpawnedBeforeFirstCompleted++;
                    }
                }
            }

            Assert.Equal(expectedNumberOfThreads, threadsSpawnedBeforeFirstCompleted);
        }
예제 #16
0
        private void ExecuteTest(int numberOfThreads, int expectedNumberOfThreads)
        {
            var bamFilePath  = Path.Combine(UnitTestPaths.TestDataDirectory, "var123var35.bam");
            var bamFilePath2 = Path.Combine(UnitTestPaths.TestDataDirectory, "var123var35_removedSQlines.bam");
            var vcfFilePath  = Path.Combine(UnitTestPaths.TestDataDirectory, "var123var35.vcf");
            var vcfFilePath2 = Path.Combine(UnitTestPaths.TestDataDirectory, "var123var35_removedSQlines.vcf");
            var genomePath   = Path.Combine(UnitTestPaths.TestGenomesDirectory, "chr17chr19");

            var options = new ApplicationOptions
            {
                BAMPaths    = new[] { bamFilePath, bamFilePath2 },
                GenomePaths = new[] { genomePath },
            };

            var logFile = Path.Combine(options.LogFolder, ApplicationOptions.LogFileName);

            if (File.Exists(logFile))
            {
                File.Delete(logFile);
            }

            Logger.TryOpenLog(options.LogFolder, ApplicationOptions.LogFileName);

            var factory = new MockFactoryWithDefaults(options);

            factory.MockSomaticVariantCaller = new Mock <ISomaticVariantCaller>();
            factory.MockSomaticVariantCaller.Setup(s => s.Execute()).Callback(() =>
            {
                Thread.Sleep(500);
            });
            var processor = new GenomeProcessor(factory, factory.GetReferenceGenome(genomePath));

            processor.Execute(numberOfThreads);

            Assert.True(File.Exists(vcfFilePath));
            Assert.True(File.Exists(vcfFilePath2));

            Logger.TryCloseLog();

            var chrCheck = new Dictionary <string, Tuple <int, bool> >();

            chrCheck["chr17"] = new Tuple <int, bool>(0, false);
            chrCheck["chr19"] = new Tuple <int, bool>(0, false);

            var startedChr19 = false;

            using (var reader = new StreamReader(logFile))
            {
                string line;
                while ((line = reader.ReadLine()) != null)
                {
                    if (string.IsNullOrEmpty(line))
                    {
                        continue;
                    }

                    foreach (var chr in chrCheck.Keys.ToList())
                    {
                        if (line.Contains("Start processing chr " + chr))
                        {
                            var chrState = chrCheck[chr];
                            chrCheck[chr] = new Tuple <int, bool>(chrState.Item1 + 1, true);
                        }
                    }

                    foreach (var chr in chrCheck.Keys.ToList())
                    {
                        if (line.Contains("Completed processing chr " + chr) && chrCheck[chr].Item2)
                        {
                            var chrState = chrCheck[chr];
                            Assert.Equal(expectedNumberOfThreads, chrState.Item1);

                            chrCheck[chr] = new Tuple <int, bool>(0, false);
                        }
                    }

                    // make sure chr 17 fully completes before 19 starts
                    if (line.Contains("Processing chromosome 'chr19'"))
                    {
                        startedChr19 = true;
                    }
                    Assert.False(line.Contains("Processing chromosome 'chr17'") && startedChr19);
                }
            }
        }
        //Test we get the same results when using muliple samples and intervals, in the same order.
        //Fist test running two samples together, then test running two samples individualy, then test it with threadByChrOn/
        //Nothing strange should happen..
        public void IntervalTestingWithMultipleSamples()                                                             //based on a real bug when a gvcf was found was out of order, that only happened for multiple-bam runs with different interval files.
        {
            var bamFile1Path     = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam");                 //has data from chr17,7572952 and chr19,3118883
            var bamFile2Path     = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17again.bam");
            var interval1Path    = Path.Combine(TestPaths.LocalTestDataDirectory, "chr17int.picard");                //chr 17 only
            var interval2Path    = Path.Combine(TestPaths.LocalTestDataDirectory, "poorlyOrdered.picard");           //disordered, chr 19 first.
            var outDir           = Path.Combine(TestPaths.LocalTestDataDirectory, "IntervalTests");
            var vcfFile1Path     = Path.Combine(outDir, "Chr17Chr19.genome.vcf");                                    //only results from chr17
            var vcfFile2Path     = Path.Combine(outDir, "Chr17again.genome.vcf");                                    //show results from chr17 and 19
            var vcfExpectedFile1 = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.expected.genome.vcf"); //only results from chr17
            var vcfExpectedFile2 = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17again.expected.genome.vcf"); //show results from chr17 and 19


            var genomeDirectory  = Path.Combine(TestPaths.SharedGenomesDirectory, "fourChrs");
            var twoSampleFactory = MakeFactory(new List <string> {
                bamFile1Path, bamFile2Path
            },
                                               new List <string> {
                interval1Path, interval2Path
            }, outDir);

            var firstSampleFactory = MakeFactory(new List <string> {
                bamFile1Path
            },
                                                 new List <string> {
                interval1Path
            }, outDir);

            var secondSampleFactory = MakeFactory(new List <string> {
                bamFile2Path
            },
                                                  new List <string> {
                interval2Path
            }, outDir);


            //regular two-sample run mode.

            var genome  = twoSampleFactory.GetReferenceGenome(genomeDirectory);
            var genome1 = firstSampleFactory.GetReferenceGenome(genomeDirectory);
            var genome2 = secondSampleFactory.GetReferenceGenome(genomeDirectory);

            var processor = new GenomeProcessor(twoSampleFactory, genome);

            var chrs = genome.ChromosomesToProcess;

            Assert.Equal("chr7", chrs[0]);
            Assert.Equal("chr8", chrs[1]);
            Assert.Equal("chr17", chrs[2]);
            Assert.Equal("chr19", chrs[3]);

            processor.InternalExecute(10);
            chrs = genome.ChromosomesToProcess;
            Assert.Equal("chr7", chrs[0]);
            Assert.Equal("chr8", chrs[1]);
            Assert.Equal("chr17", chrs[2]);
            Assert.Equal("chr19", chrs[3]);

            //jsut be aware, when we porcess the samples individually, we use different genome lists.
            Assert.Equal(4, genome.ChromosomesToProcess.Count);
            Assert.Equal(1, genome1.ChromosomesToProcess.Count);
            Assert.Equal(4, genome2.ChromosomesToProcess.Count);
            Assert.Equal("chr17", genome1.ChromosomesToProcess[0]);
            Assert.Equal("chr7", genome2.ChromosomesToProcess[0]);
            Assert.Equal("chr19", genome2.ChromosomesToProcess[3]);

            var reader1 = new AlleleReader(vcfFile1Path);
            var reader2 = new AlleleReader(vcfFile2Path);

            var contigs1Results = GetContigs(reader1);
            var contigs2Results = GetContigs(reader2);
            var vcf1Results     = reader1.GetVariants().ToList();
            var vcf2Results     = reader2.GetVariants().ToList();


            //the expected results:
            var readerExp1 = new AlleleReader(vcfExpectedFile1);
            var readerExp2 = new AlleleReader(vcfExpectedFile2);

            var contigs1Expected = GetContigs(readerExp1);
            var contigs2Expected = GetContigs(readerExp2);
            var vcf1Expected     = readerExp1.GetVariants().ToList();
            var vcf2Expected     = readerExp2.GetVariants().ToList();

            Assert.Equal(4, contigs1Results.Count);
            Assert.Equal(4, contigs2Results.Count);
            Assert.Equal(11, vcf1Results.Count);
            Assert.Equal(71, vcf2Results.Count);

            //check variants and contigs all come out the same
            CheckForOrdering(contigs1Results, contigs2Results, contigs1Expected, contigs2Expected, vcf1Expected, vcf2Expected);

            reader1.Dispose();
            reader2.Dispose();
            File.Delete(vcfFile1Path);
            File.Delete(vcfFile2Path);

            //now check again, processing them separately
            processor = new GenomeProcessor(firstSampleFactory, genome1);
            processor.InternalExecute(10);
            processor = new GenomeProcessor(secondSampleFactory, genome2);
            processor.InternalExecute(10);

            reader1 = new AlleleReader(vcfFile1Path);
            reader2 = new AlleleReader(vcfFile2Path);

            contigs1Results = GetContigs(reader1);
            contigs2Results = GetContigs(reader2);
            vcf1Results     = reader1.GetVariants().ToList();
            vcf2Results     = reader2.GetVariants().ToList();

            //check variants all come out the same (the contigs will be different as shown)
            CheckForOrdering(contigs1Results, contigs2Results,
                             new List <string>()
            {
                "chr17"
            }, contigs2Expected, vcf1Expected, vcf2Expected);

            reader1.Dispose();
            reader2.Dispose();
            File.Delete(vcfFile1Path);

            //now check again, processing them "thread by chr" way
            processor = new GenomeProcessor(twoSampleFactory, genome, false);
            processor.InternalExecute(10);

            reader1 = new AlleleReader(vcfFile1Path);
            reader2 = new AlleleReader(vcfFile2Path);

            contigs1Results = GetContigs(reader1);
            contigs2Results = GetContigs(reader2);
            vcf1Results     = reader1.GetVariants().ToList();
            vcf2Results     = reader2.GetVariants().ToList();

            //check variants all come out the same (the contigs will be back to normal)
            CheckForOrdering(contigs1Results, contigs2Results,
                             contigs2Expected, contigs2Expected, vcf1Expected, vcf2Expected);

            reader1.Dispose();
            reader2.Dispose();
            File.Delete(vcfFile1Path);
            File.Delete(vcfFile2Path);
        }
예제 #18
0
        public void IntervalTestingWithVcf()
        {
            var bamFile1Path     = Path.Combine(UnitTestPaths.TestDataDirectory, "Chr17Chr19.bam");          //has data from chr17,7572952 and chr19,3118883
            var interval1Path    = Path.Combine(UnitTestPaths.TestDataDirectory, "chr17int.picard");         //chr 17 only
            var outDir           = Path.Combine(UnitTestPaths.WorkingDirectory, "IntervalTests");
            var vcfFile1Path     = Path.Combine(outDir, "Chr17Chr19.vcf");                                   //only results from chr17
            var vcfExpectedFile1 = Path.Combine(UnitTestPaths.TestDataDirectory, "Chr17Chr19.expected.vcf"); //only results from chr17


            var genomeDirectory = Path.Combine(UnitTestPaths.TestGenomesDirectory, "fourChrs");

            var factory = MakeVcfFactory(new List <string> {
                bamFile1Path
            },
                                         new List <string> {
                interval1Path
            }, outDir);

            var genome1 = factory.GetReferenceGenome(genomeDirectory);

            var processor = new GenomeProcessor(factory, genome1);
            var chrs      = genome1.ChromosomesToProcess;

            Assert.Equal("chr17", chrs[0]);

            processor.InternalExecute(10);
            Assert.Equal(1, genome1.ChromosomesToProcess.Count);
            Assert.Equal("chr17", genome1.ChromosomesToProcess[0]);

            var reader1 = new VcfReader(vcfFile1Path);

            var filters1Results = GetFilters(reader1);
            var contigs1Results = GetContigs(reader1);
            var vcf1Results     = reader1.GetVariants().ToList();


            //the expected results:
            var readerExp1 = new VcfReader(vcfExpectedFile1);

            var filters1Expected = GetFilters(readerExp1);
            var contigs1Expected = GetContigs(readerExp1);
            var vcf1Expected     = readerExp1.GetVariants().ToList();

            Assert.Equal(3, filters1Results.Count);
            Assert.Equal(1, contigs1Results.Count);
            Assert.Equal(1, vcf1Results.Count);

            //check variants and contigs all come out the same
            for (int i = 0; i < contigs1Expected.Count; i++)
            {
                Assert.Equal(contigs1Expected[i], contigs1Results[i]);
            }

            for (int i = 0; i < filters1Expected.Count; i++)
            {
                Assert.Equal(filters1Expected[i].ToString(), filters1Results[i].ToString());
            }

            for (int i = 0; i < vcf1Expected.Count; i++)
            {
                Assert.Equal(vcf1Expected[i].ToString(), vcf1Results[i].ToString());
            }


            reader1.Dispose();
            File.Delete(vcfFile1Path);
        }