示例#1
0
 public ClassifierTransformerBlockFactory(Dictionary <int, string> refIdMapping, StitcherOptions stitcherOptions, GeminiOptions geminiOptions)
 {
     _refIdMapping           = refIdMapping;
     _stitcherOptions        = stitcherOptions;
     _geminiOptions          = geminiOptions;
     _maxDegreeOfParallelism = Math.Min(stitcherOptions.NumThreads, Environment.ProcessorCount);
 }
示例#2
0
        private static void VerifyFlow(List <ReadPair> reads, GeminiOptions geminiOptions, int expectedRegions)
        {
            var batchBlockFactory          = BatchBlockFactory();
            var classificationBlockFactory = new Mock <IClassificationBlockProvider>();
            var classifierBlockFactory     = ClassifierBlockFactory();
            var blockFactory = BlockFactory(batchBlockFactory, classifierBlockFactory, classificationBlockFactory);

            var alignments            = new List <BamAlignment>();
            var mockDataOutputFactory = DataflowMocks.MockDataOutputFactory(alignments);

            var mockReader            = DataflowMocks.MockReader();
            var mockReadPairSource    = DataflowMocks.MockDataSource(reads);
            var mockDataSourceFactory = DataflowMocks.MockDataSourceFactory(mockReader, mockReadPairSource);


            var dataflowEvaluator = new DataflowReadEvaluator(geminiOptions, mockDataSourceFactory.Object,
                                                              new GeminiSampleOptions()
            {
                OutputFolder = "outfoldersample"
            }, mockDataOutputFactory.Object, blockFactory.Object);

            dataflowEvaluator.ProcessBam();

            VerifyCallNumbers(batchBlockFactory, classifierBlockFactory, classificationBlockFactory, mockDataSourceFactory,
                              blockFactory, expectedRegions);
        }
示例#3
0
        public void ProcessBam()
        {
            var stitcherOptions = new StitcherOptions()
            {
            };
            var geminiOptions = new GeminiOptions()
            {
                RegionSize = 1000,
            };


            var readPair1 = TestHelpers.GetPair("5M1I5M", "5M1I5M", name: "Pair1");
            var readPair2 = TestHelpers.GetPair("5M1I5M", "5M1I5M", read1Position: 1001, name: "Pair2");
            var readPair3 = TestHelpers.GetPair("5M1I5M", "5M1I5M", read1Position: 1201, name: "Pair3");
            var readPair4 = TestHelpers.GetPair("5M1I5M", "5M1I5M", read1Position: 10000, name: "Pair4");
            var reads     = new List <ReadPair>()
            {
                readPair1, readPair2, readPair3, readPair4
            };

            var read = TestHelpers.CreateBamAlignment("AAAAAAAATA", 999, 1001, 30, true,
                                                      cigar: new CigarAlignment("10M"), name: "LonerPair1");

            read.SetIsProperPair(true);
            var lonerPair1Mate1 = new ReadPair(read, "LonerPair1");
            var read2           = TestHelpers.CreateBamAlignment("AAAAATAAAA", 1002, 999, 30, true,
                                                                 cigar: new CigarAlignment("10M"), name: "LonerPair1", isFirstMate: false);

            read2.SetIsProperPair(true);
            var lonerPair1Mate2 = new ReadPair(read2, "LonerPair1", readNumber: ReadNumber.Read2);

            var read3 = TestHelpers.CreateBamAlignment("AAAAAAAAAA", 999, 5001, 30, true,
                                                       cigar: new CigarAlignment("10M"), name: "LonerPairFarApart");

            read3.SetIsProperPair(true);
            var read4 = TestHelpers.CreateBamAlignment("AAAAAAAAAA", 5001, 999, 30, true,
                                                       cigar: new CigarAlignment("10M"), name: "LonerPairFarApart", isFirstMate: false);

            read4.SetIsProperPair(true);
            var lonerPair2Mate1 = new ReadPair(read3, name: "LonerPairFarApart");
            var lonerPair2Mate2 = new ReadPair(read4, name: "LonerPairFarApart", readNumber: ReadNumber.Read2);

            var lonerReads = new List <ReadPair>()
            {
                lonerPair1Mate1, lonerPair1Mate2, lonerPair2Mate1, lonerPair2Mate2
            };
            var alignments = new List <BamAlignment>();

            Execute(alignments, reads, geminiOptions, stitcherOptions, lonerReads);
            Assert.Equal(1, alignments.Count(x => x.Name == "Pair1"));
            Assert.Equal(1, alignments.Count(x => x.Name == "Pair2"));
            Assert.Equal(1, alignments.Count(x => x.Name == "Pair3"));
            Assert.Equal(1, alignments.Count(x => x.Name == "Pair4"));
            Assert.Equal(1, alignments.Count(x => x.Name == "LonerPair1"));
            Assert.Equal(2, alignments.Count(x => x.Name == "LonerPairFarApart"));
            Assert.Equal(7, alignments.Count);

            alignments.Clear();
        }
 public BamRealignmentFactory(GeminiOptions geminiOptions,
                              RealignmentAssessmentOptions realignmentAssessmentOptions, StitcherOptions stitcherOptions, RealignmentOptions realignmentOptions, string outputDir)
 {
     _geminiOptions = geminiOptions;
     _realignmentAssessmentOptions = realignmentAssessmentOptions;
     _stitcherOptions    = stitcherOptions;
     _realignmentOptions = realignmentOptions;
     _outputDir          = outputDir;
 }
示例#5
0
 public DataflowReadEvaluator(GeminiOptions geminiOptions,
                              IGeminiDataSourceFactory dataSourceFactory, GeminiSampleOptions geminiSampleOptions, IGeminiDataOutputFactory dataOutputFactory, IBlockFactorySource blockFactorySource)
 {
     _geminiOptions       = geminiOptions;
     _dataSourceFactory   = dataSourceFactory;
     _geminiSampleOptions = geminiSampleOptions;
     _dataOutputFactory   = dataOutputFactory;
     _blockFactorySource  = blockFactorySource;
 }
示例#6
0
 public GeminiApplicationOptions()
 {
     StitcherOptions              = new StitcherOptions();
     GeminiSampleOptions          = new GeminiSampleOptions();
     IndelFilteringOptions        = new IndelFilteringOptions();
     RealignmentAssessmentOptions = new RealignmentAssessmentOptions();
     RealignmentOptions           = new RealignmentOptions();
     GeminiOptions = new GeminiOptions();
 }
示例#7
0
 public BlockFactorySource(StitcherOptions stitcherOptions, GeminiOptions geminiOptions, Dictionary <int, string> refIdMapping, BamRealignmentFactory bamRealignmentFactory,
                           IGeminiDataSourceFactory dataSourceFactory, GeminiSampleOptions geminiSampleOptions, RealignmentOptions realignmentOptions, IGeminiFactory geminiFactory)
 {
     _stitcherOptions        = stitcherOptions;
     _geminiOptions          = geminiOptions;
     _refIdMapping           = refIdMapping;
     _bamRealignmentFactory  = bamRealignmentFactory;
     _dataSourceFactory      = dataSourceFactory;
     _geminiSampleOptions    = geminiSampleOptions;
     _realignmentOptions     = realignmentOptions;
     _geminiFactory          = geminiFactory;
     _maxDegreeOfParallelism = Math.Min(_stitcherOptions.NumThreads, Environment.ProcessorCount);
 }
 public ClassificationBlockProvider(GeminiOptions geminiOptions, string chrom, ConcurrentDictionary <string, int> progressTracker,
                                    ConcurrentDictionary <PairClassification, int> categoryLookup, PairResultActionBlockFactoryProvider actionBlockFactoryProvider,
                                    IAggregateRegionProcessor aggregateRegionProcessor, bool lightDebug, PairResultBatchBlockFactory batchBlockFactory, IBinEvidenceFactory binEvidenceFactory, List <PairClassification> categoriesForRealignment, int maxDegreeOfParallelism)
 {
     _geminiOptions              = geminiOptions;
     _chrom                      = chrom;
     _progressTracker            = progressTracker;
     _categoryLookup             = categoryLookup;
     _maxDegreeOfParallelism     = maxDegreeOfParallelism;
     _actionBlockFactoryProvider = actionBlockFactoryProvider;
     _aggregateRegionProcessor   = aggregateRegionProcessor;
     _lightDebug                 = lightDebug;
     _binEvidenceFactory         = binEvidenceFactory;
     _categoriesForRealignment   = categoriesForRealignment;
     _batchBlockFactory          = batchBlockFactory;
 }
        public void GetAndLinkAllClassificationBlocksWithEcFinalization()
        {
            var geminiOptions       = new GeminiOptions();
            var chrom               = "chr1";
            var tracker             = new ConcurrentDictionary <string, int>();
            var categoryLookup      = new ConcurrentDictionary <PairClassification, int>();
            var mockWriterSource    = new Mock <IWriterSource>();
            var actionBlockProvider = new PairResultActionBlockFactoryProvider(mockWriterSource.Object, false, false,
                                                                               "chr1", 1, 1, false, 500, tracker, categoryLookup);
            var chrReference   = new ChrReference();
            var realignFactory = new BamRealignmentFactory(geminiOptions, new RealignmentAssessmentOptions(),
                                                           new StitcherOptions(), new RealignmentOptions(), "outdir");
            var gemFactory        = new GeminiFactory(geminiOptions, new IndelFilteringOptions());
            var dataSourceFactory = new Mock <IGeminiDataSourceFactory>();
            var dataOutputFactory = new Mock <IGeminiDataOutputFactory>();
            ConcurrentDictionary <string, IndelEvidence> masterIndelLOokup    = new ConcurrentDictionary <string, IndelEvidence>();
            ConcurrentDictionary <HashableIndel, int[]>  masterOutcomesLookup = new ConcurrentDictionary <HashableIndel, int[]>();
            ConcurrentDictionary <HashableIndel, int>    masterFinalIndels    = new ConcurrentDictionary <HashableIndel, int>();
            var binEvidenceFactory = new BinEvidenceFactory(geminiOptions, new GeminiSampleOptions());
            var catsForRealign     = new List <PairClassification>();

            var aggRegionProcessor = new AggregateRegionProcessor(chrReference,
                                                                  new Dictionary <int, string>()
            {
                { 1, "chr1" }
            },
                                                                  realignFactory, geminiOptions, gemFactory, chrom, dataSourceFactory.Object, new RealignmentOptions(),
                                                                  masterIndelLOokup, masterOutcomesLookup, masterFinalIndels,
                                                                  catsForRealign, tracker);

            var provider = new ClassificationBlockProvider(geminiOptions, chrom, tracker, categoryLookup,
                                                           actionBlockProvider, aggRegionProcessor, false, new PairResultBatchBlockFactory(10), binEvidenceFactory,
                                                           catsForRealign, 1);


            var  sourceBlock = new Mock <ISourceBlock <PairResult> >();
            bool consumed;

            sourceBlock.Setup(x => x.ConsumeMessage(It.IsAny <DataflowMessageHeader>(),
                                                    It.IsAny <ITargetBlock <PairResult> >(), out consumed)).Returns(new PairResult());
            ConcurrentDictionary <int, EdgeState> edgeStates   = new ConcurrentDictionary <int, EdgeState>();
            ConcurrentDictionary <int, Task>      edgeToWaitOn = new ConcurrentDictionary <int, Task>();

            provider.GetAndLinkAllClassificationBlocksWithEcFinalization(sourceBlock.Object, 1000, 2000, edgeStates,
                                                                         edgeToWaitOn, 0, false);
        }
示例#10
0
        public GeminiWorkflow(IGeminiDataSourceFactory dataSourceFactory,
                              IGeminiDataOutputFactory dataOutputFactory, GeminiOptions geminiOptions,
                              GeminiSampleOptions geminiSampleOptions, RealignmentOptions realignmentOptions, StitcherOptions stitcherOptions, string outputDirectory, RealignmentAssessmentOptions realignmentAssessmentOptions, IndelFilteringOptions indelFilteringOptions, ISamtoolsWrapper samtoolsWrapper)
        {
            _dataSourceFactory   = dataSourceFactory;
            _dataOutputFactory   = dataOutputFactory;
            _geminiOptions       = geminiOptions;
            _geminiSampleOptions = geminiSampleOptions;
            _realignmentOptions  = realignmentOptions;
            _samtoolsWrapper     = samtoolsWrapper;
            _stitcherOptions     = stitcherOptions ?? new StitcherOptions();

            _geminiFactory = new GeminiFactory(geminiOptions, indelFilteringOptions);
            var bamRealignmentFactory = new BamRealignmentFactory(geminiOptions,
                                                                  realignmentAssessmentOptions, stitcherOptions, realignmentOptions,
                                                                  outputDirectory);

            _bamRealignmentFactory = bamRealignmentFactory;
        }
示例#11
0
 public static GeminiOptions DeepCopy(this GeminiOptions options)
 {
     return(new GeminiOptions()
     {
         GenomeContextSize = options.GenomeContextSize,
         IndelsCsvName = options.IndelsCsvName,
         Debug = options.Debug,
         StitchOnly = options.StitchOnly,
         GenomePath = options.GenomePath,
         SamtoolsPath = options.SamtoolsPath,
         TrustSoftclips = options.TrustSoftclips,
         SkipStitching = options.SkipStitching,
         KeepBothSideSoftclips = options.KeepBothSideSoftclips,
         SkipAndRemoveDups = options.SkipAndRemoveDups,
         KeepProbeSoftclip = options.KeepProbeSoftclip,
         KeepUnmergedBams = options.KeepUnmergedBams,
         IsWeirdSamtools = options.IsWeirdSamtools,
         UseHygeaComparer = options.UseHygeaComparer,
         AllowRescoringOrigZero = options.AllowRescoringOrigZero,
         IndexPerChrom = options.IndexPerChrom,
         SoftclipUnknownIndels = options.SoftclipUnknownIndels,
         RemaskMessySoftclips = options.RemaskMessySoftclips,
         SkipEvidenceCollection = options.SkipEvidenceCollection,
         FinalIndelsOverride = options.FinalIndelsOverride,
         ReadCacheSize = options.ReadCacheSize,
         MessySiteThreshold = options.MessySiteThreshold,
         MessySiteWidth = options.MessySiteWidth,
         CollectDepth = options.CollectDepth,
         ImperfectFreqThreshold = options.ImperfectFreqThreshold,
         IndelRegionFreqThreshold = options.IndelRegionFreqThreshold,
         RegionDepthThreshold = options.RegionDepthThreshold,
         NumConcurrentRegions = options.NumConcurrentRegions,
         LogRegionsAndRealignments = options.LogRegionsAndRealignments,
         LightDebug = options.LightDebug,
         AvoidLikelySnvs = options.AvoidLikelySnvs,
         RecalculateUsableSitesAfterSnowball = options.RecalculateUsableSitesAfterSnowball,
         SortPerChrom = options.SortPerChrom,
         ForceHighLikelihoodRealigners = options.ForceHighLikelihoodRealigners,
         RequirePositiveOutcomeForSnowball = options.RequirePositiveOutcomeForSnowball,
         RegionSize = options.RegionSize
     });
 }
 public AggregateRegionProcessor(ChrReference chrReference, Dictionary <int, string> refIdMapping,
                                 BamRealignmentFactory bamRealignmentFactory, GeminiOptions geminiOptions, IGeminiFactory geminiFactory,
                                 string chrom, IGeminiDataSourceFactory dataSourceFactory, RealignmentOptions realignmentOptions,
                                 ConcurrentDictionary <string, IndelEvidence> masterIndelLookup,
                                 ConcurrentDictionary <HashableIndel, int[]> masterOutcomesLookup,
                                 ConcurrentDictionary <HashableIndel, int> masterFinalIndels, List <PairClassification> categoriesForRealignment, ConcurrentDictionary <string, int> progressTracker)
 {
     _chrReference          = chrReference;
     _refIdMapping          = refIdMapping;
     _bamRealignmentFactory = bamRealignmentFactory;
     _geminiOptions         = geminiOptions;
     _geminiFactory         = geminiFactory;
     _chrom                    = chrom;
     _dataSourceFactory        = dataSourceFactory;
     _realignmentOptions       = realignmentOptions;
     _masterIndelLookup        = masterIndelLookup;
     _masterOutcomesLookup     = masterOutcomesLookup;
     _masterFinalIndels        = masterFinalIndels;
     _categoriesForRealignment = categoriesForRealignment;
     _progressTracker          = progressTracker;
 }
示例#13
0
        public void Flow()
        {
            var readPair1 = TestHelpers.GetPair("5M1I5M", "5M1I5M");
            var readPair2 = TestHelpers.GetPair("5M1I5M", "5M1I5M", read1Position: 1001);
            var readPair3 = TestHelpers.GetPair("5M1I5M", "5M1I5M", read1Position: 1201);
            var readPair4 = TestHelpers.GetPair("5M1I5M", "5M1I5M", read1Position: 10000);
            var reads     = new List <ReadPair>()
            {
                readPair1, readPair2, readPair3, readPair4
            };
            var geminiOptions = new GeminiOptions()
            {
                RegionSize = 1000
            };

            // Initial region (0-1000), 1000-2000, 10000-11000, final region
            VerifyFlow(reads, geminiOptions, 4);

            geminiOptions.RegionSize = 100;
            VerifyFlow(reads, geminiOptions, 5);

            geminiOptions.RegionSize = 100000;
            VerifyFlow(reads, geminiOptions, 2);
        }
示例#14
0
        private static void Execute(List <BamAlignment> alignments, List <ReadPair> reads, GeminiOptions geminiOptions, StitcherOptions stitcherOptions, List <ReadPair> lonerpairs = null)
        {
            var mockOutcomesWriter    = new Mock <IOutcomesWriter>();
            var mockDataOutputFactory = DataflowMocks.MockDataOutputFactory(alignments);
            var mockTextWriter        = new Mock <ITextWriter>();

            mockDataOutputFactory.Setup(x => x.GetTextWriter(It.IsAny <string>()))
            .Returns(mockTextWriter.Object);
            var mockReader            = DataflowMocks.MockReader();
            var mockReadPairSource    = DataflowMocks.MockDataSource(reads, lonerpairs);
            var mockDataSourceFactory = DataflowMocks.MockDataSourceFactory(mockReader, mockReadPairSource);
            var mockSamtoolsWrapper   = new Mock <ISamtoolsWrapper>();
            var geminiSampleOptions   = new GeminiSampleOptions()
            {
                RefId = 1, OutputFolder = "OutFolder"
            };

            var geminiWorkflow = new GeminiWorkflow(mockDataSourceFactory.Object, mockDataOutputFactory.Object,
                                                    geminiOptions, geminiSampleOptions, new RealignmentOptions(), stitcherOptions, "outdir",
                                                    new RealignmentAssessmentOptions(), new IndelFilteringOptions(), mockSamtoolsWrapper.Object);

            geminiWorkflow.Execute();
        }
 public BinEvidenceFactory(GeminiOptions geminiOptions, GeminiSampleOptions geminiSampleOptions)
 {
     _geminiOptions       = geminiOptions;
     _geminiSampleOptions = geminiSampleOptions;
 }
示例#16
0
 public GeminiFactory(GeminiOptions geminiOptions, IndelFilteringOptions indelFilteringOptions)
 {
     _geminiOptions         = geminiOptions;
     _indelFilteringOptions = indelFilteringOptions;
 }
        public void GetAggregateRegionResults()
        {
            var          geminiOptions = new GeminiOptions();
            ChrReference chrReference  = null;
            var          refIdMapping  = new Dictionary <int, string>()
            {
                { 1, "chr1" }
            };
            var bamRealignmentFactory = new BamRealignmentFactory(new GeminiOptions(),
                                                                  new RealignmentAssessmentOptions(), new StitcherOptions(), new RealignmentOptions(), "out");

            var geminiFactory         = new GeminiFactory(geminiOptions, new IndelFilteringOptions());
            var dataSourceFactoryMock = new Mock <IGeminiDataSourceFactory>();
            var chromIndelSource      = new Mock <IChromosomeIndelSource>();

            //var indel = new KeyValuePair<HashableIndel, GenomeSnippet>();
            chromIndelSource
            .Setup(x => x.GetRelevantIndels(It.IsAny <int>(), It.IsAny <List <PreIndel> >(),
                                            It.IsAny <List <HashableIndel> >(), It.IsAny <List <PreIndel> >(), It.IsAny <List <PreIndel> >())).Returns(new List <KeyValuePair <HashableIndel, GenomeSnippet> >()
            {
                //indel
            });
            dataSourceFactoryMock
            .Setup(x => x.GetChromosomeIndelSource(It.IsAny <List <HashableIndel> >(),
                                                   It.IsAny <IGenomeSnippetSource>())).Returns(chromIndelSource.Object);
            var dataSourceFactory        = dataSourceFactoryMock.Object;
            var masterIndelLookup        = new ConcurrentDictionary <string, IndelEvidence>();
            var masterOutcomesLookup     = new ConcurrentDictionary <HashableIndel, int[]>();
            var masterFinalIndels        = new ConcurrentDictionary <HashableIndel, int>();
            var categoriesForRealignment = new List <PairClassification>();
            var progressTracker          = new ConcurrentDictionary <string, int>();

            var processor = new AggregateRegionProcessor(chrReference, refIdMapping, bamRealignmentFactory,
                                                         geminiOptions, geminiFactory, "chr1", dataSourceFactory, new RealignmentOptions()
            {
                CategoriesForSnowballing = new List <PairClassification>()
                {
                    PairClassification.Disagree
                }
            },
                                                         masterIndelLookup, masterOutcomesLookup, masterFinalIndels, categoriesForRealignment, progressTracker);

            var indelLookup     = new ConcurrentDictionary <string, IndelEvidence>();
            var binEvidence     = new BinEvidence(1, true, 20, false, 500, 1000);
            var edgeBinEvidence = new BinEvidence(1, true, 20, false, 500, 1000);
            var edgeState       = new EdgeState()
            {
                Name                 = "0-1000",
                EdgeAlignments       = new Dictionary <PairClassification, List <PairResult> >(),
                BinEvidence          = edgeBinEvidence,
                EdgeIndels           = new List <HashableIndel>(),
                EffectiveMinPosition = 0
            };

            var pairResultLookup =
                new ConcurrentDictionary <PairClassification, List <PairResult> >();

            pairResultLookup.TryAdd(PairClassification.Disagree, new List <PairResult>()
            {
                TestHelpers.GetPairResult(10000),
                TestHelpers.GetPairResult(10001),
                TestHelpers.GetPairResult(10002),
                TestHelpers.GetPairResult(19995)
            });
            pairResultLookup.TryAdd(PairClassification.SingleMismatchStitched, new List <PairResult>()
            {
                TestHelpers.GetPairResult(19995),
                TestHelpers.GetPairResult(19995)
            });

            // Borderline case: the max position in the pair is >= EffectiveMaxPosition - 5000, even if one of the reads in the pair is not
            var effectiveMax    = 19999;
            var r2BorderlinePos = effectiveMax - 5000 + 1;
            var offset          = 1;
            var r1BorderlinePos = r2BorderlinePos - offset;

            pairResultLookup.TryAdd(PairClassification.UnstitchForwardMessy, new List <PairResult>()
            {
                TestHelpers.GetPairResult(r1BorderlinePos, offset), // One is just over border
                TestHelpers.GetPairResult(r1BorderlinePos, 0),      // Both are within safe range
            });

            var regionData = new RegionDataForAggregation()
            {
                BinEvidence          = binEvidence,
                EdgeState            = edgeState,
                EffectiveMaxPosition = effectiveMax,
                EffectiveMinPosition = 10000,
                PairResultLookup     = pairResultLookup
            };
            var regionResults = processor.GetAggregateRegionResults(indelLookup,
                                                                    10000, 20000, false, regionData);

            // New edge state should have the correct items carrying over
            Assert.Equal("10000-20000", regionResults.EdgeState.Name);
            Assert.Equal(14999, regionResults.EdgeState.EffectiveMinPosition);
            Assert.Equal(4, regionResults.AlignmentsReadyToBeFlushed.Count); // The four that are solidly in-bounds should be flushable immediately

            var edgeAlignmentsLookup = regionResults.EdgeState.EdgeAlignments;

            Assert.Equal(1, edgeAlignmentsLookup[PairClassification.Disagree].Count);
            Assert.Equal(2, edgeAlignmentsLookup[PairClassification.SingleMismatchStitched].Count);
            Assert.Equal(1, edgeAlignmentsLookup[PairClassification.UnstitchForwardMessy].Count);
        }