public ClassifierTransformerBlockFactory(Dictionary <int, string> refIdMapping, StitcherOptions stitcherOptions, GeminiOptions geminiOptions) { _refIdMapping = refIdMapping; _stitcherOptions = stitcherOptions; _geminiOptions = geminiOptions; _maxDegreeOfParallelism = Math.Min(stitcherOptions.NumThreads, Environment.ProcessorCount); }
private static void VerifyFlow(List <ReadPair> reads, GeminiOptions geminiOptions, int expectedRegions) { var batchBlockFactory = BatchBlockFactory(); var classificationBlockFactory = new Mock <IClassificationBlockProvider>(); var classifierBlockFactory = ClassifierBlockFactory(); var blockFactory = BlockFactory(batchBlockFactory, classifierBlockFactory, classificationBlockFactory); var alignments = new List <BamAlignment>(); var mockDataOutputFactory = DataflowMocks.MockDataOutputFactory(alignments); var mockReader = DataflowMocks.MockReader(); var mockReadPairSource = DataflowMocks.MockDataSource(reads); var mockDataSourceFactory = DataflowMocks.MockDataSourceFactory(mockReader, mockReadPairSource); var dataflowEvaluator = new DataflowReadEvaluator(geminiOptions, mockDataSourceFactory.Object, new GeminiSampleOptions() { OutputFolder = "outfoldersample" }, mockDataOutputFactory.Object, blockFactory.Object); dataflowEvaluator.ProcessBam(); VerifyCallNumbers(batchBlockFactory, classifierBlockFactory, classificationBlockFactory, mockDataSourceFactory, blockFactory, expectedRegions); }
public void ProcessBam() { var stitcherOptions = new StitcherOptions() { }; var geminiOptions = new GeminiOptions() { RegionSize = 1000, }; var readPair1 = TestHelpers.GetPair("5M1I5M", "5M1I5M", name: "Pair1"); var readPair2 = TestHelpers.GetPair("5M1I5M", "5M1I5M", read1Position: 1001, name: "Pair2"); var readPair3 = TestHelpers.GetPair("5M1I5M", "5M1I5M", read1Position: 1201, name: "Pair3"); var readPair4 = TestHelpers.GetPair("5M1I5M", "5M1I5M", read1Position: 10000, name: "Pair4"); var reads = new List <ReadPair>() { readPair1, readPair2, readPair3, readPair4 }; var read = TestHelpers.CreateBamAlignment("AAAAAAAATA", 999, 1001, 30, true, cigar: new CigarAlignment("10M"), name: "LonerPair1"); read.SetIsProperPair(true); var lonerPair1Mate1 = new ReadPair(read, "LonerPair1"); var read2 = TestHelpers.CreateBamAlignment("AAAAATAAAA", 1002, 999, 30, true, cigar: new CigarAlignment("10M"), name: "LonerPair1", isFirstMate: false); read2.SetIsProperPair(true); var lonerPair1Mate2 = new ReadPair(read2, "LonerPair1", readNumber: ReadNumber.Read2); var read3 = TestHelpers.CreateBamAlignment("AAAAAAAAAA", 999, 5001, 30, true, cigar: new CigarAlignment("10M"), name: "LonerPairFarApart"); read3.SetIsProperPair(true); var read4 = TestHelpers.CreateBamAlignment("AAAAAAAAAA", 5001, 999, 30, true, cigar: new CigarAlignment("10M"), name: "LonerPairFarApart", isFirstMate: false); read4.SetIsProperPair(true); var lonerPair2Mate1 = new ReadPair(read3, name: "LonerPairFarApart"); var lonerPair2Mate2 = new ReadPair(read4, name: "LonerPairFarApart", readNumber: ReadNumber.Read2); var lonerReads = new List <ReadPair>() { lonerPair1Mate1, lonerPair1Mate2, lonerPair2Mate1, lonerPair2Mate2 }; var alignments = new List <BamAlignment>(); Execute(alignments, reads, geminiOptions, stitcherOptions, lonerReads); Assert.Equal(1, alignments.Count(x => x.Name == "Pair1")); Assert.Equal(1, alignments.Count(x => x.Name == "Pair2")); Assert.Equal(1, alignments.Count(x => x.Name == "Pair3")); Assert.Equal(1, alignments.Count(x => x.Name == "Pair4")); Assert.Equal(1, alignments.Count(x => x.Name == "LonerPair1")); Assert.Equal(2, alignments.Count(x => x.Name == "LonerPairFarApart")); Assert.Equal(7, alignments.Count); alignments.Clear(); }
public BamRealignmentFactory(GeminiOptions geminiOptions, RealignmentAssessmentOptions realignmentAssessmentOptions, StitcherOptions stitcherOptions, RealignmentOptions realignmentOptions, string outputDir) { _geminiOptions = geminiOptions; _realignmentAssessmentOptions = realignmentAssessmentOptions; _stitcherOptions = stitcherOptions; _realignmentOptions = realignmentOptions; _outputDir = outputDir; }
public DataflowReadEvaluator(GeminiOptions geminiOptions, IGeminiDataSourceFactory dataSourceFactory, GeminiSampleOptions geminiSampleOptions, IGeminiDataOutputFactory dataOutputFactory, IBlockFactorySource blockFactorySource) { _geminiOptions = geminiOptions; _dataSourceFactory = dataSourceFactory; _geminiSampleOptions = geminiSampleOptions; _dataOutputFactory = dataOutputFactory; _blockFactorySource = blockFactorySource; }
public GeminiApplicationOptions() { StitcherOptions = new StitcherOptions(); GeminiSampleOptions = new GeminiSampleOptions(); IndelFilteringOptions = new IndelFilteringOptions(); RealignmentAssessmentOptions = new RealignmentAssessmentOptions(); RealignmentOptions = new RealignmentOptions(); GeminiOptions = new GeminiOptions(); }
public BlockFactorySource(StitcherOptions stitcherOptions, GeminiOptions geminiOptions, Dictionary <int, string> refIdMapping, BamRealignmentFactory bamRealignmentFactory, IGeminiDataSourceFactory dataSourceFactory, GeminiSampleOptions geminiSampleOptions, RealignmentOptions realignmentOptions, IGeminiFactory geminiFactory) { _stitcherOptions = stitcherOptions; _geminiOptions = geminiOptions; _refIdMapping = refIdMapping; _bamRealignmentFactory = bamRealignmentFactory; _dataSourceFactory = dataSourceFactory; _geminiSampleOptions = geminiSampleOptions; _realignmentOptions = realignmentOptions; _geminiFactory = geminiFactory; _maxDegreeOfParallelism = Math.Min(_stitcherOptions.NumThreads, Environment.ProcessorCount); }
public ClassificationBlockProvider(GeminiOptions geminiOptions, string chrom, ConcurrentDictionary <string, int> progressTracker, ConcurrentDictionary <PairClassification, int> categoryLookup, PairResultActionBlockFactoryProvider actionBlockFactoryProvider, IAggregateRegionProcessor aggregateRegionProcessor, bool lightDebug, PairResultBatchBlockFactory batchBlockFactory, IBinEvidenceFactory binEvidenceFactory, List <PairClassification> categoriesForRealignment, int maxDegreeOfParallelism) { _geminiOptions = geminiOptions; _chrom = chrom; _progressTracker = progressTracker; _categoryLookup = categoryLookup; _maxDegreeOfParallelism = maxDegreeOfParallelism; _actionBlockFactoryProvider = actionBlockFactoryProvider; _aggregateRegionProcessor = aggregateRegionProcessor; _lightDebug = lightDebug; _binEvidenceFactory = binEvidenceFactory; _categoriesForRealignment = categoriesForRealignment; _batchBlockFactory = batchBlockFactory; }
public void GetAndLinkAllClassificationBlocksWithEcFinalization() { var geminiOptions = new GeminiOptions(); var chrom = "chr1"; var tracker = new ConcurrentDictionary <string, int>(); var categoryLookup = new ConcurrentDictionary <PairClassification, int>(); var mockWriterSource = new Mock <IWriterSource>(); var actionBlockProvider = new PairResultActionBlockFactoryProvider(mockWriterSource.Object, false, false, "chr1", 1, 1, false, 500, tracker, categoryLookup); var chrReference = new ChrReference(); var realignFactory = new BamRealignmentFactory(geminiOptions, new RealignmentAssessmentOptions(), new StitcherOptions(), new RealignmentOptions(), "outdir"); var gemFactory = new GeminiFactory(geminiOptions, new IndelFilteringOptions()); var dataSourceFactory = new Mock <IGeminiDataSourceFactory>(); var dataOutputFactory = new Mock <IGeminiDataOutputFactory>(); ConcurrentDictionary <string, IndelEvidence> masterIndelLOokup = new ConcurrentDictionary <string, IndelEvidence>(); ConcurrentDictionary <HashableIndel, int[]> masterOutcomesLookup = new ConcurrentDictionary <HashableIndel, int[]>(); ConcurrentDictionary <HashableIndel, int> masterFinalIndels = new ConcurrentDictionary <HashableIndel, int>(); var binEvidenceFactory = new BinEvidenceFactory(geminiOptions, new GeminiSampleOptions()); var catsForRealign = new List <PairClassification>(); var aggRegionProcessor = new AggregateRegionProcessor(chrReference, new Dictionary <int, string>() { { 1, "chr1" } }, realignFactory, geminiOptions, gemFactory, chrom, dataSourceFactory.Object, new RealignmentOptions(), masterIndelLOokup, masterOutcomesLookup, masterFinalIndels, catsForRealign, tracker); var provider = new ClassificationBlockProvider(geminiOptions, chrom, tracker, categoryLookup, actionBlockProvider, aggRegionProcessor, false, new PairResultBatchBlockFactory(10), binEvidenceFactory, catsForRealign, 1); var sourceBlock = new Mock <ISourceBlock <PairResult> >(); bool consumed; sourceBlock.Setup(x => x.ConsumeMessage(It.IsAny <DataflowMessageHeader>(), It.IsAny <ITargetBlock <PairResult> >(), out consumed)).Returns(new PairResult()); ConcurrentDictionary <int, EdgeState> edgeStates = new ConcurrentDictionary <int, EdgeState>(); ConcurrentDictionary <int, Task> edgeToWaitOn = new ConcurrentDictionary <int, Task>(); provider.GetAndLinkAllClassificationBlocksWithEcFinalization(sourceBlock.Object, 1000, 2000, edgeStates, edgeToWaitOn, 0, false); }
public GeminiWorkflow(IGeminiDataSourceFactory dataSourceFactory, IGeminiDataOutputFactory dataOutputFactory, GeminiOptions geminiOptions, GeminiSampleOptions geminiSampleOptions, RealignmentOptions realignmentOptions, StitcherOptions stitcherOptions, string outputDirectory, RealignmentAssessmentOptions realignmentAssessmentOptions, IndelFilteringOptions indelFilteringOptions, ISamtoolsWrapper samtoolsWrapper) { _dataSourceFactory = dataSourceFactory; _dataOutputFactory = dataOutputFactory; _geminiOptions = geminiOptions; _geminiSampleOptions = geminiSampleOptions; _realignmentOptions = realignmentOptions; _samtoolsWrapper = samtoolsWrapper; _stitcherOptions = stitcherOptions ?? new StitcherOptions(); _geminiFactory = new GeminiFactory(geminiOptions, indelFilteringOptions); var bamRealignmentFactory = new BamRealignmentFactory(geminiOptions, realignmentAssessmentOptions, stitcherOptions, realignmentOptions, outputDirectory); _bamRealignmentFactory = bamRealignmentFactory; }
public static GeminiOptions DeepCopy(this GeminiOptions options) { return(new GeminiOptions() { GenomeContextSize = options.GenomeContextSize, IndelsCsvName = options.IndelsCsvName, Debug = options.Debug, StitchOnly = options.StitchOnly, GenomePath = options.GenomePath, SamtoolsPath = options.SamtoolsPath, TrustSoftclips = options.TrustSoftclips, SkipStitching = options.SkipStitching, KeepBothSideSoftclips = options.KeepBothSideSoftclips, SkipAndRemoveDups = options.SkipAndRemoveDups, KeepProbeSoftclip = options.KeepProbeSoftclip, KeepUnmergedBams = options.KeepUnmergedBams, IsWeirdSamtools = options.IsWeirdSamtools, UseHygeaComparer = options.UseHygeaComparer, AllowRescoringOrigZero = options.AllowRescoringOrigZero, IndexPerChrom = options.IndexPerChrom, SoftclipUnknownIndels = options.SoftclipUnknownIndels, RemaskMessySoftclips = options.RemaskMessySoftclips, SkipEvidenceCollection = options.SkipEvidenceCollection, FinalIndelsOverride = options.FinalIndelsOverride, ReadCacheSize = options.ReadCacheSize, MessySiteThreshold = options.MessySiteThreshold, MessySiteWidth = options.MessySiteWidth, CollectDepth = options.CollectDepth, ImperfectFreqThreshold = options.ImperfectFreqThreshold, IndelRegionFreqThreshold = options.IndelRegionFreqThreshold, RegionDepthThreshold = options.RegionDepthThreshold, NumConcurrentRegions = options.NumConcurrentRegions, LogRegionsAndRealignments = options.LogRegionsAndRealignments, LightDebug = options.LightDebug, AvoidLikelySnvs = options.AvoidLikelySnvs, RecalculateUsableSitesAfterSnowball = options.RecalculateUsableSitesAfterSnowball, SortPerChrom = options.SortPerChrom, ForceHighLikelihoodRealigners = options.ForceHighLikelihoodRealigners, RequirePositiveOutcomeForSnowball = options.RequirePositiveOutcomeForSnowball, RegionSize = options.RegionSize }); }
public AggregateRegionProcessor(ChrReference chrReference, Dictionary <int, string> refIdMapping, BamRealignmentFactory bamRealignmentFactory, GeminiOptions geminiOptions, IGeminiFactory geminiFactory, string chrom, IGeminiDataSourceFactory dataSourceFactory, RealignmentOptions realignmentOptions, ConcurrentDictionary <string, IndelEvidence> masterIndelLookup, ConcurrentDictionary <HashableIndel, int[]> masterOutcomesLookup, ConcurrentDictionary <HashableIndel, int> masterFinalIndels, List <PairClassification> categoriesForRealignment, ConcurrentDictionary <string, int> progressTracker) { _chrReference = chrReference; _refIdMapping = refIdMapping; _bamRealignmentFactory = bamRealignmentFactory; _geminiOptions = geminiOptions; _geminiFactory = geminiFactory; _chrom = chrom; _dataSourceFactory = dataSourceFactory; _realignmentOptions = realignmentOptions; _masterIndelLookup = masterIndelLookup; _masterOutcomesLookup = masterOutcomesLookup; _masterFinalIndels = masterFinalIndels; _categoriesForRealignment = categoriesForRealignment; _progressTracker = progressTracker; }
public void Flow() { var readPair1 = TestHelpers.GetPair("5M1I5M", "5M1I5M"); var readPair2 = TestHelpers.GetPair("5M1I5M", "5M1I5M", read1Position: 1001); var readPair3 = TestHelpers.GetPair("5M1I5M", "5M1I5M", read1Position: 1201); var readPair4 = TestHelpers.GetPair("5M1I5M", "5M1I5M", read1Position: 10000); var reads = new List <ReadPair>() { readPair1, readPair2, readPair3, readPair4 }; var geminiOptions = new GeminiOptions() { RegionSize = 1000 }; // Initial region (0-1000), 1000-2000, 10000-11000, final region VerifyFlow(reads, geminiOptions, 4); geminiOptions.RegionSize = 100; VerifyFlow(reads, geminiOptions, 5); geminiOptions.RegionSize = 100000; VerifyFlow(reads, geminiOptions, 2); }
private static void Execute(List <BamAlignment> alignments, List <ReadPair> reads, GeminiOptions geminiOptions, StitcherOptions stitcherOptions, List <ReadPair> lonerpairs = null) { var mockOutcomesWriter = new Mock <IOutcomesWriter>(); var mockDataOutputFactory = DataflowMocks.MockDataOutputFactory(alignments); var mockTextWriter = new Mock <ITextWriter>(); mockDataOutputFactory.Setup(x => x.GetTextWriter(It.IsAny <string>())) .Returns(mockTextWriter.Object); var mockReader = DataflowMocks.MockReader(); var mockReadPairSource = DataflowMocks.MockDataSource(reads, lonerpairs); var mockDataSourceFactory = DataflowMocks.MockDataSourceFactory(mockReader, mockReadPairSource); var mockSamtoolsWrapper = new Mock <ISamtoolsWrapper>(); var geminiSampleOptions = new GeminiSampleOptions() { RefId = 1, OutputFolder = "OutFolder" }; var geminiWorkflow = new GeminiWorkflow(mockDataSourceFactory.Object, mockDataOutputFactory.Object, geminiOptions, geminiSampleOptions, new RealignmentOptions(), stitcherOptions, "outdir", new RealignmentAssessmentOptions(), new IndelFilteringOptions(), mockSamtoolsWrapper.Object); geminiWorkflow.Execute(); }
public BinEvidenceFactory(GeminiOptions geminiOptions, GeminiSampleOptions geminiSampleOptions) { _geminiOptions = geminiOptions; _geminiSampleOptions = geminiSampleOptions; }
public GeminiFactory(GeminiOptions geminiOptions, IndelFilteringOptions indelFilteringOptions) { _geminiOptions = geminiOptions; _indelFilteringOptions = indelFilteringOptions; }
public void GetAggregateRegionResults() { var geminiOptions = new GeminiOptions(); ChrReference chrReference = null; var refIdMapping = new Dictionary <int, string>() { { 1, "chr1" } }; var bamRealignmentFactory = new BamRealignmentFactory(new GeminiOptions(), new RealignmentAssessmentOptions(), new StitcherOptions(), new RealignmentOptions(), "out"); var geminiFactory = new GeminiFactory(geminiOptions, new IndelFilteringOptions()); var dataSourceFactoryMock = new Mock <IGeminiDataSourceFactory>(); var chromIndelSource = new Mock <IChromosomeIndelSource>(); //var indel = new KeyValuePair<HashableIndel, GenomeSnippet>(); chromIndelSource .Setup(x => x.GetRelevantIndels(It.IsAny <int>(), It.IsAny <List <PreIndel> >(), It.IsAny <List <HashableIndel> >(), It.IsAny <List <PreIndel> >(), It.IsAny <List <PreIndel> >())).Returns(new List <KeyValuePair <HashableIndel, GenomeSnippet> >() { //indel }); dataSourceFactoryMock .Setup(x => x.GetChromosomeIndelSource(It.IsAny <List <HashableIndel> >(), It.IsAny <IGenomeSnippetSource>())).Returns(chromIndelSource.Object); var dataSourceFactory = dataSourceFactoryMock.Object; var masterIndelLookup = new ConcurrentDictionary <string, IndelEvidence>(); var masterOutcomesLookup = new ConcurrentDictionary <HashableIndel, int[]>(); var masterFinalIndels = new ConcurrentDictionary <HashableIndel, int>(); var categoriesForRealignment = new List <PairClassification>(); var progressTracker = new ConcurrentDictionary <string, int>(); var processor = new AggregateRegionProcessor(chrReference, refIdMapping, bamRealignmentFactory, geminiOptions, geminiFactory, "chr1", dataSourceFactory, new RealignmentOptions() { CategoriesForSnowballing = new List <PairClassification>() { PairClassification.Disagree } }, masterIndelLookup, masterOutcomesLookup, masterFinalIndels, categoriesForRealignment, progressTracker); var indelLookup = new ConcurrentDictionary <string, IndelEvidence>(); var binEvidence = new BinEvidence(1, true, 20, false, 500, 1000); var edgeBinEvidence = new BinEvidence(1, true, 20, false, 500, 1000); var edgeState = new EdgeState() { Name = "0-1000", EdgeAlignments = new Dictionary <PairClassification, List <PairResult> >(), BinEvidence = edgeBinEvidence, EdgeIndels = new List <HashableIndel>(), EffectiveMinPosition = 0 }; var pairResultLookup = new ConcurrentDictionary <PairClassification, List <PairResult> >(); pairResultLookup.TryAdd(PairClassification.Disagree, new List <PairResult>() { TestHelpers.GetPairResult(10000), TestHelpers.GetPairResult(10001), TestHelpers.GetPairResult(10002), TestHelpers.GetPairResult(19995) }); pairResultLookup.TryAdd(PairClassification.SingleMismatchStitched, new List <PairResult>() { TestHelpers.GetPairResult(19995), TestHelpers.GetPairResult(19995) }); // Borderline case: the max position in the pair is >= EffectiveMaxPosition - 5000, even if one of the reads in the pair is not var effectiveMax = 19999; var r2BorderlinePos = effectiveMax - 5000 + 1; var offset = 1; var r1BorderlinePos = r2BorderlinePos - offset; pairResultLookup.TryAdd(PairClassification.UnstitchForwardMessy, new List <PairResult>() { TestHelpers.GetPairResult(r1BorderlinePos, offset), // One is just over border TestHelpers.GetPairResult(r1BorderlinePos, 0), // Both are within safe range }); var regionData = new RegionDataForAggregation() { BinEvidence = binEvidence, EdgeState = edgeState, EffectiveMaxPosition = effectiveMax, EffectiveMinPosition = 10000, PairResultLookup = pairResultLookup }; var regionResults = processor.GetAggregateRegionResults(indelLookup, 10000, 20000, false, regionData); // New edge state should have the correct items carrying over Assert.Equal("10000-20000", regionResults.EdgeState.Name); Assert.Equal(14999, regionResults.EdgeState.EffectiveMinPosition); Assert.Equal(4, regionResults.AlignmentsReadyToBeFlushed.Count); // The four that are solidly in-bounds should be flushable immediately var edgeAlignmentsLookup = regionResults.EdgeState.EdgeAlignments; Assert.Equal(1, edgeAlignmentsLookup[PairClassification.Disagree].Count); Assert.Equal(2, edgeAlignmentsLookup[PairClassification.SingleMismatchStitched].Count); Assert.Equal(1, edgeAlignmentsLookup[PairClassification.UnstitchForwardMessy].Count); }