public void VariantGenerator_AsExpected() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(new Dictionary <string, IChromosome> { { "chr1", new Chromosome("chr1", "1", 0) } }); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = SimplePosition.GetSimplePosition("chr1 2 . A T,G . PASS . GT:PS 0|1:123 2/2:789 0|2:456", sequenceProvider.RefNameToChromosome); var position2 = SimplePosition.GetSimplePosition("chr1 4 . C A,G . PASS . GT:PS 1|1:301 1|2:789 1|2:456", sequenceProvider.RefNameToChromosome); var position3 = SimplePosition.GetSimplePosition("chr1 6 . G C . PASS . GT:PS . 1|0:789 0/1:.", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 4, 6, 8 }; var recomposer = new VariantGenerator(sequenceProvider); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3 }, functionBlockRanges).ToList(); Assert.Equal(2, recomposedPositions.Count); Assert.Equal("chr1 2 . AGC AGA,GGG . PASS RECOMPOSED GT:PS . . 1|2:456", string.Join("\t", recomposedPositions[0].VcfFields)); Assert.Equal("chr1 2 . AGCTG GGATC,GGGTG . PASS RECOMPOSED GT:PS . 1|2:789 .", string.Join("\t", recomposedPositions[1].VcfFields)); }
public void VariantGenerator_OverlappingDeletionAtTheEnd_Ignored() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(new Dictionary <string, IChromosome> { { "chr1", new Chromosome("chr1", "1", 0) } }); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAATCGCGA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T . PASS . GT 0|1 0/0", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C A . PASS . GT 1|1 0/0", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 4 . CTGAATCGCGA C . PASS . GT 0/0 0|1", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 4, 6, 6 }; var recomposer = new VariantGenerator(sequenceProvider); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3 }, functionBlockRanges).ToList(); Assert.Single(recomposedPositions); Assert.Equal("chr1 2 . AGC AGA,TGA . PASS RECOMPOSED GT 1|2 0|0", string.Join("\t", recomposedPositions[0].VcfFields)); }
public void GetPositionsAndRefAltAlleles_AsExpected() { var genotypes = new[] { "1|2", "1/1", "0|1", "0/1" }; var genotypeToSample = new Dictionary <(string, int), List <int> > { { (string.Join(";", genotypes), 0), new List <int> { 0 } } }; var indexOfUnsupportedVars = Enumerable.Repeat(new HashSet <string>(), genotypes.Length).ToArray(); var starts = new[] { 356, 358, 360, 361 }; var functionBlockRanges = new List <int> { 358, 360, 362, 364 }; var alleles = new[] { new[] { "G", "C", "T" }, new[] { "A", "T" }, new[] { "C", "G" }, new[] { "G", "T" } }; var refSequence = "GAATCG"; var alleleIndexBlocksToSample = AlleleIndexBlock.GetAlleleIndexBlockToSampleIndex(genotypeToSample, indexOfUnsupportedVars, starts, functionBlockRanges).ToArray(); var alleleSet = new AlleleSet(null, starts, alleles); var alleleIndexBlocks = alleleIndexBlocksToSample.Select(x => x.Key).ToArray(); var decomposedPositionIndex = new HashSet <(int, int)>(); var result1 = VariantGenerator.GetPositionsAndRefAltAlleles(alleleIndexBlocks[0], alleleSet, refSequence, starts[0], decomposedPositionIndex); var result2 = VariantGenerator.GetPositionsAndRefAltAlleles(alleleIndexBlocks[1], alleleSet, refSequence, starts[0], decomposedPositionIndex); Assert.Equal((356, 360, "GAATC", "CATTC"), result1); Assert.Equal((356, 360, "GAATC", "TATTG"), result2); }
public void VariantGenerator_MinGQUsed_DotAndNullIgnored() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(new Dictionary <string, IChromosome> { { "chr1", new Chromosome("chr1", "1", 0) } }); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T,G . PASS . GT:PS:GQ 0|1:123:. 2/2:.:14.2 0|2:456:.", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C A,G . PASS . GT:PS:GQ 1|1:301:. 1|2:.:18 1|2:456:15.6", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 6 . G C . PASS . GT . 1|0 0/1", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 4, 6, 8 }; var recomposer = new VariantGenerator(sequenceProvider); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3 }, functionBlockRanges).ToList(); Assert.Equal(2, recomposedPositions.Count); Assert.Equal("chr1 2 . AGC AGA,GGG,TGA . PASS RECOMPOSED GT:GQ:PS 1|3:.:123 . 1|2:15.6:456", string.Join("\t", recomposedPositions[0].VcfFields)); Assert.Equal("chr1 2 . AGCTG GGATC,GGGTG . PASS RECOMPOSED GT:GQ . 1|2:14.2 .", string.Join("\t", recomposedPositions[1].VcfFields)); }
public void VariantGenerator_AllTrailingMissingValuesDroped() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(new Dictionary <string, IChromosome> { { "chr1", new Chromosome("chr1", "1", 0) } }); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T,G . PASS . GT:GQ:PS 0|1:.:123 2/2 1|1:17:456", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C A,G . PASS . GT:GQ:PS ./. 1|2 1|2:15.6:456", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 6 . G C . PASS . GT:GQ:PS ./. 1|0 1|1:13:456", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 4, 6, 8 }; var recomposer = new VariantGenerator(sequenceProvider); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3 }, functionBlockRanges).ToList(); Assert.Single(recomposedPositions); Assert.Equal("chr1 2 . AGCTG GGATC,GGGTG,TGATC,TGGTC . PASS RECOMPOSED GT:GQ:PS . 1|2 3|4:13:456", string.Join("\t", recomposedPositions[0].VcfFields)); }
public void VariantGenerator_HomozygousSitesAndPhasedSites_Recomposed() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(new Dictionary <string, IChromosome> { { "chr1", new Chromosome("chr1", "1", 0) } }); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T . PASS . GT 1/1 1/1 1/1", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 3 . G A,G 45 PASS . GT:PS 1|1:2 1|2:2 1|2:2", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 4 . C A 45 PASS . GT 1/1 1/1 1/1", sequenceProvider.RefNameToChromosome); var position4 = AnnotationUtilities.GetSimplePosition("chr1 5 . T A,G 45 PASS . GT:PS 1|1:4 1|2:2 1|2:4", sequenceProvider.RefNameToChromosome); var position5 = AnnotationUtilities.GetSimplePosition("chr1 6 . G C 30 PASS . GT 1/1 1/1 1/1", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 4, 5, 6, 7, 8 }; var recomposer = new VariantGenerator(sequenceProvider); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3, position4, position5 }, functionBlockRanges).ToList(); Assert.Equal(3, recomposedPositions.Count); Assert.Equal("chr1 2 . AGC TAA,TGA 45 PASS RECOMPOSED GT:PS . . 1|2:2", string.Join("\t", recomposedPositions[0].VcfFields)); Assert.Equal("chr1 2 . AGCTG TAAAC,TGAGC 30 PASS RECOMPOSED GT:PS 1|1 1|2:2 .", string.Join("\t", recomposedPositions[1].VcfFields)); Assert.Equal("chr1 4 . CTG AAC,AGC 30 PASS RECOMPOSED GT:PS . . 1|2:4", string.Join("\t", recomposedPositions[2].VcfFields)); }
public void VariantGenerator_FilterTag_PassedMnvOverridesFailedOne() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(new Dictionary <string, IChromosome> { { "chr1", new Chromosome("chr1", "1", 0) } }); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T . PASS . GT 0|1 0|1", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C A . FailedForSomeReason . GT 0|0 0|1", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 6 . G C . PASS . GT 0|1 0|1", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 6, 8, 10 }; var recomposer = new VariantGenerator(sequenceProvider); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3 }, functionBlockRanges).ToList(); Assert.Single(recomposedPositions); Assert.Equal("chr1 2 . AGCTG TGATC,TGCTC . PASS RECOMPOSED GT 0|2 0|1", string.Join("\t", recomposedPositions[0].VcfFields)); }
public void VariantGenerator_HomoReferenceGenotype_Output() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(new Dictionary <string, IChromosome> { { "chr1", new Chromosome("chr1", "1", 0) } }); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 1 . C A . PASS . GT:PS 0|1:1584593 0/0:.", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 2 . A C . PASS . GT:PS 0|1:1584593 0/0:.", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 3, 4 }; var recomposer = new VariantGenerator(sequenceProvider); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2 }, functionBlockRanges).ToList(); Assert.Single(recomposedPositions); Assert.Equal("chr1 1 . CA AC . PASS RECOMPOSED GT:PS 0|1:1584593 0|0", string.Join("\t", recomposedPositions[0].VcfFields)); }
public void VariantGenerator_SampleColumnCorrectlyProcessed_WhenTrailingMissingValuesDroped() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(new Dictionary <string, IChromosome> { { "chr1", new Chromosome("chr1", "1", 0) } }); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = SimplePosition.GetSimplePosition("chr1 2 . A T,G . PASS . GT:PS:GQ 0|1:123 2/2:.:14.2 ./.", sequenceProvider.RefNameToChromosome); var position2 = SimplePosition.GetSimplePosition("chr1 4 . C A,G . PASS . GT:PS:GQ ./. 1|2:.:18 1|2:456:15.6", sequenceProvider.RefNameToChromosome); var position3 = SimplePosition.GetSimplePosition("chr1 6 . G C . PASS . GT ./. 1|0 ./.", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 4, 6, 8 }; var recomposer = new VariantGenerator(sequenceProvider); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3 }, functionBlockRanges).ToList(); Assert.Single(recomposedPositions); Assert.Equal("chr1 2 . AGCTG GGATC,GGGTG . PASS RECOMPOSED GT:GQ . 1|2:14.2 .", string.Join("\t", recomposedPositions[0].VcfFields)); }
public void GenerateOutput_NothingRecomposed_ReturnOriginalVcfFieldList() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(new Dictionary <string, IChromosome> { { "chr1", new Chromosome("chr1", "1", 0) } }); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var variantGenerator = new VariantGenerator(sequenceProvider); var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T . PASS . GT:PS 0|1:123", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C G . PASS . GT 0/1", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 6 . G C . PASS . GT 0|1", sequenceProvider.RefNameToChromosome); var positions = new List <ISimplePosition> { position1, position2, position3 }; var recomposable = new List <bool> { true, true, true }; var functionBlockRanges = new List <int> { 4, 6, 8 }; var bufferedPositions = new BufferedPositions(positions, recomposable, functionBlockRanges); var positionProcessor = new PositionProcessor(_positionBufferMock.Object, variantGenerator); var output = positionProcessor.GenerateOutput(bufferedPositions).ToArray(); for (int i = 0; i < output.Length; i++) { Assert.True(positions[i].VcfFields.SequenceEqual(output[i].VcfFields)); } }
public void VariantGenerator_MinQualUsed_DotIgnored() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(ChromosomeUtilities.RefNameToChromosome); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T,G . PASS . GT:PS 0|1:123 2/2:. 0|2:456", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C A,G 45 PASS . GT:PS 1|1:301 1|2:. 1|2:456", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 6 . G C 30.1 PASS . GT . 1|0 0/1", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 4, 6, 8 }; var recomposer = new VariantGenerator(sequenceProvider, _vidCreator); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3 }, functionBlockRanges).ToList(); Assert.Equal(2, recomposedPositions.Count); Assert.Equal("chr1 2 . AGC AGA,GGG,TGA 45 PASS RECOMPOSED GT:PS 1|3:123 . 1|2:456", string.Join("\t", recomposedPositions[0].VcfFields)); Assert.Equal("chr1 2 . AGCTG GGATC,GGGTG 30.1 PASS RECOMPOSED GT . 1|2 .", string.Join("\t", recomposedPositions[1].VcfFields)); }
public void VariantGenerator_ForceGenotype_ConsistentAllele_Recompose() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(ChromosomeUtilities.RefNameToChromosome); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAATCGCGA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T . PASS . GT 1|1 0/0", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C G . PASS . GT 0|1 0|1", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 4 . C G,A . PASS . GT 0|1 0/0", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 4, 6, 6 }; var recomposer = new VariantGenerator(sequenceProvider, _vidCreator); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3 }, functionBlockRanges).ToList(); Assert.Single(recomposedPositions); Assert.Equal("chr1 2 . AGC TGC,TGG . PASS RECOMPOSED GT 1|2 .", string.Join("\t", recomposedPositions[0].VcfFields)); }
public void VariantGenerator_ConflictAltAlleles_AlleleBlockStartInTheMiddle_NoRecompositionNoException() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(ChromosomeUtilities.RefNameToChromosome); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAATCGCGA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 1 . C T . PASS . GT 0/1 0|1", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T . PASS . GT 0/1 0|0", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 4 . C G . PASS . GT 1|1 1|1", sequenceProvider.RefNameToChromosome); var position4 = AnnotationUtilities.GetSimplePosition("chr1 4 . C A . PASS . GT 1|1 0|1", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 3, 4, 6, 6 }; var recomposer = new VariantGenerator(sequenceProvider, _vidCreator); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3, position4 }, functionBlockRanges).ToList(); Assert.Empty(recomposedPositions); }
public void VariantGenerator_RefAllelesAddedToMergeAlleles() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(new Dictionary <string, IChromosome> { { "chr1", new Chromosome("chr1", "1", 0) } }); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAACT")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T . PASS . GT 0|1 0|0 0|1 0|0", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C A . PASS . GT 1|1 1|1 1|1 1|1", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 6 . G C . PASS . GT 1|1 1|1 1|1 1|1", sequenceProvider.RefNameToChromosome); var position4 = AnnotationUtilities.GetSimplePosition("chr1 8 . A G . PASS . GT 0|1 0|1 0|0 0|0", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 4, 6, 8, 10 }; var recomposer = new VariantGenerator(sequenceProvider); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3, position4 }, functionBlockRanges).ToList(); Assert.Single(recomposedPositions); Assert.Equal("chr1 2 . AGCTGAA AGATCAA,AGATCAG,TGATCAA,TGATCAG . PASS RECOMPOSED GT 1|4 1|2 1|3 1|1", string.Join("\t", recomposedPositions[0].VcfFields)); }
public void VariantGenerator_FilterTag_DotTreatedAsPass() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(new Dictionary <string, IChromosome> { { "chr1", new Chromosome("chr1", "1", 0) } }); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T,G . PASS . GT:PS 0|1:123 2/2:. 0|2:456", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C A,G . . . GT:PS 1|1:301 1|2:. 1|2:456", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 6 . G C . FailedForSomeReason . GT:PS . 1|0:. 0/1:456", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 4, 6, 8 }; var recomposer = new VariantGenerator(sequenceProvider); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3 }, functionBlockRanges).ToList(); Assert.Equal(2, recomposedPositions.Count); Assert.Equal("chr1 2 . AGC AGA,GGG,TGA . PASS RECOMPOSED GT:PS 1|3:123 . 1|2:456", string.Join("\t", recomposedPositions[0].VcfFields)); Assert.Equal("chr1 2 . AGCTG GGATC,GGGTG . FilteredVariantsRecomposed RECOMPOSED GT . 1|2 .", string.Join("\t", recomposedPositions[1].VcfFields)); }
public void GetPositionsAndRefAltAlleles_AsExpected() { var genotypeBlock = new GenotypeBlock(new[] { "1|2", "1/1", "0|1", "0/1" }.Select(Genotype.GetGenotype).ToArray()); var genotypeToSample = new Dictionary <GenotypeBlock, List <int> > { { genotypeBlock, new List <int> { 0 } } }; var indexOfUnsupportedVars = Enumerable.Repeat(new HashSet <int>(), genotypeBlock.Genotypes.Length).ToArray(); var starts = new[] { 356, 358, 360, 361 }; var functionBlockRanges = new List <int> { 358, 360, 362, 364 }; var alleles = new[] { new[] { "G", "C", "T" }, new[] { "A", "T" }, new[] { "C", "G" }, new[] { "G", "T" } }; const string refSequence = "GAATCG"; var alleleBlockToSampleHaplotype = AlleleBlock.GetAlleleBlockToSampleHaplotype(genotypeToSample, indexOfUnsupportedVars, starts, functionBlockRanges, out var alleleBlockGraph); var mergedAlleleBlockToSampleHaplotype = AlleleBlockMerger.Merge(alleleBlockToSampleHaplotype, alleleBlockGraph).ToArray(); var alleleSet = new AlleleSet(ChromosomeUtilities.Chr1, starts, alleles); var alleleBlocks = mergedAlleleBlockToSampleHaplotype.Select(x => x.Key).ToArray(); var sequence = new NSequence(); var result1 = VariantGenerator.GetPositionsAndRefAltAlleles(alleleBlocks[0], alleleSet, refSequence, starts[0], null, sequence, _vidCreator); var result2 = VariantGenerator.GetPositionsAndRefAltAlleles(alleleBlocks[1], alleleSet, refSequence, starts[0], null, sequence, _vidCreator); var expectedVarPosIndexes1 = new List <int> { 0, 1 }; var expectedVarPosIndexes2 = new List <int> { 0, 1, 2 }; Assert.Equal((356, 360, "GAATC", "CATTC"), (result1.Start, result1.End, result1.Ref, result1.Alt)); for (var i = 0; i < expectedVarPosIndexes1.Count; i++) { Assert.Equal(expectedVarPosIndexes1[i], result1.VarPosIndexesInAlleleBlock[i]); } Assert.Equal((356, 360, "GAATC", "TATTG"), (result2.Start, result2.End, result2.Ref, result2.Alt)); for (var i = 0; i < expectedVarPosIndexes2.Count; i++) { Assert.Equal(expectedVarPosIndexes2[i], result2.VarPosIndexesInAlleleBlock[i]); } }
public void GenerateOutput_Return_OriginalAndRecomposed_VcfFieldList() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(new Dictionary <string, IChromosome> { { "chr1", new Chromosome("chr1", "1", 0) } }); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var variantGenerator = new VariantGenerator(sequenceProvider); var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T . PASS . GT:PS 0|1:.", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C G . PASS . GT 1/1", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 6 . G C . PASS . GT 0|1", sequenceProvider.RefNameToChromosome); var positions = new List <ISimplePosition> { position1, position2, position3 }; var recomposable = new List <bool> { true, true, true }; var functionBlockRanges = new List <int> { 4, 6, 8 }; var bufferedPositions = new BufferedPositions(positions, recomposable, functionBlockRanges); var positionProcessor = new PositionProcessor(_positionBufferMock.Object, variantGenerator); var output = positionProcessor.GenerateOutput(bufferedPositions).ToArray(); var expectedOutput = new string[4][]; expectedOutput[0] = position1.VcfFields; expectedOutput[1] = new[] { "chr1", "2", ".", "AGCTG", "AGGTG,TGGTC", ".", "PASS", "RECOMPOSED", "GT", "1|2" }; expectedOutput[2] = position2.VcfFields; expectedOutput[3] = position3.VcfFields; for (int i = 0; i < output.Length; i++) { Assert.True(expectedOutput[i].SequenceEqual(output[i].VcfFields)); } }
public void VariantGenerator_NoMnvAfterTrimming_NotRecompose() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(ChromosomeUtilities.RefNameToChromosome); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 1 . C A . PASS . GT:PS 1|0:1584593 1|1:. 0|1:.", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 2 . A C . PASS . GT:PS 0|1:1584593 0/0:. 0/0:.", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 3, 4 }; var recomposer = new VariantGenerator(sequenceProvider, _vidCreator); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2 }, functionBlockRanges).ToList(); Assert.Empty(recomposedPositions); }
public void VariantGenerator_FilterTag_OnlyDecomposedVariantsConsidered() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(ChromosomeUtilities.RefNameToChromosome); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T . PASS . GT 0|1 0/1 0|0", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C A . FailedForSomeReason . GT 0|0 0/1 0|0", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 6 . G C . PASS . GT 1|1 0/1 0|0", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 6, 8, 10 }; var recomposer = new VariantGenerator(sequenceProvider, _vidCreator); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3 }, functionBlockRanges).ToList(); Assert.Single(recomposedPositions); Assert.Equal("chr1 2 . AGCTG AGCTC,TGCTC . PASS RECOMPOSED GT 1|2 . 0|0", string.Join("\t", recomposedPositions[0].VcfFields)); }
public static List <Configuration> buildConfigs(VariabilityModel vm, List <SamplingStrategies> binaryStrategies, List <ExperimentalDesign> experimentalDesigns) { List <Configuration> result = new List <Configuration>(); VariantGenerator vg = new VariantGenerator(); List <List <BinaryOption> > binaryConfigs = new List <List <BinaryOption> >(); List <Dictionary <NumericOption, Double> > numericConfigs = new List <Dictionary <NumericOption, double> >(); foreach (SamplingStrategies strat in binaryStrategies) { switch (strat) { //Binary sampling heuristics case SamplingStrategies.ALLBINARY: binaryConfigs.AddRange(vg.generateAllVariantsFast(vm)); break; case SamplingStrategies.BINARY_RANDOM: RandomBinary rb = new RandomBinary(vm); foreach (Dictionary <string, string> expDesignParamSet in binaryParams.randomBinaryParameters) { binaryConfigs.AddRange(rb.getRandomConfigs(expDesignParamSet)); } break; case SamplingStrategies.OPTIONWISE: { FeatureWise fw = new FeatureWise(); binaryConfigs.AddRange(fw.generateFeatureWiseConfigurations(GlobalState.varModel)); } break; //case SamplingStrategies.MINMAX: // { // MinMax mm = new MinMax(); // binaryConfigs.AddRange(mm.generateMinMaxConfigurations(GlobalState.varModel)); // } // break; case SamplingStrategies.PAIRWISE: { PairWise pw = new PairWise(); binaryConfigs.AddRange(pw.generatePairWiseVariants(GlobalState.varModel)); } break; case SamplingStrategies.NEGATIVE_OPTIONWISE: { NegFeatureWise neg = new NegFeatureWise(); //2nd option: neg.generateNegativeFWAllCombinations(GlobalState.varModel)); binaryConfigs.AddRange(neg.generateNegativeFW(GlobalState.varModel)); } break; case SamplingStrategies.T_WISE: foreach (Dictionary <string, string> ParamSet in binaryParams.tWiseParameters) { TWise tw = new TWise(); int t = 3; foreach (KeyValuePair <String, String> param in ParamSet) { if (param.Key.Equals(TWise.PARAMETER_T_NAME)) { t = Convert.ToInt16(param.Value); } binaryConfigs.AddRange(tw.generateT_WiseVariants_new(vm, t)); } } break; } } //Experimental designs for numeric options if (experimentalDesigns.Count != 0) { handleDesigns(experimentalDesigns, numericConfigs, vm); } foreach (List <BinaryOption> binConfig in binaryConfigs) { if (numericConfigs.Count == 0) { Configuration c = new Configuration(binConfig); result.Add(c); } foreach (Dictionary <NumericOption, double> numConf in numericConfigs) { Configuration c = new Configuration(binConfig, numConf); result.Add(c); } } if (vm.MixedConstraints.Count == 0) { return(result.Distinct().ToList()); } else { List <Configuration> unfilteredList = result.Distinct().ToList(); List <Configuration> filteredConfiguration = new List <Configuration>(); foreach (Configuration toTest in unfilteredList) { bool isValid = true; foreach (MixedConstraint constr in vm.MixedConstraints) { if (!constr.requirementsFulfilled(toTest)) { isValid = false; } } if (isValid) { filteredConfiguration.Add(toTest); } } return(filteredConfiguration); } }
/// <summary> /// Returns a set of random binary partial configurations. /// </summary> /// <param name="parameters">Parameters for this random sampling. The following paramters are supported: /// seed = the seed for the random generator (int required) /// numConfigs = the number of configurations that have to be selected. /// To be able ot select a number of configurations equal to the number selected by the OW heuristic or /// the TWise heuristics, two special values can be given for this paramter. To select a number equal to /// the OW heuristics use "asOW" as value and to select a number equal to a TWise heuristics with a t of X /// use "asTWX". /// </param> /// <returns>A list of random binary partial configuartions.</returns> public List <List <BinaryOption> > getRandomConfigs(Dictionary <String, String> parameters) { configurations.Clear(); int seed = 0; int numConfigs = varModel.BinaryOptions.Count; // parse parameters if (parameters.ContainsKey("numConfigs")) { String numConfigsValue = parameters["numConfigs"]; if (!int.TryParse(numConfigsValue, out numConfigs)) { // special constants as parameter (numConfigs = asOW or asTWX if (numConfigsValue.Contains("asOW")) { FeatureWise fw = new FeatureWise(); numConfigs = fw.generateFeatureWiseConfigsCSP(varModel).Count; } else if (numConfigsValue.Contains("asTW")) { numConfigsValue = numConfigsValue.Replace("asTW", "").Trim(); int.TryParse(numConfigsValue, out numConfigs); TWise tw = new TWise(); numConfigs = tw.generateT_WiseVariants_new(varModel, numConfigs).Count; } } } if (parameters.ContainsKey("seed")) { int.TryParse(parameters["seed"], out seed); } // build set of all valid binary partial configurations VariantGenerator vg = new VariantGenerator(); List <List <BinaryOption> > allConfigs = vg.generateAllVariantsFast(varModel); //repair wrong parameters if (numConfigs >= allConfigs.Count) { if (numConfigs > allConfigs.Count) { GlobalState.logError.logLine("Random Sampling: numConfigs to large for variability model. num set to " + allConfigs.Count); } configurations = allConfigs; return(allConfigs); } // select random configurations Random r = new Random(seed); for (int i = 0; i < numConfigs; i++) { List <BinaryOption> selectedConfig = allConfigs[r.Next(allConfigs.Count + 1)]; if (configurations.Contains(selectedConfig)) { i -= 1; } else { configurations.Add(selectedConfig); } } return(configurations); }
public void VariantGenerator_AsExpected() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(new Dictionary <string, IChromosome> { { "chr1", new Chromosome("chr1", "1", 0) } }); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T,G . PASS . GT:PS 0|1:123 2/2:789 0|2:456", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C A,G . PASS . GT:PS 1|1:301 1|2:789 1|2:456", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 6 . G C . PASS . GT:PS . 1|0:789 0/1:.", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 4, 6, 8 }; var recomposer = new VariantGenerator(sequenceProvider); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3 }, functionBlockRanges).ToList(); Assert.Equal(2, recomposedPositions.Count); Assert.Equal("chr1 2 . AGC AGA,GGG,TGA . PASS RECOMPOSED GT:PS 1|3:123 . 1|2:456", string.Join("\t", recomposedPositions[0].VcfFields)); Assert.Equal("chr1 2 . AGCTG GGATC,GGGTG . PASS RECOMPOSED GT:PS . 1|2:789 .", string.Join("\t", recomposedPositions[1].VcfFields)); //Check LinkedVids //SNVs Assert.Equal(2, position1.LinkedVids.Length); Assert.Equal(new List <string> { "1:2:4:TGA" }, position1.LinkedVids[0]); position1.LinkedVids[1].Sort(); Assert.Equal(new List <string> { "1:2:4:GGG", "1:2:6:GGATC", "1:2:6:GGGTG" }, position1.LinkedVids[1]); Assert.Equal(2, position2.LinkedVids.Length); position2.LinkedVids[0].Sort(); Assert.Equal(new List <string> { "1:2:4:AGA", "1:2:4:TGA", "1:2:6:GGATC" }, position2.LinkedVids[0]); position2.LinkedVids[1].Sort(); Assert.Equal(new List <string> { "1:2:4:GGG", "1:2:6:GGGTG" }, position2.LinkedVids[1]); Assert.Single(position3.LinkedVids); Assert.Equal(new List <string> { "1:2:6:GGATC" }, position3.LinkedVids[0]); //MNVs Assert.Equal(3, recomposedPositions[0].LinkedVids.Length); Assert.Equal(new List <string> { "1:4:A" }, recomposedPositions[0].LinkedVids[0]); Assert.Equal(new List <string> { "1:2:G", "1:4:G" }, recomposedPositions[0].LinkedVids[1]); Assert.Equal(new List <string> { "1:2:T", "1:4:A" }, recomposedPositions[0].LinkedVids[2]); Assert.Equal(2, recomposedPositions[1].LinkedVids.Length); Assert.Equal(new List <string> { "1:2:G", "1:4:A", "1:6:C" }, recomposedPositions[1].LinkedVids[0]); Assert.Equal(new List <string> { "1:2:G", "1:4:G" }, recomposedPositions[1].LinkedVids[1]); }