public void VariantGenerator_AsExpected()
        {
            var mockSequenceProvider = new Mock <ISequenceProvider>();

            mockSequenceProvider.SetupGet(x => x.RefNameToChromosome)
            .Returns(new Dictionary <string, IChromosome> {
                { "chr1", new Chromosome("chr1", "1", 0) }
            });
            mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA"));
            var sequenceProvider = mockSequenceProvider.Object;

            var position1           = SimplePosition.GetSimplePosition("chr1	2	.	A	T,G	.	PASS	.	GT:PS	0|1:123	2/2:789	0|2:456", sequenceProvider.RefNameToChromosome);
            var position2           = SimplePosition.GetSimplePosition("chr1	4	.	C	A,G	.	PASS	.	GT:PS	1|1:301	1|2:789	1|2:456", sequenceProvider.RefNameToChromosome);
            var position3           = SimplePosition.GetSimplePosition("chr1	6	.	G	C	.	PASS	.	GT:PS	.	1|0:789	0/1:.", sequenceProvider.RefNameToChromosome);
            var functionBlockRanges = new List <int> {
                4, 6, 8
            };

            var recomposer          = new VariantGenerator(sequenceProvider);
            var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> {
                position1, position2, position3
            }, functionBlockRanges).ToList();

            Assert.Equal(2, recomposedPositions.Count);
            Assert.Equal("chr1	2	.	AGC	AGA,GGG	.	PASS	RECOMPOSED	GT:PS	.	.	1|2:456", string.Join("\t", recomposedPositions[0].VcfFields));
            Assert.Equal("chr1	2	.	AGCTG	GGATC,GGGTG	.	PASS	RECOMPOSED	GT:PS	.	1|2:789	.", string.Join("\t", recomposedPositions[1].VcfFields));
        }
        public void VariantGenerator_OverlappingDeletionAtTheEnd_Ignored()
        {
            var mockSequenceProvider = new Mock <ISequenceProvider>();

            mockSequenceProvider.SetupGet(x => x.RefNameToChromosome)
            .Returns(new Dictionary <string, IChromosome> {
                { "chr1", new Chromosome("chr1", "1", 0) }
            });
            mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAATCGCGA"));
            var sequenceProvider = mockSequenceProvider.Object;

            var position1           = AnnotationUtilities.GetSimplePosition("chr1	2	.	A	T	.	PASS	.	GT	0|1	0/0", sequenceProvider.RefNameToChromosome);
            var position2           = AnnotationUtilities.GetSimplePosition("chr1	4	.	C	A	.	PASS	.	GT	1|1	0/0", sequenceProvider.RefNameToChromosome);
            var position3           = AnnotationUtilities.GetSimplePosition("chr1	4	.	CTGAATCGCGA	C	.	PASS	.	GT	0/0	0|1", sequenceProvider.RefNameToChromosome);
            var functionBlockRanges = new List <int> {
                4, 6, 6
            };

            var recomposer          = new VariantGenerator(sequenceProvider);
            var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> {
                position1, position2, position3
            }, functionBlockRanges).ToList();

            Assert.Single(recomposedPositions);
            Assert.Equal("chr1	2	.	AGC	AGA,TGA	.	PASS	RECOMPOSED	GT	1|2	0|0", string.Join("\t", recomposedPositions[0].VcfFields));
        }
        public void GetPositionsAndRefAltAlleles_AsExpected()
        {
            var genotypes        = new[] { "1|2", "1/1", "0|1", "0/1" };
            var genotypeToSample =
                new Dictionary <(string, int), List <int> > {
                { (string.Join(";", genotypes), 0), new List <int> {
                      0
                  } }
            };
            var indexOfUnsupportedVars = Enumerable.Repeat(new HashSet <string>(), genotypes.Length).ToArray();
            var starts = new[] { 356, 358, 360, 361 };
            var functionBlockRanges = new List <int> {
                358, 360, 362, 364
            };
            var alleles     = new[] { new[] { "G", "C", "T" }, new[] { "A", "T" }, new[] { "C", "G" }, new[] { "G", "T" } };
            var refSequence = "GAATCG";
            var alleleIndexBlocksToSample = AlleleIndexBlock.GetAlleleIndexBlockToSampleIndex(genotypeToSample, indexOfUnsupportedVars, starts, functionBlockRanges).ToArray();
            var alleleSet               = new AlleleSet(null, starts, alleles);
            var alleleIndexBlocks       = alleleIndexBlocksToSample.Select(x => x.Key).ToArray();
            var decomposedPositionIndex = new HashSet <(int, int)>();
            var result1 = VariantGenerator.GetPositionsAndRefAltAlleles(alleleIndexBlocks[0], alleleSet, refSequence, starts[0], decomposedPositionIndex);
            var result2 = VariantGenerator.GetPositionsAndRefAltAlleles(alleleIndexBlocks[1], alleleSet, refSequence, starts[0], decomposedPositionIndex);

            Assert.Equal((356, 360, "GAATC", "CATTC"), result1);
            Assert.Equal((356, 360, "GAATC", "TATTG"), result2);
        }
        public void VariantGenerator_MinGQUsed_DotAndNullIgnored()
        {
            var mockSequenceProvider = new Mock <ISequenceProvider>();

            mockSequenceProvider.SetupGet(x => x.RefNameToChromosome)
            .Returns(new Dictionary <string, IChromosome> {
                { "chr1", new Chromosome("chr1", "1", 0) }
            });
            mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA"));
            var sequenceProvider = mockSequenceProvider.Object;

            var position1           = AnnotationUtilities.GetSimplePosition("chr1	2	.	A	T,G	.	PASS	.	GT:PS:GQ	0|1:123:.	2/2:.:14.2	0|2:456:.", sequenceProvider.RefNameToChromosome);
            var position2           = AnnotationUtilities.GetSimplePosition("chr1	4	.	C	A,G	.	PASS	.	GT:PS:GQ	1|1:301:.	1|2:.:18	1|2:456:15.6", sequenceProvider.RefNameToChromosome);
            var position3           = AnnotationUtilities.GetSimplePosition("chr1	6	.	G	C	.	PASS	.	GT	.	1|0	0/1", sequenceProvider.RefNameToChromosome);
            var functionBlockRanges = new List <int> {
                4, 6, 8
            };

            var recomposer          = new VariantGenerator(sequenceProvider);
            var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> {
                position1, position2, position3
            }, functionBlockRanges).ToList();

            Assert.Equal(2, recomposedPositions.Count);
            Assert.Equal("chr1	2	.	AGC	AGA,GGG,TGA	.	PASS	RECOMPOSED	GT:GQ:PS	1|3:.:123	.	1|2:15.6:456", string.Join("\t", recomposedPositions[0].VcfFields));
            Assert.Equal("chr1	2	.	AGCTG	GGATC,GGGTG	.	PASS	RECOMPOSED	GT:GQ	.	1|2:14.2	.", string.Join("\t", recomposedPositions[1].VcfFields));
        }
        public void VariantGenerator_AllTrailingMissingValuesDroped()
        {
            var mockSequenceProvider = new Mock <ISequenceProvider>();

            mockSequenceProvider.SetupGet(x => x.RefNameToChromosome)
            .Returns(new Dictionary <string, IChromosome> {
                { "chr1", new Chromosome("chr1", "1", 0) }
            });
            mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA"));
            var sequenceProvider = mockSequenceProvider.Object;

            var position1           = AnnotationUtilities.GetSimplePosition("chr1	2	.	A	T,G	.	PASS	.	GT:GQ:PS	0|1:.:123	2/2	1|1:17:456", sequenceProvider.RefNameToChromosome);
            var position2           = AnnotationUtilities.GetSimplePosition("chr1	4	.	C	A,G	.	PASS	.	GT:GQ:PS	./.	1|2	1|2:15.6:456", sequenceProvider.RefNameToChromosome);
            var position3           = AnnotationUtilities.GetSimplePosition("chr1	6	.	G	C	.	PASS	.	GT:GQ:PS	./.	1|0	1|1:13:456", sequenceProvider.RefNameToChromosome);
            var functionBlockRanges = new List <int> {
                4, 6, 8
            };

            var recomposer          = new VariantGenerator(sequenceProvider);
            var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> {
                position1, position2, position3
            }, functionBlockRanges).ToList();

            Assert.Single(recomposedPositions);
            Assert.Equal("chr1	2	.	AGCTG	GGATC,GGGTG,TGATC,TGGTC	.	PASS	RECOMPOSED	GT:GQ:PS	.	1|2	3|4:13:456", string.Join("\t", recomposedPositions[0].VcfFields));
        }
        public void VariantGenerator_HomozygousSitesAndPhasedSites_Recomposed()
        {
            var mockSequenceProvider = new Mock <ISequenceProvider>();

            mockSequenceProvider.SetupGet(x => x.RefNameToChromosome)
            .Returns(new Dictionary <string, IChromosome> {
                { "chr1", new Chromosome("chr1", "1", 0) }
            });
            mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA"));
            var sequenceProvider = mockSequenceProvider.Object;

            var position1 = AnnotationUtilities.GetSimplePosition("chr1	2	.	A	T	.	PASS	.	GT	1/1	1/1	1/1", sequenceProvider.RefNameToChromosome);
            var position2 = AnnotationUtilities.GetSimplePosition("chr1	3	.	G	A,G	45	PASS	.	GT:PS	1|1:2	1|2:2	1|2:2", sequenceProvider.RefNameToChromosome);
            var position3 = AnnotationUtilities.GetSimplePosition("chr1	4	.	C	A	45	PASS	.	GT	1/1	1/1	1/1", sequenceProvider.RefNameToChromosome);
            var position4 = AnnotationUtilities.GetSimplePosition("chr1	5	.	T	A,G	45	PASS	.	GT:PS	1|1:4	1|2:2	1|2:4", sequenceProvider.RefNameToChromosome);
            var position5 = AnnotationUtilities.GetSimplePosition("chr1	6	.	G	C	30	PASS	.	GT	1/1	1/1	1/1", sequenceProvider.RefNameToChromosome);

            var functionBlockRanges = new List <int> {
                4, 5, 6, 7, 8
            };

            var recomposer          = new VariantGenerator(sequenceProvider);
            var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> {
                position1, position2, position3, position4, position5
            }, functionBlockRanges).ToList();

            Assert.Equal(3, recomposedPositions.Count);
            Assert.Equal("chr1	2	.	AGC	TAA,TGA	45	PASS	RECOMPOSED	GT:PS	.	.	1|2:2", string.Join("\t", recomposedPositions[0].VcfFields));
            Assert.Equal("chr1	2	.	AGCTG	TAAAC,TGAGC	30	PASS	RECOMPOSED	GT:PS	1|1	1|2:2	.", string.Join("\t", recomposedPositions[1].VcfFields));
            Assert.Equal("chr1	4	.	CTG	AAC,AGC	30	PASS	RECOMPOSED	GT:PS	.	.	1|2:4", string.Join("\t", recomposedPositions[2].VcfFields));
        }
        public void VariantGenerator_FilterTag_PassedMnvOverridesFailedOne()
        {
            var mockSequenceProvider = new Mock <ISequenceProvider>();

            mockSequenceProvider.SetupGet(x => x.RefNameToChromosome)
            .Returns(new Dictionary <string, IChromosome> {
                { "chr1", new Chromosome("chr1", "1", 0) }
            });
            mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA"));
            var sequenceProvider = mockSequenceProvider.Object;

            var position1           = AnnotationUtilities.GetSimplePosition("chr1	2	.	A	T	.	PASS	.	GT	0|1	0|1", sequenceProvider.RefNameToChromosome);
            var position2           = AnnotationUtilities.GetSimplePosition("chr1	4	.	C	A	.	FailedForSomeReason	.	GT	0|0	0|1", sequenceProvider.RefNameToChromosome);
            var position3           = AnnotationUtilities.GetSimplePosition("chr1	6	.	G	C	.	PASS	.	GT	0|1	0|1", sequenceProvider.RefNameToChromosome);
            var functionBlockRanges = new List <int> {
                6, 8, 10
            };

            var recomposer          = new VariantGenerator(sequenceProvider);
            var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> {
                position1, position2, position3
            }, functionBlockRanges).ToList();

            Assert.Single(recomposedPositions);
            Assert.Equal("chr1	2	.	AGCTG	TGATC,TGCTC	.	PASS	RECOMPOSED	GT	0|2	0|1", string.Join("\t", recomposedPositions[0].VcfFields));
        }
        public void VariantGenerator_HomoReferenceGenotype_Output()
        {
            var mockSequenceProvider = new Mock <ISequenceProvider>();

            mockSequenceProvider.SetupGet(x => x.RefNameToChromosome)
            .Returns(new Dictionary <string, IChromosome> {
                { "chr1", new Chromosome("chr1", "1", 0) }
            });
            mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA"));
            var sequenceProvider = mockSequenceProvider.Object;

            var position1           = AnnotationUtilities.GetSimplePosition("chr1	1	.	C	A	.	PASS	.	GT:PS	0|1:1584593	0/0:.", sequenceProvider.RefNameToChromosome);
            var position2           = AnnotationUtilities.GetSimplePosition("chr1	2	.	A	C	.	PASS	.	GT:PS	0|1:1584593	0/0:.", sequenceProvider.RefNameToChromosome);
            var functionBlockRanges = new List <int> {
                3, 4
            };

            var recomposer          = new VariantGenerator(sequenceProvider);
            var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> {
                position1, position2
            }, functionBlockRanges).ToList();

            Assert.Single(recomposedPositions);
            Assert.Equal("chr1	1	.	CA	AC	.	PASS	RECOMPOSED	GT:PS	0|1:1584593	0|0", string.Join("\t", recomposedPositions[0].VcfFields));
        }
        public void VariantGenerator_SampleColumnCorrectlyProcessed_WhenTrailingMissingValuesDroped()
        {
            var mockSequenceProvider = new Mock <ISequenceProvider>();

            mockSequenceProvider.SetupGet(x => x.RefNameToChromosome)
            .Returns(new Dictionary <string, IChromosome> {
                { "chr1", new Chromosome("chr1", "1", 0) }
            });
            mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA"));
            var sequenceProvider = mockSequenceProvider.Object;

            var position1           = SimplePosition.GetSimplePosition("chr1	2	.	A	T,G	.	PASS	.	GT:PS:GQ	0|1:123	2/2:.:14.2	./.", sequenceProvider.RefNameToChromosome);
            var position2           = SimplePosition.GetSimplePosition("chr1	4	.	C	A,G	.	PASS	.	GT:PS:GQ	./.	1|2:.:18	1|2:456:15.6", sequenceProvider.RefNameToChromosome);
            var position3           = SimplePosition.GetSimplePosition("chr1	6	.	G	C	.	PASS	.	GT	./.	1|0	./.", sequenceProvider.RefNameToChromosome);
            var functionBlockRanges = new List <int> {
                4, 6, 8
            };

            var recomposer          = new VariantGenerator(sequenceProvider);
            var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> {
                position1, position2, position3
            }, functionBlockRanges).ToList();

            Assert.Single(recomposedPositions);
            Assert.Equal("chr1	2	.	AGCTG	GGATC,GGGTG	.	PASS	RECOMPOSED	GT:GQ	.	1|2:14.2	.", string.Join("\t", recomposedPositions[0].VcfFields));
        }
Exemple #10
0
        public void GenerateOutput_NothingRecomposed_ReturnOriginalVcfFieldList()
        {
            var mockSequenceProvider = new Mock <ISequenceProvider>();

            mockSequenceProvider.SetupGet(x => x.RefNameToChromosome)
            .Returns(new Dictionary <string, IChromosome> {
                { "chr1", new Chromosome("chr1", "1", 0) }
            });
            mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA"));
            var sequenceProvider = mockSequenceProvider.Object;
            var variantGenerator = new VariantGenerator(sequenceProvider);

            var position1 = AnnotationUtilities.GetSimplePosition("chr1	2	.	A	T	.	PASS	.	GT:PS	0|1:123", sequenceProvider.RefNameToChromosome);
            var position2 = AnnotationUtilities.GetSimplePosition("chr1	4	.	C	G	.	PASS	.	GT	0/1", sequenceProvider.RefNameToChromosome);
            var position3 = AnnotationUtilities.GetSimplePosition("chr1	6	.	G	C	.	PASS	.	GT	0|1", sequenceProvider.RefNameToChromosome);

            var positions = new List <ISimplePosition> {
                position1, position2, position3
            };
            var recomposable = new List <bool> {
                true, true, true
            };
            var functionBlockRanges = new List <int> {
                4, 6, 8
            };
            var bufferedPositions = new BufferedPositions(positions, recomposable, functionBlockRanges);
            var positionProcessor = new PositionProcessor(_positionBufferMock.Object, variantGenerator);
            var output            = positionProcessor.GenerateOutput(bufferedPositions).ToArray();

            for (int i = 0; i < output.Length; i++)
            {
                Assert.True(positions[i].VcfFields.SequenceEqual(output[i].VcfFields));
            }
        }
Exemple #11
0
        public void VariantGenerator_MinQualUsed_DotIgnored()
        {
            var mockSequenceProvider = new Mock <ISequenceProvider>();

            mockSequenceProvider.SetupGet(x => x.RefNameToChromosome)
            .Returns(ChromosomeUtilities.RefNameToChromosome);
            mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA"));
            var sequenceProvider = mockSequenceProvider.Object;

            var position1           = AnnotationUtilities.GetSimplePosition("chr1	2	.	A	T,G	.	PASS	.	GT:PS	0|1:123	2/2:.	0|2:456", sequenceProvider.RefNameToChromosome);
            var position2           = AnnotationUtilities.GetSimplePosition("chr1	4	.	C	A,G	45	PASS	.	GT:PS	1|1:301	1|2:.	1|2:456", sequenceProvider.RefNameToChromosome);
            var position3           = AnnotationUtilities.GetSimplePosition("chr1	6	.	G	C	30.1	PASS	.	GT	.	1|0	0/1", sequenceProvider.RefNameToChromosome);
            var functionBlockRanges = new List <int> {
                4, 6, 8
            };

            var recomposer          = new VariantGenerator(sequenceProvider, _vidCreator);
            var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> {
                position1, position2, position3
            }, functionBlockRanges).ToList();

            Assert.Equal(2, recomposedPositions.Count);
            Assert.Equal("chr1	2	.	AGC	AGA,GGG,TGA	45	PASS	RECOMPOSED	GT:PS	1|3:123	.	1|2:456", string.Join("\t", recomposedPositions[0].VcfFields));
            Assert.Equal("chr1	2	.	AGCTG	GGATC,GGGTG	30.1	PASS	RECOMPOSED	GT	.	1|2	.", string.Join("\t", recomposedPositions[1].VcfFields));
        }
Exemple #12
0
        public void VariantGenerator_ForceGenotype_ConsistentAllele_Recompose()
        {
            var mockSequenceProvider = new Mock <ISequenceProvider>();

            mockSequenceProvider.SetupGet(x => x.RefNameToChromosome)
            .Returns(ChromosomeUtilities.RefNameToChromosome);
            mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAATCGCGA"));
            var sequenceProvider = mockSequenceProvider.Object;

            var position1 = AnnotationUtilities.GetSimplePosition("chr1	2	.	A	T	.	PASS	.	GT	1|1	0/0", sequenceProvider.RefNameToChromosome);
            var position2 = AnnotationUtilities.GetSimplePosition("chr1	4	.	C	G	.	PASS	.	GT	0|1	0|1", sequenceProvider.RefNameToChromosome);
            var position3 = AnnotationUtilities.GetSimplePosition("chr1	4	.	C	G,A	.	PASS	.	GT	0|1	0/0", sequenceProvider.RefNameToChromosome);

            var functionBlockRanges = new List <int> {
                4, 6, 6
            };

            var recomposer          = new VariantGenerator(sequenceProvider, _vidCreator);
            var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> {
                position1, position2, position3
            }, functionBlockRanges).ToList();

            Assert.Single(recomposedPositions);
            Assert.Equal("chr1	2	.	AGC	TGC,TGG	.	PASS	RECOMPOSED	GT	1|2	.", string.Join("\t", recomposedPositions[0].VcfFields));
        }
Exemple #13
0
        public void VariantGenerator_ConflictAltAlleles_AlleleBlockStartInTheMiddle_NoRecompositionNoException()
        {
            var mockSequenceProvider = new Mock <ISequenceProvider>();

            mockSequenceProvider.SetupGet(x => x.RefNameToChromosome)
            .Returns(ChromosomeUtilities.RefNameToChromosome);
            mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAATCGCGA"));
            var sequenceProvider = mockSequenceProvider.Object;

            var position1 = AnnotationUtilities.GetSimplePosition("chr1	1	.	C	T	.	PASS	.	GT	0/1	0|1", sequenceProvider.RefNameToChromosome);
            var position2 = AnnotationUtilities.GetSimplePosition("chr1	2	.	A	T	.	PASS	.	GT	0/1	0|0", sequenceProvider.RefNameToChromosome);
            var position3 = AnnotationUtilities.GetSimplePosition("chr1	4	.	C	G	.	PASS	.	GT	1|1	1|1", sequenceProvider.RefNameToChromosome);
            var position4 = AnnotationUtilities.GetSimplePosition("chr1	4	.	C	A	.	PASS	.	GT	1|1	0|1", sequenceProvider.RefNameToChromosome);

            var functionBlockRanges = new List <int> {
                3, 4, 6, 6
            };

            var recomposer          = new VariantGenerator(sequenceProvider, _vidCreator);
            var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> {
                position1, position2, position3, position4
            }, functionBlockRanges).ToList();

            Assert.Empty(recomposedPositions);
        }
        public void VariantGenerator_RefAllelesAddedToMergeAlleles()
        {
            var mockSequenceProvider = new Mock <ISequenceProvider>();

            mockSequenceProvider.SetupGet(x => x.RefNameToChromosome)
            .Returns(new Dictionary <string, IChromosome> {
                { "chr1", new Chromosome("chr1", "1", 0) }
            });
            mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAACT"));
            var sequenceProvider = mockSequenceProvider.Object;

            var position1           = AnnotationUtilities.GetSimplePosition("chr1	2	.	A	T	.	PASS	.	GT	0|1	0|0	0|1	0|0", sequenceProvider.RefNameToChromosome);
            var position2           = AnnotationUtilities.GetSimplePosition("chr1	4	.	C	A	.	PASS	.	GT	1|1	1|1	1|1	1|1", sequenceProvider.RefNameToChromosome);
            var position3           = AnnotationUtilities.GetSimplePosition("chr1	6	.	G	C	.	PASS	.	GT	1|1	1|1	1|1	1|1", sequenceProvider.RefNameToChromosome);
            var position4           = AnnotationUtilities.GetSimplePosition("chr1	8	.	A	G	.	PASS	.	GT	0|1	0|1	0|0	0|0", sequenceProvider.RefNameToChromosome);
            var functionBlockRanges = new List <int> {
                4, 6, 8, 10
            };

            var recomposer          = new VariantGenerator(sequenceProvider);
            var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> {
                position1, position2, position3, position4
            }, functionBlockRanges).ToList();

            Assert.Single(recomposedPositions);
            Assert.Equal("chr1	2	.	AGCTGAA	AGATCAA,AGATCAG,TGATCAA,TGATCAG	.	PASS	RECOMPOSED	GT	1|4	1|2	1|3	1|1", string.Join("\t", recomposedPositions[0].VcfFields));
        }
        public void VariantGenerator_FilterTag_DotTreatedAsPass()
        {
            var mockSequenceProvider = new Mock <ISequenceProvider>();

            mockSequenceProvider.SetupGet(x => x.RefNameToChromosome)
            .Returns(new Dictionary <string, IChromosome> {
                { "chr1", new Chromosome("chr1", "1", 0) }
            });
            mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA"));
            var sequenceProvider = mockSequenceProvider.Object;

            var position1           = AnnotationUtilities.GetSimplePosition("chr1	2	.	A	T,G	.	PASS	.	GT:PS	0|1:123	2/2:.	0|2:456", sequenceProvider.RefNameToChromosome);
            var position2           = AnnotationUtilities.GetSimplePosition("chr1	4	.	C	A,G	.	.	.	GT:PS	1|1:301	1|2:.	1|2:456", sequenceProvider.RefNameToChromosome);
            var position3           = AnnotationUtilities.GetSimplePosition("chr1	6	.	G	C	.	FailedForSomeReason	.	GT:PS	.	1|0:.	0/1:456", sequenceProvider.RefNameToChromosome);
            var functionBlockRanges = new List <int> {
                4, 6, 8
            };

            var recomposer          = new VariantGenerator(sequenceProvider);
            var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> {
                position1, position2, position3
            }, functionBlockRanges).ToList();

            Assert.Equal(2, recomposedPositions.Count);
            Assert.Equal("chr1	2	.	AGC	AGA,GGG,TGA	.	PASS	RECOMPOSED	GT:PS	1|3:123	.	1|2:456", string.Join("\t", recomposedPositions[0].VcfFields));
            Assert.Equal("chr1	2	.	AGCTG	GGATC,GGGTG	.	FilteredVariantsRecomposed	RECOMPOSED	GT	.	1|2	.", string.Join("\t", recomposedPositions[1].VcfFields));
        }
Exemple #16
0
        public void GetPositionsAndRefAltAlleles_AsExpected()
        {
            var genotypeBlock    = new GenotypeBlock(new[] { "1|2", "1/1", "0|1", "0/1" }.Select(Genotype.GetGenotype).ToArray());
            var genotypeToSample =
                new Dictionary <GenotypeBlock, List <int> > {
                { genotypeBlock, new List <int> {
                      0
                  } }
            };
            var indexOfUnsupportedVars = Enumerable.Repeat(new HashSet <int>(), genotypeBlock.Genotypes.Length).ToArray();
            var starts = new[] { 356, 358, 360, 361 };
            var functionBlockRanges = new List <int> {
                358, 360, 362, 364
            };
            var          alleles     = new[] { new[] { "G", "C", "T" }, new[] { "A", "T" }, new[] { "C", "G" }, new[] { "G", "T" } };
            const string refSequence = "GAATCG";
            var          alleleBlockToSampleHaplotype       = AlleleBlock.GetAlleleBlockToSampleHaplotype(genotypeToSample, indexOfUnsupportedVars, starts, functionBlockRanges, out var alleleBlockGraph);
            var          mergedAlleleBlockToSampleHaplotype =
                AlleleBlockMerger.Merge(alleleBlockToSampleHaplotype, alleleBlockGraph).ToArray();
            var alleleSet    = new AlleleSet(ChromosomeUtilities.Chr1, starts, alleles);
            var alleleBlocks = mergedAlleleBlockToSampleHaplotype.Select(x => x.Key).ToArray();
            var sequence     = new NSequence();

            var result1 = VariantGenerator.GetPositionsAndRefAltAlleles(alleleBlocks[0], alleleSet, refSequence, starts[0], null, sequence, _vidCreator);
            var result2 = VariantGenerator.GetPositionsAndRefAltAlleles(alleleBlocks[1], alleleSet, refSequence, starts[0], null, sequence, _vidCreator);

            var expectedVarPosIndexes1 = new List <int> {
                0, 1
            };
            var expectedVarPosIndexes2 = new List <int> {
                0, 1, 2
            };

            Assert.Equal((356, 360, "GAATC", "CATTC"), (result1.Start, result1.End, result1.Ref, result1.Alt));
            for (var i = 0; i < expectedVarPosIndexes1.Count; i++)
            {
                Assert.Equal(expectedVarPosIndexes1[i], result1.VarPosIndexesInAlleleBlock[i]);
            }

            Assert.Equal((356, 360, "GAATC", "TATTG"), (result2.Start, result2.End, result2.Ref, result2.Alt));
            for (var i = 0; i < expectedVarPosIndexes2.Count; i++)
            {
                Assert.Equal(expectedVarPosIndexes2[i], result2.VarPosIndexesInAlleleBlock[i]);
            }
        }
Exemple #17
0
        public void GenerateOutput_Return_OriginalAndRecomposed_VcfFieldList()
        {
            var mockSequenceProvider = new Mock <ISequenceProvider>();

            mockSequenceProvider.SetupGet(x => x.RefNameToChromosome)
            .Returns(new Dictionary <string, IChromosome> {
                { "chr1", new Chromosome("chr1", "1", 0) }
            });
            mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA"));
            var sequenceProvider = mockSequenceProvider.Object;
            var variantGenerator = new VariantGenerator(sequenceProvider);

            var position1 = AnnotationUtilities.GetSimplePosition("chr1	2	.	A	T	.	PASS	.	GT:PS	0|1:.", sequenceProvider.RefNameToChromosome);
            var position2 = AnnotationUtilities.GetSimplePosition("chr1	4	.	C	G	.	PASS	.	GT	1/1", sequenceProvider.RefNameToChromosome);
            var position3 = AnnotationUtilities.GetSimplePosition("chr1	6	.	G	C	.	PASS	.	GT	0|1", sequenceProvider.RefNameToChromosome);

            var positions = new List <ISimplePosition> {
                position1, position2, position3
            };
            var recomposable = new List <bool> {
                true, true, true
            };
            var functionBlockRanges = new List <int> {
                4, 6, 8
            };
            var bufferedPositions = new BufferedPositions(positions, recomposable, functionBlockRanges);
            var positionProcessor = new PositionProcessor(_positionBufferMock.Object, variantGenerator);
            var output            = positionProcessor.GenerateOutput(bufferedPositions).ToArray();

            var expectedOutput = new string[4][];

            expectedOutput[0] = position1.VcfFields;
            expectedOutput[1] = new[]
            { "chr1", "2", ".", "AGCTG", "AGGTG,TGGTC", ".", "PASS", "RECOMPOSED", "GT", "1|2" };
            expectedOutput[2] = position2.VcfFields;
            expectedOutput[3] = position3.VcfFields;

            for (int i = 0; i < output.Length; i++)
            {
                Assert.True(expectedOutput[i].SequenceEqual(output[i].VcfFields));
            }
        }
Exemple #18
0
        public void VariantGenerator_NoMnvAfterTrimming_NotRecompose()
        {
            var mockSequenceProvider = new Mock <ISequenceProvider>();

            mockSequenceProvider.SetupGet(x => x.RefNameToChromosome)
            .Returns(ChromosomeUtilities.RefNameToChromosome);
            mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA"));
            var sequenceProvider = mockSequenceProvider.Object;

            var position1           = AnnotationUtilities.GetSimplePosition("chr1	1	.	C	A	.	PASS	.	GT:PS	1|0:1584593	1|1:.	0|1:.", sequenceProvider.RefNameToChromosome);
            var position2           = AnnotationUtilities.GetSimplePosition("chr1	2	.	A	C	.	PASS	.	GT:PS	0|1:1584593	0/0:.	0/0:.", sequenceProvider.RefNameToChromosome);
            var functionBlockRanges = new List <int> {
                3, 4
            };

            var recomposer          = new VariantGenerator(sequenceProvider, _vidCreator);
            var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> {
                position1, position2
            }, functionBlockRanges).ToList();

            Assert.Empty(recomposedPositions);
        }
Exemple #19
0
        public void VariantGenerator_FilterTag_OnlyDecomposedVariantsConsidered()
        {
            var mockSequenceProvider = new Mock <ISequenceProvider>();

            mockSequenceProvider.SetupGet(x => x.RefNameToChromosome)
            .Returns(ChromosomeUtilities.RefNameToChromosome);
            mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA"));
            var sequenceProvider = mockSequenceProvider.Object;

            var position1           = AnnotationUtilities.GetSimplePosition("chr1	2	.	A	T	.	PASS	.	GT	0|1	0/1	0|0", sequenceProvider.RefNameToChromosome);
            var position2           = AnnotationUtilities.GetSimplePosition("chr1	4	.	C	A	.	FailedForSomeReason	.	GT	0|0	0/1	0|0", sequenceProvider.RefNameToChromosome);
            var position3           = AnnotationUtilities.GetSimplePosition("chr1	6	.	G	C	.	PASS	.	GT	1|1	0/1	0|0", sequenceProvider.RefNameToChromosome);
            var functionBlockRanges = new List <int> {
                6, 8, 10
            };

            var recomposer          = new VariantGenerator(sequenceProvider, _vidCreator);
            var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> {
                position1, position2, position3
            }, functionBlockRanges).ToList();

            Assert.Single(recomposedPositions);
            Assert.Equal("chr1	2	.	AGCTG	AGCTC,TGCTC	.	PASS	RECOMPOSED	GT	1|2	.	0|0", string.Join("\t", recomposedPositions[0].VcfFields));
        }
        public static List <Configuration> buildConfigs(VariabilityModel vm, List <SamplingStrategies> binaryStrategies,
                                                        List <ExperimentalDesign> experimentalDesigns)
        {
            List <Configuration> result = new List <Configuration>();
            VariantGenerator     vg     = new VariantGenerator();

            List <List <BinaryOption> > binaryConfigs = new List <List <BinaryOption> >();
            List <Dictionary <NumericOption, Double> > numericConfigs = new List <Dictionary <NumericOption, double> >();

            foreach (SamplingStrategies strat in binaryStrategies)
            {
                switch (strat)
                {
                //Binary sampling heuristics
                case SamplingStrategies.ALLBINARY:
                    binaryConfigs.AddRange(vg.generateAllVariantsFast(vm));
                    break;

                case SamplingStrategies.BINARY_RANDOM:
                    RandomBinary rb = new RandomBinary(vm);
                    foreach (Dictionary <string, string> expDesignParamSet in binaryParams.randomBinaryParameters)
                    {
                        binaryConfigs.AddRange(rb.getRandomConfigs(expDesignParamSet));
                    }

                    break;

                case SamplingStrategies.OPTIONWISE:
                {
                    FeatureWise fw = new FeatureWise();
                    binaryConfigs.AddRange(fw.generateFeatureWiseConfigurations(GlobalState.varModel));
                }
                break;

                //case SamplingStrategies.MINMAX:
                //    {
                //        MinMax mm = new MinMax();
                //        binaryConfigs.AddRange(mm.generateMinMaxConfigurations(GlobalState.varModel));

                //    }
                //    break;

                case SamplingStrategies.PAIRWISE:
                {
                    PairWise pw = new PairWise();
                    binaryConfigs.AddRange(pw.generatePairWiseVariants(GlobalState.varModel));
                }
                break;

                case SamplingStrategies.NEGATIVE_OPTIONWISE:
                {
                    NegFeatureWise neg = new NegFeatureWise();        //2nd option: neg.generateNegativeFWAllCombinations(GlobalState.varModel));
                    binaryConfigs.AddRange(neg.generateNegativeFW(GlobalState.varModel));
                }
                break;

                case SamplingStrategies.T_WISE:
                    foreach (Dictionary <string, string> ParamSet in binaryParams.tWiseParameters)
                    {
                        TWise tw = new TWise();
                        int   t  = 3;

                        foreach (KeyValuePair <String, String> param in ParamSet)
                        {
                            if (param.Key.Equals(TWise.PARAMETER_T_NAME))
                            {
                                t = Convert.ToInt16(param.Value);
                            }
                            binaryConfigs.AddRange(tw.generateT_WiseVariants_new(vm, t));
                        }
                    }
                    break;
                }
            }

            //Experimental designs for numeric options
            if (experimentalDesigns.Count != 0)
            {
                handleDesigns(experimentalDesigns, numericConfigs, vm);
            }


            foreach (List <BinaryOption> binConfig in binaryConfigs)
            {
                if (numericConfigs.Count == 0)
                {
                    Configuration c = new Configuration(binConfig);
                    result.Add(c);
                }
                foreach (Dictionary <NumericOption, double> numConf in numericConfigs)
                {
                    Configuration c = new Configuration(binConfig, numConf);
                    result.Add(c);
                }
            }
            if (vm.MixedConstraints.Count == 0)
            {
                return(result.Distinct().ToList());
            }
            else
            {
                List <Configuration> unfilteredList        = result.Distinct().ToList();
                List <Configuration> filteredConfiguration = new List <Configuration>();
                foreach (Configuration toTest in unfilteredList)
                {
                    bool isValid = true;
                    foreach (MixedConstraint constr in vm.MixedConstraints)
                    {
                        if (!constr.requirementsFulfilled(toTest))
                        {
                            isValid = false;
                        }
                    }

                    if (isValid)
                    {
                        filteredConfiguration.Add(toTest);
                    }
                }
                return(filteredConfiguration);
            }
        }
Exemple #21
0
        /// <summary>
        /// Returns a set of random binary partial configurations.
        /// </summary>
        /// <param name="parameters">Parameters for this random sampling. The following paramters are supported:
        /// seed = the seed for the random generator (int required)
        /// numConfigs = the number of configurations that have to be selected.
        ///              To be able ot select a number of configurations equal to the number selected by the OW heuristic or
        ///              the TWise heuristics, two special values can be given for this paramter. To select a number equal to
        ///              the OW heuristics use "asOW" as value and to select a number equal to a TWise heuristics with a t of X
        ///              use "asTWX".
        /// </param>
        /// <returns>A list of random binary partial configuartions.</returns>
        public List <List <BinaryOption> > getRandomConfigs(Dictionary <String, String> parameters)
        {
            configurations.Clear();

            int seed       = 0;
            int numConfigs = varModel.BinaryOptions.Count;

            // parse parameters
            if (parameters.ContainsKey("numConfigs"))
            {
                String numConfigsValue = parameters["numConfigs"];
                if (!int.TryParse(numConfigsValue, out numConfigs))
                {
                    // special constants as parameter (numConfigs = asOW or asTWX
                    if (numConfigsValue.Contains("asOW"))
                    {
                        FeatureWise fw = new FeatureWise();
                        numConfigs = fw.generateFeatureWiseConfigsCSP(varModel).Count;
                    }
                    else if (numConfigsValue.Contains("asTW"))
                    {
                        numConfigsValue = numConfigsValue.Replace("asTW", "").Trim();
                        int.TryParse(numConfigsValue, out numConfigs);
                        TWise tw = new TWise();
                        numConfigs = tw.generateT_WiseVariants_new(varModel, numConfigs).Count;
                    }
                }
            }
            if (parameters.ContainsKey("seed"))
            {
                int.TryParse(parameters["seed"], out seed);
            }

            // build set of all valid binary partial configurations
            VariantGenerator            vg         = new VariantGenerator();
            List <List <BinaryOption> > allConfigs = vg.generateAllVariantsFast(varModel);

            //repair wrong parameters
            if (numConfigs >= allConfigs.Count)
            {
                if (numConfigs > allConfigs.Count)
                {
                    GlobalState.logError.logLine("Random Sampling: numConfigs to large for variability model. num set to " + allConfigs.Count);
                }
                configurations = allConfigs;
                return(allConfigs);
            }

            // select random configurations
            Random r = new Random(seed);

            for (int i = 0; i < numConfigs; i++)
            {
                List <BinaryOption> selectedConfig = allConfigs[r.Next(allConfigs.Count + 1)];

                if (configurations.Contains(selectedConfig))
                {
                    i -= 1;
                }
                else
                {
                    configurations.Add(selectedConfig);
                }
            }
            return(configurations);
        }
        public void VariantGenerator_AsExpected()
        {
            var mockSequenceProvider = new Mock <ISequenceProvider>();

            mockSequenceProvider.SetupGet(x => x.RefNameToChromosome)
            .Returns(new Dictionary <string, IChromosome> {
                { "chr1", new Chromosome("chr1", "1", 0) }
            });
            mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA"));
            var sequenceProvider = mockSequenceProvider.Object;

            var position1           = AnnotationUtilities.GetSimplePosition("chr1	2	.	A	T,G	.	PASS	.	GT:PS	0|1:123	2/2:789	0|2:456", sequenceProvider.RefNameToChromosome);
            var position2           = AnnotationUtilities.GetSimplePosition("chr1	4	.	C	A,G	.	PASS	.	GT:PS	1|1:301	1|2:789	1|2:456", sequenceProvider.RefNameToChromosome);
            var position3           = AnnotationUtilities.GetSimplePosition("chr1	6	.	G	C	.	PASS	.	GT:PS	.	1|0:789	0/1:.", sequenceProvider.RefNameToChromosome);
            var functionBlockRanges = new List <int> {
                4, 6, 8
            };

            var recomposer          = new VariantGenerator(sequenceProvider);
            var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> {
                position1, position2, position3
            }, functionBlockRanges).ToList();

            Assert.Equal(2, recomposedPositions.Count);
            Assert.Equal("chr1	2	.	AGC	AGA,GGG,TGA	.	PASS	RECOMPOSED	GT:PS	1|3:123	.	1|2:456", string.Join("\t", recomposedPositions[0].VcfFields));
            Assert.Equal("chr1	2	.	AGCTG	GGATC,GGGTG	.	PASS	RECOMPOSED	GT:PS	.	1|2:789	.", string.Join("\t", recomposedPositions[1].VcfFields));

            //Check LinkedVids
            //SNVs
            Assert.Equal(2, position1.LinkedVids.Length);
            Assert.Equal(new List <string> {
                "1:2:4:TGA"
            }, position1.LinkedVids[0]);
            position1.LinkedVids[1].Sort();
            Assert.Equal(new List <string> {
                "1:2:4:GGG", "1:2:6:GGATC", "1:2:6:GGGTG"
            }, position1.LinkedVids[1]);
            Assert.Equal(2, position2.LinkedVids.Length);

            position2.LinkedVids[0].Sort();
            Assert.Equal(new List <string> {
                "1:2:4:AGA", "1:2:4:TGA", "1:2:6:GGATC"
            }, position2.LinkedVids[0]);
            position2.LinkedVids[1].Sort();
            Assert.Equal(new List <string> {
                "1:2:4:GGG", "1:2:6:GGGTG"
            }, position2.LinkedVids[1]);

            Assert.Single(position3.LinkedVids);
            Assert.Equal(new List <string> {
                "1:2:6:GGATC"
            }, position3.LinkedVids[0]);

            //MNVs
            Assert.Equal(3, recomposedPositions[0].LinkedVids.Length);
            Assert.Equal(new List <string> {
                "1:4:A"
            }, recomposedPositions[0].LinkedVids[0]);
            Assert.Equal(new List <string> {
                "1:2:G", "1:4:G"
            }, recomposedPositions[0].LinkedVids[1]);
            Assert.Equal(new List <string> {
                "1:2:T", "1:4:A"
            }, recomposedPositions[0].LinkedVids[2]);
            Assert.Equal(2, recomposedPositions[1].LinkedVids.Length);
            Assert.Equal(new List <string> {
                "1:2:G", "1:4:A", "1:6:C"
            }, recomposedPositions[1].LinkedVids[0]);
            Assert.Equal(new List <string> {
                "1:2:G", "1:4:G"
            }, recomposedPositions[1].LinkedVids[1]);
        }