コード例 #1
0
        public void GetPloidyFromGenotypes_DotIsIgnored()
        {
            var genotypes = new[] { Genotype.GetGenotype("."), Genotype.GetGenotype("1|2"), Genotype.GetGenotype("0/2") };
            var ploidy    = AlleleBlock.GetMaxPloidy(genotypes);

            Assert.Equal(2, ploidy);
        }
コード例 #2
0
        public void GetAlleleBlockToSampleHaplotype_AlleleBlock_WithInternalRefPositions_SplitIfOutOfRange()
        {
            var genotypeBlock1   = new GenotypeBlock(new[] { "1|2", "0/0", "0|0", "1/1" }.Select(Genotype.GetGenotype).ToArray());
            var genotypeBlock2   = new GenotypeBlock(new[] { "1/1", "0|0", "1|1" }.Select(Genotype.GetGenotype).ToArray());
            var genotypeBlock3   = new GenotypeBlock(new[] { "1|2", "0|0", "1|1" }.Select(Genotype.GetGenotype).ToArray(), 1);
            var genotypeToSample =
                new Dictionary <GenotypeBlock, List <int> >
            {
                { genotypeBlock1, new List <int> {
                      0
                  } },
                { genotypeBlock2, new List <int> {
                      1
                  } },
                { genotypeBlock3, new List <int> {
                      2
                  } }
            };
            var indexOfUnsupportedVars = Enumerable.Repeat(new HashSet <int>(), genotypeBlock1.Genotypes.Length).ToArray();

            var starts = new[] { 100, 102, 103, 104 };
            var functionBlockRanges = starts.Select(x => x + 2).ToList();

            var alleleBlockToSampleHaplotype = AlleleBlock.GetAlleleBlockToSampleHaplotype(genotypeToSample,
                                                                                           indexOfUnsupportedVars, starts, functionBlockRanges, out _);
            var expectedBlock1 = new AlleleBlock(1, new[] { 1, 0, 1 }, 0, 0);
            var expectedBlock2 = new AlleleBlock(1, new[] { 2, 0, 1 }, 0, 0);

            Assert.True(alleleBlockToSampleHaplotype.ContainsKey(expectedBlock1));
            Assert.True(alleleBlockToSampleHaplotype[expectedBlock1].SequenceEqual(new[] { new SampleHaplotype(2, 0) }));
            Assert.True(alleleBlockToSampleHaplotype.ContainsKey(expectedBlock2));
            Assert.True(alleleBlockToSampleHaplotype[expectedBlock2].SequenceEqual(new[] { new SampleHaplotype(2, 1) }));
        }
コード例 #3
0
        public void ExtendAlleleBlock_AsExpected()
        {
            var alleleBlock1   = new AlleleBlock(2, new [] { 1, 1 }, 2, 2);
            var extendedBlock1 = AlleleBlockMerger.ExtendAlleleBlock(alleleBlock1, 2, 2);

            var expectedBlock1 = new AlleleBlock(0, new [] { 0, 0, 1, 1, 0, 0 }, -1, -1);

            Assert.Equal(extendedBlock1, expectedBlock1);
        }
コード例 #4
0
        private static VariantInfo GetVariantInfo(PositionSet positionSet, AlleleBlock alleleBlock)
        {
            var positions    = positionSet.SimplePositions;
            int startIndex   = alleleBlock.PositionIndex;
            int numPositions = alleleBlock.AlleleIndexes.Length;
            int numSamples   = positionSet.NumSamples;

            string qual = GetStringWithMinValueOrDot(Enumerable.Range(startIndex, numPositions)
                                                     .Select(x => positions[x].VcfFields[VcfCommon.QualIndex]));
            var filters = Enumerable.Range(startIndex, numPositions)
                          .Select(i => positions[i].VcfFields[VcfCommon.FilterIndex])
                          .ToArray();

            var gqValues = new string[numSamples];

            for (var i = 0; i < numSamples; i++)
            {
                gqValues[i] = GetStringWithMinValueOrDot(
                    new ArraySegment <string>(positionSet.GqInfo.Values[i], startIndex, numPositions).ToArray());
            }

            var psValues = new string[numSamples];

            for (var i = 0; i < numSamples; i++)
            {
                var psTagsThisSample =
                    new ArraySegment <string>(positionSet.PsInfo.Values[i], startIndex, numPositions);
                var isHomozygous = new ArraySegment <bool>(
                    positionSet.GtInfo.Values[i].Select(x => x.IsHomozygous).ToArray(), startIndex, numPositions);
                psValues[i] = GetPhaseSetForRecomposedVariant(psTagsThisSample, isHomozygous);
            }

            var homoReferenceSamplePloidy = new int?[numSamples];

            for (var i = 0; i < numSamples; i++)
            {
                if (Genotype.IsAllHomozygousReference(positionSet.GtInfo.Values[i], startIndex, numPositions))
                {
                    homoReferenceSamplePloidy[i] = positionSet.GtInfo.Values[i][startIndex].AlleleIndexes.Length;
                }
            }

            var sampleFilters = new List <bool> [numSamples];

            for (var i = 0; i < numSamples; i++)
            {
                sampleFilters[i] = new List <bool>();
            }

            return(new VariantInfo(qual, filters, gqValues, psValues, homoReferenceSamplePloidy, sampleFilters));
        }
コード例 #5
0
        public void GetAlleleBlockToSampleHaplotype_AwareOfTrimmedRefPositions()
        {
            var genotypeBlock1   = new GenotypeBlock(new[] { "0|0", "1|1", "1|1", "0|0" }.Select(Genotype.GetGenotype).ToArray());
            var genotypeBlock2   = new GenotypeBlock(new[] { "0|0", "1|1", "1|1" }.Select(Genotype.GetGenotype).ToArray());
            var genotypeBlock3   = new GenotypeBlock(new[] { "1|1", "1|1", "0|0" }.Select(Genotype.GetGenotype).ToArray(), 1);
            var genotypeBlock4   = new GenotypeBlock(new[] { "1|1", "1|1" }.Select(Genotype.GetGenotype).ToArray(), 1);
            var genotypeToSample =
                new Dictionary <GenotypeBlock, List <int> >
            {
                { genotypeBlock1, new List <int> {
                      0
                  } },
                { genotypeBlock2, new List <int> {
                      1
                  } },
                { genotypeBlock3, new List <int> {
                      2
                  } },
                { genotypeBlock4, new List <int> {
                      3
                  } }
            };
            var indexOfUnsupportedVars = Enumerable.Repeat(new HashSet <int>(), 4).ToArray();
            var starts = Enumerable.Range(100, 4).ToArray();
            var functionBlockRanges = starts.Select(x => x + 2).ToList();


            var alleleBlockToSampleHaplotype = AlleleBlock.GetAlleleBlockToSampleHaplotype(genotypeToSample,
                                                                                           indexOfUnsupportedVars, starts, functionBlockRanges, out _);
            var expectedBlock1 = new AlleleBlock(1, new[] { 1, 1 }, 1, 1);
            var expectedBlock2 = new AlleleBlock(1, new[] { 1, 1 }, 1, 0);
            var expectedBlock3 = new AlleleBlock(1, new[] { 1, 1 }, 0, 1);
            var expectedBlock4 = new AlleleBlock(1, new[] { 1, 1 }, 0, 0);

            Assert.True(alleleBlockToSampleHaplotype.ContainsKey(expectedBlock1));
            Assert.True(alleleBlockToSampleHaplotype[expectedBlock1]
                        .SequenceEqual(new[] { new SampleHaplotype(0, 0), new SampleHaplotype(0, 1) }));
            Assert.True(alleleBlockToSampleHaplotype.ContainsKey(expectedBlock2));
            Assert.True(alleleBlockToSampleHaplotype[expectedBlock2]
                        .SequenceEqual(new[] { new SampleHaplotype(1, 0), new SampleHaplotype(1, 1) }));
            Assert.True(alleleBlockToSampleHaplotype.ContainsKey(expectedBlock3));
            Assert.True(alleleBlockToSampleHaplotype[expectedBlock3]
                        .SequenceEqual(new[] { new SampleHaplotype(2, 0), new SampleHaplotype(2, 1) }));
            Assert.True(alleleBlockToSampleHaplotype.ContainsKey(expectedBlock4));
            Assert.True(alleleBlockToSampleHaplotype[expectedBlock4]
                        .SequenceEqual(new[] { new SampleHaplotype(3, 0), new SampleHaplotype(3, 1) }));
        }
コード例 #6
0
        public void GetAlleleBlockToSampleHaplotype_AlleleBlock_OneAlleleIsRef_EachTime()
        {
            var genotypeBlock1 = new GenotypeBlock(new[] { "1|0", "0|1", "1|0", "0|1" }.Select(Genotype.GetGenotype).ToArray());
            var genotypeBlock2 = new GenotypeBlock(new[] { "1/1", "0|1", "1|0" }.Select(Genotype.GetGenotype).ToArray(), 1);
            var genotypeBlock3 = new GenotypeBlock(new[] { "0|0", "1|0", "0|1", "0|0" }.Select(Genotype.GetGenotype).ToArray());
            var genotypeBlock4 = new GenotypeBlock(new[] { "0|1", "1|0", "1|0" }.Select(Genotype.GetGenotype).ToArray());

            var genotypeToSample =
                new Dictionary <GenotypeBlock, List <int> >
            {
                { genotypeBlock1, new List <int> {
                      0
                  } },
                { genotypeBlock2, new List <int> {
                      1
                  } },
                { genotypeBlock3, new List <int> {
                      2
                  } },
                { genotypeBlock4, new List <int> {
                      3
                  } }
            };
            var indexOfUnsupportedVars = Enumerable.Repeat(new HashSet <int>(), genotypeBlock1.Genotypes.Length).ToArray();

            var starts = new[] { 100, 101, 102, 104 };
            var functionBlockRanges = starts.Select(x => x + 2).ToList();

            var alleleBlockToSampleHaplotype = AlleleBlock.GetAlleleBlockToSampleHaplotype(genotypeToSample,
                                                                                           indexOfUnsupportedVars, starts, functionBlockRanges, out _);
            var expectedBlock1 = new AlleleBlock(0, new[] { 1, 0, 1 }, 0, 0);
            var expectedBlock2 = new AlleleBlock(0, new[] { 0, 1, 0 }, 0, 0);
            var expectedBlock3 = new AlleleBlock(1, new[] { 1, 0 }, 0, 0);
            var expectedBlock4 = new AlleleBlock(1, new[] { 1, 1 }, 0, 0);
            var expectedBlock5 = new AlleleBlock(1, new[] { 0, 0 }, 0, 0);

            Assert.True(alleleBlockToSampleHaplotype.ContainsKey(expectedBlock1));
            Assert.True(alleleBlockToSampleHaplotype[expectedBlock1].SequenceEqual(new[] { new SampleHaplotype(0, 0) }));
            Assert.True(alleleBlockToSampleHaplotype.ContainsKey(expectedBlock2));
            Assert.True(alleleBlockToSampleHaplotype[expectedBlock2].SequenceEqual(new[] { new SampleHaplotype(0, 1) }));
            Assert.True(alleleBlockToSampleHaplotype.ContainsKey(expectedBlock3));
            Assert.True(alleleBlockToSampleHaplotype[expectedBlock3].SequenceEqual(new[] { new SampleHaplotype(1, 0) }));
            Assert.True(alleleBlockToSampleHaplotype.ContainsKey(expectedBlock4));
            Assert.True(alleleBlockToSampleHaplotype[expectedBlock4].SequenceEqual(new[] { new SampleHaplotype(1, 1), new SampleHaplotype(3, 0) }));
            Assert.True(alleleBlockToSampleHaplotype.ContainsKey(expectedBlock5));
            Assert.True(alleleBlockToSampleHaplotype[expectedBlock5].SequenceEqual(new[] { new SampleHaplotype(3, 1) }));
        }
コード例 #7
0
        public void GetPositionsAndRefAltAlleles_AsExpected()
        {
            var genotypeBlock    = new GenotypeBlock(new[] { "1|2", "1/1", "0|1", "0/1" }.Select(Genotype.GetGenotype).ToArray());
            var genotypeToSample =
                new Dictionary <GenotypeBlock, List <int> > {
                { genotypeBlock, new List <int> {
                      0
                  } }
            };
            var indexOfUnsupportedVars = Enumerable.Repeat(new HashSet <int>(), genotypeBlock.Genotypes.Length).ToArray();
            var starts = new[] { 356, 358, 360, 361 };
            var functionBlockRanges = new List <int> {
                358, 360, 362, 364
            };
            var          alleles     = new[] { new[] { "G", "C", "T" }, new[] { "A", "T" }, new[] { "C", "G" }, new[] { "G", "T" } };
            const string refSequence = "GAATCG";
            var          alleleBlockToSampleHaplotype       = AlleleBlock.GetAlleleBlockToSampleHaplotype(genotypeToSample, indexOfUnsupportedVars, starts, functionBlockRanges, out var alleleBlockGraph);
            var          mergedAlleleBlockToSampleHaplotype =
                AlleleBlockMerger.Merge(alleleBlockToSampleHaplotype, alleleBlockGraph).ToArray();
            var alleleSet    = new AlleleSet(ChromosomeUtilities.Chr1, starts, alleles);
            var alleleBlocks = mergedAlleleBlockToSampleHaplotype.Select(x => x.Key).ToArray();
            var sequence     = new NSequence();

            var result1 = VariantGenerator.GetPositionsAndRefAltAlleles(alleleBlocks[0], alleleSet, refSequence, starts[0], null, sequence, _vidCreator);
            var result2 = VariantGenerator.GetPositionsAndRefAltAlleles(alleleBlocks[1], alleleSet, refSequence, starts[0], null, sequence, _vidCreator);

            var expectedVarPosIndexes1 = new List <int> {
                0, 1
            };
            var expectedVarPosIndexes2 = new List <int> {
                0, 1, 2
            };

            Assert.Equal((356, 360, "GAATC", "CATTC"), (result1.Start, result1.End, result1.Ref, result1.Alt));
            for (var i = 0; i < expectedVarPosIndexes1.Count; i++)
            {
                Assert.Equal(expectedVarPosIndexes1[i], result1.VarPosIndexesInAlleleBlock[i]);
            }

            Assert.Equal((356, 360, "GAATC", "TATTG"), (result2.Start, result2.End, result2.Ref, result2.Alt));
            for (var i = 0; i < expectedVarPosIndexes2.Count; i++)
            {
                Assert.Equal(expectedVarPosIndexes2[i], result2.VarPosIndexesInAlleleBlock[i]);
            }
        }
コード例 #8
0
                         decomposedVids) GetPositionsAndRefAltAlleles(AlleleBlock alleleBlock, AlleleSet alleleSet,
                                                                      string totalRefSequence, int regionStart, List <ISimplePosition> simplePositions, ISequence sequence, IVariantIdCreator vidCreator)
        {
            int numPositions       = alleleBlock.AlleleIndexes.Length;
            int firstPositionIndex = alleleBlock.PositionIndex;
            int lastPositionIndex  = alleleBlock.PositionIndex + numPositions - 1;

            int    blockStart     = alleleSet.Starts[firstPositionIndex];
            int    blockEnd       = alleleSet.Starts[lastPositionIndex];
            string lastRefAllele  = alleleSet.VariantArrays[lastPositionIndex][0];
            int    blockRefLength = blockEnd - blockStart + lastRefAllele.Length;
            string refSequence    = totalRefSequence.Substring(blockStart - regionStart, blockRefLength);

            var refSequenceStart               = 0;
            var altSequenceSegments            = new LinkedList <string>();
            var variantPosIndexesInAlleleBlock = new List <int>();
            var vidListsNeedUpdate             = new List <List <string> >();
            var decomposedVids = new List <string>();

            if (FindConflictAllele(alleleBlock, alleleSet))
            {
                return(default);
コード例 #9
0
        public void Merge_AsExpected()
        {
            var genotypeBlock1   = new GenotypeBlock(new[] { "1|0", "1|1", "1|1" }.Select(Genotype.GetGenotype).ToArray());
            var genotypeBlock2   = new GenotypeBlock(new[] { "0|0", "1|0", "1|1" }.Select(Genotype.GetGenotype).ToArray());
            var genotypeToSample =
                new Dictionary <GenotypeBlock, List <int> >
            {
                { genotypeBlock1, new List <int> {
                      0
                  } },
                { genotypeBlock2, new List <int> {
                      1
                  } }
            };
            var indexOfUnsupportedVars = Enumerable.Repeat(new HashSet <int>(), 3).ToArray();
            var starts = Enumerable.Range(100, 3).ToArray();
            var functionBlockRanges          = starts.Select(x => x + 2).ToList();
            var alleleBlockToSampleHaplotype = AlleleBlock.GetAlleleBlockToSampleHaplotype(genotypeToSample,
                                                                                           indexOfUnsupportedVars, starts, functionBlockRanges, out var alleleBlockGraph);
            var mergedAlleleBlockToSampleHaplotype =
                AlleleBlockMerger.Merge(alleleBlockToSampleHaplotype, alleleBlockGraph);

            var expectedBlock1 = new AlleleBlock(0, new[] { 1, 1, 1 }, -1, -1);
            var expectedBlock2 = new AlleleBlock(0, new[] { 0, 1, 1 }, -1, -1);
            var expectedBlock3 = new AlleleBlock(0, new[] { 0, 0, 1 }, -1, -1);

            Assert.True(mergedAlleleBlockToSampleHaplotype.ContainsKey(expectedBlock1));
            Assert.True(mergedAlleleBlockToSampleHaplotype[expectedBlock1]
                        .SequenceEqual(new[] { new SampleHaplotype(0, 0) }));
            Assert.True(mergedAlleleBlockToSampleHaplotype.ContainsKey(expectedBlock2));
            Assert.True(mergedAlleleBlockToSampleHaplotype[expectedBlock2]
                        .SequenceEqual(new[] { new SampleHaplotype(0, 1), new SampleHaplotype(1, 0) }));
            Assert.True(mergedAlleleBlockToSampleHaplotype.ContainsKey(expectedBlock3));
            Assert.True(mergedAlleleBlockToSampleHaplotype[expectedBlock3]
                        .SequenceEqual(new[] { new SampleHaplotype(1, 1) }));
        }
コード例 #10
0
        internal static (int Start, int End, string Ref, string Alt, List <int> VarPosIndexesInAlleleBlock, List <string> decomposedVids) GetPositionsAndRefAltAlleles(AlleleBlock alleleBlock, AlleleSet alleleSet, string totalRefSequence, int regionStart, List <ISimplePosition> simplePositions)
        {
            int numPositions       = alleleBlock.AlleleIndexes.Length;
            int firstPositionIndex = alleleBlock.PositionIndex;
            int lastPositionIndex  = alleleBlock.PositionIndex + numPositions - 1;

            int    blockStart     = alleleSet.Starts[firstPositionIndex];
            int    blockEnd       = alleleSet.Starts[lastPositionIndex];
            string lastRefAllele  = alleleSet.VariantArrays[lastPositionIndex][0];
            int    blockRefLength = blockEnd - blockStart + lastRefAllele.Length;
            var    refSequence    = totalRefSequence.Substring(blockStart - regionStart, blockRefLength);

            int refSequenceStart               = 0;
            var altSequenceSegments            = new LinkedList <string>();
            var variantPosIndexesInAlleleBlock = new List <int>();
            var vidListsNeedUpdate             = new List <List <string> >();
            var decomposedVids = new List <string>();

            for (int positionIndex = firstPositionIndex; positionIndex <= lastPositionIndex; positionIndex++)
            {
                int indexInBlock = positionIndex - firstPositionIndex;
                int alleleIndex  = alleleBlock.AlleleIndexes[indexInBlock];
                //only non-reference alleles considered
                if (alleleIndex == 0)
                {
                    continue;
                }

                variantPosIndexesInAlleleBlock.Add(positionIndex - firstPositionIndex);
                string refAllele                     = alleleSet.VariantArrays[positionIndex][0];
                string altAllele                     = alleleSet.VariantArrays[positionIndex][alleleIndex];
                int    positionOnRefSequence         = alleleSet.Starts[positionIndex] - blockStart;
                int    refRegionBetweenTwoAltAlleles = positionOnRefSequence - refSequenceStart;

                if (refRegionBetweenTwoAltAlleles < 0)
                {
                    string previousAltAllele = alleleSet.VariantArrays[positionIndex - 1][alleleIndex];
                    throw new UserErrorException($"Conflicting alternative alleles identified at {alleleSet.Chromosome.UcscName}:{alleleSet.Starts[positionIndex]}: both \"{previousAltAllele}\" and \"{altAllele}\" are present.");
                }

                string refSequenceBefore = refSequence.Substring(refSequenceStart, refRegionBetweenTwoAltAlleles);
                altSequenceSegments.AddLast(refSequenceBefore);
                altSequenceSegments.AddLast(altAllele);
                refSequenceStart = positionOnRefSequence + refAllele.Length;

                if (simplePositions == null)
                {
                    continue;
                }
                var thisPosition = simplePositions[positionIndex];
                // alleleIndex is 1-based for altAlleles
                int varIndex = alleleIndex - 1;

                //Only SNVs get recomposed for now
                if (thisPosition.Vids[varIndex] == null)
                {
                    thisPosition.Vids[varIndex] = SmallVariantCreator.GetVid(alleleSet.Chromosome.EnsemblName,
                                                                             thisPosition.Start, thisPosition.End, thisPosition.AltAlleles[varIndex], VariantType.SNV);
                    thisPosition.IsDecomposed[varIndex] = true;
                }
                decomposedVids.Add(thisPosition.Vids[varIndex]);

                if (thisPosition.LinkedVids[varIndex] == null)
                {
                    thisPosition.LinkedVids[varIndex] = new List <string>();
                }
                vidListsNeedUpdate.Add(thisPosition.LinkedVids[varIndex]);
            }
            altSequenceSegments.AddLast(refSequence.Substring(refSequenceStart));
            var recomposedAllele    = string.Concat(altSequenceSegments);
            var blockRefEnd         = blockStart + blockRefLength - 1;
            var recomposedVariantId = SmallVariantCreator.GetVid(alleleSet.Chromosome.EnsemblName, blockStart, blockRefEnd, recomposedAllele, VariantType.MNV);

            vidListsNeedUpdate.ForEach(x => x.Add(recomposedVariantId));
            return(blockStart, blockRefEnd, refSequence, recomposedAllele, variantPosIndexesInAlleleBlock, decomposedVids);
        }