Esempio n. 1
0
        private static AlleleSet GenerateAlleleSet(PositionSet positionSet)
        {
            var alleleArrays = new string[positionSet._numPositions][];
            var starts       = positionSet.SimplePositions.Select(x => x.Start).ToArray();

            for (var index = 0; index < positionSet._numPositions; index++)
            {
                var position = positionSet.SimplePositions[index];
                alleleArrays[index]    = new string[position.AltAlleles.Length + 1];
                alleleArrays[index][0] = position.RefAllele;
                position.AltAlleles.CopyTo(alleleArrays[index], 1);
            }
            return(new AlleleSet(positionSet.SimplePositions[0].Chromosome, starts, alleleArrays));
        }
Esempio n. 2
0
        private static SampleInfo GetSampleInfo(PositionSet positionSet)
        {
            var sampleInfo = new string[positionSet._numPositions, positionSet.NumSamples][];

            for (var i = 0; i < positionSet._numPositions; i++)
            {
                for (int sampleIndex = 0; sampleIndex < positionSet.NumSamples; sampleIndex++)
                {
                    int sampleColIndex = sampleIndex + VcfCommon.GenotypeIndex;
                    sampleInfo[i, sampleIndex] = positionSet.SimplePositions[i].VcfFields[sampleColIndex].OptimizedSplit(':');
                }
            }
            return(new SampleInfo(sampleInfo));
        }
Esempio n. 3
0
        // GenotypeBlocks can be shared by multiple samples
        // We mainly utilize phase set information at this step to avoid duplicated calculation
        // These GenotypeBlocks could be further segmented when more details considered
        private static IEnumerable <GenotypeBlock> GetGenotypeBlocks(PositionSet positionSet, int sampleIndex)
        {
            var genotypes      = positionSet.GtInfo.Values[sampleIndex];
            var entireBlock    = new GenotypeBlock(genotypes);
            var blockRanges    = GetGenotypeBlockRange(positionSet.PsInfo.Values[sampleIndex], genotypes.Select(x => x.IsPhased).ToArray(), genotypes.Select(x => x.IsHomozygous).ToArray());
            var genotypeBlocks = new List <GenotypeBlock>();

            foreach (var range in blockRanges)
            {
                genotypeBlocks.Add(entireBlock.GetSubBlock(range.StartIndex, range.PositionCount));
            }

            return(genotypeBlocks);
        }
Esempio n. 4
0
        private static HashSet <int>[] GetAllelesWithUnsupportedTypes(PositionSet positionSet)
        {
            var allelesWithUnsupportedTypes = new HashSet <int> [positionSet._numPositions];

            for (int posIndex = 0; posIndex < positionSet._numPositions; posIndex++)
            {
                allelesWithUnsupportedTypes[posIndex] = new HashSet <int>();
                var thisPosition = positionSet.SimplePositions[posIndex];
                for (int varIndex = 0; varIndex < thisPosition.AltAlleles.Length; varIndex++)
                {
                    if (!(IsSupportedVariantType(thisPosition.RefAllele, thisPosition.AltAlleles[varIndex]) || thisPosition.VcfFields[VcfCommon.AltIndex] == VcfCommon.GatkNonRefAllele))
                    {
                        allelesWithUnsupportedTypes[posIndex].Add(varIndex + 1); // GT tag is 1-based
                    }
                }
            }
            return(allelesWithUnsupportedTypes);
        }
Esempio n. 5
0
        public static PositionSet CreatePositionSet(List <ISimplePosition> simpleSimplePositions, List <int> functionBlockRanges)
        {
            var positionSet = new PositionSet(simpleSimplePositions, functionBlockRanges);

            positionSet.AlleleSet = GenerateAlleleSet(positionSet);
            positionSet._allelesWithUnsupportedTypes = GetAllelesWithUnsupportedTypes(positionSet);
            positionSet._sampleInfo = GetSampleInfo(positionSet);

            var phaseSetAndGqIndexes = positionSet.GetSampleTagIndexes(new[] { "GT", "PS", "GQ" });

            positionSet.GtInfo = TagInfo <Genotype> .GetTagInfo(positionSet._sampleInfo, phaseSetAndGqIndexes[0], ExtractSampleValue, Genotype.GetGenotype);

            positionSet.PsInfo = TagInfo <string> .GetTagInfo(positionSet._sampleInfo, phaseSetAndGqIndexes[1], ExtractSampleValue, x => x);

            positionSet.GqInfo = TagInfo <string> .GetTagInfo(positionSet._sampleInfo, phaseSetAndGqIndexes[2], ExtractSampleValue, x => x);

            var genotypeToSampleIndex        = GetGenotypeToSampleIndex(positionSet);
            var alleleBlockToSampleHaplotype = AlleleBlock.GetAlleleBlockToSampleHaplotype(genotypeToSampleIndex, positionSet._allelesWithUnsupportedTypes, positionSet.AlleleSet.Starts, positionSet.FunctionBlockRanges, out var alleleBlockGraph);

            positionSet.AlleleBlockToSampleHaplotype = AlleleBlockMerger.Merge(alleleBlockToSampleHaplotype, alleleBlockGraph);
            return(positionSet);
        }
Esempio n. 6
0
        private static Dictionary <GenotypeBlock, List <int> > GetGenotypeToSampleIndex(PositionSet positionSet)
        {
            var genotypeToSample = new Dictionary <GenotypeBlock, List <int> >();

            for (int sampleIndex = 0; sampleIndex < positionSet.NumSamples; sampleIndex++)
            {
                var genotypesAndStartIndexes = GetGenotypeBlocks(positionSet, sampleIndex);
                foreach (var genotypeAndStartIndex in genotypesAndStartIndexes)
                {
                    if (genotypeToSample.ContainsKey(genotypeAndStartIndex))
                    {
                        genotypeToSample[genotypeAndStartIndex].Add(sampleIndex);
                    }
                    else
                    {
                        genotypeToSample[genotypeAndStartIndex] = new List <int> {
                            sampleIndex
                        }
                    };
                }
            }
            return(genotypeToSample);
        }