Пример #1
0
        private static List <(string, int)> GetGenotypesAndStarts(PositionSet positionSet, int sampleIndex)
        {
            var    genotypesAndStartIndexes = new List <(string, int)>();
            var    gtTags             = new List <string>();
            int    startIndexInBlock  = 0;
            string previousPhaseSetId = null;

            for (var i = 0; i < positionSet._numPositions; i++)
            {
                var    thisSampleInfo    = positionSet._sampleInfo[i, sampleIndex];
                string currentPhaseSetId = positionSet.PsInfo[sampleIndex][i];
                if (previousPhaseSetId == null)
                {
                    // ReSharper disable once RedundantAssignment
                    previousPhaseSetId = currentPhaseSetId;
                }
                else if (currentPhaseSetId != previousPhaseSetId)
                {
                    if (gtTags.Count > 1)
                    {
                        genotypesAndStartIndexes.Add((string.Join(";", gtTags), startIndexInBlock));
                    }
                    gtTags            = new List <string>();
                    startIndexInBlock = i;
                }
                gtTags.Add(thisSampleInfo[0]);
                previousPhaseSetId = currentPhaseSetId;
            }
            if (gtTags.Count > 1)
            {
                genotypesAndStartIndexes.Add((string.Join(";", gtTags), startIndexInBlock));
            }
            return(genotypesAndStartIndexes);
        }
Пример #2
0
        private static string[, ][] GetSampleInfo(PositionSet positionSet)
        {
            var sampleInfo = new String[positionSet._numPositions, positionSet.NumSamples][];

            for (var i = 0; i < positionSet._numPositions; i++)
            {
                for (int sampleIndex = 0; sampleIndex < positionSet.NumSamples; sampleIndex++)
                {
                    int sampleColIndex = sampleIndex + VcfCommon.GenotypeIndex;
                    sampleInfo[i, sampleIndex] = positionSet.SimplePositions[i].VcfFields[sampleColIndex].Split(":");
                }
            }
            return(sampleInfo);
        }
Пример #3
0
        private static AlleleSet GenerateAlleleSet(PositionSet positionSet)
        {
            var alleleArrays = new string[positionSet._numPositions][];
            var starts       = positionSet.SimplePositions.Select(x => x.Start).ToArray();

            for (int index = 0; index < positionSet._numPositions; index++)
            {
                var position = positionSet.SimplePositions[index];
                alleleArrays[index]    = new string[position.AltAlleles.Length + 1];
                alleleArrays[index][0] = position.RefAllele;
                position.AltAlleles.CopyTo(alleleArrays[index], 1);
            }
            return(new AlleleSet(positionSet.SimplePositions[0].Chromosome, starts, alleleArrays));
        }
Пример #4
0
        public static PositionSet CreatePositionSet(List <ISimplePosition> simpleSimplePositions, List <int> functionBlockRanges)
        {
            var positionSet = new PositionSet(simpleSimplePositions, functionBlockRanges);

            positionSet.AlleleSet = GenerateAlleleSet(positionSet);
            positionSet._allelesWithUnsupportedTypes = GetAllelesWithUnsupportedTypes(positionSet);
            positionSet._sampleInfo = GetSampleInfo(positionSet);
            var phaseSetAndGqIndexes = positionSet.GetSampleTagIndexes(new[] { "PS", "GQ" });

            positionSet.PsInfo = GetTagInfo(positionSet._sampleInfo, phaseSetAndGqIndexes[0], ExtractSamplePhaseSet);
            positionSet.GqInfo = GetTagInfo(positionSet._sampleInfo, phaseSetAndGqIndexes[1], ExtractSampleGq);
            var genotypeToSampleIndex = GetGenotypeToSampleIndex(positionSet);

            positionSet.AlleleIndexBlockToSampleIndex = AlleleIndexBlock.GetAlleleIndexBlockToSampleIndex(genotypeToSampleIndex, positionSet._allelesWithUnsupportedTypes, positionSet.AlleleSet.Starts, positionSet.FunctionBlockRanges);
            return(positionSet);
        }
Пример #5
0
        private static HashSet <string>[] GetAllelesWithUnsupportedTypes(PositionSet positionSet)
        {
            var allelesWithUnsupportedTypes = new HashSet <string> [positionSet._numPositions];

            for (int posIndex = 0; posIndex < positionSet._numPositions; posIndex++)
            {
                allelesWithUnsupportedTypes[posIndex] = new HashSet <string>();
                var thisPosition = positionSet.SimplePositions[posIndex];
                for (int varIndex = 0; varIndex < thisPosition.AltAlleles.Length; varIndex++)
                {
                    if (!(IsSupportedVariantType(thisPosition.RefAllele, thisPosition.AltAlleles[varIndex]) || thisPosition.VcfFields[VcfCommon.AltIndex] == VcfCommon.GatkNonRefAllele)) //todo: simplify the logic
                    {
                        allelesWithUnsupportedTypes[posIndex].Add((varIndex + 1).ToString());                                                                                             // GT tag is 1-based
                    }
                }
            }
            return(allelesWithUnsupportedTypes);
        }
Пример #6
0
        private static Dictionary <(string Genotypes, int Start), List <int> > GetGenotypeToSampleIndex(PositionSet positionSet)
        {
            var genotypeToSample = new Dictionary <(string, int), List <int> >();

            for (int sampleIndex = 0; sampleIndex < positionSet.NumSamples; sampleIndex++)
            {
                var genotypesAndStartIndexes = GetGenotypesAndStarts(positionSet, sampleIndex);
                foreach (var genotypeAndSartIndex in genotypesAndStartIndexes)
                {
                    if (genotypeToSample.ContainsKey(genotypeAndSartIndex))
                    {
                        genotypeToSample[genotypeAndSartIndex].Add(sampleIndex);
                    }
                    else
                    {
                        genotypeToSample[genotypeAndSartIndex] = new List <int> {
                            sampleIndex
                        }
                    };
                }
            }
            return(genotypeToSample);
        }