private static AlleleSet GenerateAlleleSet(PositionSet positionSet) { var alleleArrays = new string[positionSet._numPositions][]; var starts = positionSet.SimplePositions.Select(x => x.Start).ToArray(); for (var index = 0; index < positionSet._numPositions; index++) { var position = positionSet.SimplePositions[index]; alleleArrays[index] = new string[position.AltAlleles.Length + 1]; alleleArrays[index][0] = position.RefAllele; position.AltAlleles.CopyTo(alleleArrays[index], 1); } return(new AlleleSet(positionSet.SimplePositions[0].Chromosome, starts, alleleArrays)); }
private static SampleInfo GetSampleInfo(PositionSet positionSet) { var sampleInfo = new string[positionSet._numPositions, positionSet.NumSamples][]; for (var i = 0; i < positionSet._numPositions; i++) { for (int sampleIndex = 0; sampleIndex < positionSet.NumSamples; sampleIndex++) { int sampleColIndex = sampleIndex + VcfCommon.GenotypeIndex; sampleInfo[i, sampleIndex] = positionSet.SimplePositions[i].VcfFields[sampleColIndex].OptimizedSplit(':'); } } return(new SampleInfo(sampleInfo)); }
// GenotypeBlocks can be shared by multiple samples // We mainly utilize phase set information at this step to avoid duplicated calculation // These GenotypeBlocks could be further segmented when more details considered private static IEnumerable <GenotypeBlock> GetGenotypeBlocks(PositionSet positionSet, int sampleIndex) { var genotypes = positionSet.GtInfo.Values[sampleIndex]; var entireBlock = new GenotypeBlock(genotypes); var blockRanges = GetGenotypeBlockRange(positionSet.PsInfo.Values[sampleIndex], genotypes.Select(x => x.IsPhased).ToArray(), genotypes.Select(x => x.IsHomozygous).ToArray()); var genotypeBlocks = new List <GenotypeBlock>(); foreach (var range in blockRanges) { genotypeBlocks.Add(entireBlock.GetSubBlock(range.StartIndex, range.PositionCount)); } return(genotypeBlocks); }
private static HashSet <int>[] GetAllelesWithUnsupportedTypes(PositionSet positionSet) { var allelesWithUnsupportedTypes = new HashSet <int> [positionSet._numPositions]; for (int posIndex = 0; posIndex < positionSet._numPositions; posIndex++) { allelesWithUnsupportedTypes[posIndex] = new HashSet <int>(); var thisPosition = positionSet.SimplePositions[posIndex]; for (int varIndex = 0; varIndex < thisPosition.AltAlleles.Length; varIndex++) { if (!(IsSupportedVariantType(thisPosition.RefAllele, thisPosition.AltAlleles[varIndex]) || thisPosition.VcfFields[VcfCommon.AltIndex] == VcfCommon.GatkNonRefAllele)) { allelesWithUnsupportedTypes[posIndex].Add(varIndex + 1); // GT tag is 1-based } } } return(allelesWithUnsupportedTypes); }
public static PositionSet CreatePositionSet(List <ISimplePosition> simpleSimplePositions, List <int> functionBlockRanges) { var positionSet = new PositionSet(simpleSimplePositions, functionBlockRanges); positionSet.AlleleSet = GenerateAlleleSet(positionSet); positionSet._allelesWithUnsupportedTypes = GetAllelesWithUnsupportedTypes(positionSet); positionSet._sampleInfo = GetSampleInfo(positionSet); var phaseSetAndGqIndexes = positionSet.GetSampleTagIndexes(new[] { "GT", "PS", "GQ" }); positionSet.GtInfo = TagInfo <Genotype> .GetTagInfo(positionSet._sampleInfo, phaseSetAndGqIndexes[0], ExtractSampleValue, Genotype.GetGenotype); positionSet.PsInfo = TagInfo <string> .GetTagInfo(positionSet._sampleInfo, phaseSetAndGqIndexes[1], ExtractSampleValue, x => x); positionSet.GqInfo = TagInfo <string> .GetTagInfo(positionSet._sampleInfo, phaseSetAndGqIndexes[2], ExtractSampleValue, x => x); var genotypeToSampleIndex = GetGenotypeToSampleIndex(positionSet); var alleleBlockToSampleHaplotype = AlleleBlock.GetAlleleBlockToSampleHaplotype(genotypeToSampleIndex, positionSet._allelesWithUnsupportedTypes, positionSet.AlleleSet.Starts, positionSet.FunctionBlockRanges, out var alleleBlockGraph); positionSet.AlleleBlockToSampleHaplotype = AlleleBlockMerger.Merge(alleleBlockToSampleHaplotype, alleleBlockGraph); return(positionSet); }
private static Dictionary <GenotypeBlock, List <int> > GetGenotypeToSampleIndex(PositionSet positionSet) { var genotypeToSample = new Dictionary <GenotypeBlock, List <int> >(); for (int sampleIndex = 0; sampleIndex < positionSet.NumSamples; sampleIndex++) { var genotypesAndStartIndexes = GetGenotypeBlocks(positionSet, sampleIndex); foreach (var genotypeAndStartIndex in genotypesAndStartIndexes) { if (genotypeToSample.ContainsKey(genotypeAndStartIndex)) { genotypeToSample[genotypeAndStartIndex].Add(sampleIndex); } else { genotypeToSample[genotypeAndStartIndex] = new List <int> { sampleIndex } }; } } return(genotypeToSample); }