/// <summary> /// returns a ClinVar object given the vcf line /// </summary> public static DgvItem ExtractDgvItem(string line, IDictionary <string, IChromosome> refChromDict) { var cols = line.Split('\t'); if (cols.Length < 8) { return(null); } var id = cols[0]; var chromosomeName = cols[1]; if (!refChromDict.ContainsKey(chromosomeName)) { return(null); } var chromosome = refChromDict[chromosomeName]; var start = int.Parse(cols[2]); var end = int.Parse(cols[3]); var variantType = cols[4]; var variantSubType = cols[5]; var sampleSize = int.Parse(cols[14]); var observedGains = cols[15] == "" ? 0 : int.Parse(cols[15]); var observedLosses = cols[16] == "" ? 0 : int.Parse(cols[16]); var seqAltType = SaParseUtilities.GetSequenceAlterationType(variantType, variantSubType); return(new DgvItem(id, chromosome, start, end, sampleSize, observedGains, observedLosses, seqAltType)); }
private OnekGenSvItem ExtractOneKGenSvItem(string line) { var splitLine = line.OptimizedSplit('\t'); string altAllele = splitLine[AltIndex]; if (altAllele.StartsWith("<INS:ME:")) { return(null); } string chromosomeName = splitLine[ChromIndex]; if (!_refNameDict.ContainsKey(chromosomeName)) { return(null); } var chromosome = _refNameDict[chromosomeName]; int start = int.Parse(splitLine[StartIndex]) + 1; // start is 0-based in BED format int end = int.Parse(splitLine[EndIndex]); string id = RemoveMissingValues(splitLine[IdIndex]); string infoFields = splitLine[InfoIndex]; Clear(); ParseInfoField(infoFields); var variantType = SaParseUtilities.GetSequenceAlteration(_svType); return(new OnekGenSvItem(chromosome, start, end, variantType, id, _allAlleleNumber, _allAlleleCount, _allAlleleFrequency, _afrAlleleFrequency, _amrAlleleFrequency, _easAlleleFrequency, _eurAlleleFrequency, _sasAlleleFrequency)); }
private OnekGenSvItem ExtractOneKGenSvItem(string line) { var splitLine = line.OptimizedSplit('\t'); // we don't care about the many fields after info field if (splitLine.Length < 8) { return(null); } var altAlleles = splitLine[VcfCommon.AltIndex].OptimizedSplit(','); var hasSymbolicAllele = altAlleles.Any(x => x.OptimizedStartsWith('<') && x.OptimizedEndsWith('>')); if (!hasSymbolicAllele) { return(null); } var chromosomeName = splitLine[VcfCommon.ChromIndex]; if (!_refNameDict.ContainsKey(chromosomeName)) { return(null); } var chromosome = _refNameDict[chromosomeName]; var position = int.Parse(splitLine[VcfCommon.PosIndex]); //we have to get it from RSPOS in info var id = splitLine[VcfCommon.IdIndex]; //var refAllele = splitLine[VcfCommon.RefIndex]; var infoFields = splitLine[VcfCommon.InfoIndex]; Clear(); ParseInfoField(infoFields); if (_svEnd == null && _svLen != null) { _svEnd = position + _svLen; } if (_svEnd == null) { return(null); } var variantType = SaParseUtilities.GetSequenceAlteration(_svType); return(new OnekGenSvItem(chromosome, position + 1, _svEnd.Value, variantType, id, _allAlleleNumber, _allAlleleCount, _allAlleleFrequency, _afrAlleleFrequency, _amrAlleleFrequency, _easAlleleFrequency, _eurAlleleFrequency, _sasAlleleFrequency)); }
public override SupplementaryIntervalItem GetSupplementaryInterval() { if (!IsInterval) { return(null); } var seqAltType = SaParseUtilities.GetSequenceAlteration(SvType, ObservedGains, ObservedLosses); var intValues = new Dictionary <string, int>(); var doubleValues = new Dictionary <string, double>(); var freqValues = new Dictionary <string, double>(); var stringValues = new Dictionary <string, string>(); var boolValues = new List <string>(); var suppInterval = new SupplementaryIntervalItem(Chromosome, Start, SvEnd, AlternateAllele, seqAltType, "1000 Genomes Project", intValues, doubleValues, freqValues, stringValues, boolValues); if (Id != null) { suppInterval.AddStringValue("id", Id); } if (AfrFreq != null) { suppInterval.AddFrequencyValue("variantFreqAfr", Convert.ToDouble(AfrFreq)); } if (AllFreq != null) { suppInterval.AddFrequencyValue("variantFreqAll", Convert.ToDouble(AllFreq)); } if (AmrFreq != null) { suppInterval.AddFrequencyValue("variantFreqAmr", Convert.ToDouble(AmrFreq)); } if (EasFreq != null) { suppInterval.AddFrequencyValue("variantFreqEas", Convert.ToDouble(EasFreq)); } if (EurFreq != null) { suppInterval.AddFrequencyValue("variantFreqEur", Convert.ToDouble(EurFreq)); } if (SasFreq != null) { suppInterval.AddFrequencyValue("variantFreqSas", Convert.ToDouble(SasFreq)); } if (AllAlleleNumber != null && AllAlleleNumber.Value > 0) { suppInterval.AddIntValue("sampleSize", AllAlleleNumber.Value); } if (AfrAlleleNumber != null && AfrAlleleNumber.Value > 0) { suppInterval.AddIntValue("sampleSizeAfr", AfrAlleleNumber.Value); } if (AmrAlleleNumber != null && AmrAlleleNumber.Value > 0) { suppInterval.AddIntValue("sampleSizeAmr", AmrAlleleNumber.Value); } if (EasAlleleNumber != null && EasAlleleNumber.Value > 0) { suppInterval.AddIntValue("sampleSizeEas", EasAlleleNumber.Value); } if (EurAlleleNumber != null && EurAlleleNumber.Value > 0) { suppInterval.AddIntValue("sampleSizeEur", EurAlleleNumber.Value); } if (SasAlleleNumber != null && SasAlleleNumber.Value > 0) { suppInterval.AddIntValue("sampleSizeSas", SasAlleleNumber.Value); } if (ObservedGains != 0) { suppInterval.AddIntValue("observedGains", ObservedGains); } if (ObservedLosses != 0) { suppInterval.AddIntValue("observedLosses", ObservedLosses); } return(suppInterval); }