示例#1
0
        /// <summary>
        /// returns a ClinVar object given the vcf line
        /// </summary>
        public static DgvItem ExtractDgvItem(string line, IDictionary <string, IChromosome> refChromDict)
        {
            var cols = line.Split('\t');

            if (cols.Length < 8)
            {
                return(null);
            }

            var id             = cols[0];
            var chromosomeName = cols[1];

            if (!refChromDict.ContainsKey(chromosomeName))
            {
                return(null);
            }

            var chromosome = refChromDict[chromosomeName];

            var start          = int.Parse(cols[2]);
            var end            = int.Parse(cols[3]);
            var variantType    = cols[4];
            var variantSubType = cols[5];
            var sampleSize     = int.Parse(cols[14]);
            var observedGains  = cols[15] == "" ? 0 : int.Parse(cols[15]);
            var observedLosses = cols[16] == "" ? 0 : int.Parse(cols[16]);

            var seqAltType = SaParseUtilities.GetSequenceAlterationType(variantType, variantSubType);

            return(new DgvItem(id, chromosome, start, end, sampleSize, observedGains, observedLosses, seqAltType));
        }
示例#2
0
        private OnekGenSvItem ExtractOneKGenSvItem(string line)
        {
            var    splitLine = line.OptimizedSplit('\t');
            string altAllele = splitLine[AltIndex];

            if (altAllele.StartsWith("<INS:ME:"))
            {
                return(null);
            }

            string chromosomeName = splitLine[ChromIndex];

            if (!_refNameDict.ContainsKey(chromosomeName))
            {
                return(null);
            }
            var    chromosome = _refNameDict[chromosomeName];
            int    start      = int.Parse(splitLine[StartIndex]) + 1; // start is 0-based in BED format
            int    end        = int.Parse(splitLine[EndIndex]);
            string id         = RemoveMissingValues(splitLine[IdIndex]);

            string infoFields = splitLine[InfoIndex];

            Clear();
            ParseInfoField(infoFields);

            var variantType = SaParseUtilities.GetSequenceAlteration(_svType);

            return(new OnekGenSvItem(chromosome, start, end, variantType, id,
                                     _allAlleleNumber, _allAlleleCount,
                                     _allAlleleFrequency, _afrAlleleFrequency, _amrAlleleFrequency, _easAlleleFrequency, _eurAlleleFrequency, _sasAlleleFrequency));
        }
示例#3
0
        private OnekGenSvItem ExtractOneKGenSvItem(string line)
        {
            var splitLine = line.OptimizedSplit('\t');        // we don't care about the many fields after info field

            if (splitLine.Length < 8)
            {
                return(null);
            }

            var altAlleles        = splitLine[VcfCommon.AltIndex].OptimizedSplit(',');
            var hasSymbolicAllele = altAlleles.Any(x => x.OptimizedStartsWith('<') && x.OptimizedEndsWith('>'));

            if (!hasSymbolicAllele)
            {
                return(null);
            }

            var chromosomeName = splitLine[VcfCommon.ChromIndex];

            if (!_refNameDict.ContainsKey(chromosomeName))
            {
                return(null);
            }
            var chromosome = _refNameDict[chromosomeName];
            var position   = int.Parse(splitLine[VcfCommon.PosIndex]);      //we have to get it from RSPOS in info
            var id         = splitLine[VcfCommon.IdIndex];
            //var refAllele = splitLine[VcfCommon.RefIndex];
            var infoFields = splitLine[VcfCommon.InfoIndex];

            Clear();
            ParseInfoField(infoFields);

            if (_svEnd == null && _svLen != null)
            {
                _svEnd = position + _svLen;
            }

            if (_svEnd == null)
            {
                return(null);
            }

            var variantType = SaParseUtilities.GetSequenceAlteration(_svType);

            return(new OnekGenSvItem(chromosome, position + 1, _svEnd.Value, variantType, id,
                                     _allAlleleNumber, _allAlleleCount,
                                     _allAlleleFrequency, _afrAlleleFrequency, _amrAlleleFrequency, _easAlleleFrequency, _eurAlleleFrequency, _sasAlleleFrequency));
        }
示例#4
0
        public override SupplementaryIntervalItem GetSupplementaryInterval()
        {
            if (!IsInterval)
            {
                return(null);
            }

            var seqAltType = SaParseUtilities.GetSequenceAlteration(SvType, ObservedGains, ObservedLosses);

            var intValues    = new Dictionary <string, int>();
            var doubleValues = new Dictionary <string, double>();
            var freqValues   = new Dictionary <string, double>();
            var stringValues = new Dictionary <string, string>();
            var boolValues   = new List <string>();

            var suppInterval = new SupplementaryIntervalItem(Chromosome, Start, SvEnd, AlternateAllele, seqAltType,
                                                             "1000 Genomes Project", intValues, doubleValues, freqValues, stringValues, boolValues);

            if (Id != null)
            {
                suppInterval.AddStringValue("id", Id);
            }
            if (AfrFreq != null)
            {
                suppInterval.AddFrequencyValue("variantFreqAfr", Convert.ToDouble(AfrFreq));
            }
            if (AllFreq != null)
            {
                suppInterval.AddFrequencyValue("variantFreqAll", Convert.ToDouble(AllFreq));
            }
            if (AmrFreq != null)
            {
                suppInterval.AddFrequencyValue("variantFreqAmr", Convert.ToDouble(AmrFreq));
            }
            if (EasFreq != null)
            {
                suppInterval.AddFrequencyValue("variantFreqEas", Convert.ToDouble(EasFreq));
            }
            if (EurFreq != null)
            {
                suppInterval.AddFrequencyValue("variantFreqEur", Convert.ToDouble(EurFreq));
            }
            if (SasFreq != null)
            {
                suppInterval.AddFrequencyValue("variantFreqSas", Convert.ToDouble(SasFreq));
            }

            if (AllAlleleNumber != null && AllAlleleNumber.Value > 0)
            {
                suppInterval.AddIntValue("sampleSize", AllAlleleNumber.Value);
            }
            if (AfrAlleleNumber != null && AfrAlleleNumber.Value > 0)
            {
                suppInterval.AddIntValue("sampleSizeAfr", AfrAlleleNumber.Value);
            }
            if (AmrAlleleNumber != null && AmrAlleleNumber.Value > 0)
            {
                suppInterval.AddIntValue("sampleSizeAmr", AmrAlleleNumber.Value);
            }
            if (EasAlleleNumber != null && EasAlleleNumber.Value > 0)
            {
                suppInterval.AddIntValue("sampleSizeEas", EasAlleleNumber.Value);
            }
            if (EurAlleleNumber != null && EurAlleleNumber.Value > 0)
            {
                suppInterval.AddIntValue("sampleSizeEur", EurAlleleNumber.Value);
            }
            if (SasAlleleNumber != null && SasAlleleNumber.Value > 0)
            {
                suppInterval.AddIntValue("sampleSizeSas", SasAlleleNumber.Value);
            }

            if (ObservedGains != 0)
            {
                suppInterval.AddIntValue("observedGains", ObservedGains);
            }
            if (ObservedLosses != 0)
            {
                suppInterval.AddIntValue("observedLosses", ObservedLosses);
            }

            return(suppInterval);
        }