Example #1
0
        /// <summary>
        /// Parses a source file and return an enumeration object containing
        /// all the data objects that have been extracted.
        /// </summary>
        /// <returns></returns>
        private IEnumerable <GnomadItem> GetItems(StreamReader reader, GnomadDataType type)
        {
            if (reader == null)
            {
                yield break;
            }
            using (reader)
            {
                string line;
                while ((line = reader.ReadLine()) != null)
                {
                    // Skip empty lines.
                    if (string.IsNullOrWhiteSpace(line))
                    {
                        continue;
                    }

                    // Skip comments.
                    if (line.OptimizedStartsWith('#'))
                    {
                        continue;
                    }
                    var items = ExtractItems(line, type);
                    if (items == null)
                    {
                        continue;
                    }
                    foreach (var item in items)
                    {
                        yield return(item);
                    }
                }
            }
        }
Example #2
0
        public GnomadItem(IChromosome chromosome,
                          int position,
                          string refAllele,
                          string alternateAllele,
                          int?depth,
                          int?allAlleleNumber, int?afrAlleleNumber, int?amrAlleleNumber, int?easAlleleNumber,
                          int?finAlleleNumber, int?nfeAlleleNumber, int?othAlleleNumber, int?asjAlleleNumber, int?sasAlleleNumber,
                          int?maleAlleleNumber, int?femaleAlleleNumber,
                          int?allAlleleCount, int?afrAlleleCount, int?amrAlleleCount, int?easAlleleCount, int?finAlleleCount, int?nfeAlleleCount, int?othAlleleCount, int?asjAlleleCount, int?sasAlleleCount,
                          int?maleAlleleCount, int?femaleAlleleCount,
                          int?allHomCount, int?afrHomCount, int?amrHomCount, int?easHomCount,
                          int?finHomCount, int?nfeHomCount, int?othHomCount, int?asjHomCount, int?sasHomCount,
                          int?maleHomCount, int?femaleHomCount,
                          int?controlsAllAlleleNumber,
                          int?controlsAllAlleleCount,
                          bool hasFailedFilters,
                          bool isLcr,
                          GnomadDataType dataType)
        {
            Chromosome = chromosome;
            Position   = position;
            RefAllele  = refAllele;
            AltAllele  = alternateAllele;

            Depth = depth;
            if (depth != null && allAlleleNumber != null && allAlleleNumber.Value > 0)
            {
                Coverage = ComputingUtilities.GetCoverage(depth.Value, allAlleleNumber.Value);
            }

            AllAlleleNumber = allAlleleNumber;
            AfrAlleleNumber = afrAlleleNumber;
            AmrAlleleNumber = amrAlleleNumber;
            EasAlleleNumber = easAlleleNumber;
            FinAlleleNumber = finAlleleNumber;
            NfeAlleleNumber = nfeAlleleNumber;
            OthAlleleNumber = othAlleleNumber;
            AsjAlleleNumber = asjAlleleNumber;
            SasAlleleNumber = sasAlleleNumber;

            MaleAlleleNumber   = maleAlleleNumber;
            FemaleAlleleNumber = femaleAlleleNumber;
            MaleHomCount       = maleHomCount;

            AllAlleleCount = allAlleleCount;
            AfrAlleleCount = afrAlleleCount;
            AmrAlleleCount = amrAlleleCount;
            EasAlleleCount = easAlleleCount;
            FinAlleleCount = finAlleleCount;
            NfeAlleleCount = nfeAlleleCount;
            OthAlleleCount = othAlleleCount;
            AsjAlleleCount = asjAlleleCount;
            SasAlleleCount = sasAlleleCount;

            MaleAlleleCount   = maleAlleleCount;
            FemaleAlleleCount = femaleAlleleCount;
            FemaleHomCount    = femaleHomCount;

            AllHomCount = allHomCount;
            AfrHomCount = afrHomCount;
            AmrHomCount = amrHomCount;
            EasHomCount = easHomCount;
            FinHomCount = finHomCount;
            NfeHomCount = nfeHomCount;
            OthHomCount = othHomCount;
            AsjHomCount = asjHomCount;
            SasHomCount = sasHomCount;

            //controls
            ControlsAllAlleleNumber = controlsAllAlleleNumber;
            ControlsAllAlleleCount  = controlsAllAlleleCount;

            HasFailedFilters      = hasFailedFilters;
            IsLowComplexityRegion = isLcr;
            DataType = dataType;

            RemoveAlleleNumberZero();
        }
Example #3
0
        /// <summary>
        /// Extracts a gnomad item(s) from the specified VCF line.
        /// </summary>
        /// <param name="line"></param>
        /// <param name="type"></param>
        /// <returns></returns>
        private List <GnomadItem> ExtractItems(string line, GnomadDataType type)
        {
            if (line == null)
            {
                return(null);
            }
            var splitLine = line.OptimizedSplit('\t');

            if (splitLine.Length < 8)
            {
                return(null);
            }

            Clear();

            var chromosome = splitLine[VcfCommon.ChromIndex];

            if (!_sequenceProvider.RefNameToChromosome.ContainsKey(chromosome))
            {
                return(null);
            }

            var chrom      = _sequenceProvider.RefNameToChromosome[chromosome];
            var position   = int.Parse(splitLine[VcfCommon.PosIndex]);
            var refAllele  = splitLine[VcfCommon.RefIndex];
            var altAlleles = splitLine[VcfCommon.AltIndex].OptimizedSplit(',');
            var filters    = splitLine[VcfCommon.FilterIndex];
            var infoFields = splitLine[VcfCommon.InfoIndex];

            var hasFailedFilters = !(filters.Equals("PASS") || filters.Equals("."));

            // parses the info fields and extract frequencies, coverage, num samples.
            ParseInfoField(infoFields);

            var gnomadItemsList = new List <GnomadItem>();

            for (int i = 0; i < altAlleles.Length; i++)
            {
                gnomadItemsList.Add(new GnomadItem(
                                        chrom,
                                        position,
                                        refAllele,
                                        altAlleles[i],
                                        _totalDepth,
                                        _anAll, _anAfr, _anAmr, _anEas, _anFin, _anNfe, _anOth, _anAsj, _anSas, _anMale, _anFemale,
                                        GetCount(_acAll, i), GetCount(_acAfr, i), GetCount(_acAmr, i), GetCount(_acEas, i),
                                        GetCount(_acFin, i), GetCount(_acNfe, i), GetCount(_acOth, i), GetCount(_acAsj, i),
                                        GetCount(_acSas, i), GetCount(_acMale, i), GetCount(_acFemale, i),
                                        GetCount(_hcAll, i), GetCount(_hcAfr, i), GetCount(_hcAmr, i), GetCount(_hcEas, i), GetCount(_hcFin, i),
                                        GetCount(_hcNfe, i), GetCount(_hcOth, i), GetCount(_hcAsj, i), GetCount(_hcSas, i),
                                        GetCount(_hcMale, i), GetCount(_hcFemale, i),
                                        //controls
                                        _control_anAll,
                                        GetCount(_control_acAll, i),
                                        hasFailedFilters,
                                        _isLowComplexityRegion,
                                        type)
                                    );
            }
            return(gnomadItemsList);
        }