Exemplo n.º 1
0
        public void IdentifyConflictingItems()
        {
            var sequence = new SimpleSequence(new string('A', VariantUtils.MaxUpstreamLength) + "TAAGCCAGCCAGCCAGCCAAGCTGGCCAAGCCAGACAGGCAGCCAAGCCAACCAAGACACCCAGGCAGCCAAGCCAGC", 16558315 - VariantUtils.MaxUpstreamLength);

            var refNameToChrom = new Dictionary <string, IChromosome> {
                { "22", Chrom22 }
            };

            var sequenceProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh38, sequence, refNameToChrom);

            var gnomadReader = new GnomadReader(new StreamReader(GetConflictingItemsStream()), sequenceProvider);

            var items = new List <ISupplementaryDataItem>();

            foreach (GnomadItem item in gnomadReader.GetItems())
            {
                //item.Trim();
                if (item.Position == 16558315)
                {
                    items.Add(item);
                }
            }

            items = SuppDataUtilities.RemoveConflictingAlleles(items, false);

            //two if the items were removed as conflicting items
            Assert.Equal(3, items.Count);
        }
Exemplo n.º 2
0
        private static ISupplementaryDataItem GetRefMinorItem(IList <ISupplementaryDataItem> saItems)
        {
            var    totalAltAlleleFreq = 0.0;
            var    alleleFrequencies  = new Dictionary <string, double>();
            string refAllele          = null;

            foreach (var supplementaryDataItem in saItems)
            {
                var item = (AlleleFrequencyItem)supplementaryDataItem;
                if (!IsSnv(item.RefAllele) || !IsSnv(item.AltAllele))
                {
                    continue;
                }

                refAllele           = item.RefAllele;
                totalAltAlleleFreq += item.AltFrequency;
                alleleFrequencies[item.AltAllele] = item.AltFrequency;
            }
            var isRefMinor = totalAltAlleleFreq >= SaCommon.RefMinorThreshold;

            if (!isRefMinor)
            {
                return(null);
            }
            string globalMajor = SuppDataUtilities.GetMostFrequentAllele(alleleFrequencies, refAllele);

            return(new RefMinorItem(saItems[0].Chromosome, saItems[0].Position, globalMajor));
        }
Exemplo n.º 3
0
        private void WriteUptoPosition(MinHeap <ISupplementaryDataItem> itemsHeap, int position)
        {
            if (position < 1)
            {
                return;
            }
            if (itemsHeap.Count() == 0)
            {
                return;
            }
            var bufferMin = itemsHeap.GetMin();

            while (bufferMin.Position < position)
            {
                var itemsAtMinPosition = new List <ISupplementaryDataItem>();

                while (itemsHeap.Count() > 0 && SuppDataUtilities.CompareTo(bufferMin, itemsHeap.GetMin()) == 0)
                {
                    itemsAtMinPosition.Add(itemsHeap.ExtractMin());
                }

                if (itemsAtMinPosition.Count > 0)
                {
                    _count += itemsAtMinPosition.Count;
                    WritePosition(itemsAtMinPosition);
                }
                if (itemsHeap.Count() == 0)
                {
                    break;
                }

                bufferMin = itemsHeap.GetMin();
            }
        }
Exemplo n.º 4
0
        public void GetItems_test()
        {
            var reader = new GlobalMinorReader(GetStream(), _chromDict);

            var items = reader.GetItems().Cast <ISupplementaryDataItem>().ToList();

            var globalMinor = SuppDataUtilities.GetPositionalAnnotation(items);

            Assert.Equal("{\"globalMinorAllele\":\"G\",\"globalMinorAlleleFrequency\":0.3472}", globalMinor.GetJsonString());
        }
Exemplo n.º 5
0
        public void RemoveConflictingAlleles_does_not_remove_duplicates()
        {
            var seqProvider  = ParserTestUtils.GetSequenceProvider(70220313, "TGCC", 'A', _chromDict);
            var topMedReader = new TopMedReader(new StreamReader(GetDupItemsStream()), seqProvider);

            var items   = topMedReader.GetItems().ToList();
            var saItems = new List <ISupplementaryDataItem>(items);

            saItems = SuppDataUtilities.RemoveConflictingAlleles(saItems, false);
            Assert.Single(saItems);
        }
Exemplo n.º 6
0
        private void WritePosition(List <ISupplementaryDataItem> items)
        {
            int position = items[0].Position;

            _memStream.Position = 0;
            if (_isPositional)
            {
                var positionalItem = SuppDataUtilities.GetPositionalAnnotation(items);
                if (positionalItem == null)
                {
                    return;
                }
                _memWriter.Write(positionalItem.GetJsonString());
            }
            else
            {
                // any data source that is reported by allele and is not an array (e.g. allele frequencies) need this filtering step
                if (_index.MatchByAllele && !_index.IsArray)
                {
                    items = SuppDataUtilities.RemoveConflictingAlleles(items, _throwErrorOnConflicts);
                }

                if (_index.JsonKey == SaCommon.PrimateAiTag)
                {
                    items = SuppDataUtilities.DeDuplicatePrimateAiItems(items);
                }

                _memWriter.WriteOpt(items.Count);

                foreach (ISupplementaryDataItem saItem in items)
                {
                    _memWriter.WriteOptAscii(saItem.RefAllele);
                    _memWriter.WriteOptAscii(saItem.AltAllele);
                    _memWriter.Write(saItem.GetJsonString());
                }
            }

            int numBytes = (int)_memStream.Position;

            if (!_block.HasSpace(numBytes))
            {
                Flush(items[0].Chromosome.Index);
            }
            _block.Add(_memBuffer, numBytes, position);
        }
Exemplo n.º 7
0
        public IEnumerable <ISupplementaryDataItem> GetItems()
        {
            var vcvList = GetVariationRecords();

            Console.WriteLine($"Found {vcvList.Count} VCV records");

            var unknownVcvs = new HashSet <int>();

            foreach (var clinVarItem in GetRcvItems())
            {
                if (clinVarItem.VariationId == null)
                {
                    yield return(clinVarItem);

                    continue;
                }

                var vcvId    = clinVarItem.VariationId.Value;
                var vcvIndex = SuppDataUtilities.BinarySearch(vcvList, vcvId);

                if (vcvIndex < 0)
                {
                    Console.WriteLine($"Unknown vcv id:{vcvId} found in {clinVarItem.Id}");
                    unknownVcvs.Add(vcvId);
                    //remove the VariationId
                    yield return(new ClinVarItem(clinVarItem.Chromosome, clinVarItem.Position, clinVarItem.Stop,
                                                 clinVarItem.RefAllele, clinVarItem.AltAllele, clinVarItem.JsonSchema, clinVarItem.AlleleOrigins,
                                                 clinVarItem.VariantType, clinVarItem.Id, null, clinVarItem.ReviewStatus,
                                                 clinVarItem.MedGenIds, clinVarItem.OmimIds, clinVarItem.OrphanetIds,
                                                 clinVarItem.Phenotypes, clinVarItem.Significances, clinVarItem.PubmedIds, clinVarItem.LastUpdatedDate));

                    continue;
                }

                var vcvItem = vcvList[vcvIndex];
                yield return(new VcvSaItem(clinVarItem.Chromosome, clinVarItem.Position, clinVarItem.RefAllele, clinVarItem.AltAllele,
                                           vcvItem.Accession, vcvItem.Version, vcvItem.LastUpdatedDate, vcvItem.ReviewStatus, vcvItem.Significances));

                yield return(clinVarItem);
            }

            Console.WriteLine($"{unknownVcvs.Count} unknown VCVs found in RCVs.");
            Console.WriteLine($"{string.Join(',', unknownVcvs)}");
        }
Exemplo n.º 8
0
        private static (Dictionary <(string refAllele, string altAllele), GnomadItem> genomeItems, Dictionary <(string refAllele, string altAllele), GnomadItem> exomeItems) GetMinItems(MinHeap <GnomadItem> minHeap)
        {
            var genomeItems = new List <ISupplementaryDataItem>();
            var exomeItems  = new List <ISupplementaryDataItem>();

            if (minHeap.Count() == 0)
            {
                return(null, null);
            }
            var position = minHeap.GetMin().Position;

            while (minHeap.Count() > 0 && minHeap.GetMin().Position == position)
            {
                var item = minHeap.ExtractMin();
                if (item.DataType == GnomadDataType.Genome)
                {
                    genomeItems.Add(item);
                }
                else
                {
                    exomeItems.Add(item);
                }
            }

            genomeItems = SuppDataUtilities.RemoveConflictingAlleles(genomeItems, false);
            exomeItems  = SuppDataUtilities.RemoveConflictingAlleles(exomeItems, false);

            var genomeItemsByAllele = new Dictionary <(string refAllele, string altAllele), GnomadItem>();

            foreach (var item in genomeItems)
            {
                genomeItemsByAllele.Add((item.RefAllele, item.AltAllele), (GnomadItem)item);
            }

            var exomeItemsByAllele = new Dictionary <(string refAllele, string altAllele), GnomadItem>();

            foreach (var item in exomeItems)
            {
                exomeItemsByAllele.Add((item.RefAllele, item.AltAllele), (GnomadItem)item);
            }
            return(genomeItemsByAllele, exomeItemsByAllele);
        }
Exemplo n.º 9
0
        public void ResolveDuplicates()
        {
            var entrezToHgnc = new Dictionary <string, string>
            {
                { "255403", "Gene1" }
            };

            var ensemblToHgnc = new Dictionary <string, string>
            {
                { "ENSG00000234810", "Gene2" }
            };
            var primateParser = new PrimateAiParser(GetDuplicateItemStream(), GetSequenceProvider(), entrezToHgnc, ensemblToHgnc);

            var items = primateParser.GetItems().Cast <ISupplementaryDataItem>().ToList();

            var deDupItems = SuppDataUtilities.DeDuplicatePrimateAiItems(items);

            Assert.Single(deDupItems);
            Assert.Equal("\"hgnc\":\"Gene1\",\"scorePercentile\":0.93", deDupItems[0].GetJsonString());
        }
Exemplo n.º 10
0
        public IEnumerable <ISupplementaryDataItem> GetItems()
        {
            _vcvItems = GetVariationRecords();
            Console.WriteLine($"Found {_vcvItems.Count} VCV records");

            var unknownVcvs = new HashSet <int>();

            foreach (var clinVarItem in GetRcvItems())
            {
                if (string.IsNullOrEmpty(clinVarItem.VariationId))
                {
                    yield return(clinVarItem);

                    continue;
                }

                var vcvId    = int.Parse(clinVarItem.VariationId);
                var vcvIndex = SuppDataUtilities.BinarySearch(_vcvItems, vcvId);

                if (vcvIndex < 0)
                {
                    Console.WriteLine($"Unknown vcv id:{vcvId} found in {clinVarItem.Id}");
                    unknownVcvs.Add(vcvId);
                    //remove the VariationId
                    clinVarItem.VariationId = null;
                    yield return(clinVarItem);

                    continue;
                }

                var vcvItem = _vcvItems[vcvIndex];
                yield return(new VcvSaItem(clinVarItem.Chromosome, clinVarItem.Position, clinVarItem.RefAllele, clinVarItem.AltAllele,
                                           vcvItem.Accession, vcvItem.Version, vcvItem.LastUpdatedDate, vcvItem.ReviewStatus, vcvItem.Significances));

                clinVarItem.VariationId = $"{vcvItem.Accession}.{vcvItem.Version}";
                yield return(clinVarItem);
            }

            Console.WriteLine($"{unknownVcvs.Count} unknown VCVs found in RCVs.");
            Console.WriteLine($"{string.Join(',', unknownVcvs)}");
        }