Exemplo n.º 1
0
        public void IdentifyConflictingItems()
        {
            var sequence = new SimpleSequence(new string('A', VariantUtils.MaxUpstreamLength) + "TAAGCCAGCCAGCCAGCCAAGCTGGCCAAGCCAGACAGGCAGCCAAGCCAACCAAGACACCCAGGCAGCCAAGCCAGC", 16558315 - VariantUtils.MaxUpstreamLength);

            var refNameToChrom = new Dictionary <string, IChromosome> {
                { "22", Chrom22 }
            };

            var sequenceProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh38, sequence, refNameToChrom);

            var gnomadReader = new GnomadReader(new StreamReader(GetConflictingItemsStream()), sequenceProvider);

            var items = new List <ISupplementaryDataItem>();

            foreach (GnomadItem item in gnomadReader.GetItems())
            {
                //item.Trim();
                if (item.Position == 16558315)
                {
                    items.Add(item);
                }
            }

            items = SuppDataUtilities.RemoveConflictingAlleles(items, false);

            //two if the items were removed as conflicting items
            Assert.Equal(3, items.Count);
        }
Exemplo n.º 2
0
        public void RemoveConflictingAlleles_does_not_remove_duplicates()
        {
            var seqProvider  = ParserTestUtils.GetSequenceProvider(70220313, "TGCC", 'A', _chromDict);
            var topMedReader = new TopMedReader(new StreamReader(GetDupItemsStream()), seqProvider);

            var items   = topMedReader.GetItems().ToList();
            var saItems = new List <ISupplementaryDataItem>(items);

            saItems = SuppDataUtilities.RemoveConflictingAlleles(saItems, false);
            Assert.Single(saItems);
        }
Exemplo n.º 3
0
        private void WritePosition(List <ISupplementaryDataItem> items)
        {
            int position = items[0].Position;

            _memStream.Position = 0;
            if (_isPositional)
            {
                var positionalItem = SuppDataUtilities.GetPositionalAnnotation(items);
                if (positionalItem == null)
                {
                    return;
                }
                _memWriter.Write(positionalItem.GetJsonString());
            }
            else
            {
                // any data source that is reported by allele and is not an array (e.g. allele frequencies) need this filtering step
                if (_index.MatchByAllele && !_index.IsArray)
                {
                    items = SuppDataUtilities.RemoveConflictingAlleles(items, _throwErrorOnConflicts);
                }

                if (_index.JsonKey == SaCommon.PrimateAiTag)
                {
                    items = SuppDataUtilities.DeDuplicatePrimateAiItems(items);
                }

                _memWriter.WriteOpt(items.Count);

                foreach (ISupplementaryDataItem saItem in items)
                {
                    _memWriter.WriteOptAscii(saItem.RefAllele);
                    _memWriter.WriteOptAscii(saItem.AltAllele);
                    _memWriter.Write(saItem.GetJsonString());
                }
            }

            int numBytes = (int)_memStream.Position;

            if (!_block.HasSpace(numBytes))
            {
                Flush(items[0].Chromosome.Index);
            }
            _block.Add(_memBuffer, numBytes, position);
        }
Exemplo n.º 4
0
        private static (Dictionary <(string refAllele, string altAllele), GnomadItem> genomeItems, Dictionary <(string refAllele, string altAllele), GnomadItem> exomeItems) GetMinItems(MinHeap <GnomadItem> minHeap)
        {
            var genomeItems = new List <ISupplementaryDataItem>();
            var exomeItems  = new List <ISupplementaryDataItem>();

            if (minHeap.Count() == 0)
            {
                return(null, null);
            }
            var position = minHeap.GetMin().Position;

            while (minHeap.Count() > 0 && minHeap.GetMin().Position == position)
            {
                var item = minHeap.ExtractMin();
                if (item.DataType == GnomadDataType.Genome)
                {
                    genomeItems.Add(item);
                }
                else
                {
                    exomeItems.Add(item);
                }
            }

            genomeItems = SuppDataUtilities.RemoveConflictingAlleles(genomeItems, false);
            exomeItems  = SuppDataUtilities.RemoveConflictingAlleles(exomeItems, false);

            var genomeItemsByAllele = new Dictionary <(string refAllele, string altAllele), GnomadItem>();

            foreach (var item in genomeItems)
            {
                genomeItemsByAllele.Add((item.RefAllele, item.AltAllele), (GnomadItem)item);
            }

            var exomeItemsByAllele = new Dictionary <(string refAllele, string altAllele), GnomadItem>();

            foreach (var item in exomeItems)
            {
                exomeItemsByAllele.Add((item.RefAllele, item.AltAllele), (GnomadItem)item);
            }
            return(genomeItemsByAllele, exomeItemsByAllele);
        }