private IEnumerable <GnomadItem> GetRemainingItems(IEnumerator <GnomadItem> enumerator) { do { var item = enumerator.Current; if (item == null) { yield break; } if (item.AllAlleleNumber == null || item.AllAlleleNumber.Value == 0) { continue; } yield return(GnomadUtilities.GetNormalizedItem(item, _sequenceProvider)); } while (enumerator.MoveNext()); }
/// <summary> /// Merging genomic an exomic items to create one stream of gnomad entries /// </summary> /// <returns></returns> public IEnumerable <GnomadItem> GetCombinedItems() { using (var genomeEnumerator = GetItems(_genomeReader, GnomadDataType.Genome).GetEnumerator()) using (var exomeEnumerator = GetItems(_exomeReader, GnomadDataType.Exome).GetEnumerator()) { var hasGenomicItem = genomeEnumerator.MoveNext(); var hasExomeItem = exomeEnumerator.MoveNext(); var minHeap = new MinHeap <GnomadItem>(GnomadItem.CompareTo); while (hasExomeItem && hasGenomicItem) { var genomeItem = genomeEnumerator.Current; var exomeItem = exomeEnumerator.Current; var position = Math.Min(genomeItem.Position, exomeItem.Position); while (hasGenomicItem && genomeItem.Position == position) { //all items for a position should be gathered so as to resolve conflicts properly minHeap.Add(GnomadUtilities.GetNormalizedItem(genomeItem, _sequenceProvider)); hasGenomicItem = genomeEnumerator.MoveNext(); genomeItem = genomeEnumerator.Current; } while (hasExomeItem && exomeItem.Position == position) { minHeap.Add(GnomadUtilities.GetNormalizedItem(exomeItem, _sequenceProvider)); hasExomeItem = exomeEnumerator.MoveNext(); exomeItem = exomeEnumerator.Current; } // at this point, the min heap should not be empty int heapPosition = minHeap.GetMin().Position; while (minHeap.Count() > 0 && heapPosition < position - VariantUtils.MaxUpstreamLength) { var(genomeItems, exomeItems) = GetMinItems(minHeap); foreach (var item in GnomadUtilities.GetMergedItems(genomeItems, exomeItems).Values) { if (item.AllAlleleNumber == null || item.AllAlleleNumber.Value == 0) { continue; } yield return(item); } } } //flush out the last positions in heap while (minHeap.Count() > 0) { var(genomeItems, exomeItems) = GetMinItems(minHeap); foreach (var item in GnomadUtilities.GetMergedItems(genomeItems, exomeItems).Values) { yield return(item); } } //now, only one of the iterator is left if (hasGenomicItem) { foreach (var item in GetRemainingItems(genomeEnumerator)) { yield return(item); } } if (hasExomeItem) { foreach (var item in GetRemainingItems(exomeEnumerator)) { yield return(item); } } } }