public IEnumerable <SpliceAiItem> GetItems() { var previousItems = new List <SpliceAiItem>(); using (var reader = FileUtilities.GetStreamReader(_stream)) { string line; while ((line = reader.ReadLine()) != null) { // Skip empty lines. if (string.IsNullOrWhiteSpace(line)) { continue; } // Skip comments. if (line.OptimizedStartsWith('#')) { continue; } var item = ExtractItem(line); if (item == null) { continue; } if (previousItems.Count == 0 || SpliceAiItem.CompareTo(item, previousItems[0]) == 0) { //starting or extending new position previousItems.Add(item); continue; } //performing sanity check SanityCheck(previousItems); UpdateGeneSymbols(previousItems); foreach (var spliceAiItem in previousItems) { yield return(spliceAiItem); } previousItems.Clear(); previousItems.Add(item); } } //clearing off the final items (they should all be at the same position UpdateGeneSymbols(previousItems); foreach (var spliceAiItem in previousItems) { yield return(spliceAiItem); } Console.WriteLine($"{_unresolvedSymbols.Count} unresolved gene symbols encountered. Symbols:"); foreach (var symbol in _unresolvedSymbols) { Console.Write(symbol + ','); } }
public static int CompareTo(SpliceAiItem one, SpliceAiItem other) { if (one.Chromosome.Index != other.Chromosome.Index) { return(one.Chromosome.Index.CompareTo(other.Chromosome.Index)); } return(one.Position.CompareTo(other.Position)); }
private void UpdateGeneSymbol(SpliceAiItem item) { if (_spliceToNirvanaSymbols.TryGetValue(item.Hgnc, out var nirHgnc)) { item.Hgnc = nirHgnc; } else { _unresolvedSymbols.Add(item.Hgnc); } }
private void UpdateGeneSymbol(SpliceAiItem item) { if (_geneTree == null || _geneSynonyms == null) { return; } var chromosome = item.Chromosome; var position = item.Position; if (_currentPositionGeneSymbols.Count > 1) { return; //for multiple genes for a position, we cannot update the symbol } var nirvanaGenes = _geneTree.GetAllOverlappingValues(chromosome.Index, position, position); if (nirvanaGenes == null) { item.Hgnc = null; return; } var uniqueOverlapping = new HashSet <string>(nirvanaGenes); if (uniqueOverlapping.Contains(item.Hgnc)) { return; } //gene not found in cache if (uniqueOverlapping.Count == 1) { item.Hgnc = uniqueOverlapping.First(); //update gene symbol } else { if (!_geneSynonyms.TryGetValue(item.Hgnc, out var symbolsList)) { return; } var commonSymbols = symbolsList.Intersect(uniqueOverlapping).ToArray(); if (commonSymbols.Length == 1) { item.Hgnc = commonSymbols[0]; } else { _unresolvedSymbols.Add(item.Hgnc); } } }