public ChromosomeIndelSource(List <HashableIndel> indels, IGenomeSnippetSource snippetSource, int bucketSize = 1000) { _bucketSize = bucketSize; Indels = indels; var partnerIndelsLookup = new Dictionary <HashableIndel, IEnumerable <HashableIndel> >(); var positionalBuckets = new Dictionary <int, List <HashableIndel> >(); var snippetsLookup = new Dictionary <int, GenomeSnippet>(); if (!indels.Any()) { _partnerIndels = new ReadOnlyDictionary <HashableIndel, IEnumerable <HashableIndel> >(partnerIndelsLookup); _genomeSnippetsLookup = new ReadOnlyDictionary <int, GenomeSnippet>(snippetsLookup); _positionalBucketsOfIndels = new ReadOnlyDictionary <int, List <HashableIndel> >(positionalBuckets); return; } LowestPosition = indels.Min(x => x.ReferencePosition); HighestPosition = indels.Max(x => x.ReferencePosition); foreach (var indel in indels) { var bucketNum = ((indel.ReferencePosition - LowestPosition) / bucketSize); if (!positionalBuckets.TryGetValue(bucketNum, out var indelsForbucket)) { indelsForbucket = new List <HashableIndel>(); positionalBuckets.Add(bucketNum, indelsForbucket); } // TODO come back to this if needed - was thinking I could link indels to each other so that if multi-indels contain indels in multiple buckets we would make sure to grab all // Maybe add start and end pos of hashables, and this could take into account the main and the other //var indelString = Helper.HashableToString(indel); var indelString = indel.StringRepresentation; var partnerIndels = indels.Where(x => x.StringRepresentation == indelString || x.OtherIndel == indelString); partnerIndelsLookup.Add(indel, partnerIndels); indelsForbucket.Add(indel); _numIndels++; } _partnerIndels = new ReadOnlyDictionary <HashableIndel, IEnumerable <HashableIndel> >(partnerIndelsLookup); _positionalBucketsOfIndels = new ReadOnlyDictionary <int, List <HashableIndel> >(positionalBuckets); foreach (var kvp in _positionalBucketsOfIndels) { var bucket = kvp.Value; if (bucket.Any()) { var firstIndel = bucket.First(); var snippet = snippetSource.GetGenomeSnippet(firstIndel.ReferencePosition); snippetsLookup[kvp.Key] = snippet; } } _genomeSnippetsLookup = new ReadOnlyDictionary <int, GenomeSnippet>(snippetsLookup); }
public GenomeSnippet GetGenomeSnippet(int position) { if (position < 0) { throw new ArgumentException( $"Invalid snippet reference position ({position}): must be non-negative."); } if (Math.Abs(position - _lastPosition) < _snippetBuffer && _currentEndPos - position > _snippetBuffer) { return(_snippet); } else { _snippet = _snippetSource.GetGenomeSnippet(position); _lastPosition = position; _currentEndPos = _snippet.StartPosition + _snippet.Sequence.Length; return(_snippet); } }
public int GetNm(BamAlignment alignment) { var positionMap = new PositionMap(alignment.Bases.Length); Read.UpdatePositionMap(alignment.Position + 1, alignment.CigarData, positionMap); var snippet = _genomeSnippetSource.GetGenomeSnippet(alignment.Position); var numMismatches = Helper.GetNumMismatches(alignment.Bases, positionMap, snippet.Sequence, snippet.StartPosition); if (numMismatches == null) { throw new Exception("Num mismatches is null"); } var numIndelBases = alignment.CigarData.NumIndelBases(); return(numMismatches.Value + numIndelBases); }