public static (ImmutableDictionary <IChromosome, List <int> > PositionsByChromosome, int Count) GetPositions(Stream vcfStream, GenomicRange genomicRange, ISequenceProvider sequenceProvider, IRefMinorProvider refMinorProvider) { var positionsByChromosome = new Dictionary <IChromosome, List <int> >(); var rangeChecker = new GenomicRangeChecker(genomicRange); var refNameToChrom = sequenceProvider.RefNameToChromosome; using (var reader = new StreamReader(vcfStream)) { string line; string currentReferenceName = ""; IChromosome chromosome = null; while ((line = reader.ReadLine()) != null) { if (line.StartsWith('#')) { continue; } string[] cols = line.OptimizedSplit('\t'); string referenceName = cols[VcfCommon.ChromIndex]; if (referenceName != currentReferenceName) { if (!refNameToChrom.TryGetValue(referenceName, out chromosome)) { continue; } currentReferenceName = referenceName; } (int position, bool foundError) = cols[VcfCommon.PosIndex].OptimizedParseInt32(); if (foundError) { throw new InvalidDataException($"Unable to convert the VCF position to an integer: {cols[VcfCommon.PosIndex]}"); } if (rangeChecker.OutOfRange(chromosome, position)) { break; } string refAllele = cols[VcfCommon.RefIndex]; string altAllele = cols[VcfCommon.AltIndex]; if (altAllele == "." && !IsRefMinor(refMinorProvider, chromosome, position)) { continue; } sequenceProvider.LoadChromosome(chromosome); TryAddPosition(positionsByChromosome, chromosome, position, refAllele, altAllele, sequenceProvider.Sequence); } } int count = SortPositionsAndGetCount(positionsByChromosome); return(positionsByChromosome.ToImmutableDictionary(), count); }
public static IDictionary <IChromosome, List <int> > GetPositions(Stream vcfStream, GenomicRange genomicRange, ISequenceProvider sequenceProvider) { var benchmark = new Benchmark(); Console.Write("Scanning positions required for SA pre-loading...."); var chromPositions = new Dictionary <IChromosome, List <int> >(); var rangeChecker = new GenomicRangeChecker(genomicRange); var refNameToChrom = sequenceProvider.RefNameToChromosome; using (var reader = new StreamReader(vcfStream)) { string line; while ((line = reader.ReadLine()) != null) { if (!NeedProcessThisLine(refNameToChrom, line, out var splits, out IChromosome iChrom)) { continue; } int position = int.Parse(splits[VcfCommon.PosIndex]); if (rangeChecker.OutOfRange(iChrom, position)) { break; } string refAllele = splits[VcfCommon.RefIndex]; string altAllele = splits[VcfCommon.AltIndex]; sequenceProvider.LoadChromosome(iChrom); UpdateChromToPositions(chromPositions, iChrom, position, refAllele, altAllele, sequenceProvider.Sequence); } } int count = SortPositionsAndGetCount(chromPositions); Console.WriteLine($"{count} positions found in {Benchmark.ToHumanReadable(benchmark.GetElapsedTime())}"); return(chromPositions); }
public bool PassedTheEnd(IChromosome chromosome, int position) => _genomicRangeChecker.OutOfRange(chromosome, position);