Example #1
0
        public static (ImmutableDictionary <IChromosome, List <int> > PositionsByChromosome, int Count) GetPositions(Stream vcfStream, GenomicRange genomicRange,
                                                                                                                     ISequenceProvider sequenceProvider, IRefMinorProvider refMinorProvider)
        {
            var positionsByChromosome = new Dictionary <IChromosome, List <int> >();
            var rangeChecker          = new GenomicRangeChecker(genomicRange);
            var refNameToChrom        = sequenceProvider.RefNameToChromosome;

            using (var reader = new StreamReader(vcfStream))
            {
                string      line;
                string      currentReferenceName = "";
                IChromosome chromosome           = null;

                while ((line = reader.ReadLine()) != null)
                {
                    if (line.StartsWith('#'))
                    {
                        continue;
                    }

                    string[] cols          = line.OptimizedSplit('\t');
                    string   referenceName = cols[VcfCommon.ChromIndex];

                    if (referenceName != currentReferenceName)
                    {
                        if (!refNameToChrom.TryGetValue(referenceName, out chromosome))
                        {
                            continue;
                        }
                        currentReferenceName = referenceName;
                    }

                    (int position, bool foundError) = cols[VcfCommon.PosIndex].OptimizedParseInt32();
                    if (foundError)
                    {
                        throw new InvalidDataException($"Unable to convert the VCF position to an integer: {cols[VcfCommon.PosIndex]}");
                    }

                    if (rangeChecker.OutOfRange(chromosome, position))
                    {
                        break;
                    }

                    string refAllele = cols[VcfCommon.RefIndex];
                    string altAllele = cols[VcfCommon.AltIndex];

                    if (altAllele == "." && !IsRefMinor(refMinorProvider, chromosome, position))
                    {
                        continue;
                    }

                    sequenceProvider.LoadChromosome(chromosome);
                    TryAddPosition(positionsByChromosome, chromosome, position, refAllele, altAllele, sequenceProvider.Sequence);
                }
            }

            int count = SortPositionsAndGetCount(positionsByChromosome);

            return(positionsByChromosome.ToImmutableDictionary(), count);
        }
Example #2
0
        public static IDictionary <IChromosome, List <int> > GetPositions(Stream vcfStream, GenomicRange genomicRange, ISequenceProvider sequenceProvider)
        {
            var benchmark = new Benchmark();

            Console.Write("Scanning positions required for SA pre-loading....");
            var chromPositions = new Dictionary <IChromosome, List <int> >();
            var rangeChecker   = new GenomicRangeChecker(genomicRange);
            var refNameToChrom = sequenceProvider.RefNameToChromosome;

            using (var reader = new StreamReader(vcfStream))
            {
                string line;
                while ((line = reader.ReadLine()) != null)
                {
                    if (!NeedProcessThisLine(refNameToChrom, line, out var splits, out IChromosome iChrom))
                    {
                        continue;
                    }

                    int position = int.Parse(splits[VcfCommon.PosIndex]);

                    if (rangeChecker.OutOfRange(iChrom, position))
                    {
                        break;
                    }

                    string refAllele = splits[VcfCommon.RefIndex];
                    string altAllele = splits[VcfCommon.AltIndex];
                    sequenceProvider.LoadChromosome(iChrom);
                    UpdateChromToPositions(chromPositions, iChrom, position, refAllele, altAllele, sequenceProvider.Sequence);
                }
            }

            int count = SortPositionsAndGetCount(chromPositions);

            Console.WriteLine($"{count} positions found in {Benchmark.ToHumanReadable(benchmark.GetElapsedTime())}");

            return(chromPositions);
        }
Example #3
0
 public VcfFilter(GenomicRange genomicRange)
 {
     _genomicRange        = genomicRange;
     _genomicRangeChecker = new GenomicRangeChecker(genomicRange);
 }