Пример #1
0
        public void FastForward_EnsemblNamingStyle_ChangeReaderStateCorrectly()
        {
            var annotationRange = new GenomicRange(new GenomicPosition(ChromosomeUtilities.Chr1, 100), new GenomicPosition(ChromosomeUtilities.Chr1, 200));

            var vcfFilter = new VcfFilter(annotationRange);

            const string firstLineInRange = "1\t100\t.\tC\tT\t165.00\tPASS\tSNVSB=-12.5;SNVHPOL=2\tGT:GQ:GQX:DP:DPF:AD\t0/1:119:35:25:0:8,17";

            using (var ms = new MemoryStream())
            {
                using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))
                {
                    writer.WriteLine("#Header line 1");
                    writer.WriteLine("#Header line 2");
                    writer.WriteLine("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tMother");
                    writer.WriteLine("2\t150\t.\tG\tA\t5.00\tLowGQXHetSNP\tSNVSB=0.0;SNVHPOL=2\tGT:GQ:GQX:DP:DPF:AD\t0/1:3:1:1:0:0,1");
                    writer.WriteLine("1\t90\t.\tT\tC\t1.00\tLowGQXHetSNP\tSNVSB=0.0;SNVHPOL=2\tGT:GQ:GQX:DP:DPF:AD\t0/1:23:9:3:0:2,1");
                    writer.WriteLine("1\t95\t.\tA\tT\t2.00\tLowGQXHetSNP\tSNVSB=0.0;SNVHPOL=2\tGT:GQ:GQX:DP:DPF:AD\t0/1:23:9:3:0:2,1");
                    writer.WriteLine(firstLineInRange);
                    writer.WriteLine("1\t102\t.\tC\tA\t3.00\tLowGQXHetSNP\tSNVSB=0.0;SNVHPOL=5\tGT:GQ:GQX:DP:DPF:AD\t0/1:29:2:2:0:1,1");
                }

                ms.Position = 0;

                using (var reader = new StreamReader(ms))
                {
                    vcfFilter.FastForward(reader);
                    Assert.Equal(firstLineInRange, vcfFilter.BufferedLine);
                }
            }
        }
Пример #2
0
        public void GetPositions_inRange()
        {
            var chromosome      = new Chromosome("chr1", "1", 0);
            var annotationRange = new GenomicRange(new GenomicPosition(chromosome, 10019), new GenomicPosition(chromosome, 10290));
            var seqProvider     = ParserTestUtils.GetSequenceProvider(10329, "AC", 'A', _chromDict);
            var positions       = PreLoadUtilities.GetPositions(GetVcfStream(), annotationRange, seqProvider);

            Assert.Single(positions);
            Assert.Equal(3, positions[Chrom1].Count);
        }
Пример #3
0
        public void GetPositions_inRange()
        {
            var annotationRange = new GenomicRange(new GenomicPosition(ChromosomeUtilities.Chr1, 10019), new GenomicPosition(ChromosomeUtilities.Chr1, 10290));
            var seqProvider     = ParserTestUtils.GetSequenceProvider(10329, "AC", 'A', ChromosomeUtilities.RefNameToChromosome);

            (var positions, _) = PreLoadUtilities.GetPositions(GetVcfStream(), annotationRange, seqProvider, null);

            Assert.Single(positions);
            Assert.Equal(3, positions[ChromosomeUtilities.Chr1].Count);
        }
Пример #4
0
        public void PassedTheEnd_AsExpected()
        {
            var annotationRange = new GenomicRange(new GenomicPosition(ChromosomeUtilities.Chr1, 100), new GenomicPosition(ChromosomeUtilities.Chr1, 200));
            var vcfFilter       = new VcfFilter(annotationRange);

            Assert.False(vcfFilter.PassedTheEnd(ChromosomeUtilities.Chr1, 150));
            Assert.False(vcfFilter.PassedTheEnd(ChromosomeUtilities.Chr1, 200));
            Assert.True(vcfFilter.PassedTheEnd(ChromosomeUtilities.Chr1, 201));
            Assert.True(vcfFilter.PassedTheEnd(ChromosomeUtilities.Chr2, 150));
        }
Пример #5
0
        public void GetVariantPositions(Stream vcfStream, GenomicRange genomicRange)
        {
            if (vcfStream == null)
            {
                _variantPositions = null;
                return;
            }

            vcfStream.Position = Tabix.VirtualPosition.From(InputStartVirtualPosition).BlockOffset;
            _variantPositions  = PreLoadUtilities.GetPositions(vcfStream, genomicRange, SequenceProvider).ToImmutableDictionary();
        }
Пример #6
0
        public void PassedTheEnd_AsExpected()
        {
            var chromosome      = new Chromosome("chr1", "1", 0);
            var annotationRange = new GenomicRange(new GenomicPosition(chromosome, 100), new GenomicPosition(chromosome, 200));
            var vcfFilter       = new VcfFilter(annotationRange);

            Assert.False(vcfFilter.PassedTheEnd(new Chromosome("chr1", "1", 0), 150));
            Assert.False(vcfFilter.PassedTheEnd(new Chromosome("chr1", "1", 0), 200));
            Assert.True(vcfFilter.PassedTheEnd(new Chromosome("chr1", "1", 0), 201));
            Assert.True(vcfFilter.PassedTheEnd(new Chromosome("chr2", "2", 1), 150));
        }
Пример #7
0
        public void GetVariantPositions(Stream vcfStream, GenomicRange genomicRange)
        {
            if (vcfStream == null)
            {
                _variantPositions = null;
                return;
            }

            vcfStream.Position = Tabix.VirtualPosition.From(InputStartVirtualPosition).BlockOffset;
            int numPositions;

            Metrics.SaPositionScan.Start();
            (_variantPositions, numPositions) = PreLoadUtilities.GetPositions(vcfStream, genomicRange, SequenceProvider, RefMinorProvider);
            Metrics.ShowSaPositionScanLoad(numPositions);
        }
Пример #8
0
 public VcfFilter(GenomicRange genomicRange)
 {
     _genomicRange        = genomicRange;
     _genomicRangeChecker = new GenomicRangeChecker(genomicRange);
 }
Пример #9
0
        public static IDictionary <IChromosome, List <int> > GetPositions(Stream vcfStream, GenomicRange genomicRange, ISequenceProvider sequenceProvider)
        {
            var benchmark = new Benchmark();

            Console.Write("Scanning positions required for SA pre-loading....");
            var chromPositions = new Dictionary <IChromosome, List <int> >();
            var rangeChecker   = new GenomicRangeChecker(genomicRange);
            var refNameToChrom = sequenceProvider.RefNameToChromosome;

            using (var reader = new StreamReader(vcfStream))
            {
                string line;
                while ((line = reader.ReadLine()) != null)
                {
                    if (!NeedProcessThisLine(refNameToChrom, line, out var splits, out IChromosome iChrom))
                    {
                        continue;
                    }

                    int position = int.Parse(splits[VcfCommon.PosIndex]);

                    if (rangeChecker.OutOfRange(iChrom, position))
                    {
                        break;
                    }

                    string refAllele = splits[VcfCommon.RefIndex];
                    string altAllele = splits[VcfCommon.AltIndex];
                    sequenceProvider.LoadChromosome(iChrom);
                    UpdateChromToPositions(chromPositions, iChrom, position, refAllele, altAllele, sequenceProvider.Sequence);
                }
            }

            int count = SortPositionsAndGetCount(chromPositions);

            Console.WriteLine($"{count} positions found in {Benchmark.ToHumanReadable(benchmark.GetElapsedTime())}");

            return(chromPositions);
        }
Пример #10
0
        public static (ImmutableDictionary <IChromosome, List <int> > PositionsByChromosome, int Count) GetPositions(Stream vcfStream, GenomicRange genomicRange,
                                                                                                                     ISequenceProvider sequenceProvider, IRefMinorProvider refMinorProvider)
        {
            var positionsByChromosome = new Dictionary <IChromosome, List <int> >();
            var rangeChecker          = new GenomicRangeChecker(genomicRange);
            var refNameToChrom        = sequenceProvider.RefNameToChromosome;

            using (var reader = new StreamReader(vcfStream))
            {
                string      line;
                string      currentReferenceName = "";
                IChromosome chromosome           = null;

                while ((line = reader.ReadLine()) != null)
                {
                    if (line.StartsWith('#'))
                    {
                        continue;
                    }

                    string[] cols          = line.OptimizedSplit('\t');
                    string   referenceName = cols[VcfCommon.ChromIndex];

                    if (referenceName != currentReferenceName)
                    {
                        if (!refNameToChrom.TryGetValue(referenceName, out chromosome))
                        {
                            continue;
                        }
                        currentReferenceName = referenceName;
                    }

                    (int position, bool foundError) = cols[VcfCommon.PosIndex].OptimizedParseInt32();
                    if (foundError)
                    {
                        throw new InvalidDataException($"Unable to convert the VCF position to an integer: {cols[VcfCommon.PosIndex]}");
                    }

                    if (rangeChecker.OutOfRange(chromosome, position))
                    {
                        break;
                    }

                    string refAllele = cols[VcfCommon.RefIndex];
                    string altAllele = cols[VcfCommon.AltIndex];

                    if (altAllele == "." && !IsRefMinor(refMinorProvider, chromosome, position))
                    {
                        continue;
                    }

                    sequenceProvider.LoadChromosome(chromosome);
                    TryAddPosition(positionsByChromosome, chromosome, position, refAllele, altAllele, sequenceProvider.Sequence);
                }
            }

            int count = SortPositionsAndGetCount(positionsByChromosome);

            return(positionsByChromosome.ToImmutableDictionary(), count);
        }
Пример #11
0
 public GenomicRangeChecker(GenomicRange genomicRange)
 {
     _genomicRange = genomicRange;
 }