Example #1
0
 public VariantFactory(IDictionary <string, IChromosome> refNameToChromosome, IRefMinorProvider refMinorProvider,
                       bool enableVerboseTranscript)
 {
     _refNameToChromosome     = refNameToChromosome;
     _refMinorProvider        = refMinorProvider;
     _enableVerboseTranscript = enableVerboseTranscript;
 }
Example #2
0
 private static bool IsRefMinor(IRefMinorProvider refMinorProvider, IChromosome chrom, int position)
 {
     if (refMinorProvider == null)
     {
         return(false);
     }
     return(!string.IsNullOrEmpty(refMinorProvider.GetGlobalMajorAllele(chrom, position)));
 }
Example #3
0
        public static VcfReader Create(StreamReader headerReader, StreamReader vcfLineReader, ISequenceProvider sequenceProvider,
                                       IRefMinorProvider refMinorProvider, IRecomposer recomposer, IVcfFilter vcfFilter, IVariantIdCreator vidCreator, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, bool enableDq = false)
        {
            var vcfReader = new VcfReader(headerReader, vcfLineReader, sequenceProvider, refMinorProvider, vcfFilter, vidCreator, mitoHeteroplasmyProvider, enableDq);

            vcfReader.ParseHeader();
            vcfReader.SetRecomposer(recomposer);
            return(vcfReader);
        }
Example #4
0
        public static VcfReader Create(StreamReader headerReader, StreamReader vcfLineReader, ISequenceProvider sequenceProvider,
                                       IRefMinorProvider refMinorProvider, IRecomposer recomposer, IVcfFilter vcfFilter, IVariantIdCreator vidCreator)
        {
            var vcfReader = new VcfReader(headerReader, vcfLineReader, sequenceProvider, refMinorProvider, vcfFilter, vidCreator);

            vcfReader.ParseHeader();
            vcfReader.SetRecomposer(recomposer);
            return(vcfReader);
        }
Example #5
0
        public VcfReader(Stream stream, IDictionary <string, IChromosome> refNameToChromosome,
                         IRefMinorProvider refMinorProvider, bool enableVerboseTranscript, IRecomposer recomposer)
        {
            _reader              = new StreamReader(stream);
            _variantFactory      = new VariantFactory(refNameToChromosome, refMinorProvider, enableVerboseTranscript);
            _refNameToChromosome = refNameToChromosome;
            bool hasSampleColumn = ParseHeader();

            _recomposer = hasSampleColumn ? recomposer : new NullRecomposer();
        }
Example #6
0
 private VcfReader(StreamReader headerReader, StreamReader vcfLineReader, ISequenceProvider sequenceProvider,
                   IRefMinorProvider refMinorProvider, IVcfFilter vcfFilter)
 {
     _headerReader        = headerReader;
     _reader              = vcfLineReader;
     _variantFactory      = new VariantFactory(sequenceProvider);
     _refMinorProvider    = refMinorProvider;
     _vcfFilter           = vcfFilter;
     _refNameToChromosome = sequenceProvider.RefNameToChromosome;
 }
Example #7
0
        public static IVcfReader GetVcfReader(string vcfPath, IDictionary <string, IChromosome> chromosomeDictionary,
                                              IRefMinorProvider refMinorProvider, bool verboseTranscript, IRecomposer recomposer)
        {
            var useStdInput = vcfPath == "-";

            var peekStream =
                new PeekStream(useStdInput
                        ? Console.OpenStandardInput()
                        : GZipUtilities.GetAppropriateReadStream(vcfPath));

            return(new VcfReader(peekStream, chromosomeDictionary, refMinorProvider, verboseTranscript, recomposer));
        }
Example #8
0
 private VcfReader(StreamReader headerReader, StreamReader vcfLineReader, ISequenceProvider sequenceProvider,
                   IRefMinorProvider refMinorProvider, IVcfFilter vcfFilter, IVariantIdCreator vidCreator, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider)
 {
     _headerReader             = headerReader;
     _reader                   = vcfLineReader;
     _variantFactory           = new VariantFactory(sequenceProvider.Sequence, vidCreator);
     _sequenceProvider         = sequenceProvider;
     _refMinorProvider         = refMinorProvider;
     _vcfFilter                = vcfFilter;
     _refNameToChromosome      = sequenceProvider.RefNameToChromosome;
     _mitoHeteroplasmyProvider = mitoHeteroplasmyProvider;
 }
Example #9
0
        public static IPosition ToPosition(ISimplePosition simplePosition, IRefMinorProvider refMinorProvider, ISequenceProvider sequenceProvider, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, VariantFactory variantFactory, bool enableDq = false)
        {
            if (simplePosition == null)
            {
                return(null);
            }

            sequenceProvider.LoadChromosome(simplePosition.Chromosome);

            string[] vcfFields   = simplePosition.VcfFields;
            string[] altAlleles  = vcfFields[VcfCommon.AltIndex].OptimizedSplit(',');
            bool     isReference = altAlleles.Length == 1 && VcfCommon.ReferenceAltAllele.Contains(altAlleles[0]);

            string globalMajorAllele = isReference
                ? refMinorProvider?.GetGlobalMajorAllele(simplePosition.Chromosome, simplePosition.Start)
                : null;

            bool isRefMinor = isReference && globalMajorAllele != null;

            if (isReference && !isRefMinor)
            {
                return(GetReferencePosition(simplePosition));
            }

            var    infoData = VcfInfoParser.Parse(vcfFields[VcfCommon.InfoIndex]);
            int    end      = ExtractEnd(infoData, simplePosition.Start, simplePosition.RefAllele.Length);
            double?quality  = vcfFields[VcfCommon.QualIndex].GetNullableValue <double>(double.TryParse);

            string[]  filters = vcfFields[VcfCommon.FilterIndex].OptimizedSplit(';');
            ISample[] samples = vcfFields.ToSamples(variantFactory.FormatIndices, simplePosition, mitoHeteroplasmyProvider, enableDq);

            IVariant[] variants = variantFactory.CreateVariants(simplePosition.Chromosome, simplePosition.Start, end,
                                                                simplePosition.RefAllele, altAlleles, infoData, simplePosition.IsDecomposed,
                                                                simplePosition.IsRecomposed, simplePosition.LinkedVids, globalMajorAllele);

            return(new Position(simplePosition.Chromosome, simplePosition.Start, end, simplePosition.RefAllele,
                                altAlleles, quality, filters, variants, samples, infoData, vcfFields, simplePosition.IsDecomposed,
                                simplePosition.IsRecomposed));
        }
 public static (IPosition, string[]) GetPositionAndSampleNames(this SingleConfig config, ISequenceProvider sequenceProvider,
                                                               IRefMinorProvider refMinorProvider) => (ToPosition(config.variant.GetVcfFields(), sequenceProvider, refMinorProvider), config.variant.sampleNames);
Example #11
0
        internal static IPosition ParseVcfLine(string vcfLine, IRefMinorProvider refMinorProvider, VariantFactory variantFactory, IDictionary <string, IChromosome> refNameToChromosome)
        {
            var simplePosition = GetSimplePosition(vcfLine, refNameToChromosome);

            return(Position.ToPosition(simplePosition, refMinorProvider, variantFactory));
        }
Example #12
0
        public static (ImmutableDictionary <IChromosome, List <int> > PositionsByChromosome, int Count) GetPositions(Stream vcfStream, GenomicRange genomicRange,
                                                                                                                     ISequenceProvider sequenceProvider, IRefMinorProvider refMinorProvider)
        {
            var positionsByChromosome = new Dictionary <IChromosome, List <int> >();
            var rangeChecker          = new GenomicRangeChecker(genomicRange);
            var refNameToChrom        = sequenceProvider.RefNameToChromosome;

            using (var reader = new StreamReader(vcfStream))
            {
                string      line;
                string      currentReferenceName = "";
                IChromosome chromosome           = null;

                while ((line = reader.ReadLine()) != null)
                {
                    if (line.StartsWith('#'))
                    {
                        continue;
                    }

                    string[] cols          = line.OptimizedSplit('\t');
                    string   referenceName = cols[VcfCommon.ChromIndex];

                    if (referenceName != currentReferenceName)
                    {
                        if (!refNameToChrom.TryGetValue(referenceName, out chromosome))
                        {
                            continue;
                        }
                        currentReferenceName = referenceName;
                    }

                    (int position, bool foundError) = cols[VcfCommon.PosIndex].OptimizedParseInt32();
                    if (foundError)
                    {
                        throw new InvalidDataException($"Unable to convert the VCF position to an integer: {cols[VcfCommon.PosIndex]}");
                    }

                    if (rangeChecker.OutOfRange(chromosome, position))
                    {
                        break;
                    }

                    string refAllele = cols[VcfCommon.RefIndex];
                    string altAllele = cols[VcfCommon.AltIndex];

                    if (altAllele == "." && !IsRefMinor(refMinorProvider, chromosome, position))
                    {
                        continue;
                    }

                    sequenceProvider.LoadChromosome(chromosome);
                    TryAddPosition(positionsByChromosome, chromosome, position, refAllele, altAllele, sequenceProvider.Sequence);
                }
            }

            int count = SortPositionsAndGetCount(positionsByChromosome);

            return(positionsByChromosome.ToImmutableDictionary(), count);
        }
        internal static IPosition ParseVcfLine(string vcfLine, IRefMinorProvider refMinorProvider, ISequenceProvider sequenceProvider, VariantFactory variantFactory)
        {
            var simplePosition = GetSimplePosition(vcfLine, sequenceProvider.RefNameToChromosome);

            return(Position.ToPosition(simplePosition, refMinorProvider, sequenceProvider, variantFactory));
        }