コード例 #1
0
        internal static ISample ExtractSample(string sampleColumn, FormatIndices formatIndices, ISimplePosition simplePosition,
                                              IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, LegacySampleFieldExtractor legacyExtractor = null, bool enableDq = false)
        {
            // sanity check: make sure we have a format column
            if (string.IsNullOrEmpty(sampleColumn))
            {
                return(Sample.EmptySample);
            }

            string[] sampleColumns = sampleColumn.OptimizedSplit(':', formatIndices.NumColumns);
            if (sampleColumns.Length == 1 && sampleColumns[0] == ".")
            {
                return(Sample.EmptySample);
            }

            sampleColumns.NormalizeNulls();

            if (legacyExtractor != null)
            {
                return(legacyExtractor.ExtractSample(sampleColumn));
            }

            int[] alleleDepths = sampleColumns.GetString(formatIndices.AD).GetIntegers();
            float?artifactAdjustedQualityScore = sampleColumns.GetString(formatIndices.AQ).GetFloat();
            int?  copyNumber = sampleColumns.GetString(formatIndices.CN).GetInteger();

            string[] diseaseAffectedStatuses = sampleColumns.GetString(formatIndices.DST).GetStrings();
            bool     failedFilter            = sampleColumns.GetString(formatIndices.FT).GetFailedFilter();
            string   genotype                    = sampleColumns.GetString(formatIndices.GT);
            int?     genotypeQuality             = sampleColumns.GetString(formatIndices.GQ).GetInteger();
            bool     isDeNovo                    = sampleColumns.GetString(formatIndices.DN).IsDeNovo();
            double?  deNovoQuality               = enableDq? sampleColumns.GetString(formatIndices.DQ).GetDouble():null;
            float?   likelihoodRatioQualityScore = sampleColumns.GetString(formatIndices.LQ).GetFloat();

            int[]  pairedEndReadCounts      = sampleColumns.GetString(formatIndices.PR).GetIntegers();
            int[]  repeatUnitCounts         = sampleColumns.GetString(formatIndices.REPCN).GetIntegers('/');
            int[]  splitReadCounts          = sampleColumns.GetString(formatIndices.SR).GetIntegers();
            int?   totalDepth               = sampleColumns.GetString(formatIndices.DP).GetInteger();
            double?variantFrequency         = sampleColumns.GetString(formatIndices.VF).GetDouble();
            int?   minorHaplotypeCopyNumber = sampleColumns.GetString(formatIndices.MCN).GetInteger();
            double?somaticQuality           = sampleColumns.GetString(formatIndices.SQ).GetDouble();
            int?   binCount = sampleColumns.GetString(formatIndices.BC).GetInteger();

            double[] variantFrequencies          = VariantFrequency.GetVariantFrequencies(variantFrequency, alleleDepths, simplePosition.AltAlleles.Length);
            string[] mitoHeteroplasmyPercentiles = mitoHeteroplasmyProvider?.GetVrfPercentiles(simplePosition.Chromosome, simplePosition.Start,
                                                                                               simplePosition.AltAlleles, variantFrequencies)?.Select(x => x?.ToString("0.##") ?? "null").ToArray();

            var isLoh = GetLoh(copyNumber, minorHaplotypeCopyNumber, genotype);

            var sample = new Sample(alleleDepths, artifactAdjustedQualityScore, copyNumber, diseaseAffectedStatuses,
                                    failedFilter, genotype, genotypeQuality, isDeNovo, deNovoQuality, likelihoodRatioQualityScore, pairedEndReadCounts,
                                    repeatUnitCounts, splitReadCounts, totalDepth, variantFrequencies, minorHaplotypeCopyNumber, somaticQuality, isLoh, mitoHeteroplasmyPercentiles, binCount);

            return(sample);
        }
コード例 #2
0
ファイル: VcfReader.cs プロジェクト: LvLH/Nirvana
 private VcfReader(StreamReader headerReader, StreamReader vcfLineReader, ISequenceProvider sequenceProvider,
                   IRefMinorProvider refMinorProvider, IVcfFilter vcfFilter, IVariantIdCreator vidCreator, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider)
 {
     _headerReader             = headerReader;
     _reader                   = vcfLineReader;
     _variantFactory           = new VariantFactory(sequenceProvider.Sequence, vidCreator);
     _sequenceProvider         = sequenceProvider;
     _refMinorProvider         = refMinorProvider;
     _vcfFilter                = vcfFilter;
     _refNameToChromosome      = sequenceProvider.RefNameToChromosome;
     _mitoHeteroplasmyProvider = mitoHeteroplasmyProvider;
 }
コード例 #3
0
        private static VcfReader GetVcfReader(Stream headerStream, Stream vcfStream, IAnnotationResources annotationResources,
                                              IVcfFilter vcfFilter, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider)
        {
            var vcfReader = FileUtilities.GetStreamReader(vcfStream);

            StreamReader headerReader;

            if (headerStream == null)
            {
                headerReader = vcfReader;
            }
            else
            {
                headerReader       = FileUtilities.GetStreamReader(headerStream);
                vcfStream.Position = Tabix.VirtualPosition.From(annotationResources.InputStartVirtualPosition).BlockOffset;
            }

            return(VcfReader.Create(headerReader, vcfReader, annotationResources.SequenceProvider,
                                    annotationResources.RefMinorProvider, annotationResources.Recomposer, vcfFilter, annotationResources.VidCreator, mitoHeteroplasmyProvider));
        }
コード例 #4
0
        internal static ISample[]  ToSamples(this string[] vcfColumns, FormatIndices formatIndices, ISimplePosition simplePosition, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, bool enableDq = false)
        {
            if (vcfColumns.Length < VcfCommon.MinNumColumnsSampleGenotypes)
            {
                return(null);
            }

            int numSamples = vcfColumns.Length - VcfCommon.MinNumColumnsSampleGenotypes + 1;
            var samples    = new ISample[numSamples];

            formatIndices.Set(vcfColumns[VcfCommon.FormatIndex]);

            var legacySampleExtractor = IsLegacyVariantCaller(formatIndices) ? new LegacySampleFieldExtractor(vcfColumns, formatIndices) : null;

            for (int index = VcfCommon.GenotypeIndex; index < vcfColumns.Length; index++)
            {
                samples[index - VcfCommon.GenotypeIndex] = ExtractSample(vcfColumns[index], formatIndices, simplePosition, mitoHeteroplasmyProvider, legacySampleExtractor, enableDq);
            }

            return(samples);
        }
コード例 #5
0
        public static VcfReader Create(StreamReader headerReader, StreamReader vcfLineReader, ISequenceProvider sequenceProvider,
                                       IRefMinorProvider refMinorProvider, IRecomposer recomposer, IVcfFilter vcfFilter, IVariantIdCreator vidCreator, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, bool enableDq = false)
        {
            var vcfReader = new VcfReader(headerReader, vcfLineReader, sequenceProvider, refMinorProvider, vcfFilter, vidCreator, mitoHeteroplasmyProvider, enableDq);

            vcfReader.ParseHeader();
            vcfReader.SetRecomposer(recomposer);
            return(vcfReader);
        }
コード例 #6
0
        public static IPosition ToPosition(ISimplePosition simplePosition, IRefMinorProvider refMinorProvider, ISequenceProvider sequenceProvider, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, VariantFactory variantFactory, bool enableDq = false)
        {
            if (simplePosition == null)
            {
                return(null);
            }

            sequenceProvider.LoadChromosome(simplePosition.Chromosome);

            string[] vcfFields   = simplePosition.VcfFields;
            string[] altAlleles  = vcfFields[VcfCommon.AltIndex].OptimizedSplit(',');
            bool     isReference = altAlleles.Length == 1 && VcfCommon.ReferenceAltAllele.Contains(altAlleles[0]);

            string globalMajorAllele = isReference
                ? refMinorProvider?.GetGlobalMajorAllele(simplePosition.Chromosome, simplePosition.Start)
                : null;

            bool isRefMinor = isReference && globalMajorAllele != null;

            if (isReference && !isRefMinor)
            {
                return(GetReferencePosition(simplePosition));
            }

            var    infoData = VcfInfoParser.Parse(vcfFields[VcfCommon.InfoIndex]);
            int    end      = ExtractEnd(infoData, simplePosition.Start, simplePosition.RefAllele.Length);
            double?quality  = vcfFields[VcfCommon.QualIndex].GetNullableValue <double>(double.TryParse);

            string[]  filters = vcfFields[VcfCommon.FilterIndex].OptimizedSplit(';');
            ISample[] samples = vcfFields.ToSamples(variantFactory.FormatIndices, simplePosition, mitoHeteroplasmyProvider, enableDq);

            IVariant[] variants = variantFactory.CreateVariants(simplePosition.Chromosome, simplePosition.Start, end,
                                                                simplePosition.RefAllele, altAlleles, infoData, simplePosition.IsDecomposed,
                                                                simplePosition.IsRecomposed, simplePosition.LinkedVids, globalMajorAllele);

            return(new Position(simplePosition.Chromosome, simplePosition.Start, end, simplePosition.RefAllele,
                                altAlleles, quality, filters, variants, samples, infoData, vcfFields, simplePosition.IsDecomposed,
                                simplePosition.IsRecomposed));
        }
コード例 #7
0
        public static ExitCodes Annotate(Stream headerStream, Stream inputVcfStream, Stream outputJsonStream,
                                         Stream outputJsonIndexStream, AnnotationResources annotationResources, IVcfFilter vcfFilter,
                                         bool ignoreEmptyChromosome)
        {
            var metrics = annotationResources.Metrics;

            PerformanceMetrics.ShowAnnotationHeader();

            IChromosome currentChromosome = new EmptyChromosome("dummy");
            int         numVariants       = 0;
            IMitoHeteroplasmyProvider mitoHeteroplasmyProvider = MitoHeteroplasmyReader.GetProvider();

            using (var vcfReader = GetVcfReader(headerStream, inputVcfStream, annotationResources, vcfFilter, mitoHeteroplasmyProvider))
                using (var jsonWriter = new JsonWriter(outputJsonStream, outputJsonIndexStream, annotationResources, Date.CurrentTimeStamp, vcfReader.GetSampleNames(), false))
                {
                    try
                    {
                        CheckGenomeAssembly(annotationResources, vcfReader);
                        SetMitochondrialAnnotationBehavior(annotationResources, vcfReader);

                        IPosition position;

                        while ((position = vcfReader.GetNextPosition()) != null)
                        {
                            IChromosome chromosome = position.Chromosome;
                            if (ignoreEmptyChromosome && chromosome.IsEmpty())
                            {
                                continue;
                            }

                            if (chromosome.Index != currentChromosome.Index)
                            {
                                if (!currentChromosome.IsEmpty())
                                {
                                    metrics.ShowAnnotationEntry(currentChromosome, numVariants);
                                }

                                numVariants = 0;

                                metrics.Preload.Start();
                                annotationResources.PreLoad(chromosome);
                                metrics.Preload.Stop();

                                metrics.Annotation.Start();
                                currentChromosome = chromosome;
                            }

                            var annotatedPosition = position.Variants != null?annotationResources.Annotator.Annotate(position) : null;

                            string json = annotatedPosition?.GetJsonString();
                            if (json != null)
                            {
                                jsonWriter.WritePosition(annotatedPosition.Position, json);
                            }

                            numVariants++;
                        }

                        jsonWriter.WriteGenes(annotationResources.Annotator.GetGeneAnnotations());
                    }
                    catch (Exception e)
                    {
                        e.Data[ExitCodeUtilities.VcfLine] = vcfReader.VcfLine;
                        throw;
                    }
                }

            if (!currentChromosome.IsEmpty())
            {
                metrics.ShowAnnotationEntry(currentChromosome, numVariants);
            }

            metrics.ShowSummaryTable();

            return(ExitCodes.Success);
        }
コード例 #8
0
        internal static IPosition ParseVcfLine(string vcfLine, IRefMinorProvider refMinorProvider, ISequenceProvider sequenceProvider, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, VariantFactory variantFactory)
        {
            var simplePosition = GetSimplePosition(vcfLine, sequenceProvider.RefNameToChromosome);

            return(Position.ToPosition(simplePosition, refMinorProvider, sequenceProvider, mitoHeteroplasmyProvider, variantFactory));
        }
コード例 #9
0
        internal static IAnnotatedPosition GetAnnotatedPosition(string cacheFilePrefix, List <string> saPaths, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider,
                                                                string vcfLine)
        {
            var annotationFiles = new AnnotationFiles();

            saPaths?.ForEach(x => annotationFiles.AddFiles(x));

            var refMinorProvider = ProviderUtilities.GetRefMinorProvider(annotationFiles);

            var(annotator, sequenceProvider) = GetAnnotatorAndSequenceProvider(cacheFilePrefix, saPaths);

            var variantFactory    = new VariantFactory(sequenceProvider.Sequence, new VariantId());
            var position          = ParseVcfLine(vcfLine, refMinorProvider, sequenceProvider, mitoHeteroplasmyProvider, variantFactory);
            var annotatedPosition = annotator.Annotate(position);

            return(annotatedPosition);
        }