コード例 #1
0
 public void GetChromosome_RefIndex_DoesNotExist()
 {
     Assert.Throws <InvalidDataException>(delegate
     {
         ReferenceNameUtilities.GetChromosome(_refIndexToChromosome, 1);
     });
 }
コード例 #2
0
                 CdsEndNotFound, string TranslateableSequence, int StartExonPhase, string BamEditStatus) ReadTranscriptInfo(
            string line)
        {
            var cols = GetColumns("Transcript", line);

            string id               = cols[1];
            byte   version          = byte.Parse(cols[2]);
            ushort referenceIndex   = ushort.Parse(cols[4]);
            int    start            = int.Parse(cols[5]);
            int    end              = int.Parse(cols[6]);
            var    biotype          = (BioType)byte.Parse(cols[8]);
            bool   isCanonical      = cols[9] == "Y";
            int    totalExonLength  = int.Parse(cols[10]);
            string ccdsId           = cols[11];
            string refSeqId         = cols[12];
            var    source           = (Source)byte.Parse(cols[13]);
            bool   cdsStartNotFound = cols[14] == "Y";
            bool   cdsEndNotFound   = cols[15] == "Y";
            int    startExonPhase   = int.Parse(cols[16]);
            string bamEditStatus    = cols[17];

            string translateableSequence = _reader.ReadLine();
            var    chromosome            = ReferenceNameUtilities.GetChromosome(_refIndexToChromosome, referenceIndex);

            return(id, version, chromosome, start, end, biotype, isCanonical, totalExonLength, ccdsId, refSeqId, source
                   , cdsStartNotFound, cdsEndNotFound, translateableSequence, startExonPhase, bamEditStatus);
        }
コード例 #3
0
ファイル: UgaGeneReader.cs プロジェクト: wangdi2014/Nirvana
        private UgaGene GetNextGene()
        {
            string line = _reader.ReadLine();

            if (line == null)
            {
                return(null);
            }

            var cols = line.OptimizedSplit('\t');

            if (cols.Length != 11)
            {
                throw new InvalidDataException($"Expected 11 columns, but found {cols.Length} columns.");
            }

            string ucscRefName     = cols[0];
            var    chromosome      = ReferenceNameUtilities.GetChromosome(_refNameToChromosome, ucscRefName);
            string symbol          = cols[2];
            int    start37         = int.Parse(cols[3]);
            int    end37           = int.Parse(cols[4]);
            int    start38         = int.Parse(cols[5]);
            int    end38           = int.Parse(cols[6]);
            bool   onReverseStrand = cols[7] == "R";
            int    hgncId          = int.Parse(cols[8]);
            string ensemblId       = cols[9];
            string entrezGeneId    = cols[10];

            var grch37 = new Interval(start37, end37);
            var grch38 = new Interval(start38, end38);

            return(new UgaGene(chromosome, grch37, grch38, onReverseStrand, entrezGeneId, ensemblId, symbol, hgncId));
        }
コード例 #4
0
        private static ExitCodes ProgramExecution()
        {
            var logger     = new ConsoleLogger();
            var bundle     = DataBundle.GetDataBundle(_inputReferencePath, _inputPrefix);
            int numRefSeqs = bundle.SequenceReader.NumRefSeqs;
            var chromosome = ReferenceNameUtilities.GetChromosome(bundle.SequenceReader.RefNameToChromosome, _referenceName);

            bundle.Load(chromosome);

            string outputStub  = GetOutputStub(chromosome, bundle.Source);
            var    interval    = new ChromosomeInterval(chromosome, _referencePosition, _referenceEndPosition);
            var    transcripts = GetTranscripts(logger, bundle, interval);

            var    sift           = GetPredictionStaging(logger, "SIFT", transcripts, chromosome, bundle.SiftPredictions, bundle.SiftReader, x => x.SiftIndex, numRefSeqs);
            var    polyphen       = GetPredictionStaging(logger, "PolyPhen", transcripts, chromosome, bundle.PolyPhenPredictions, bundle.PolyPhenReader, x => x.PolyPhenIndex, numRefSeqs);
            string referenceBases = GetReferenceBases(logger, bundle.SequenceReader, interval);

            var regulatoryRegionIntervalArrays = GetRegulatoryRegionIntervalArrays(logger, bundle.TranscriptCache, interval, numRefSeqs);
            var transcriptIntervalArrays       = PredictionUtilities.UpdateTranscripts(transcripts, bundle.SiftPredictions,
                                                                                       sift.Predictions, bundle.PolyPhenPredictions, polyphen.Predictions, numRefSeqs);

            var transcriptStaging = GetTranscriptStaging(bundle.TranscriptCacheData.Header, transcriptIntervalArrays, regulatoryRegionIntervalArrays);

            WriteCache(logger, FileUtilities.GetCreateStream(CacheConstants.TranscriptPath(outputStub)), transcriptStaging, "transcript");
            WriteCache(logger, FileUtilities.GetCreateStream(CacheConstants.SiftPath(outputStub)), sift.Staging, "SIFT");
            WriteCache(logger, FileUtilities.GetCreateStream(CacheConstants.PolyPhenPath(outputStub)), polyphen.Staging, "PolyPhen");
            WriteReference(logger, CacheConstants.BasesPath(outputStub), bundle.SequenceReader, chromosome,
                           referenceBases, interval.Start);

            return(ExitCodes.Success);
        }
コード例 #5
0
        public void GetChromosome_RefName_NullName()
        {
            var chromosome = ReferenceNameUtilities.GetChromosome(_refNameToChromosome, null);

            Assert.Equal(string.Empty, chromosome.EnsemblName);
            Assert.Equal(string.Empty, chromosome.UcscName);
            Assert.True(chromosome.IsEmpty());
        }
コード例 #6
0
        internal static SimplePosition GetSimplePosition(string vcfLine,
                                                         IDictionary <string, IChromosome> refNameToChromosome)
        {
            string[] vcfFields  = vcfLine.OptimizedSplit('\t');
            var      chromosome = ReferenceNameUtilities.GetChromosome(refNameToChromosome, vcfFields[VcfCommon.ChromIndex]);
            int      position   = int.Parse(vcfFields[VcfCommon.PosIndex]);

            return(SimplePosition.GetSimplePosition(chromosome, position, vcfFields, new NullVcfFilter()));
        }
コード例 #7
0
        public void GetChromosome_RefName_DoesNotExist()
        {
            const string chromosomeName = "dummy";
            var          chromosome     = ReferenceNameUtilities.GetChromosome(_refNameToChromosome, chromosomeName);

            Assert.Equal(chromosomeName, chromosome.EnsemblName);
            Assert.Equal(chromosomeName, chromosome.UcscName);
            Assert.True(chromosome.IsEmpty());
        }
コード例 #8
0
        private static IChromosome GetChromosome(IDictionary <string, IChromosome> refNameToChromosome, string name)
        {
            var chromosome = ReferenceNameUtilities.GetChromosome(refNameToChromosome, name);

            if (chromosome.IsEmpty())
            {
                throw new InvalidDataException($"Could not find the chromosome ({name}) in the reference name dictionary.");
            }

            return(chromosome);
        }
コード例 #9
0
        private IChromosome GetChromosome(string cytogeneticBand)
        {
            int armPos = GetArmPos(cytogeneticBand);

            if (armPos == -1)
            {
                return(new EmptyChromosome(cytogeneticBand));
            }

            string chrName = cytogeneticBand.Substring(0, armPos);

            return(ReferenceNameUtilities.GetChromosome(_refNameToChromosome, chrName));
        }
コード例 #10
0
        public GenomicRange ToGenomicRange(IDictionary <string, IChromosome> refNameToChromosome)
        {
            var startGenomicPosition = new GenomicPosition(ReferenceNameUtilities.GetChromosome(refNameToChromosome, Start.Chromosome), Start.Position);

            GenomicPosition?endGenomicPosition = null;

            if (End != null)
            {
                endGenomicPosition = new GenomicPosition(ReferenceNameUtilities.GetChromosome(refNameToChromosome, End.Value.Chromosome), End.Value.Position);
            }

            return(new GenomicRange(startGenomicPosition, endGenomicPosition));
        }
コード例 #11
0
ファイル: VariantFactory.cs プロジェクト: zhouhufeng/Nirvana
        internal IBreakEnd[] GetSvBreakEnds(string ensemblName, int start, VariantType svType, int?svEnd, bool isInv3, bool isInv5)
        {
            if (svEnd == null)
            {
                return(null);
            }

            var end        = svEnd.Value;
            var breakEnds  = new IBreakEnd[2];
            var chromosome = ReferenceNameUtilities.GetChromosome(_refNameToChromosome, ensemblName);

            // ReSharper disable once SwitchStatementMissingSomeCases
            switch (svType)
            {
            case VariantType.deletion:
                breakEnds[0] = new BreakEnd(chromosome, chromosome, start, end + 1, false, true);
                breakEnds[1] = new BreakEnd(chromosome, chromosome, end + 1, start, true, false);
                break;

            case VariantType.tandem_duplication:
            case VariantType.duplication:
                breakEnds[0] = new BreakEnd(chromosome, chromosome, end, start, false, true);
                breakEnds[1] = new BreakEnd(chromosome, chromosome, start, end, true, false);
                break;

            case VariantType.inversion:
                if (isInv3)
                {
                    breakEnds[0] = new BreakEnd(chromosome, chromosome, start, end, false, false);
                    breakEnds[1] = new BreakEnd(chromosome, chromosome, end, start, false, false);
                    break;
                }
                if (isInv5)
                {
                    breakEnds[0] = new BreakEnd(chromosome, chromosome, start + 1, end + 1, true, true);
                    breakEnds[1] = new BreakEnd(chromosome, chromosome, end + 1, start + 1, true, true);
                    break;
                }

                breakEnds[0] = new BreakEnd(chromosome, chromosome, start, end, false, false);
                breakEnds[1] = new BreakEnd(chromosome, chromosome, end + 1, start + 1, true, true);
                break;

            default:
                return(null);
            }

            return(breakEnds);
        }
コード例 #12
0
        public static SimplePosition GetSimplePosition(string[] vcfFields, IDictionary <string, IChromosome> refNameToChromosome, bool isRecomposed = false)
        {
            var simplePosition = new SimplePosition
            {
                Start      = Convert.ToInt32(vcfFields[VcfCommon.PosIndex]),
                Chromosome = ReferenceNameUtilities.GetChromosome(refNameToChromosome, vcfFields[VcfCommon.ChromIndex]),
                RefAllele  = vcfFields[VcfCommon.RefIndex]
            };

            simplePosition.End          = vcfFields[VcfCommon.AltIndex].StartsWith("<") || vcfFields[VcfCommon.AltIndex] == "*" ? -1 : simplePosition.Start + simplePosition.RefAllele.Length - 1;
            simplePosition.AltAlleles   = vcfFields[VcfCommon.AltIndex].Split(",");
            simplePosition.VcfFields    = vcfFields;
            simplePosition.IsRecomposed = isRecomposed;
            simplePosition.IsDecomposed = new bool[simplePosition.AltAlleles.Length]; // fasle by default
            return(simplePosition);
        }
コード例 #13
0
        public IEnumerable <SimplePosition> GetRecomposedPositions(IDictionary <string, IChromosome> refNameToChromosome)
        {
            foreach (var variantSite in RecomposedAlleles.Keys.OrderBy(x => x))
            {
                var varInfo         = RecomposedAlleles[variantSite];
                var altAlleleList   = new List <string>();
                var genotypeIndex   = 1; // genotype index of alt allele
                var sampleGenotypes = new List <int> [_numSamples];
                for (var i = 0; i < _numSamples; i++)
                {
                    sampleGenotypes[i] = new List <int>();
                }
                List <List <string> > allLinkedVids = new List <List <string> >();
                foreach (string altAllele in varInfo.AltAlleleToSample.Keys.OrderBy(x => x))
                {
                    var(sampleAlleles, linkedVids) = varInfo.AltAlleleToSample[altAllele];
                    int currentGenotypeIndex;
                    if (altAllele == variantSite.RefAllele)
                    {
                        currentGenotypeIndex = 0;
                    }
                    else
                    {
                        currentGenotypeIndex = genotypeIndex;
                        genotypeIndex++;
                        altAlleleList.Add(altAllele);
                        allLinkedVids.Add(linkedVids);
                    }
                    foreach (var sampleAllele in sampleAlleles)
                    {
                        SetGenotypeWithAlleleIndex(sampleGenotypes[sampleAllele.SampleIndex], sampleAllele.HaplotypeIndex,
                                                   currentGenotypeIndex);
                    }
                }
                string altAlleleColumn = string.Join(",", altAlleleList);
                var    vcfFields       = GetVcfFields(variantSite, varInfo, altAlleleColumn, sampleGenotypes);

                var chromosome = ReferenceNameUtilities.GetChromosome(refNameToChromosome, vcfFields[VcfCommon.ChromIndex]);
                var position   = SimplePosition.GetSimplePosition(chromosome, variantSite.Start, vcfFields, new NullVcfFilter(), true);
                for (var i = 0; i < allLinkedVids.Count; i++)
                {
                    position.LinkedVids[i] = allLinkedVids[i];
                }

                yield return(position);
            }
        }
コード例 #14
0
        public static List <Band>[] GetCytogeneticBands(Stream stream, int numRefSeqs, IDictionary <string, IChromosome> refNameToChromosome)
        {
            var bandLists = new List <Band> [numRefSeqs];

            for (var i = 0; i < numRefSeqs; i++)
            {
                bandLists[i] = new List <Band>();
            }

            using (var reader = new StreamReader(stream))
            {
                while (true)
                {
                    string line = reader.ReadLine();
                    if (string.IsNullOrEmpty(line))
                    {
                        break;
                    }

                    string[] cols = line.Split('\t');

                    const int expectedNumColumns = 5;

                    if (cols.Length != expectedNumColumns)
                    {
                        throw new InvalidDataException($"Expected {expectedNumColumns} columns, but found {cols.Length} columns: [{line}]");
                    }

                    string ucscName = cols[0];
                    int    begin    = int.Parse(cols[1]) + 1;
                    int    end      = int.Parse(cols[2]);
                    string name     = cols[3];

                    var chromosome = ReferenceNameUtilities.GetChromosome(refNameToChromosome, ucscName);
                    if (chromosome.IsEmpty())
                    {
                        continue;
                    }

                    bandLists[chromosome.Index].Add(new Band(begin, end, name));
                }
            }

            return(bandLists);
        }
コード例 #15
0
ファイル: VariantFactory.cs プロジェクト: zhouhufeng/Nirvana
        /// <summary>
        /// parses the alternate allele
        /// </summary>
        private (IChromosome Chromosome2, int Position2, bool IsSuffix1, bool IsSuffix2) ParseBreakendAltAllele(string refAllele, string altAllele)
        {
            string referenceName2;
            int    position2;
            bool   isSuffix2;

            // (\w+)([\[\]])([^:]+):(\d+)([\[\]])
            // ([\[\]])([^:]+):(\d+)([\[\]])(\w+)
            if (altAllele.StartsWith(refAllele))
            {
                var forwardRegex = new Regex(@"\w+([\[\]])([^:]+):(\d+)([\[\]])", RegexOptions.Compiled);
                var match        = forwardRegex.Match(altAllele);

                if (!match.Success)
                {
                    throw new InvalidDataException(
                              "Unable to successfully parse the complex rearrangements for the following allele: " + altAllele);
                }

                isSuffix2      = match.Groups[4].Value == ForwardBreakEnd;
                position2      = Convert.ToInt32(match.Groups[3].Value);
                referenceName2 = match.Groups[2].Value;

                return(ReferenceNameUtilities.GetChromosome(_refNameToChromosome, referenceName2), position2, false, isSuffix2);
            }
            else
            {
                var reverseRegex = new Regex(@"([\[\]])([^:]+):(\d+)([\[\]])\w+", RegexOptions.Compiled);
                var match        = reverseRegex.Match(altAllele);

                if (!match.Success)
                {
                    throw new InvalidDataException(
                              "Unable to successfully parse the complex rearrangements for the following allele: " + altAllele);
                }

                isSuffix2      = match.Groups[1].Value == ForwardBreakEnd;
                position2      = Convert.ToInt32(match.Groups[3].Value);
                referenceName2 = match.Groups[2].Value;

                return(ReferenceNameUtilities.GetChromosome(_refNameToChromosome, referenceName2), position2, true, isSuffix2);
            }
        }
コード例 #16
0
        public Dictionary <ushort, string> GetRefIndexToVepDir(string dirPath)
        {
            var vepDirectories = Directory.GetDirectories(dirPath);
            var referenceDict  = new Dictionary <ushort, string>();

            foreach (string dir in vepDirectories)
            {
                string referenceName = Path.GetFileName(dir);
                var    chromosome    = ReferenceNameUtilities.GetChromosome(_refNameToChromosome, referenceName);
                if (chromosome.IsEmpty())
                {
                    continue;
                }

                referenceDict[chromosome.Index] = dir;
            }

            return(referenceDict);
        }
コード例 #17
0
        private static AnnotationPosition FindProperStartPosition(AnnotationPosition genomicPosition, IIntervalForest <IGene> geneIntervalForest, IDictionary <string, IChromosome> refNameToChromosome)
        {
            var chromosome = ReferenceNameUtilities.GetChromosome(refNameToChromosome, genomicPosition.Chromosome);

            int currentPosition = genomicPosition.Position;

            IGene[] overlappingGenes;
            while ((overlappingGenes = geneIntervalForest.GetAllOverlappingValues(chromosome.Index,
                                                                                  currentPosition, currentPosition)) != null)
            {
                if (overlappingGenes.Length > 0)
                {
                    currentPosition = overlappingGenes.Select(x => x.Start).Min() - 1;
                }
            }

            // Always return the position right before the overlapping genes to KISS
            return(new AnnotationPosition(genomicPosition.Chromosome, currentPosition < 1 ? 1 : currentPosition));
        }
コード例 #18
0
        private ISimplePosition GetNextSimplePosition()
        {
            while (_queuedPositions.Count == 0)
            {
                VcfLine = _vcfFilter.GetNextLine(_reader);

                SimplePosition vcfPosition = null;

                if (VcfLine != null)
                {
                    string[] vcfFields  = VcfLine.OptimizedSplit('\t');
                    var      chromosome = ReferenceNameUtilities.GetChromosome(_refNameToChromosome, vcfFields[VcfCommon.ChromIndex]);
                    CheckVcfOrder(vcfFields[VcfCommon.ChromIndex]);

                    (int start, bool foundError) = vcfFields[VcfCommon.PosIndex].OptimizedParseInt32();
                    if (foundError)
                    {
                        throw new InvalidDataException($"Unable to convert the VCF position to an integer: {vcfFields[VcfCommon.PosIndex]}");
                    }

                    if (InconsistentSampleFields(vcfFields))
                    {
                        int sampleCount = _sampleNames?.Length ?? 0;
                        throw new UserErrorException($"Inconsistent number of sample fields in line:\n{VcfLine}\nExpected number of sample fields: {sampleCount}");
                    }
                    vcfPosition = SimplePosition.GetSimplePosition(chromosome, start, vcfFields, _vcfFilter);
                }

                IEnumerable <ISimplePosition> simplePositions = _recomposer.ProcessSimplePosition(vcfPosition);
                foreach (var simplePosition in simplePositions)
                {
                    _queuedPositions.Enqueue(simplePosition);
                }

                if (VcfLine == null)
                {
                    break;
                }
            }

            return(_queuedPositions.Count == 0 ? null : _queuedPositions.Dequeue());
        }
コード例 #19
0
        private (IChromosome Chromosome, int NumPredictions) GetChromosomeHeader()
        {
            string line = _reader.ReadLine();
            var    cols = line?.OptimizedSplit('\t');

            if (cols == null)
            {
                throw new InvalidDataException("Found an unexpected null line when parsing the chromosome header in the prediction reader.");
            }
            if (cols.Length != 3)
            {
                throw new InvalidDataException($"Expected 3 columns in the chromosome header, but found {cols.Length}");
            }

            ushort referenceIndex = ushort.Parse(cols[1]);
            var    chromosome     = ReferenceNameUtilities.GetChromosome(_refIndexToChromosome, referenceIndex);
            int    numPredictions = int.Parse(cols[2]);

            return(chromosome, numPredictions);
        }
コード例 #20
0
        private IRegulatoryRegion GetNextRegulatoryRegion()
        {
            string line = _reader.ReadLine();

            if (line == null)
            {
                return(null);
            }

            var    cols           = line.OptimizedSplit('\t');
            ushort referenceIndex = ushort.Parse(cols[1]);
            int    start          = int.Parse(cols[2]);
            int    end            = int.Parse(cols[3]);
            var    id             = CompactId.Convert(cols[4]);
            var    type           = (RegulatoryRegionType)byte.Parse(cols[6]);

            var chromosome = ReferenceNameUtilities.GetChromosome(_refIndexToChromosome, referenceIndex);

            return(new RegulatoryRegion(chromosome, start, end, id, type));
        }
コード例 #21
0
        private static BreakEndAdjacency[] ConvertTranslocation(ISimpleVariant variant, Regex regex,
                                                                bool onReverseStrand, int partnerBracketIndex, IDictionary <string, IChromosome> refNameToChromosome)
        {
            var match = regex.Match(variant.AltAllele);

            if (!match.Success)
            {
                throw new InvalidDataException($"Unable to successfully parse the complex rearrangements for the following allele: {variant.AltAllele}");
            }

            bool   partnerOnReverseStrand = match.Groups[partnerBracketIndex].Value == ReverseBracket;
            var    partnerPosition        = Convert.ToInt32(match.Groups[3].Value);
            string partnerReferenceName   = match.Groups[2].Value;
            var    partnerChromosome      = ReferenceNameUtilities.GetChromosome(refNameToChromosome, partnerReferenceName);

            var origin  = new BreakPoint(variant.Chromosome, variant.Start, onReverseStrand);
            var partner = new BreakPoint(partnerChromosome, partnerPosition, partnerOnReverseStrand);

            return(new[] { new BreakEndAdjacency(origin, partner) });
        }
コード例 #22
0
ファイル: SimplePosition.cs プロジェクト: wangdi2014/Nirvana
        public static SimplePosition GetSimplePosition(string[] vcfFields, IVcfFilter vcfFilter, IDictionary <string, IChromosome> refNameToChromosome, bool isRecomposed = false)
        {
            var simplePosition = new SimplePosition(
                ReferenceNameUtilities.GetChromosome(refNameToChromosome, vcfFields[VcfCommon.ChromIndex]),
                int.Parse(vcfFields[VcfCommon.PosIndex]),
                vcfFields[VcfCommon.RefIndex],
                vcfFields[VcfCommon.AltIndex].OptimizedSplit(','));

            if (vcfFilter.PassedTheEnd(simplePosition.Chromosome, simplePosition.Start))
            {
                return(null);
            }

            simplePosition.End          = vcfFields[VcfCommon.AltIndex].OptimizedStartsWith('<') || vcfFields[VcfCommon.AltIndex] == "*" ? -1 : simplePosition.Start + simplePosition.RefAllele.Length - 1;
            simplePosition.VcfFields    = vcfFields;
            simplePosition.IsRecomposed = isRecomposed;
            simplePosition.IsDecomposed = new bool[simplePosition.AltAlleles.Length]; // false by default
            simplePosition.Vids         = new string[simplePosition.AltAlleles.Length];
            simplePosition.LinkedVids   = new List <string> [simplePosition.AltAlleles.Length];
            return(simplePosition);
        }
コード例 #23
0
ファイル: ChromMapper.cs プロジェクト: shannonnana/Nirvana
        private string GetNextChromDestinations(string line)
        {
            //extracting current chrom info from first line provided
            var currentChromName = line.Split('\t', 2)[VcfCommon.ChromIndex];

            Console.Write($"Getting destinations for chromosome:{currentChromName}...");
            var currentChrom = ReferenceNameUtilities.GetChromosome(_desSequenceProvider.RefNameToChromosome, currentChromName);

            _desSequenceProvider.LoadChromosome(currentChrom);
            do
            {
                var splits = line.Split('\t', VcfCommon.InfoIndex);
                var chrom  = splits[VcfCommon.ChromIndex];
                if (chrom != currentChromName)
                {
                    break;
                }

                var refAllele  = splits[VcfCommon.RefIndex];
                var altAlleles = splits[VcfCommon.AltIndex].Split(',');
                var position   = int.Parse(splits[VcfCommon.PosIndex]);
                var rsIds      = Utilities.GetRsids(splits[VcfCommon.IdIndex]);
                if (rsIds == null)
                {
                    continue;
                }

                var processedVariants = altAlleles.Select(x => VariantUtils.TrimAndLeftAlign(position, refAllele, x, _desSequenceProvider.Sequence)).ToArray();

                foreach (var(start, variantRef, variantAlt) in processedVariants)
                {
                    foreach (var rsId in rsIds)
                    {
                        if (!_destinationVariants.TryGetValue((rsId, variantRef.Length, variantAlt), out var variants))
                        {
                            variants = new List <int>();
                            _destinationVariants[(rsId, variantRef.Length, variantAlt)] = variants;
コード例 #24
0
ファイル: Reader.cs プロジェクト: shannonnana/Nirvana
        // ReSharper disable once MemberCanBePrivate.Global
        public static Index Read(BinaryReader reader, IDictionary <string, IChromosome> refNameToChromosome)
        {
            int magic = reader.ReadInt32();

            if (magic != Constants.TabixMagic)
            {
                throw new InvalidDataException("This does not seem to be a tabix file. Did you use a GZipStream?");
            }

            int numReferenceSequences = reader.ReadInt32();
            int format                      = reader.ReadInt32();
            int sequenceNameIndex           = reader.ReadInt32() - 1;
            int sequenceBeginIndex          = reader.ReadInt32() - 1;
            int sequenceEndIndex            = reader.ReadInt32() - 1;
            var commentChar                 = (char)reader.ReadInt32();
            int numLinesToSkip              = reader.ReadInt32();
            int concatenatedSequenceNameLen = reader.ReadInt32();

            byte[] concatenatedNames = reader.ReadBytes(concatenatedSequenceNameLen);

            string[] referenceSequenceNames = GetReferenceSequenceNames(concatenatedNames, numReferenceSequences);
            var      referenceSequences     = new ReferenceIndex[numReferenceSequences];
            var      refNameToTabixIndex    = new Dictionary <string, ushort>(numReferenceSequences);

            for (ushort i = 0; i < numReferenceSequences; i++)
            {
                string chromosomeName = referenceSequenceNames[i];
                var    chromosome     = ReferenceNameUtilities.GetChromosome(refNameToChromosome, chromosomeName);

                referenceSequences[i] = ReadReferenceSequence(reader, chromosome);
                refNameToTabixIndex[chromosome.UcscName]    = i;
                refNameToTabixIndex[chromosome.EnsemblName] = i;
            }

            return(new Index(format, sequenceNameIndex, sequenceBeginIndex, sequenceEndIndex, commentChar,
                             numLinesToSkip, referenceSequences, refNameToTabixIndex));
        }
コード例 #25
0
ファイル: RepeatExpansionReader.cs プロジェクト: LvLH/Nirvana
        private static (ushort RefIndex, Interval <RepeatExpansionPhenotype> Interval) GetPhenotype(string line, IDictionary <string, IChromosome> refNameToChromosome)
        {
            string[] cols = line.OptimizedSplit('\t');
            if (cols.Length < MinNumberOfColumns)
            {
                throw new InvalidDataException($"Expected at least {MinNumberOfColumns} columns in the STR data file, but found only {cols.Length}.");
            }

            string chromosomeString = cols[ChromIndex];
            int    start            = int.Parse(cols[StartIndex]);
            int    end       = int.Parse(cols[EndIndex]);
            string phenotype = cols[PhenotypeIndex];
            string omimId    = cols[OmimIndex];

            int[]      repeatNumbers        = cols[RepeatNumbersIndex].Split(',').Select(int.Parse).ToArray();
            int[]      alleleCounts         = cols[AlleleCountsIndex].Split(',').Select(int.Parse).ToArray();
            string[]   classifications      = cols[CategoriesIndex].Split(',').ToArray();
            Interval[] classificationRanges = cols[CategoryRangesIndex].Split(',').Select(GetInterval).ToArray();

            if (repeatNumbers.Length != alleleCounts.Length)
            {
                throw new InvalidDataException($"Inconsistent number of repeat numbers ({repeatNumbers.Length}) vs. allele counts ({alleleCounts.Length})");
            }
            if (classifications.Length != classificationRanges.Length)
            {
                throw new InvalidDataException($"Inconsistent number of values of classifications ({classifications.Length}) vs. classification ranges ({classificationRanges.Length})");
            }

            var chromosome         = ReferenceNameUtilities.GetChromosome(refNameToChromosome, chromosomeString);
            var chromosomeInterval = new ChromosomeInterval(chromosome, start, end);

            double[] percentiles = PercentileUtilities.ComputePercentiles(repeatNumbers.Length, alleleCounts);

            var rePhenotype = new RepeatExpansionPhenotype(chromosomeInterval, phenotype, omimId, repeatNumbers, percentiles, classifications, classificationRanges);

            return(chromosome.Index, new Interval <RepeatExpansionPhenotype>(start, end, rePhenotype));
        }
コード例 #26
0
        public void GetChromosome_RefName_Exists()
        {
            var chromosome = ReferenceNameUtilities.GetChromosome(_refNameToChromosome, "1");

            Assert.Equal(0, chromosome.Index);
        }
コード例 #27
0
ファイル: ChromMapper.cs プロジェクト: shannonnana/Nirvana
        private string ProcessNextChromSource(string line)
        {
            //extracting current chrom info from first line provided
            var currentChromName = line.Split('\t', 2)[VcfCommon.ChromIndex];
            var currentChrom     = ReferenceNameUtilities.GetChromosome(_srcSequenceProvider.RefNameToChromosome, currentChromName);

            _srcSequenceProvider.LoadChromosome(currentChrom);

            var leftoverCount = 0;

            do
            {
                var splits = line.Split('\t', VcfCommon.InfoIndex);
                var chrom  = splits[VcfCommon.ChromIndex];
                if (chrom != currentChromName)
                {
                    break;
                }

                var refAllele  = splits[VcfCommon.RefIndex];
                var altAlleles = splits[VcfCommon.AltIndex].Split(',');
                var position   = int.Parse(splits[VcfCommon.PosIndex]);
                var rsIds      = Utilities.GetRsids(splits[VcfCommon.IdIndex]);
                if (rsIds == null)
                {
                    continue;
                }

                var processedVariants = altAlleles.Select(x => VariantUtils.TrimAndLeftAlign(position, refAllele, x, _srcSequenceProvider.Sequence)).ToArray();

                var foundInDest = false;
                foreach (var(_, variantRef, variantAlt) in processedVariants)
                {
                    foreach (var rsId in rsIds)
                    {
                        if (!_destinationVariants.TryGetValue((rsId, variantRef.Length, variantAlt), out var targetPositions))
                        {
                            continue;
                        }

                        targetPositions.ForEach(x => WriteRemappedEntry(chrom, x, variantRef, variantAlt, line));
                        //flipping the sign to indicate it has been mapped
                        //_destinationVariants[rsId] = (-variant.position, variant.refAllele, variant.altAlleles);

                        foundInDest = true;
                    }
                }
                if (foundInDest)
                {
                    continue;
                }

                foreach (var(_, _, variantAlt) in processedVariants)
                {
                    foreach (var rsId in rsIds)
                    {
                        _leftoverWriter.WriteLine(string.Join('#', rsId.ToString(), variantAlt, line));
                    }
                }
                leftoverCount++;
            } while ((line = _srcReader.ReadLine()) != null);

            Console.WriteLine($"Leftover count for {currentChromName}: {leftoverCount}");
            //Console.WriteLine($"Number of entries discarded due to allele mismatch: {_alleleMismatchCount}");
            _leftoverCount += leftoverCount;
            return(line);
        }
コード例 #28
0
ファイル: LeftoverMapper.cs プロジェクト: shannonnana/Nirvana
        public int Map()
        {
            // write out the relocated locations of the leftover rsIds whenever possible
            //reading in the leftover ids
            var leftoverIds = new HashSet <(long, string)>();

            Console.Write("Loading leftover ids...");
            string line;

            while ((line = _leftoverReader.ReadLine()) != null)
            {
                var splits = line.Split('#', 3);
                var id     = long.Parse(splits[0]);
                var alt    = splits[1];
                leftoverIds.Add((id, alt));
            }
            Console.WriteLine($"{leftoverIds.Count} found.");

            // stream through the dest file to find locations
            var leftoversWithDest = new Dictionary <(long, string), List <GenomicLocation> >();
            var currentChromName  = "";

            while ((line = _destReader.ReadLine()) != null)
            {
                if (line.OptimizedStartsWith('#'))
                {
                    continue;
                }

                var splits    = line.Split('\t', VcfCommon.InfoIndex);
                var chromName = splits[VcfCommon.ChromIndex];
                if (chromName != currentChromName)
                {
                    currentChromName = chromName;
                    Console.WriteLine($"Getting destinations for chromosome:{currentChromName}...");
                    var currentChrom = ReferenceNameUtilities.GetChromosome(_desSequenceProvider.RefNameToChromosome,
                                                                            currentChromName);
                    _desSequenceProvider.LoadChromosome(currentChrom);
                }

                var refAllele  = splits[VcfCommon.RefIndex];
                var altAlleles = splits[VcfCommon.AltIndex].Split(',');
                var position   = int.Parse(splits[VcfCommon.PosIndex]);
                var rsIds      = Utilities.GetRsids(splits[VcfCommon.IdIndex]);
                if (rsIds == null)
                {
                    continue;
                }

                var processedVariants = altAlleles.Select(x => VariantUtils.TrimAndLeftAlign(position, refAllele, x, _desSequenceProvider.Sequence)).ToArray();

                foreach (var(_, _, variantAlt) in processedVariants)
                {
                    foreach (var rsId in rsIds)
                    {
                        if (!leftoverIds.Contains((rsId, variantAlt)))
                        {
                            continue;
                        }
                        var pos = int.Parse(splits[VcfCommon.PosIndex]);
                        if (!leftoversWithDest.TryGetValue((rsId, variantAlt), out var locations))
                        {
                            locations = new List <GenomicLocation>();
                            leftoversWithDest[(rsId, variantAlt)] = locations;
コード例 #29
0
        public void GetChromosome_RefIndex_Exists()
        {
            var chromosome = ReferenceNameUtilities.GetChromosome(_refIndexToChromosome, 2);

            Assert.Equal("3", chromosome.EnsemblName);
        }