Ejemplo n.º 1
0
        /// <summary>
        /// returns the desired JSON transcript given an annotated variant, a transcript ID, and an alt allele
        /// </summary>
        public static IAnnotatedTranscript GetTranscript(IAnnotatedVariant annotatedVariant, string transcriptId, string altAllele, string refAllele = null)
        {
            foreach (var variant in annotatedVariant.AnnotatedAlternateAlleles)
            {
                if (altAllele != null && variant.AltAllele != altAllele)
                {
                    continue;
                }
                if (refAllele != null && variant.RefAllele != refAllele)
                {
                    continue;
                }

                foreach (var transcript in
                         variant.EnsemblTranscripts.Where(
                             transcript => FormatUtilities.SplitVersion(transcript.TranscriptID).Item1 == transcriptId))
                {
                    return(transcript);
                }

                foreach (var transcript in
                         variant.RefSeqTranscripts.Where(
                             transcript => FormatUtilities.SplitVersion(transcript.TranscriptID).Item1 == transcriptId))
                {
                    return(transcript);
                }
            }

            return(null);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// loads the data in the LRG data file
        /// </summary>
        public static HashSet <string> GetTranscriptIds(string lrgPath)
        {
            var lrgTranscriptIds = new HashSet <string>();

            using (var reader = new StreamReader(FileUtilities.GetReadStream(lrgPath)))
            {
                reader.ReadLine();

                while (true)
                {
                    var line = reader.ReadLine();
                    if (string.IsNullOrEmpty(line))
                    {
                        break;
                    }

                    var cols = line.Split('\t');
                    if (cols.Length != 10)
                    {
                        throw new GeneralException($"Expected 10 columns, but found {cols.Length}: [{line}]");
                    }

                    var tuple = FormatUtilities.SplitVersion(cols[5]);
                    lrgTranscriptIds.Add(tuple.Item1);
                }
            }

            return(lrgTranscriptIds);
        }
Ejemplo n.º 3
0
        public void SplitVersion(string originalId, string expectedId, byte expectedVersion)
        {
            var tuple = FormatUtilities.SplitVersion(originalId);

            Assert.Equal(expectedId, tuple.Item1);
            Assert.Equal(expectedVersion, tuple.Item2);
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Retrieves the maximum version. Handles situations where VEP sets the transcript
        /// version to 1, but embeds a version in the RefSeq ID: NM_178221.2
        /// </summary>
        private static byte GetMaxVersion(string transcriptVersion, string id)
        {
            var idVersion = FormatUtilities.SplitVersion(id).Item2;
            var version   = byte.Parse(transcriptVersion);

            return(idVersion > version ? idVersion : version);
        }
Ejemplo n.º 5
0
        private static CompactId GetCompactId(string s, int prefixLen, IdType idType)
        {
            var tuple = FormatUtilities.SplitVersion(s);
            var num   = int.Parse(tuple.Item1.Substring(prefixLen));

            return(new CompactId(idType, ToInfo(num, tuple.Item1.Length - prefixLen)));
        }
Ejemplo n.º 6
0
        private static CompactId GetCompactId(string s, int prefixLen, IdType idType, byte version)
        {
            var(id, _) = FormatUtilities.SplitVersion(s);
            var num = int.Parse(id.Substring(prefixLen));

            return(new CompactId(idType, version, ToInfo(num, id.Length - prefixLen)));
        }
Ejemplo n.º 7
0
        public void SplitVersion_ReturnNull_WithNullInput()
        {
            var result = FormatUtilities.SplitVersion(null);

            Assert.Null(result.Id);
            Assert.Equal(0, result.Version);
        }
Ejemplo n.º 8
0
        public void SplitVersion(string combinedId, string expectedId, byte expectedVersion)
        {
            var result = FormatUtilities.SplitVersion(combinedId);

            Assert.Equal(expectedId, result.Id);
            Assert.Equal(expectedVersion, result.Version);
        }
Ejemplo n.º 9
0
            private static int GetAccessionNumber(string transcriptId)
            {
                int accession;

                int firstUnderLine = transcriptId.IndexOf('_');

                if (firstUnderLine != -1)
                {
                    transcriptId = transcriptId.Substring(firstUnderLine + 1);
                }
                var tuple = FormatUtilities.SplitVersion(transcriptId);

                return(int.TryParse(tuple.Item1, out accession) ? accession : 0);
            }
Ejemplo n.º 10
0
        public static Dictionary <string, List <string> > GetCcdsIdToEnsemblId(string ccdsPath)
        {
            var ccdsIdToEnsemblId = new Dictionary <string, List <string> >();

            using (var reader = FileUtilities.GetStreamReader(FileUtilities.GetReadStream(ccdsPath)))
            {
                while (true)
                {
                    string line = reader.ReadLine();
                    if (line == null)
                    {
                        break;
                    }
                    if (line.OptimizedStartsWith('#'))
                    {
                        continue;
                    }

                    var cols = line.OptimizedSplit('\t');
                    if (cols.Length != 8)
                    {
                        throw new InvalidDataException($"Expected 8 columns, but found {cols.Length}: [{line}]");
                    }

                    string nucleotideId = cols[NucleotideIdIndex];
                    if (!nucleotideId.StartsWith("ENST"))
                    {
                        continue;
                    }

                    var ccds    = FormatUtilities.SplitVersion(cols[CcdsIdIndex]);
                    var ensembl = FormatUtilities.SplitVersion(nucleotideId);

                    if (ccdsIdToEnsemblId.TryGetValue(ccds.Id, out var ensemblList))
                    {
                        ensemblList.Add(ensembl.Id);
                    }
                    else
                    {
                        ccdsIdToEnsemblId[ccds.Id] = new List <string> {
                            ensembl.Id
                        }
                    };
                }
            }

            return(ccdsIdToEnsemblId);
        }
    }
Ejemplo n.º 11
0
        public static HashSet <string> GetTranscriptIds(string lrgPath, Dictionary <string, List <string> > ccdsIdToEnsemblId)
        {
            var transcriptIds = new HashSet <string>();

            using (var reader = new StreamReader(FileUtilities.GetReadStream(lrgPath)))
            {
                while (true)
                {
                    var line = reader.ReadLine();
                    if (line == null)
                    {
                        break;
                    }
                    if (line.StartsWith("#"))
                    {
                        continue;
                    }

                    var cols = line.Split('\t');
                    if (cols.Length != 7)
                    {
                        throw new InvalidDataException($"Expected 7 columns, but found {cols.Length}: [{line}]");
                    }

                    var refSeqTranscript     = FormatUtilities.SplitVersion(Sanitize(cols[RefSeqTranscriptIndex]));
                    var ccds                 = FormatUtilities.SplitVersion(Sanitize(cols[CccdsIndex]));
                    var ensemblTranscriptIds = GetEnsemblTranscriptIds(ccds.Id, ccdsIdToEnsemblId, Sanitize(cols[EnsemblTranscriptIndex]));

                    if (refSeqTranscript.Id != null)
                    {
                        transcriptIds.Add(refSeqTranscript.Id);
                    }
                    // ReSharper disable once InvertIf
                    if (ensemblTranscriptIds != null)
                    {
                        foreach (var id in ensemblTranscriptIds)
                        {
                            transcriptIds.Add(id);
                        }
                    }
                }
            }

            return(transcriptIds);
        }
Ejemplo n.º 12
0
 internal static (string Id, byte Version) GetMaxVersion(string originalId, byte originalVersion)
 {
     (string pureId, byte idVersion) = FormatUtilities.SplitVersion(originalId);
     return(pureId, Math.Max(originalVersion, idVersion));
 }
Ejemplo n.º 13
0
        private static CacheFile TryMatchFilename(string ndbPath, Func <string, Match> matcher, MiniCacheType type,
                                                  ChromosomeRenamer renamer)
        {
            string filename = Path.GetFileName(ndbPath);

            if (filename == null)
            {
                return(null);
            }

            var match = matcher(filename);

            if (!match.Success)
            {
                return(null);
            }

            IUpdater updater;
            string   id, transcriptDataSource;
            int      position;
            ushort   refIndex;

            switch (type)
            {
            case MiniCacheType.Transcript:
                var tuple = FormatUtilities.SplitVersion(match.Groups[1].Value);
                id                   = tuple.Item1;
                refIndex             = renamer.GetReferenceIndex(match.Groups[2].Value);
                transcriptDataSource = match.Groups[3].Value;
                updater              = new TranscriptUpdater(id, refIndex, transcriptDataSource);
                break;

            case MiniCacheType.Regulatory:
                id                   = match.Groups[1].Value;
                refIndex             = renamer.GetReferenceIndex(match.Groups[2].Value);
                transcriptDataSource = match.Groups[3].Value;
                updater              = new RegulatoryUpdater(id, refIndex, transcriptDataSource);
                break;

            case MiniCacheType.Position:
                refIndex = renamer.GetReferenceIndex(match.Groups[1].Value);
                position = int.Parse(match.Groups[2].Value);
                string refAllele = match.Groups[3].Value;
                string altAllele = match.Groups[4].Value;
                transcriptDataSource = match.Groups[5].Value;
                updater = new PositionUpdater(refIndex, position, refAllele, altAllele, transcriptDataSource);
                break;

            case MiniCacheType.PositionRange:
                refIndex = renamer.GetReferenceIndex(match.Groups[1].Value);
                position = int.Parse(match.Groups[2].Value);
                int endPosition = int.Parse(match.Groups[3].Value);
                transcriptDataSource = match.Groups[4].Value;
                updater = new PositionRangeUpdater(refIndex, position, endPosition, transcriptDataSource);
                break;

            default:
                throw new GeneralException($"Unexpected mini-cache type encountered: {type}");
            }

            return(new CacheFile(ndbPath, updater.RefIndex, ConvertTranscriptDataSource(updater.TranscriptDataSource),
                                 type, updater));
        }