Ejemplo n.º 1
0
        /// <summary>
        /// Creates UTRs for transcripts and intergenic regions after reading gene model
        /// </summary>
        public void CreateIntergenicRegions()
        {
            foreach (IntervalTree it in GenomeForest.Forest.Values)
            {
                Gene previousPositiveStrandGene = null;
                Gene previousNegativeStrandGene = null;

                // Create intergenic regions on each strand
                foreach (Gene gene in it.Intervals.OfType <Gene>().OrderBy(g => g.OneBasedStart))
                {
                    Intergenic intergenic = null;
                    Gene       previous   = gene.IsStrandPlus() ? previousPositiveStrandGene : previousNegativeStrandGene;
                    if (previous != null)
                    {
                        // if there's a previous gene, create the intergenic region
                        intergenic = new Intergenic(gene.Chromosome, gene.ChromosomeID, gene.Source, gene.Strand, previous.OneBasedEnd + 1, gene.OneBasedStart - 1, null);
                    }

                    // store previous genes on each strand
                    if (gene.IsStrandPlus())
                    {
                        previousPositiveStrandGene = gene;
                    }
                    if (gene.IsStrandMinus())
                    {
                        previousNegativeStrandGene = gene;
                    }

                    // add the intergenic region to the genome forest if it was created
                    if (intergenic != null && intergenic.Length() > 0)
                    {
                        GenomeForest.Add(intergenic);
                    }
                }
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Processes a feature from a GFF3 gene model file.
        /// </summary>
        /// <param name="feature"></param>
        /// <param name="oneBasedStart"></param>
        /// <param name="oneBasedEnd"></param>
        /// <param name="chrom"></param>
        /// <param name="attributes"></param>
        public void ProcessGff3Feature(MetadataListItem <List <string> > feature, long oneBasedStart, long oneBasedEnd, Chromosome chrom, Dictionary <string, string> attributes)
        {
            bool hasGeneId       = attributes.TryGetValue("gene_id", out string geneId);
            bool hasTranscriptId = attributes.TryGetValue("transcript_id", out string transcriptId);
            bool hasExonId       = attributes.TryGetValue("exon_id", out string exonId);
            bool hasProteinId    = attributes.TryGetValue("protein_id", out string proteinId);
            bool hasSource       = feature.SubItems.TryGetValue("source", out List <string> sourceish); // false if empty ("." in GFF format)
            bool hasStrand       = feature.SubItems.TryGetValue("strand", out List <string> strandish); // false if empty ("." in GFF format)
            bool hasFrame        = feature.SubItems.TryGetValue("frame", out List <string> framey);     // false if empty ("." in GFF format)

            string source = hasSource ? sourceish[0] : "";

            if (!hasStrand)
            {
                return;
            }                           // strand is a required to do anything in this program
            string strand = strandish[0];
            int    frame  = 0;

            if (hasFrame)
            {
                int.TryParse(framey[0], out frame);
            }

            if (hasGeneId && (currentGene == null || hasGeneId && geneId != currentGene.ID))
            {
                currentGene = new Gene(geneId, chrom, source, strand, oneBasedStart, oneBasedEnd, feature);
                Genes.Add(currentGene);
                GenomeForest.Add(currentGene);
            }

            if (hasTranscriptId && (currentTranscript == null || hasTranscriptId && transcriptId != currentTranscript.ID))
            {
                if (currentTranscript != null)
                {
                    Transcript.SetRegions(currentTranscript);
                    currentTranscript.FrameCorrection();
                }
                currentTranscript = new Transcript(transcriptId, currentGene, source, strand, oneBasedStart, oneBasedEnd, null, null, feature);
                currentGene.Transcripts.Add(currentTranscript);
                GenomeForest.Add(currentTranscript);
            }

            if (hasExonId)
            {
                ISequence exon_dna = chrom.Sequence.GetSubSequence(oneBasedStart - 1, oneBasedEnd - oneBasedStart + 1);
                Exon      exon     = new Exon(currentTranscript, currentTranscript.IsStrandPlus() ? exon_dna : exon_dna.GetReverseComplementedSequence(),
                                              source, oneBasedStart, oneBasedEnd, chrom == null ? "" : chrom.ChromosomeID, strand, null, feature);
                if (exon.Length() > 0)
                {
                    currentTranscript.Exons.Add(exon);
                }
            }
            else if (hasProteinId)
            {
                CDS cds = new CDS(currentTranscript, chrom.Sequence.ID, source, strand, oneBasedStart, oneBasedEnd, null, frame);
                if (cds.Length() > 0)
                {
                    currentTranscript.CodingDomainSequences.Add(cds);
                    currentTranscript.ProteinID = proteinId;
                }
            }
            else // nothing to do
            {
            }
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Processes a feature from a GTF gene model file.
        /// </summary>
        /// <param name="feature"></param>
        /// <param name="oneBasedStart"></param>
        /// <param name="oneBasedEnd"></param>
        /// <param name="chrom"></param>
        /// <param name="attributes"></param>
        public void ProcessGtfFeature(MetadataListItem <List <string> > feature, long oneBasedStart, long oneBasedEnd, Chromosome chrom, Dictionary <string, string> attributes)
        {
            bool hasGeneId       = attributes.TryGetValue("gene_id", out string geneId);
            bool hasTranscriptId = attributes.TryGetValue("transcript_id", out string transcriptId);
            bool hasProteinId    = attributes.TryGetValue("protein_id", out string proteinId);
            bool hasExonId       = attributes.TryGetValue("exon_id", out string exonId);
            bool hasSource       = feature.SubItems.TryGetValue("source", out List <string> sourceish);
            bool hasStrand       = feature.SubItems.TryGetValue("strand", out List <string> strandish);
            bool hasFrame        = feature.SubItems.TryGetValue("frame", out List <string> framey);

            string source = hasSource ? sourceish[0] : "";

            if (!hasStrand)
            {
                return;
            }                           // strand is a required to do anything in this program
            string strand = strandish[0];
            int    frame  = 0;

            if (hasFrame)
            {
                int.TryParse(framey[0], out frame);
            }

            // Trim prefixes from the IDs
            string genePrefix       = "gene:";
            string transcriptPrefix = "transcript:";

            if (hasGeneId && geneId.StartsWith(genePrefix))
            {
                string newGeneId = geneId.Substring(genePrefix.Length);
                feature.FreeText.Replace(geneId, newGeneId);
                geneId = newGeneId;
            }
            if (hasTranscriptId && transcriptId.StartsWith(transcriptPrefix))
            {
                string newTranscriptId = transcriptId.Substring(transcriptPrefix.Length);
                feature.FreeText.Replace(transcriptId, newTranscriptId);
                transcriptId = newTranscriptId;
            }
            if (hasProteinId && proteinId.StartsWith(transcriptPrefix))
            {
                proteinId = proteinId.Substring(transcriptPrefix.Length); // transcript id is used for protein id sometimes
            }

            // Catch the transcript features before they go by if available, i.e. if the file doesn't just have exons
            if (feature.Key == "transcript" && (currentTranscript == null || hasTranscriptId && transcriptId != currentTranscript.ID))
            {
                if (currentGene == null || hasGeneId && geneId != currentGene.ID)
                {
                    currentGene = new Gene(geneId, chrom, source, strand, oneBasedStart, oneBasedEnd, feature);
                    Genes.Add(currentGene);
                    GenomeForest.Add(currentGene);
                }

                currentTranscript = new Transcript(transcriptId, currentGene, source, strand, oneBasedStart, oneBasedEnd, null, null, feature);
                currentGene.Transcripts.Add(currentTranscript);
                GenomeForest.Add(currentTranscript);
            }

            if (feature.Key == "exon" || feature.Key == "CDS")
            {
                if (currentGene == null || hasGeneId && geneId != currentGene.ID)
                {
                    currentGene = new Gene(geneId, chrom, source, strand, oneBasedStart, oneBasedEnd, feature);
                    Genes.Add(currentGene);
                    GenomeForest.Add(currentGene);
                }

                if (currentTranscript == null || hasTranscriptId && transcriptId != currentTranscript.ID)
                {
                    if (currentTranscript != null)
                    {
                        Transcript.SetRegions(currentTranscript);
                        currentTranscript.FrameCorrection();
                    }
                    currentTranscript = new Transcript(transcriptId, currentGene, source, strand, oneBasedStart, oneBasedEnd, null, null, feature);
                    currentGene.Transcripts.Add(currentTranscript);
                    GenomeForest.Add(currentTranscript);
                }

                if (feature.Key == "exon")
                {
                    ISequence exon_dna = chrom.Sequence.GetSubSequence(oneBasedStart - 1, oneBasedEnd - oneBasedStart + 1);
                    Exon      exon     = new Exon(currentTranscript, currentTranscript.IsStrandPlus() ? exon_dna : exon_dna.GetReverseComplementedSequence(),
                                                  source, oneBasedStart, oneBasedEnd, chrom.Sequence.ID, strand, null, feature);
                    if (exon.Length() > 0)
                    {
                        currentTranscript.Exons.Add(exon);
                    }
                }
                else if (feature.Key == "CDS")
                {
                    CDS cds = new CDS(currentTranscript, chrom.Sequence.ID, source, strand, oneBasedStart, oneBasedEnd, null, frame);
                    if (hasProteinId)
                    {
                        currentTranscript.ProteinID = proteinId;
                    }
                    if (cds.Length() > 0)
                    {
                        currentTranscript.CodingDomainSequences.Add(cds);
                    }
                }
                else
                { // nothing to do
                }
            }
        }
        public string ToString(bool useSeqOntology, bool useHgvs)
        {
            // Get data to show
            string geneId = "", geneName = "", transcriptId = "", exonId = "", customId = "";
            string bioType  = null;
            int    exonRank = -1;

            if (Marker != null)
            {
                // Gene Id, name and biotype
                Gene       gene = GetGene();
                Transcript tr   = GetTranscript();

                // CDS size info
                if (gene != null)
                {
                    geneId = gene.ID;
                    //geneName = gene.getGeneName();
                    //bioType = (getBiotype() == null ? "" : getBiotype().ToString());
                }

                // Update trId
                if (tr != null)
                {
                    transcriptId = tr.ID;
                }

                // Exon rank information
                Exon exon = GetExon();
                //if (exon != null)
                //{
                //    exonId = exon.ID;
                //    exonRank = exon.getRank();
                //}

                // Regulation
                //if (isRegulation()) bioType = ((Regulation)marker).getRegulationType();
            }

            // Add seqChage's ID
            //if (!variant.getId() == "") customId += variant.getId();

            // Add custom markers
            //if (marker != null && marker is Custom) customId += (customId == "" ? "" : ";") + marker.getId();

            // CDS length
            long cdsSize = GetCdsLength();

            string error   = String.Join(",", Error);
            string warning = String.Join(",", Warning);
            string errWarn = error + (error == "" ? "" : "|") + warning;

            //string aaChange = "";

            //if (useHgvs) aaChange = getHgvs();
            //else aaChange = ((aaRef.Length + aaAlt.Length) > 0 ? aaRef + "/" + aaAlt : "");

            string aaChange = ((ReferenceAA.Length + AlternateAA.Length) > 0 ? ReferenceAA + "/" + AlternateAA : "");

            return(errWarn                                                                                 //
                   + "\t" + geneId                                                                         //
                   + "\t" + geneName                                                                       //
                   + "\t" + bioType                                                                        //
                   + "\t" + transcriptId                                                                   //
                   + "\t" + exonId                                                                         //
                   + "\t" + (exonRank >= 0 ? exonRank.ToString() : "")                                     //
                   + "\t" + Effect(false, false, false, useSeqOntology, false)                             //
                   + "\t" + aaChange                                                                       //
                   + "\t" + ((CodonsRef.Length + CodonsAlt.Length) > 0 ? CodonsRef + "/" + CodonsAlt : "") //
                   + "\t" + (CodonNum >= 0 ? (CodonNum + 1).ToString() : "")                               //
                                                                                                           //+ "\t" + (codonDegeneracy >= 0 ? codonDegeneracy + "" : "") //
                   + "\t" + (cdsSize >= 0 ? cdsSize.ToString() : "")                                       //
                   + "\t" + (CodonsAroundOld.Length > 0 ? CodonsAroundOld + " / " + CodonsAroundNew : "")  //
                   + "\t" + (AroundOldAAs.Length > 0 ? AroundOldAAs + " / " + AroundNewAAs : "")           //
                   + "\t" + customId                                                                       //
                   );
        }