コード例 #1
0
        //Creates stats like average Orf length, Codon usage when using a random strand to compare to
        public static Statistics calculateStatistics(NucleotideStrand experimentalStrand, DataTable dtAminoAcid)
        {
            NucleotideStrand strand = new NucleotideStrand();

            strand.sequence = generateRandomDNA(experimentalStrand.length, experimentalStrand.length);

            int OrfCount = 1;

            strand.setLength();
            List <Orf> OrfList = new List <Orf>();

            //Reading Strand searching for Orf
            for (int i = 0; i < (strand.length - 3); i++)
            {
                //Setting variables
                string  srchCode = strand.sequence.Substring(i, 3);
                DataRow srchRow  = dtAminoAcid.Rows.Find(srchCode);

                Codon srchCodon = new Codon(srchRow[0].ToString(), srchRow[1].ToString(), Convert.ToDecimal(srchRow[2]));

                //If start Codon is found the strand is translate until stop Codon reached
                if ("Met" == srchCodon.name)
                {
                    Codon currCodon = srchCodon;
                    int   b         = i;
                    Orf   currOrf   = new Orf();
                    currOrf.start = b;

                    Exon currExon = new Exon();
                    currExon.start = b;


                    while ((b < (strand.length - 12)) && (currCodon.name != "Stp"))
                    {
                        string  currCode = strand.sequence.Substring(b, 3);
                        DataRow currRow  = dtAminoAcid.Rows.Find(currCode);
                        currCodon = new Codon(currRow[0].ToString(), currRow[1].ToString(), Convert.ToDecimal(currRow[2]));


                        currOrf.CodonList.Add(currCodon);
                        currExon.CodonList.Add(currCodon);

                        //Detecting Intron
                        if ("AGGT" == strand.sequence.Substring(b + 1, 4))
                        {
                            //Finish Exon
                            currExon.finish = b;
                            currExon.length = currExon.finish - currExon.start;
                            currOrf.ExonList.Add(currExon);

                            //Starting Intron
                            Intron currIntron = new Intron();
                            currIntron.start = b;

                            //Reading Intron
                            string IntronStr = strand.sequence.Substring(b, 8);
                            IntronStr = MyMethods.buildBranchPoint(IntronStr);



                            while ((b < (strand.length - 8)) && (IntronStr == "01234567"))
                            {
                                IntronStr = strand.sequence.Substring(b, 8);
                                IntronStr = MyMethods.buildBranchPoint(IntronStr);
                                b++;
                            }


                            //Converts CGs at end of strand into pyrimidines for comparison because the end of the strand contains alway contains a chain pyrimidines
                            IntronStr = strand.sequence.Substring(b, 12);
                            IntronStr = MyMethods.buildIntronFinishSeq(IntronStr);
                            while ((b < (strand.length - 12)) && ("PPPPPPPPXPAG" != IntronStr))
                            {
                                IntronStr = strand.sequence.Substring(b, 12);
                                IntronStr = MyMethods.buildIntronFinishSeq(IntronStr);
                                b++;
                            }

                            b = b + 12;

                            //Finishing Intron
                            currIntron.finish   = b;
                            currIntron.length   = currIntron.finish - currIntron.start;
                            currIntron.sequence = strand.sequence.Substring(currIntron.start, currIntron.length);
                            currOrf.IntronList.Add(currIntron);
                            //Starting new Exon
                            currExon       = new Exon();
                            currExon.start = b;
                        }

                        //When stop Codon is found Orf is assessed as to how probable it is that it is a gene
                        if ("Stp" == currCodon.name)
                        {
                            //Finish Exon
                            currExon.finish = b;
                            currExon.length = currExon.finish - currExon.start;
                            currOrf.ExonList.Add(currExon);

                            //Finish Orf
                            currOrf.finish = b;
                            currOrf.length = currOrf.finish - currOrf.start;
                            int bpCount = currOrf.ExonList.Sum(item => item.length);



                            currOrf.setAACodonUsage(dtAminoAcid);
                            currOrf.setAAwholeUsage(dtAminoAcid);
                            currOrf.ExonLength = currOrf.ExonList.Sum(Exon => Exon.length);

                            int  currVal = currOrf.finish;
                            bool myCheck = OrfList.Any(Orf => Orf.finish == currOrf.finish);

                            if (myCheck == false)
                            {
                                OrfList.Add(currOrf);
                            }

                            OrfCount++;
                        }


                        b = b + 3;
                    }
                }
            }


            Statistics myStatistics = new Statistics();

            double meanExonLength    = (OrfList.Sum(Orf => Orf.ExonLength)) / OrfList.Count;
            double StanDevExonLength = OrfList.Sum(Orf => Math.Pow(Orf.ExonLength - meanExonLength, 2));

            StanDevExonLength              = Math.Sqrt((StanDevExonLength) / OrfList.Count);
            myStatistics.meanExonLength    = meanExonLength;
            myStatistics.StanDevExonLength = StanDevExonLength;
            myStatistics.rangeExonLength   = myStatistics.meanExonLength + myStatistics.StanDevExonLength;

            double meanWholeLength    = (OrfList.Sum(Orf => Orf.length)) / OrfList.Count;
            double StanDevWholeLength = OrfList.Sum(Orf => Math.Pow(Orf.length - meanWholeLength, 2));

            StanDevWholeLength         = Math.Sqrt((StanDevWholeLength) / OrfList.Count);
            myStatistics.meanLength    = meanWholeLength;
            myStatistics.StanDevLength = StanDevWholeLength;
            myStatistics.rangeLength   = myStatistics.meanLength + myStatistics.StanDevLength;


            double MeanAACodonUsage    = Convert.ToDouble((OrfList.Sum(Orf => Orf.AACodonUsage)) / OrfList.Count);
            double StanDevAACodonUsage = OrfList.Sum(Orf => Math.Pow(Convert.ToDouble(Orf.AACodonUsage) - MeanAACodonUsage, 2));

            StanDevAACodonUsage              = Math.Sqrt((StanDevAACodonUsage) / OrfList.Count);
            myStatistics.MeanAACodonUsage    = MeanAACodonUsage;
            myStatistics.StanDevAACodonUsage = StanDevAACodonUsage;
            myStatistics.RangeAACodonUsage   = myStatistics.MeanAACodonUsage + myStatistics.StanDevAACodonUsage;

            double MeanAAwholeUsage    = Convert.ToDouble((OrfList.Sum(Orf => Orf.AAwholeUsage)) / OrfList.Count);
            double StanDevAAwholeUsage = OrfList.Sum(Orf => Math.Pow(Convert.ToDouble(Orf.AAwholeUsage) - MeanAAwholeUsage, 2));

            StanDevAAwholeUsage              = Math.Sqrt((StanDevAAwholeUsage) / OrfList.Count);
            myStatistics.MeanAAwholeUsage    = MeanAAwholeUsage;
            myStatistics.StanDevAAwholeUsage = StanDevAAwholeUsage;
            myStatistics.RangeAAwholeUsage   = myStatistics.MeanAAwholeUsage + myStatistics.StanDevAAwholeUsage;

            return(myStatistics);
        }
コード例 #2
0
        //Looks for Orfs
        public static List <Orf> findOrf(NucleotideStrand strand, DataTable dtAminoAcid, Statistics currStat)
        {
            int OrfCount = 1;

            strand.setLength();
            List <Orf> OrfList = new List <Orf>();

            //Reading Strand searching for Orf
            for (int i = 0; i < (strand.length - 3); i++)
            {
                //Setting variables
                //DNA is broken into 3 character strings because that is how the body reads DNA. A 3 segment strand is called Codon
                string  srchCode = strand.sequence.Substring(i, 3);
                DataRow srchRow  = dtAminoAcid.Rows.Find(srchCode);

                Codon srchCodon = new Codon(srchRow[0].ToString(), srchRow[1].ToString(), Convert.ToDecimal(srchRow[2]));

                //If start Codon is found the strand is translate until stop Codon reached
                if ("Met" == srchCodon.name)
                {
                    Codon currCodon = srchCodon;
                    //b just symbols the current position in the strand
                    int b       = i;
                    Orf currOrf = new Orf();
                    currOrf.start = b;

                    Exon currExon = new Exon();
                    currExon.start = b;


                    //Orfs can be be broken down into Exons and Introns, Introns don't follow the same design pattern as Exons
                    //Therefore to properly assess a gene the Exons and intros have to be seperated out
                    while ((b < (strand.length - 12)) && (currCodon.name != "Stp"))
                    {
                        string  currCode = strand.sequence.Substring(b, 3);
                        DataRow currRow  = dtAminoAcid.Rows.Find(currCode);
                        currCodon = new Codon(currRow[0].ToString(), currRow[1].ToString(), Convert.ToDecimal(currRow[2]));


                        currOrf.CodonList.Add(currCodon);
                        currExon.CodonList.Add(currCodon);

                        //Detecting Intron (AGGT indicates the start of an Intron)
                        if ("AGGT" == strand.sequence.Substring(b + 1, 4))
                        {
                            //Finish Exon
                            currExon.finish = b;
                            currExon.length = currExon.finish - currExon.start;
                            currOrf.ExonList.Add(currExon);

                            //Starting Intron
                            Intron currIntron = new Intron();
                            currIntron.start = b;

                            //Reading Intron
                            //A charactertic of an Intron is that it has a branch point in the middle so before
                            //The Intron has ended the branch must be found
                            string IntronStr = strand.sequence.Substring(b, 8);

                            //buildBranchPoint just put the current Intron string into the format 01234567 if each character meets
                            // The criteria of an Intron
                            IntronStr = MyMethods.buildBranchPoint(IntronStr);
                            while ((b < (strand.length - 8)) && (IntronStr == "01234567"))
                            {
                                IntronStr = strand.sequence.Substring(b, 8);
                                IntronStr = MyMethods.buildBranchPoint(IntronStr);
                                b++;
                            }


                            //Converts CGs at end of strand into pyrimidines for comparison because the end of the strand contains alway contains a chain pyrimidines
                            IntronStr = strand.sequence.Substring(b, 12);
                            IntronStr = MyMethods.buildIntronFinishSeq(IntronStr);
                            while ((b < (strand.length - 12)) && ("PPPPPPPPXPAG" != IntronStr))
                            {
                                IntronStr = strand.sequence.Substring(b, 12);
                                IntronStr = MyMethods.buildIntronFinishSeq(IntronStr);
                                b++;
                            }

                            b = b + 12;

                            //Finishing Intron
                            currIntron.finish   = b;
                            currIntron.length   = currIntron.finish - currIntron.start;
                            currIntron.sequence = strand.sequence.Substring(currIntron.start, currIntron.length);
                            currOrf.IntronList.Add(currIntron);
                            //Starting new Exon
                            currExon       = new Exon();
                            currExon.start = b;
                        }

                        //When stop Codon is found Orf is assessed as to how probable it is that it is a gene
                        if ("Stp" == currCodon.name)
                        {
                            //Finish Exon
                            currExon.finish = b;
                            currExon.length = currExon.finish - currExon.start;
                            currOrf.ExonList.Add(currExon);

                            //Finish Orf
                            currOrf.finish     = b;
                            currOrf.length     = currOrf.finish - currOrf.start;
                            currOrf.ExonLength = currOrf.ExonList.Sum(Exon => Exon.length);
                            int bpCount = currOrf.ExonList.Sum(item => item.length);

                            //This calculates the Codon usage relative to what would be expected in a living organism
                            currOrf.setAACodonUsage(dtAminoAcid);
                            currOrf.setAAwholeUsage(dtAminoAcid);

                            //if the current found Orf has properties greater than the statistical ranges it will be added to the the list of OrfS
                            if ((currOrf.ExonLength > currStat.rangeExonLength) && (Convert.ToDouble(currOrf.AACodonUsage) > currStat.RangeAACodonUsage) && (Convert.ToDouble(currOrf.AAwholeUsage) > currStat.RangeAAwholeUsage) && (currOrf.length > currStat.rangeLength))
                            {
                                bool myCheck = OrfList.Any(Orf => Orf.finish == currOrf.finish);
                                if (myCheck == false)
                                {
                                    OrfList.Add(currOrf);
                                }
                            }



                            OrfCount++;
                        }


                        b = b + 3;
                    }
                }
            }



            return(OrfList);
        }
コード例 #3
0
        private void RunDetector(string inputFileName, string outputFileName, int PercentageStart, int PercentageFinish, string ProgramFiles)
        {
            string fileDirectory = ProgramFiles + @"\" + "AminoAcidProbTable.txt";

            //Getting DNA Strand
            string[] geneStrandArray = File.ReadAllLines(inputFileName);


            //Getting Codon usage values
            DataTable dtAminoAcid = MyMethods.buildAminoAcidDataTable(fileDirectory);

            int start  = (geneStrandArray.Length / 100) * PercentageStart;
            int finish = (geneStrandArray.Length / 100) * PercentageFinish;

            //Calculating strand length
            decimal percentageCoverage = Convert.ToDecimal(PercentageFinish - PercentageStart);
            int     strandLength       = Convert.ToInt32(((Convert.ToDecimal(geneStrandArray.Length) * percentageCoverage) / 100));


            //Reading gene
            List <Orf> OrfList = new List <Orf>();

            int numberOfWindows = strandLength / 1000;

            int lineCount = start;
            int lineLimit = start + 1000;

            //Strands are normally too long to be analysed in one go so are broken down into windows
            Statistics myStatistics = new Statistics();

            for (int windowCount = 0; windowCount < numberOfWindows; windowCount++)
            {
                NucleotideStrand strand = new NucleotideStrand();
                strand.sequence = "";


                while (lineCount < lineLimit && lineCount < geneStrandArray.Length && lineCount < (start + strandLength))
                {
                    strand.sequence = strand.sequence + geneStrandArray[lineCount];
                    lineCount++;
                }
                strand.sequence = strand.sequence.Replace("\r\n", "");
                strand.sequence = strand.sequence.ToUpper();
                strand.length   = strand.sequence.Length;

                if (windowCount == 0)
                {
                    MessageBox.Show("Strand approximately " + (strand.length * numberOfWindows).ToString() + "bp long");
                    //Calculating statistical ranges
                    myStatistics = MyMethods.calculateStatistics(strand, dtAminoAcid);
                }


                //Reading strand
                OrfList.AddRange(MyMethods.findOrf(strand, dtAminoAcid, myStatistics));

                //-100 so thee is a slight overlap with windows
                lineCount = lineCount - 100;
                lineLimit = lineLimit + 1000;
            }


            //Ordering the list of OrfS by AACodonUsage and writing the top values to file
            OrfList = OrfList.OrderByDescending(Orf => Orf.AACodonUsage).ToList();

            string foundGenes = "";

            //Building string of found genes and writing to file only want the top 10
            for (int x = 0; (x < OrfList.Count) && (x <= 9); x++)
            {
                foundGenes = foundGenes + "<" + OrfList[x].getWholeSequence() + "> \r\n \r\n";
            }


            using (StreamWriter file = new StreamWriter(outputFileName))
            {
                file.WriteLine(foundGenes);
            }

            pgStatus.Value    = 100;
            lblStatus.Content = "Finished!";
            MessageBox.Show("Program Finished file created: " + outputFileName);
        }