//Looks for Orfs public static List <Orf> findOrf(NucleotideStrand strand, DataTable dtAminoAcid, Statistics currStat) { int OrfCount = 1; strand.setLength(); List <Orf> OrfList = new List <Orf>(); //Reading Strand searching for Orf for (int i = 0; i < (strand.length - 3); i++) { //Setting variables //DNA is broken into 3 character strings because that is how the body reads DNA. A 3 segment strand is called Codon string srchCode = strand.sequence.Substring(i, 3); DataRow srchRow = dtAminoAcid.Rows.Find(srchCode); Codon srchCodon = new Codon(srchRow[0].ToString(), srchRow[1].ToString(), Convert.ToDecimal(srchRow[2])); //If start Codon is found the strand is translate until stop Codon reached if ("Met" == srchCodon.name) { Codon currCodon = srchCodon; //b just symbols the current position in the strand int b = i; Orf currOrf = new Orf(); currOrf.start = b; Exon currExon = new Exon(); currExon.start = b; //Orfs can be be broken down into Exons and Introns, Introns don't follow the same design pattern as Exons //Therefore to properly assess a gene the Exons and intros have to be seperated out while ((b < (strand.length - 12)) && (currCodon.name != "Stp")) { string currCode = strand.sequence.Substring(b, 3); DataRow currRow = dtAminoAcid.Rows.Find(currCode); currCodon = new Codon(currRow[0].ToString(), currRow[1].ToString(), Convert.ToDecimal(currRow[2])); currOrf.CodonList.Add(currCodon); currExon.CodonList.Add(currCodon); //Detecting Intron (AGGT indicates the start of an Intron) if ("AGGT" == strand.sequence.Substring(b + 1, 4)) { //Finish Exon currExon.finish = b; currExon.length = currExon.finish - currExon.start; currOrf.ExonList.Add(currExon); //Starting Intron Intron currIntron = new Intron(); currIntron.start = b; //Reading Intron //A charactertic of an Intron is that it has a branch point in the middle so before //The Intron has ended the branch must be found string IntronStr = strand.sequence.Substring(b, 8); //buildBranchPoint just put the current Intron string into the format 01234567 if each character meets // The criteria of an Intron IntronStr = MyMethods.buildBranchPoint(IntronStr); while ((b < (strand.length - 8)) && (IntronStr == "01234567")) { IntronStr = strand.sequence.Substring(b, 8); IntronStr = MyMethods.buildBranchPoint(IntronStr); b++; } //Converts CGs at end of strand into pyrimidines for comparison because the end of the strand contains alway contains a chain pyrimidines IntronStr = strand.sequence.Substring(b, 12); IntronStr = MyMethods.buildIntronFinishSeq(IntronStr); while ((b < (strand.length - 12)) && ("PPPPPPPPXPAG" != IntronStr)) { IntronStr = strand.sequence.Substring(b, 12); IntronStr = MyMethods.buildIntronFinishSeq(IntronStr); b++; } b = b + 12; //Finishing Intron currIntron.finish = b; currIntron.length = currIntron.finish - currIntron.start; currIntron.sequence = strand.sequence.Substring(currIntron.start, currIntron.length); currOrf.IntronList.Add(currIntron); //Starting new Exon currExon = new Exon(); currExon.start = b; } //When stop Codon is found Orf is assessed as to how probable it is that it is a gene if ("Stp" == currCodon.name) { //Finish Exon currExon.finish = b; currExon.length = currExon.finish - currExon.start; currOrf.ExonList.Add(currExon); //Finish Orf currOrf.finish = b; currOrf.length = currOrf.finish - currOrf.start; currOrf.ExonLength = currOrf.ExonList.Sum(Exon => Exon.length); int bpCount = currOrf.ExonList.Sum(item => item.length); //This calculates the Codon usage relative to what would be expected in a living organism currOrf.setAACodonUsage(dtAminoAcid); currOrf.setAAwholeUsage(dtAminoAcid); //if the current found Orf has properties greater than the statistical ranges it will be added to the the list of OrfS if ((currOrf.ExonLength > currStat.rangeExonLength) && (Convert.ToDouble(currOrf.AACodonUsage) > currStat.RangeAACodonUsage) && (Convert.ToDouble(currOrf.AAwholeUsage) > currStat.RangeAAwholeUsage) && (currOrf.length > currStat.rangeLength)) { bool myCheck = OrfList.Any(Orf => Orf.finish == currOrf.finish); if (myCheck == false) { OrfList.Add(currOrf); } } OrfCount++; } b = b + 3; } } } return(OrfList); }
//Creates stats like average Orf length, Codon usage when using a random strand to compare to public static Statistics calculateStatistics(NucleotideStrand experimentalStrand, DataTable dtAminoAcid) { NucleotideStrand strand = new NucleotideStrand(); strand.sequence = generateRandomDNA(experimentalStrand.length, experimentalStrand.length); int OrfCount = 1; strand.setLength(); List <Orf> OrfList = new List <Orf>(); //Reading Strand searching for Orf for (int i = 0; i < (strand.length - 3); i++) { //Setting variables string srchCode = strand.sequence.Substring(i, 3); DataRow srchRow = dtAminoAcid.Rows.Find(srchCode); Codon srchCodon = new Codon(srchRow[0].ToString(), srchRow[1].ToString(), Convert.ToDecimal(srchRow[2])); //If start Codon is found the strand is translate until stop Codon reached if ("Met" == srchCodon.name) { Codon currCodon = srchCodon; int b = i; Orf currOrf = new Orf(); currOrf.start = b; Exon currExon = new Exon(); currExon.start = b; while ((b < (strand.length - 12)) && (currCodon.name != "Stp")) { string currCode = strand.sequence.Substring(b, 3); DataRow currRow = dtAminoAcid.Rows.Find(currCode); currCodon = new Codon(currRow[0].ToString(), currRow[1].ToString(), Convert.ToDecimal(currRow[2])); currOrf.CodonList.Add(currCodon); currExon.CodonList.Add(currCodon); //Detecting Intron if ("AGGT" == strand.sequence.Substring(b + 1, 4)) { //Finish Exon currExon.finish = b; currExon.length = currExon.finish - currExon.start; currOrf.ExonList.Add(currExon); //Starting Intron Intron currIntron = new Intron(); currIntron.start = b; //Reading Intron string IntronStr = strand.sequence.Substring(b, 8); IntronStr = MyMethods.buildBranchPoint(IntronStr); while ((b < (strand.length - 8)) && (IntronStr == "01234567")) { IntronStr = strand.sequence.Substring(b, 8); IntronStr = MyMethods.buildBranchPoint(IntronStr); b++; } //Converts CGs at end of strand into pyrimidines for comparison because the end of the strand contains alway contains a chain pyrimidines IntronStr = strand.sequence.Substring(b, 12); IntronStr = MyMethods.buildIntronFinishSeq(IntronStr); while ((b < (strand.length - 12)) && ("PPPPPPPPXPAG" != IntronStr)) { IntronStr = strand.sequence.Substring(b, 12); IntronStr = MyMethods.buildIntronFinishSeq(IntronStr); b++; } b = b + 12; //Finishing Intron currIntron.finish = b; currIntron.length = currIntron.finish - currIntron.start; currIntron.sequence = strand.sequence.Substring(currIntron.start, currIntron.length); currOrf.IntronList.Add(currIntron); //Starting new Exon currExon = new Exon(); currExon.start = b; } //When stop Codon is found Orf is assessed as to how probable it is that it is a gene if ("Stp" == currCodon.name) { //Finish Exon currExon.finish = b; currExon.length = currExon.finish - currExon.start; currOrf.ExonList.Add(currExon); //Finish Orf currOrf.finish = b; currOrf.length = currOrf.finish - currOrf.start; int bpCount = currOrf.ExonList.Sum(item => item.length); currOrf.setAACodonUsage(dtAminoAcid); currOrf.setAAwholeUsage(dtAminoAcid); currOrf.ExonLength = currOrf.ExonList.Sum(Exon => Exon.length); int currVal = currOrf.finish; bool myCheck = OrfList.Any(Orf => Orf.finish == currOrf.finish); if (myCheck == false) { OrfList.Add(currOrf); } OrfCount++; } b = b + 3; } } } Statistics myStatistics = new Statistics(); double meanExonLength = (OrfList.Sum(Orf => Orf.ExonLength)) / OrfList.Count; double StanDevExonLength = OrfList.Sum(Orf => Math.Pow(Orf.ExonLength - meanExonLength, 2)); StanDevExonLength = Math.Sqrt((StanDevExonLength) / OrfList.Count); myStatistics.meanExonLength = meanExonLength; myStatistics.StanDevExonLength = StanDevExonLength; myStatistics.rangeExonLength = myStatistics.meanExonLength + myStatistics.StanDevExonLength; double meanWholeLength = (OrfList.Sum(Orf => Orf.length)) / OrfList.Count; double StanDevWholeLength = OrfList.Sum(Orf => Math.Pow(Orf.length - meanWholeLength, 2)); StanDevWholeLength = Math.Sqrt((StanDevWholeLength) / OrfList.Count); myStatistics.meanLength = meanWholeLength; myStatistics.StanDevLength = StanDevWholeLength; myStatistics.rangeLength = myStatistics.meanLength + myStatistics.StanDevLength; double MeanAACodonUsage = Convert.ToDouble((OrfList.Sum(Orf => Orf.AACodonUsage)) / OrfList.Count); double StanDevAACodonUsage = OrfList.Sum(Orf => Math.Pow(Convert.ToDouble(Orf.AACodonUsage) - MeanAACodonUsage, 2)); StanDevAACodonUsage = Math.Sqrt((StanDevAACodonUsage) / OrfList.Count); myStatistics.MeanAACodonUsage = MeanAACodonUsage; myStatistics.StanDevAACodonUsage = StanDevAACodonUsage; myStatistics.RangeAACodonUsage = myStatistics.MeanAACodonUsage + myStatistics.StanDevAACodonUsage; double MeanAAwholeUsage = Convert.ToDouble((OrfList.Sum(Orf => Orf.AAwholeUsage)) / OrfList.Count); double StanDevAAwholeUsage = OrfList.Sum(Orf => Math.Pow(Convert.ToDouble(Orf.AAwholeUsage) - MeanAAwholeUsage, 2)); StanDevAAwholeUsage = Math.Sqrt((StanDevAAwholeUsage) / OrfList.Count); myStatistics.MeanAAwholeUsage = MeanAAwholeUsage; myStatistics.StanDevAAwholeUsage = StanDevAAwholeUsage; myStatistics.RangeAAwholeUsage = myStatistics.MeanAAwholeUsage + myStatistics.StanDevAAwholeUsage; return(myStatistics); }