public MainWindow() { InitializeComponent(); GridView myGridView = new GridView(); myGridView.AllowsColumnReorder = true; myGridView.ColumnHeaderToolTip = "INPUT_FILES"; myGridView.Columns.Add(new GridViewColumn { Header = "FILE_NAME", DisplayMemberBinding = new Binding("FILE_NAME") }); lvInputFiles.View = myGridView; dtInputFiles = MyMethods.BuildFileList(tbInputFiles.Text, dtInputFiles); lvInputFiles.ItemsSource = dtInputFiles.DefaultView; }
public decimal getAAwholeUsage(DataTable dtAminoAcid) { AAwholeUsage = MyMethods.calculateAAwholeProbality(dtAminoAcid, CodonList); return(AAwholeUsage); }
public void setAAwholeUsage(DataTable dtAminoAcid) { AAwholeUsage = MyMethods.calculateAAwholeProbality(dtAminoAcid, CodonList); }
//Looks for Orfs public static List <Orf> findOrf(NucleotideStrand strand, DataTable dtAminoAcid, Statistics currStat) { int OrfCount = 1; strand.setLength(); List <Orf> OrfList = new List <Orf>(); //Reading Strand searching for Orf for (int i = 0; i < (strand.length - 3); i++) { //Setting variables //DNA is broken into 3 character strings because that is how the body reads DNA. A 3 segment strand is called Codon string srchCode = strand.sequence.Substring(i, 3); DataRow srchRow = dtAminoAcid.Rows.Find(srchCode); Codon srchCodon = new Codon(srchRow[0].ToString(), srchRow[1].ToString(), Convert.ToDecimal(srchRow[2])); //If start Codon is found the strand is translate until stop Codon reached if ("Met" == srchCodon.name) { Codon currCodon = srchCodon; //b just symbols the current position in the strand int b = i; Orf currOrf = new Orf(); currOrf.start = b; Exon currExon = new Exon(); currExon.start = b; //Orfs can be be broken down into Exons and Introns, Introns don't follow the same design pattern as Exons //Therefore to properly assess a gene the Exons and intros have to be seperated out while ((b < (strand.length - 12)) && (currCodon.name != "Stp")) { string currCode = strand.sequence.Substring(b, 3); DataRow currRow = dtAminoAcid.Rows.Find(currCode); currCodon = new Codon(currRow[0].ToString(), currRow[1].ToString(), Convert.ToDecimal(currRow[2])); currOrf.CodonList.Add(currCodon); currExon.CodonList.Add(currCodon); //Detecting Intron (AGGT indicates the start of an Intron) if ("AGGT" == strand.sequence.Substring(b + 1, 4)) { //Finish Exon currExon.finish = b; currExon.length = currExon.finish - currExon.start; currOrf.ExonList.Add(currExon); //Starting Intron Intron currIntron = new Intron(); currIntron.start = b; //Reading Intron //A charactertic of an Intron is that it has a branch point in the middle so before //The Intron has ended the branch must be found string IntronStr = strand.sequence.Substring(b, 8); //buildBranchPoint just put the current Intron string into the format 01234567 if each character meets // The criteria of an Intron IntronStr = MyMethods.buildBranchPoint(IntronStr); while ((b < (strand.length - 8)) && (IntronStr == "01234567")) { IntronStr = strand.sequence.Substring(b, 8); IntronStr = MyMethods.buildBranchPoint(IntronStr); b++; } //Converts CGs at end of strand into pyrimidines for comparison because the end of the strand contains alway contains a chain pyrimidines IntronStr = strand.sequence.Substring(b, 12); IntronStr = MyMethods.buildIntronFinishSeq(IntronStr); while ((b < (strand.length - 12)) && ("PPPPPPPPXPAG" != IntronStr)) { IntronStr = strand.sequence.Substring(b, 12); IntronStr = MyMethods.buildIntronFinishSeq(IntronStr); b++; } b = b + 12; //Finishing Intron currIntron.finish = b; currIntron.length = currIntron.finish - currIntron.start; currIntron.sequence = strand.sequence.Substring(currIntron.start, currIntron.length); currOrf.IntronList.Add(currIntron); //Starting new Exon currExon = new Exon(); currExon.start = b; } //When stop Codon is found Orf is assessed as to how probable it is that it is a gene if ("Stp" == currCodon.name) { //Finish Exon currExon.finish = b; currExon.length = currExon.finish - currExon.start; currOrf.ExonList.Add(currExon); //Finish Orf currOrf.finish = b; currOrf.length = currOrf.finish - currOrf.start; currOrf.ExonLength = currOrf.ExonList.Sum(Exon => Exon.length); int bpCount = currOrf.ExonList.Sum(item => item.length); //This calculates the Codon usage relative to what would be expected in a living organism currOrf.setAACodonUsage(dtAminoAcid); currOrf.setAAwholeUsage(dtAminoAcid); //if the current found Orf has properties greater than the statistical ranges it will be added to the the list of OrfS if ((currOrf.ExonLength > currStat.rangeExonLength) && (Convert.ToDouble(currOrf.AACodonUsage) > currStat.RangeAACodonUsage) && (Convert.ToDouble(currOrf.AAwholeUsage) > currStat.RangeAAwholeUsage) && (currOrf.length > currStat.rangeLength)) { bool myCheck = OrfList.Any(Orf => Orf.finish == currOrf.finish); if (myCheck == false) { OrfList.Add(currOrf); } } OrfCount++; } b = b + 3; } } } return(OrfList); }
//Creates stats like average Orf length, Codon usage when using a random strand to compare to public static Statistics calculateStatistics(NucleotideStrand experimentalStrand, DataTable dtAminoAcid) { NucleotideStrand strand = new NucleotideStrand(); strand.sequence = generateRandomDNA(experimentalStrand.length, experimentalStrand.length); int OrfCount = 1; strand.setLength(); List <Orf> OrfList = new List <Orf>(); //Reading Strand searching for Orf for (int i = 0; i < (strand.length - 3); i++) { //Setting variables string srchCode = strand.sequence.Substring(i, 3); DataRow srchRow = dtAminoAcid.Rows.Find(srchCode); Codon srchCodon = new Codon(srchRow[0].ToString(), srchRow[1].ToString(), Convert.ToDecimal(srchRow[2])); //If start Codon is found the strand is translate until stop Codon reached if ("Met" == srchCodon.name) { Codon currCodon = srchCodon; int b = i; Orf currOrf = new Orf(); currOrf.start = b; Exon currExon = new Exon(); currExon.start = b; while ((b < (strand.length - 12)) && (currCodon.name != "Stp")) { string currCode = strand.sequence.Substring(b, 3); DataRow currRow = dtAminoAcid.Rows.Find(currCode); currCodon = new Codon(currRow[0].ToString(), currRow[1].ToString(), Convert.ToDecimal(currRow[2])); currOrf.CodonList.Add(currCodon); currExon.CodonList.Add(currCodon); //Detecting Intron if ("AGGT" == strand.sequence.Substring(b + 1, 4)) { //Finish Exon currExon.finish = b; currExon.length = currExon.finish - currExon.start; currOrf.ExonList.Add(currExon); //Starting Intron Intron currIntron = new Intron(); currIntron.start = b; //Reading Intron string IntronStr = strand.sequence.Substring(b, 8); IntronStr = MyMethods.buildBranchPoint(IntronStr); while ((b < (strand.length - 8)) && (IntronStr == "01234567")) { IntronStr = strand.sequence.Substring(b, 8); IntronStr = MyMethods.buildBranchPoint(IntronStr); b++; } //Converts CGs at end of strand into pyrimidines for comparison because the end of the strand contains alway contains a chain pyrimidines IntronStr = strand.sequence.Substring(b, 12); IntronStr = MyMethods.buildIntronFinishSeq(IntronStr); while ((b < (strand.length - 12)) && ("PPPPPPPPXPAG" != IntronStr)) { IntronStr = strand.sequence.Substring(b, 12); IntronStr = MyMethods.buildIntronFinishSeq(IntronStr); b++; } b = b + 12; //Finishing Intron currIntron.finish = b; currIntron.length = currIntron.finish - currIntron.start; currIntron.sequence = strand.sequence.Substring(currIntron.start, currIntron.length); currOrf.IntronList.Add(currIntron); //Starting new Exon currExon = new Exon(); currExon.start = b; } //When stop Codon is found Orf is assessed as to how probable it is that it is a gene if ("Stp" == currCodon.name) { //Finish Exon currExon.finish = b; currExon.length = currExon.finish - currExon.start; currOrf.ExonList.Add(currExon); //Finish Orf currOrf.finish = b; currOrf.length = currOrf.finish - currOrf.start; int bpCount = currOrf.ExonList.Sum(item => item.length); currOrf.setAACodonUsage(dtAminoAcid); currOrf.setAAwholeUsage(dtAminoAcid); currOrf.ExonLength = currOrf.ExonList.Sum(Exon => Exon.length); int currVal = currOrf.finish; bool myCheck = OrfList.Any(Orf => Orf.finish == currOrf.finish); if (myCheck == false) { OrfList.Add(currOrf); } OrfCount++; } b = b + 3; } } } Statistics myStatistics = new Statistics(); double meanExonLength = (OrfList.Sum(Orf => Orf.ExonLength)) / OrfList.Count; double StanDevExonLength = OrfList.Sum(Orf => Math.Pow(Orf.ExonLength - meanExonLength, 2)); StanDevExonLength = Math.Sqrt((StanDevExonLength) / OrfList.Count); myStatistics.meanExonLength = meanExonLength; myStatistics.StanDevExonLength = StanDevExonLength; myStatistics.rangeExonLength = myStatistics.meanExonLength + myStatistics.StanDevExonLength; double meanWholeLength = (OrfList.Sum(Orf => Orf.length)) / OrfList.Count; double StanDevWholeLength = OrfList.Sum(Orf => Math.Pow(Orf.length - meanWholeLength, 2)); StanDevWholeLength = Math.Sqrt((StanDevWholeLength) / OrfList.Count); myStatistics.meanLength = meanWholeLength; myStatistics.StanDevLength = StanDevWholeLength; myStatistics.rangeLength = myStatistics.meanLength + myStatistics.StanDevLength; double MeanAACodonUsage = Convert.ToDouble((OrfList.Sum(Orf => Orf.AACodonUsage)) / OrfList.Count); double StanDevAACodonUsage = OrfList.Sum(Orf => Math.Pow(Convert.ToDouble(Orf.AACodonUsage) - MeanAACodonUsage, 2)); StanDevAACodonUsage = Math.Sqrt((StanDevAACodonUsage) / OrfList.Count); myStatistics.MeanAACodonUsage = MeanAACodonUsage; myStatistics.StanDevAACodonUsage = StanDevAACodonUsage; myStatistics.RangeAACodonUsage = myStatistics.MeanAACodonUsage + myStatistics.StanDevAACodonUsage; double MeanAAwholeUsage = Convert.ToDouble((OrfList.Sum(Orf => Orf.AAwholeUsage)) / OrfList.Count); double StanDevAAwholeUsage = OrfList.Sum(Orf => Math.Pow(Convert.ToDouble(Orf.AAwholeUsage) - MeanAAwholeUsage, 2)); StanDevAAwholeUsage = Math.Sqrt((StanDevAAwholeUsage) / OrfList.Count); myStatistics.MeanAAwholeUsage = MeanAAwholeUsage; myStatistics.StanDevAAwholeUsage = StanDevAAwholeUsage; myStatistics.RangeAAwholeUsage = myStatistics.MeanAAwholeUsage + myStatistics.StanDevAAwholeUsage; return(myStatistics); }
private void RunDetector(string inputFileName, string outputFileName, int PercentageStart, int PercentageFinish, string ProgramFiles) { string fileDirectory = ProgramFiles + @"\" + "AminoAcidProbTable.txt"; //Getting DNA Strand string[] geneStrandArray = File.ReadAllLines(inputFileName); //Getting Codon usage values DataTable dtAminoAcid = MyMethods.buildAminoAcidDataTable(fileDirectory); int start = (geneStrandArray.Length / 100) * PercentageStart; int finish = (geneStrandArray.Length / 100) * PercentageFinish; //Calculating strand length decimal percentageCoverage = Convert.ToDecimal(PercentageFinish - PercentageStart); int strandLength = Convert.ToInt32(((Convert.ToDecimal(geneStrandArray.Length) * percentageCoverage) / 100)); //Reading gene List <Orf> OrfList = new List <Orf>(); int numberOfWindows = strandLength / 1000; int lineCount = start; int lineLimit = start + 1000; //Strands are normally too long to be analysed in one go so are broken down into windows Statistics myStatistics = new Statistics(); for (int windowCount = 0; windowCount < numberOfWindows; windowCount++) { NucleotideStrand strand = new NucleotideStrand(); strand.sequence = ""; while (lineCount < lineLimit && lineCount < geneStrandArray.Length && lineCount < (start + strandLength)) { strand.sequence = strand.sequence + geneStrandArray[lineCount]; lineCount++; } strand.sequence = strand.sequence.Replace("\r\n", ""); strand.sequence = strand.sequence.ToUpper(); strand.length = strand.sequence.Length; if (windowCount == 0) { MessageBox.Show("Strand approximately " + (strand.length * numberOfWindows).ToString() + "bp long"); //Calculating statistical ranges myStatistics = MyMethods.calculateStatistics(strand, dtAminoAcid); } //Reading strand OrfList.AddRange(MyMethods.findOrf(strand, dtAminoAcid, myStatistics)); //-100 so thee is a slight overlap with windows lineCount = lineCount - 100; lineLimit = lineLimit + 1000; } //Ordering the list of OrfS by AACodonUsage and writing the top values to file OrfList = OrfList.OrderByDescending(Orf => Orf.AACodonUsage).ToList(); string foundGenes = ""; //Building string of found genes and writing to file only want the top 10 for (int x = 0; (x < OrfList.Count) && (x <= 9); x++) { foundGenes = foundGenes + "<" + OrfList[x].getWholeSequence() + "> \r\n \r\n"; } using (StreamWriter file = new StreamWriter(outputFileName)) { file.WriteLine(foundGenes); } pgStatus.Value = 100; lblStatus.Content = "Finished!"; MessageBox.Show("Program Finished file created: " + outputFileName); }
private void tbInputFiles_KeyUp(object sender, KeyEventArgs e) { dtInputFiles = MyMethods.BuildFileList(tbInputFiles.Text, dtInputFiles); lvInputFiles.ItemsSource = dtInputFiles.DefaultView; }