/// <summary> /// Method for mutation /// </summary> private void mutate(List <string> Individual, int IndividualIdx, ORF orf) { int codonIdx = rnd.Next(0, Individual.Count() - 1); string amino = SeqParser.codonToAmino[Individual[codonIdx]]; // replacing randomized codon and recalculating of score Individual[codonIdx] = randomizeCodon(amino); //Homopolymers Removal if (AHomopolymersRemoval == true) { HomopolymersRemove(Individual); } if (MaintainOriginalNc == true) { if (OptimizationMode == 1) { PopulationScores[IndividualIdx] = ORF.MultiScore(Population[IndividualIdx], orf.aminoAcidCounts, minimalNc, maximalNc, 1); } if (OptimizationMode == 0) { PopulationScores[IndividualIdx] = ORF.MultiScore(Population[IndividualIdx], orf.aminoAcidCounts, minimalNc, maximalNc, 0); } } else { PopulationScores[IndividualIdx] = ORF.CPBcalculator(Population[IndividualIdx]); } }
public ORF(ORF orfToCopy) { orfSeq = new List <string>(orfToCopy.orfSeq); aminoORFseq = new List <string>(orfToCopy.aminoORFseq); }
/// <summary> /// ORF optimization /// </summary> /// <param name="ORFSeq"></param> /// <param name="AminoORFseq"></param> /// <param name="optimizationMode"></param> /// <returns></returns> public List <string> optimizeORF(ORF orf, object o, DoWorkEventArgs e) { int stopCounter = 0; double lastScore = 0; bool allowed; // new population and new scores initialization NewPopulation = new List <List <string> >(); NewPopulationScores = new List <double>(); minimalNc = MinimalNc; maximalNc = MaximalNc; // preparation // codons grouping to dictionary codonGroups = new Dictionary <string, List <string> >(); codonGroups = SeqParser.codonToAmino.GroupBy(x => x.Value) .ToDictionary(x => x.Key, x => x.Select(i => i.Key).ToList()); //calculation of minimal and maximal Nc if (MaintainOriginalNc == true) { minimalNc = ORF.NcCalculator(orf.orfSeq, orf.aminoAcidCounts) - minimalNc; maximalNc = ORF.NcCalculator(orf.orfSeq, orf.aminoAcidCounts) + maximalNc; } //Homopolymers counting if (AHomopolymersRemoval == true) { lysineIdx = HomopolymersCheck(orf.aminoORFseq); } // initial population generation generateInitialPopulation(orf); //Restriction enzymes sites removal if (RestrEnzymeSitesToRemoval == true) { allowed = false; for (int i = 0; i < PopulationSize; i++) { while (allowed == false) { allowed = enzymeSitesRemove(Population[i], allowed, i); } } } //Homopolymers removal if (AHomopolymersRemoval == true) { for (int i = 0; i < PopulationSize; i++) { HomopolymersRemove(Population[i]); } if (RestrEnzymeSitesToRemoval == true) { for (int i = 0; i < PopulationSize; i++) { allowed = false; while (allowed == false) { allowed = enzymeSitesRemove(Population[i], allowed, i); } } } } if (AHomopolymersRemoval == true || RestrEnzymeSitesToRemoval == true) { PopulationScores.Clear(); for (int i = 0; i < PopulationSize; i++) { if (OptimizationMode == 1) { PopulationScores.Add(ORF.MultiScore(orf.orfSeq, orf.aminoAcidCounts, minimalNc, maximalNc, 1)); } if (OptimizationMode == 0) { PopulationScores.Add(ORF.MultiScore(orf.orfSeq, orf.aminoAcidCounts, minimalNc, maximalNc, 0)); } } } // reproductive cycles for (int i = 0; i < ReproductiveCyclesNumber; i++) { // mutation for (int j = 0; j < Math.Round(Population.Count * MutationProbability); j++) { // individual randomization int individual = rnd.Next(0, Population.Count()); // mutation of given codon mutate(Population[individual], individual, orf); } if (CrossoverProbability != 0) { // selection selectIndividualsForCrossover(TournamentSize); // crossover crossover(orf, minimalNc, maximalNc); if (lastScore != BestScore) { lastScore = BestScore; stopCounter = 0; } else { stopCounter++; } if (stopCounter == StopCriterion) { i = ReproductiveCyclesNumber - 1; } } Thread.Sleep(1); (o as BackgroundWorker).ReportProgress(100 * i / (ReproductiveCyclesNumber - 1)); } // updating best individual updateBestIndividual(); return(BestIndividual); }
/// <summary> /// Method for crossover (uniform crossover) /// </summary> private void crossover(ORF orf, double minimalNc, double maximalNc) { // temporary variables // first parent and second parent indexes int FirstParentIdx, SecondParentIdx; // new individuals List <string> FirstNewIndividual, SecondNewIndividual; // crossover mask List <int> CrossoverMask; int CrossoverMaskSize = Population[0].Count(); //clearing new population and scores NewPopulation.Clear(); NewPopulationScores.Clear(); int end; bool allowed; if (Math.Round(PopulationSize * CrossoverProbability) % 2 != 0) { end = PopulationSize - (int)Math.Round(PopulationSize * CrossoverProbability) + 1; } else { end = PopulationSize - (int)Math.Round(PopulationSize * CrossoverProbability); } for (int i = 0; i < end; i++) { FirstParentIdx = rnd.Next(0, Population.Count()); //Restricion enzymes sites removal if (RestrEnzymeSitesToRemoval == true) { allowed = false; while (allowed == false) { allowed = enzymeSitesRemove(Population[FirstParentIdx], allowed, i); } } //Homopolymers removal if (AHomopolymersRemoval == true) { HomopolymersRemove(Population[FirstParentIdx]); allowed = false; //Restricion enzymes sites removal if (RestrEnzymeSitesToRemoval == true) { allowed = false; while (allowed == false) { allowed = enzymeSitesRemove(Population[FirstParentIdx], allowed, i); } } } NewPopulation.Add(Population[FirstParentIdx]); NewPopulationScores.Add(PopulationScores[FirstParentIdx]); Population.RemoveAt(FirstParentIdx); PopulationScores.RemoveAt(FirstParentIdx); } // randomization of parents for cross over for (int i = 0; i < (PopulationSize - end) / 2; i++) { FirstParentIdx = rnd.Next(0, Population.Count()); SecondParentIdx = rnd.Next(0, Population.Count()); // rerandomization if parent index was repeated while (FirstParentIdx == SecondParentIdx) { SecondParentIdx = rnd.Next(0, Population.Count()); } // new crossover mask initialization CrossoverMask = new List <int>(); for (int x = 0; x < CrossoverMaskSize; x++) { CrossoverMask.Add(rnd.Next(0, 2)); } // new individuals initialization FirstNewIndividual = new List <string>(); SecondNewIndividual = new List <string>(); // creation of new individuals using the crossover mask for (int x = 0; x < CrossoverMaskSize; x++) { if (CrossoverMask[x] == 0) { FirstNewIndividual.Add(Population[FirstParentIdx][x]); SecondNewIndividual.Add(Population[SecondParentIdx][x]); } if (CrossoverMask[x] == 1) { FirstNewIndividual.Add(Population[SecondParentIdx][x]); SecondNewIndividual.Add(Population[FirstParentIdx][x]); } } //Restricion enzymes sites removal if (RestrEnzymeSitesToRemoval == true) { allowed = false; while (allowed == false) { allowed = enzymeSitesRemove(FirstNewIndividual, allowed, i); } allowed = false; while (allowed == false) { allowed = enzymeSitesRemove(SecondNewIndividual, allowed, i); } } //Homopolymers removal if (AHomopolymersRemoval == true) { HomopolymersRemove(FirstNewIndividual); HomopolymersRemove(SecondNewIndividual); allowed = false; //Restricion enzymes sites removal if (RestrEnzymeSitesToRemoval == true) { allowed = false; while (allowed == false) { allowed = enzymeSitesRemove(FirstNewIndividual, allowed, i); } allowed = false; while (allowed == false) { allowed = enzymeSitesRemove(SecondNewIndividual, allowed, i); } } } if (MaintainOriginalNc == true) { // creating new population with new individuals and new scores NewPopulation.Add(FirstNewIndividual); NewPopulation.Add(SecondNewIndividual); if (OptimizationMode == 1) { NewPopulationScores.Add(ORF.MultiScore(FirstNewIndividual, orf.aminoAcidCounts, minimalNc, maximalNc, 1)); NewPopulationScores.Add(ORF.MultiScore(SecondNewIndividual, orf.aminoAcidCounts, minimalNc, maximalNc, 1)); } if (OptimizationMode == 0) { NewPopulationScores.Add(ORF.MultiScore(FirstNewIndividual, orf.aminoAcidCounts, minimalNc, maximalNc, 0)); NewPopulationScores.Add(ORF.MultiScore(SecondNewIndividual, orf.aminoAcidCounts, minimalNc, maximalNc, 0)); } // removing "used" parents if (FirstParentIdx > SecondParentIdx) { Population.RemoveAt(FirstParentIdx); Population.RemoveAt(SecondParentIdx); } else { Population.RemoveAt(SecondParentIdx); Population.RemoveAt(FirstParentIdx); } } else { // creating new population with new individuals and new scores NewPopulation.Add(FirstNewIndividual); NewPopulation.Add(SecondNewIndividual); NewPopulationScores.Add(ORF.CPBcalculator(FirstNewIndividual)); NewPopulationScores.Add(ORF.CPBcalculator(SecondNewIndividual)); // removing "used" parents if (FirstParentIdx > SecondParentIdx) { Population.RemoveAt(FirstParentIdx); Population.RemoveAt(SecondParentIdx); } else { Population.RemoveAt(SecondParentIdx); Population.RemoveAt(FirstParentIdx); } } } PopulationScores.Clear(); for (int j = 0; j < NewPopulation.Count(); j++) { Population.Add(NewPopulation[j]); PopulationScores.Add(NewPopulationScores[j]); } // updating best individual updateBestIndividual(); }
/// <summary> /// Method for generation of initial population /// </summary> /// <param name="AminoORFseq"></param> private void generateInitialPopulation(ORF orf) { // new population of individuals and scores initialization, new best individual initialization Population = new List <List <string> >(); PopulationScores = new List <double>(); BestIndividual = new List <string>(); // temporary variables List <string> tempIndividual; string tempCodon; if (MaintainOriginalNc != true) { for (int i = 0; i < PopulationSize; i++) { // new individual initialization tempIndividual = new List <string>(); // randomization of codons for given amino acid sequence foreach (string amino in orf.aminoORFseq) { if (amino != "/") { tempCodon = randomizeCodon(amino); tempIndividual.Add(tempCodon); } else { tempIndividual.Add(orf.orfSeq.Last()); } } Population.Add(tempIndividual); for (int j = 0; j < tempIndividual.Count; j++) { Console.Write(tempIndividual[j]); } PopulationScores.Add(ORF.CPBcalculator(tempIndividual)); BestScore = PopulationScores[0]; foreach (string c in Population[0]) { BestIndividual.Add(c); } updateBestIndividual(); } } if (MaintainOriginalNc == true) { for (int i = 0; i < PopulationSize; i++) { Population.Add(new List <string>(orf.orfSeq)); if (OptimizationMode == 1) { PopulationScores.Add(ORF.MultiScore(orf.orfSeq, orf.aminoAcidCounts, minimalNc, maximalNc, 1)); } if (OptimizationMode == 0) { PopulationScores.Add(ORF.MultiScore(orf.orfSeq, orf.aminoAcidCounts, minimalNc, maximalNc, 0)); } } for (int i = 0; i < Population.Count(); i++) { for (int j = 0; j < 10; j++) { // individual randomization int individual = rnd.Next(0, Population.Count()); mutate(Population[individual], individual, orf); } } BestScore = PopulationScores[0]; foreach (string c in Population[0]) { BestIndividual.Add(c); } updateBestIndividual(); } }