/// <summary>
        /// Method for mutation
        /// </summary>
        private void mutate(List <string> Individual, int IndividualIdx, ORF orf)
        {
            int    codonIdx = rnd.Next(0, Individual.Count() - 1);
            string amino    = SeqParser.codonToAmino[Individual[codonIdx]];

            // replacing randomized codon and recalculating of score
            Individual[codonIdx] = randomizeCodon(amino);

            //Homopolymers Removal
            if (AHomopolymersRemoval == true)
            {
                HomopolymersRemove(Individual);
            }

            if (MaintainOriginalNc == true)
            {
                if (OptimizationMode == 1)
                {
                    PopulationScores[IndividualIdx] = ORF.MultiScore(Population[IndividualIdx], orf.aminoAcidCounts, minimalNc, maximalNc, 1);
                }
                if (OptimizationMode == 0)
                {
                    PopulationScores[IndividualIdx] = ORF.MultiScore(Population[IndividualIdx], orf.aminoAcidCounts, minimalNc, maximalNc, 0);
                }
            }
            else
            {
                PopulationScores[IndividualIdx] = ORF.CPBcalculator(Population[IndividualIdx]);
            }
        }
Example #2
0
 public ORF(ORF orfToCopy)
 {
     orfSeq      = new List <string>(orfToCopy.orfSeq);
     aminoORFseq = new List <string>(orfToCopy.aminoORFseq);
 }
        /// <summary>
        /// ORF optimization
        /// </summary>
        /// <param name="ORFSeq"></param>
        /// <param name="AminoORFseq"></param>
        /// <param name="optimizationMode"></param>
        /// <returns></returns>
        public List <string> optimizeORF(ORF orf, object o, DoWorkEventArgs e)
        {
            int    stopCounter = 0;
            double lastScore   = 0;
            bool   allowed;

            // new population and new scores initialization
            NewPopulation       = new List <List <string> >();
            NewPopulationScores = new List <double>();

            minimalNc = MinimalNc;
            maximalNc = MaximalNc;

            // preparation
            // codons grouping to dictionary
            codonGroups = new Dictionary <string, List <string> >();
            codonGroups = SeqParser.codonToAmino.GroupBy(x => x.Value)
                          .ToDictionary(x => x.Key, x => x.Select(i => i.Key).ToList());

            //calculation of minimal and maximal Nc
            if (MaintainOriginalNc == true)
            {
                minimalNc = ORF.NcCalculator(orf.orfSeq, orf.aminoAcidCounts) - minimalNc;
                maximalNc = ORF.NcCalculator(orf.orfSeq, orf.aminoAcidCounts) + maximalNc;
            }

            //Homopolymers counting
            if (AHomopolymersRemoval == true)
            {
                lysineIdx = HomopolymersCheck(orf.aminoORFseq);
            }

            // initial population generation
            generateInitialPopulation(orf);

            //Restriction enzymes sites removal
            if (RestrEnzymeSitesToRemoval == true)
            {
                allowed = false;
                for (int i = 0; i < PopulationSize; i++)
                {
                    while (allowed == false)
                    {
                        allowed = enzymeSitesRemove(Population[i], allowed, i);
                    }
                }
            }

            //Homopolymers removal
            if (AHomopolymersRemoval == true)
            {
                for (int i = 0; i < PopulationSize; i++)
                {
                    HomopolymersRemove(Population[i]);
                }

                if (RestrEnzymeSitesToRemoval == true)
                {
                    for (int i = 0; i < PopulationSize; i++)
                    {
                        allowed = false;

                        while (allowed == false)
                        {
                            allowed = enzymeSitesRemove(Population[i], allowed, i);
                        }
                    }
                }
            }

            if (AHomopolymersRemoval == true || RestrEnzymeSitesToRemoval == true)
            {
                PopulationScores.Clear();
                for (int i = 0; i < PopulationSize; i++)
                {
                    if (OptimizationMode == 1)
                    {
                        PopulationScores.Add(ORF.MultiScore(orf.orfSeq, orf.aminoAcidCounts, minimalNc, maximalNc, 1));
                    }
                    if (OptimizationMode == 0)
                    {
                        PopulationScores.Add(ORF.MultiScore(orf.orfSeq, orf.aminoAcidCounts, minimalNc, maximalNc, 0));
                    }
                }
            }

            // reproductive cycles
            for (int i = 0; i < ReproductiveCyclesNumber; i++)
            {
                // mutation
                for (int j = 0; j < Math.Round(Population.Count * MutationProbability); j++)
                {
                    // individual randomization
                    int individual = rnd.Next(0, Population.Count());
                    // mutation of given codon
                    mutate(Population[individual], individual, orf);
                }

                if (CrossoverProbability != 0)
                {
                    // selection
                    selectIndividualsForCrossover(TournamentSize);

                    // crossover
                    crossover(orf, minimalNc, maximalNc);

                    if (lastScore != BestScore)
                    {
                        lastScore   = BestScore;
                        stopCounter = 0;
                    }
                    else
                    {
                        stopCounter++;
                    }

                    if (stopCounter == StopCriterion)
                    {
                        i = ReproductiveCyclesNumber - 1;
                    }
                }

                Thread.Sleep(1);
                (o as BackgroundWorker).ReportProgress(100 * i / (ReproductiveCyclesNumber - 1));
            }
            // updating best individual
            updateBestIndividual();

            return(BestIndividual);
        }
        /// <summary>
        /// Method for crossover (uniform crossover)
        /// </summary>
        private void crossover(ORF orf, double minimalNc, double maximalNc)
        {
            // temporary variables
            // first parent and second parent indexes
            int FirstParentIdx, SecondParentIdx;
            // new individuals
            List <string> FirstNewIndividual, SecondNewIndividual;

            // crossover mask
            List <int> CrossoverMask;
            int        CrossoverMaskSize = Population[0].Count();

            //clearing new population and scores
            NewPopulation.Clear();
            NewPopulationScores.Clear();
            int  end;
            bool allowed;

            if (Math.Round(PopulationSize * CrossoverProbability) % 2 != 0)
            {
                end = PopulationSize - (int)Math.Round(PopulationSize * CrossoverProbability) + 1;
            }
            else
            {
                end = PopulationSize - (int)Math.Round(PopulationSize * CrossoverProbability);
            }

            for (int i = 0; i < end; i++)
            {
                FirstParentIdx = rnd.Next(0, Population.Count());

                //Restricion enzymes sites removal
                if (RestrEnzymeSitesToRemoval == true)
                {
                    allowed = false;

                    while (allowed == false)
                    {
                        allowed = enzymeSitesRemove(Population[FirstParentIdx], allowed, i);
                    }
                }

                //Homopolymers removal
                if (AHomopolymersRemoval == true)
                {
                    HomopolymersRemove(Population[FirstParentIdx]);

                    allowed = false;

                    //Restricion enzymes sites removal
                    if (RestrEnzymeSitesToRemoval == true)
                    {
                        allowed = false;

                        while (allowed == false)
                        {
                            allowed = enzymeSitesRemove(Population[FirstParentIdx], allowed, i);
                        }
                    }
                }

                NewPopulation.Add(Population[FirstParentIdx]);
                NewPopulationScores.Add(PopulationScores[FirstParentIdx]);
                Population.RemoveAt(FirstParentIdx);
                PopulationScores.RemoveAt(FirstParentIdx);
            }


            // randomization of parents for cross over
            for (int i = 0; i < (PopulationSize - end) / 2; i++)
            {
                FirstParentIdx  = rnd.Next(0, Population.Count());
                SecondParentIdx = rnd.Next(0, Population.Count());

                // rerandomization if parent index was repeated
                while (FirstParentIdx == SecondParentIdx)
                {
                    SecondParentIdx = rnd.Next(0, Population.Count());
                }

                // new crossover mask initialization
                CrossoverMask = new List <int>();

                for (int x = 0; x < CrossoverMaskSize; x++)
                {
                    CrossoverMask.Add(rnd.Next(0, 2));
                }

                // new individuals initialization
                FirstNewIndividual  = new List <string>();
                SecondNewIndividual = new List <string>();

                // creation of new individuals using the crossover mask
                for (int x = 0; x < CrossoverMaskSize; x++)
                {
                    if (CrossoverMask[x] == 0)
                    {
                        FirstNewIndividual.Add(Population[FirstParentIdx][x]);
                        SecondNewIndividual.Add(Population[SecondParentIdx][x]);
                    }
                    if (CrossoverMask[x] == 1)
                    {
                        FirstNewIndividual.Add(Population[SecondParentIdx][x]);
                        SecondNewIndividual.Add(Population[FirstParentIdx][x]);
                    }
                }

                //Restricion enzymes sites removal
                if (RestrEnzymeSitesToRemoval == true)
                {
                    allowed = false;

                    while (allowed == false)
                    {
                        allowed = enzymeSitesRemove(FirstNewIndividual, allowed, i);
                    }

                    allowed = false;

                    while (allowed == false)
                    {
                        allowed = enzymeSitesRemove(SecondNewIndividual, allowed, i);
                    }
                }

                //Homopolymers removal
                if (AHomopolymersRemoval == true)
                {
                    HomopolymersRemove(FirstNewIndividual);
                    HomopolymersRemove(SecondNewIndividual);

                    allowed = false;

                    //Restricion enzymes sites removal
                    if (RestrEnzymeSitesToRemoval == true)
                    {
                        allowed = false;

                        while (allowed == false)
                        {
                            allowed = enzymeSitesRemove(FirstNewIndividual, allowed, i);
                        }

                        allowed = false;

                        while (allowed == false)
                        {
                            allowed = enzymeSitesRemove(SecondNewIndividual, allowed, i);
                        }
                    }
                }

                if (MaintainOriginalNc == true)
                {
                    // creating new population with new individuals and new scores
                    NewPopulation.Add(FirstNewIndividual);
                    NewPopulation.Add(SecondNewIndividual);

                    if (OptimizationMode == 1)
                    {
                        NewPopulationScores.Add(ORF.MultiScore(FirstNewIndividual, orf.aminoAcidCounts, minimalNc, maximalNc, 1));
                        NewPopulationScores.Add(ORF.MultiScore(SecondNewIndividual, orf.aminoAcidCounts, minimalNc, maximalNc, 1));
                    }

                    if (OptimizationMode == 0)
                    {
                        NewPopulationScores.Add(ORF.MultiScore(FirstNewIndividual, orf.aminoAcidCounts, minimalNc, maximalNc, 0));
                        NewPopulationScores.Add(ORF.MultiScore(SecondNewIndividual, orf.aminoAcidCounts, minimalNc, maximalNc, 0));
                    }

                    // removing "used" parents
                    if (FirstParentIdx > SecondParentIdx)
                    {
                        Population.RemoveAt(FirstParentIdx);
                        Population.RemoveAt(SecondParentIdx);
                    }
                    else
                    {
                        Population.RemoveAt(SecondParentIdx);
                        Population.RemoveAt(FirstParentIdx);
                    }
                }
                else
                {
                    // creating new population with new individuals and new scores
                    NewPopulation.Add(FirstNewIndividual);
                    NewPopulation.Add(SecondNewIndividual);
                    NewPopulationScores.Add(ORF.CPBcalculator(FirstNewIndividual));
                    NewPopulationScores.Add(ORF.CPBcalculator(SecondNewIndividual));

                    // removing "used" parents
                    if (FirstParentIdx > SecondParentIdx)
                    {
                        Population.RemoveAt(FirstParentIdx);
                        Population.RemoveAt(SecondParentIdx);
                    }
                    else
                    {
                        Population.RemoveAt(SecondParentIdx);
                        Population.RemoveAt(FirstParentIdx);
                    }
                }
            }

            PopulationScores.Clear();

            for (int j = 0; j < NewPopulation.Count(); j++)
            {
                Population.Add(NewPopulation[j]);
                PopulationScores.Add(NewPopulationScores[j]);
            }

            // updating best individual
            updateBestIndividual();
        }
        /// <summary>
        /// Method for generation of initial population
        /// </summary>
        /// <param name="AminoORFseq"></param>
        private void generateInitialPopulation(ORF orf)
        {
            // new population of individuals and scores initialization, new best individual initialization
            Population       = new List <List <string> >();
            PopulationScores = new List <double>();
            BestIndividual   = new List <string>();

            // temporary variables
            List <string> tempIndividual;
            string        tempCodon;

            if (MaintainOriginalNc != true)
            {
                for (int i = 0; i < PopulationSize; i++)
                {
                    // new individual initialization
                    tempIndividual = new List <string>();

                    // randomization of codons for given amino acid sequence
                    foreach (string amino in orf.aminoORFseq)
                    {
                        if (amino != "/")
                        {
                            tempCodon = randomizeCodon(amino);
                            tempIndividual.Add(tempCodon);
                        }
                        else
                        {
                            tempIndividual.Add(orf.orfSeq.Last());
                        }
                    }

                    Population.Add(tempIndividual);
                    for (int j = 0; j < tempIndividual.Count; j++)
                    {
                        Console.Write(tempIndividual[j]);
                    }

                    PopulationScores.Add(ORF.CPBcalculator(tempIndividual));

                    BestScore = PopulationScores[0];
                    foreach (string c in Population[0])
                    {
                        BestIndividual.Add(c);
                    }
                    updateBestIndividual();
                }
            }

            if (MaintainOriginalNc == true)
            {
                for (int i = 0; i < PopulationSize; i++)
                {
                    Population.Add(new List <string>(orf.orfSeq));
                    if (OptimizationMode == 1)
                    {
                        PopulationScores.Add(ORF.MultiScore(orf.orfSeq, orf.aminoAcidCounts, minimalNc, maximalNc, 1));
                    }
                    if (OptimizationMode == 0)
                    {
                        PopulationScores.Add(ORF.MultiScore(orf.orfSeq, orf.aminoAcidCounts, minimalNc, maximalNc, 0));
                    }
                }

                for (int i = 0; i < Population.Count(); i++)
                {
                    for (int j = 0; j < 10; j++)
                    {
                        // individual randomization
                        int individual = rnd.Next(0, Population.Count());

                        mutate(Population[individual], individual, orf);
                    }
                }

                BestScore = PopulationScores[0];
                foreach (string c in Population[0])
                {
                    BestIndividual.Add(c);
                }
                updateBestIndividual();
            }
        }