Beispiel #1
0
        protected void CalculateResidueWeights(char[] residues, ref SubstitutionMatrix.SubstitutionMatrix subMatrix)
        {
            double maxScore;
            Dictionary <char, double[]> weighting;

            CalculateResidueWeightsCommon(residues, out maxScore, out weighting);

            // Clustal does some things by looking up the gaps in the substitution matrix, but it appears
            // that it will always be equal to 0.
            for (int i = 0; i < profileLength; i++)
            {
                if (gapPositions[i] < profileMacromolecules.Count) // If at least one sequence has a residue here.
                {
                    double scale = (double)(profileMacromolecules.Count - gapPositions[i]) / (double)(profileMacromolecules.Count);

                    foreach (char residue1 in residues)
                    {
                        double score = 0;
                        foreach (char residue2 in residues)
                        {
                            score += weighting[residue2][i] * subMatrix[residue2][residue1];
                        }
                        residueWeights[residue1][i + 1] = (score / maxScore) * scale;
                    }

                    double oldGapScore = 0; // though if these really are always 0, I should just get rid of them or they're pointless operations.
                    double newGapScore = 0;
                    foreach (char residue in residues)
                    {
                        oldGapScore += weighting[residue][i] * subMatrix[residue][Routines.oldGap];
                        newGapScore += weighting[residue][i] * subMatrix[residue][Routines.newGap];
                    }
                    residueWeights[Routines.oldGap][i + 1] = (oldGapScore / maxScore) * scale;
                    residueWeights[Routines.newGap][i + 1] = (newGapScore / maxScore) * scale;
                }
            }
        }
Beispiel #2
0
        char[] residueCodes   = Routines.nucleotideCodesPlusGaps; // should be one with gaps, need to do programatically

        public double Align(ref Alignment alignmentObject, SubstitutionMatrix.SubstitutionMatrixSeries subMatrixClass, Tuple <ReadOnlyCollection <AlignedMacromolecule>, ReadOnlyCollection <AlignedMacromolecule>, double> step)
        {
            bool switchGroups;
            ReadOnlyCollection <AlignedMacromolecule> group1;
            ReadOnlyCollection <AlignedMacromolecule> group2;
            double meanSimilarity = step.Item3;

            int numSeqs1 = step.Item1.Count();
            int numSeqs2 = step.Item2.Count();
            int numSeqs  = alignmentObject.NumberMacromolecules;

            if (numSeqs1 == 0 || numSeqs2 == 0)
            // If one of the groups has no alignable sequences, can't do anything!
            {
                return(0);
            }


            // Make the group with the most sequences group 1
            // Figure out the structure penalties here
            if (switchGroups = numSeqs2 > numSeqs1)
            {
                group1 = step.Item2;
                group2 = step.Item1;
            }
            else
            {
                group1 = step.Item1;
                group2 = step.Item2;
            }

            // Make the first group
            int group1MaxLength = 0;

            foreach (AlignedMacromolecule macromolecule in group1)
            {
                group1MaxLength = Math.Max(group1MaxLength, macromolecule.AlignedPositions.Last() + 1);
            }


            // Make the second group
            int group2MaxLength = 0;

            foreach (AlignedMacromolecule macromolecule in group2)
            {
                group2MaxLength = Math.Max(group2MaxLength, macromolecule.AlignedPositions.Last() + 1);
            }

            maxLength = group1MaxLength + group2MaxLength + 2; // Clustal sets the Alignment parameter here.
            displ     = new int[maxLength + 1];
            alnPath1  = new int[maxLength + 1];
            alnPath2  = new int[maxLength + 1];


            // Calculate the real length of profiles, removing gaps:

            int group1NoGapsLength = 0;

            foreach (AlignedMacromolecule macromolecule in group1)
            {
                group1NoGapsLength += macromolecule.Sequence.Length;
            }
            group1NoGapsLength = (int)(group1NoGapsLength / (double)numSeqs1);

            int group2NoGapsLength = 0;

            foreach (AlignedMacromolecule macromolecule in group2)
            {
                group2NoGapsLength += macromolecule.Sequence.Length;
            }
            group2NoGapsLength = (int)(group2NoGapsLength / (double)numSeqs2);

            int minNoGapsLength = Math.Min(group1NoGapsLength, group2NoGapsLength);
            int maxNoGapsLength = Math.Max(group1NoGapsLength, group2NoGapsLength);

            // I'm just going to reproduce the Clustal code directly. May recode later once I fully understand what it does.

            // There is quite literally no point to the scaleVals.scale value, and no point to setting this here.
            // Tuple<double, double> scaleVals = Tuple.Create(1.0, 100.0); // scaleVals.scale and scaleVals.intScale, respectively.

            double gapOpeningCoefficient, gapExtensionCoefficient;
            double gapOpenPenalty   = 10.0; // Will be user selectable parameter
            double gapExtendPenalty = 0.2;  // Will be user selectable parameter

            //Will be user option
            bool useNegative = false;

            SubstitutionMatrix.SubstitutionMatrix subMatrix = subMatrixClass.GetMatrix(meanSimilarity, minNoGapsLength, useNegative);

            double gapPenaltyScaleFactor = subMatrixClass.GetScaleFactor(meanSimilarity, useNegative);

            // Then it sets DNA vs. Protein parameters
            if (alignmentObject.IsNucleicAcid)
            {
                gapOpeningCoefficient   = 100.0 * gapOpenPenalty * gapPenaltyScaleFactor;
                gapExtensionCoefficient = 100.0 * gapExtendPenalty * gapPenaltyScaleFactor;
            }
            else
            {
                gapOpeningCoefficient   = CalculateProteinGapOpeningCoefficient(gapOpenPenalty, group1NoGapsLength, group2NoGapsLength, subMatrix.AverageScore, gapPenaltyScaleFactor);
                gapExtensionCoefficient = 100.0 * gapExtendPenalty;
            }

            // We need one profile with substitution matrix information and one without!
            // But this will change when we have the LE scoring function. (Whatever that is.)

            // Calculate the profile arrays. The first group, but not the second, incorporates substitution information.
            group1Profile = Profile.Calculate(group1, group1MaxLength, gapOpeningCoefficient, gapExtensionCoefficient, subMatrix);
            group2Profile = Profile.Calculate(group2, group2MaxLength, gapOpeningCoefficient, gapExtensionCoefficient, null); //new SubstitutionMatrix());

            // User Myers and Miller to align the two sequences
            double score = ProgDiff(0, 0, group1MaxLength, group2MaxLength, group1Profile.GapOpeningPenalties[0], group1Profile.GapExtensionPenalties[0]);

            alignmentLength = ProgTracepath();

            addGGaps();

            group1MaxLength = alignmentLength;

            if (true) // Clustal: DoRemoveFirstIteration() == TREE ???
            {
                // Combine the sequences and submit for iterations
            }



            return(0);
        }
Beispiel #3
0
        public static Profile Calculate(ReadOnlyCollection <AlignedMacromolecule> macromolecules, int length, double gapOpeningCoefficient, double gapExtensionCoefficient, SubstitutionMatrix.SubstitutionMatrix subMatrix)
        {
            // For the moment, ignoring struct penalties and gap penalty masks

            char[] residues;

            // Configure parameters
            bool isNucleicAcid = macromolecules[0].IsNucleicAcid; // More global parameter to access?

            if (isNucleicAcid)
            {
                residues = Routines.nucleotideCodesPlusGaps;
            }
            else
            {
                residues = Routines.aminoAcidCodesPlusGaps;
            }

            Profile profile = new Profile(length, residues, macromolecules);

            profile.CountGaps();

            profile.CalculateGapPenalties(gapOpeningCoefficient, gapExtensionCoefficient);

            if (subMatrix == null) // The substitution matrix is empty
            {
                profile.CalculateResidueWeights(residues);
            }
            else
            {
                profile.CalculateResidueWeights(residues, ref subMatrix);
            }

            return(profile);
        }