Esempio n. 1
0
        public static List <SequenceAlignment> GetFilteredSequenceAlignments(IChain n, bool nKeepRepeats, IChain c, bool cKeepRepeats, SS allowedTypes, int minAlignmentLength = DefaultMinAlignmentLength, float maxRmsd = DefaultRmsdThreshold)
        {
            List <SequenceAlignment> alignments = GetAlignmentsPreservingFullSsBlocks(n, c, allowedTypes, minAlignmentLength, maxRmsd);
            int maxThirdHelixShortening         = DefaultMaxThirdHelixTrimming;

            //int maxOvershoot = 8;

            // Find all alignments and remove those that would:
            // -- remove more than one repeat of a repeat protein
            if (nKeepRepeats)
            {
                int nRepeatLength;
                if (Sequence.TryGetInternalRepeatLength(n, out nRepeatLength) && nRepeatLength > 15)
                {
                    alignments.RemoveAll(a => a.Range1.End < n.Count - 1.25 * nRepeatLength);
                }
            }
            if (cKeepRepeats)
            {
                int cRepeatLength;
                if (Sequence.TryGetInternalRepeatLength(c, out cRepeatLength) && cRepeatLength > 15)
                {
                    alignments.RemoveAll(a => a.Range1.Start < 1.25 * cRepeatLength);
                }
            }

            // -- leave two or fewer secondary structure elements in either chain. An exception is made if there are only two secondary structure
            // elements total to begin with, which is common for helical bundles
            List <SSBlock> nBlocks = SecondaryStructure.GetPhiPsiSSBlocksOfType(n, SS.Helix | SS.Extended, minAlignmentLength);
            List <SSBlock> cBlocks = SecondaryStructure.GetPhiPsiSSBlocksOfType(c, SS.Helix | SS.Extended, minAlignmentLength);

            if (nBlocks.Count > 2)
            {
                alignments.RemoveAll(a => a.Range1.Start < nBlocks[1].End);
            }
            if (cBlocks.Count > 2)
            {
                alignments.RemoveAll(a => a.Range2.End > cBlocks[cBlocks.Count - 2].Start);
            }

            // -- shorten the third helix of a 3-helix bundle too much, because that would also probably destabilize the oligomer
            if (nBlocks.Count > 2)
            {
                int removeCount = alignments.RemoveAll(a => a.Range1.End < nBlocks[2].End - maxThirdHelixShortening);
            }
            if (cBlocks.Count > 2)
            {
                int removeCount = alignments.RemoveAll(a => a.Range2.Start > cBlocks[cBlocks.Count - 3].Start + maxThirdHelixShortening);
            }

            return(alignments);
        }
Esempio n. 2
0
        /// <summary>
        /// Find splicing alignment ranges for two peptides that would retain a full secondary structure block for at least one of the two peptides. This prevents
        /// short splice overlaps that remove some of the secondary structure on both sides of the alignment. Basically, the alignment must result in either:
        ///   1) one secondary structure block being fully included in the other
        ///   2) the N-terminal peptide being spliced has the end of its block fully in the resultant alignment, thus preserving the entire N-side block
        ///   3) the C-terminal peptide being spliced has the start of its block fully in the resultant alignment, thus preserving the entire C-side block
        /// </summary>
        /// <param name="n"></param>
        /// <param name="c"></param>
        /// <param name="allowedTypes"></param>
        /// <param name="minAlignmentLength"></param>
        /// <returns></returns>
        public static List <SequenceAlignment> GetAlignmentsPreservingFullSsBlocks(IChain n, IChain c, SS allowedTypes, int minAlignmentLength = DefaultMinAlignmentLength, float maxRmsd = DefaultRmsdThreshold)
        {
            // TODO!!!: Debug why calling this function with peptides A,B vs B,A returns different numbers of alignments.
            List <SequenceAlignment> alignments = new List <SequenceAlignment>();
            List <SSBlock>           nBlocks    = SecondaryStructure.GetPhiPsiSSBlocksOfType(n, allowedTypes, minAlignmentLength).Where(block => block.Length >= minAlignmentLength).ToList();
            List <SSBlock>           cBlocks    = SecondaryStructure.GetPhiPsiSSBlocksOfType(c, allowedTypes, minAlignmentLength).Where(block => block.Length >= minAlignmentLength).ToList();

            foreach (SSBlock nBlock in nBlocks)
            {
                foreach (SSBlock cBlock in cBlocks)
                {
                    for (int nAlignmentOfBlockC = nBlock.End - minAlignmentLength + 1; nAlignmentOfBlockC + cBlock.Length > nBlock.Start + minAlignmentLength; nAlignmentOfBlockC--)
                    {
                        // Figure out the start and ends of the overlap region
                        int nStart          = Math.Max(nBlock.Start, nAlignmentOfBlockC);
                        int nEnd            = Math.Min(nBlock.End, nAlignmentOfBlockC + cBlock.Length - 1);
                        int alignmentLength = nEnd - nStart + 1;

                        int cStart = Math.Max(cBlock.Start, cBlock.Start + (nStart - nAlignmentOfBlockC));
                        int cEnd   = cStart + alignmentLength - 1;

                        Debug.Assert(nBlock.Start <= nStart && nEnd <= nBlock.End);
                        Debug.Assert(cBlock.Start <= cStart && cEnd <= cBlock.End);
                        Debug.Assert(nEnd - nStart == cEnd - cStart);

                        bool cFullyIncluded = (cBlock.Start == cStart && cBlock.End == cEnd);
                        bool nFullyIncluded = (nBlock.Start == nStart && nBlock.End == nEnd);

                        if (!nFullyIncluded && !cFullyIncluded && cStart != cBlock.Start && nEnd != nBlock.End)
                        {
                            continue;
                        }

                        if (Rmsd.GetRmsdNCAC(n, nStart, nEnd, c, cStart, cEnd) > maxRmsd)
                        {
                            continue;
                        }

                        alignments.Add(new SequenceAlignment(nStart, nEnd, cStart, cEnd));
                    }
                }
            }

            return(alignments);
        }
Esempio n. 3
0
        /// <summary>
        /// Find splicing alignment ranges for two peptides that would retain a full secondary structure block for at least one of the two peptides. This prevents
        /// short splice overlaps that remove some of the secondary structure on both sides of the alignment. Basically, the alignment must result in either:
        ///   1) one secondary structure block being fully included in the other
        ///   2) the N-terminal peptide being spliced has the end of its block fully in the resultant alignment, thus preserving the entire N-side block
        ///   3) the C-terminal peptide being spliced has the start of its block fully in the resultant alignment, thus preserving the entire C-side block
        /// </summary>
        /// <param name="chain1"></param>
        /// <param name="chain2"></param>
        /// <param name="allowedTypes"></param>
        /// <param name="minAlignmentLength"></param>
        /// <returns></returns>
        public static List <TransformSequenceAlignment> GetTransformAlignments(IChain chain1, IChain chain2, SS allowedTypes, int minAlignmentLength = DefaultMinAlignmentLength, float maxRmsd = DefaultRmsdThreshold)
        {
            List <TransformSequenceAlignment> alignments = new List <TransformSequenceAlignment>();
            List <SSBlock> nBlocks = SecondaryStructure.GetPhiPsiSSBlocksOfType(chain1, allowedTypes, minAlignmentLength).Where(block => block.Length >= minAlignmentLength).ToList();
            List <SSBlock> cBlocks = SecondaryStructure.GetPhiPsiSSBlocksOfType(chain2, allowedTypes, minAlignmentLength).Where(block => block.Length >= minAlignmentLength).ToList();

            // Previously, calling this function with peptides A,B vs B,A returns different numbers of alignments. The original intent was for the first chain passed in to be at the
            // N-terminus and the second at the C-terminus (post-fusion), so these post-fusion lengths were being calculated here and culled differently based on order.

            foreach (SSBlock nBlock in nBlocks)
            {
                foreach (SSBlock cBlock in cBlocks)
                {
                    for (int nAlignmentOfBlockC = nBlock.End - minAlignmentLength + 1; nAlignmentOfBlockC + cBlock.Length >= nBlock.Start + minAlignmentLength; nAlignmentOfBlockC--)
                    {
                        // Figure out the start and ends of the overlap region
                        int nStart          = Math.Max(nBlock.Start, nAlignmentOfBlockC);
                        int nEnd            = Math.Min(nBlock.End, nAlignmentOfBlockC + cBlock.Length - 1);
                        int alignmentLength = nEnd - nStart + 1;

                        int cStart = Math.Max(cBlock.Start, cBlock.Start + (nStart - nAlignmentOfBlockC));
                        int cEnd   = cStart + alignmentLength - 1;

                        //Debug.WriteLine("Comparing {0}-{1} vs {2}-{3}", nStart, nEnd, cStart, cEnd);

                        Debug.Assert(nBlock.Start <= nStart && nEnd <= nBlock.End);
                        Debug.Assert(cBlock.Start <= cStart && cEnd <= cBlock.End);
                        Debug.Assert(nEnd - nStart == cEnd - cStart);

                        //bool cFullyIncluded = (cBlock.Start == cStart && cBlock.End == cEnd);
                        //bool nFullyIncluded = (nBlock.Start == nStart && nBlock.End == nEnd);

                        //if (!nFullyIncluded && !cFullyIncluded && cStart != cBlock.Start && nEnd != nBlock.End)
                        //    continue;

                        Trace.Assert(nEnd - nStart + 1 >= minAlignmentLength);

                        float  rmsd   = float.NaN;
                        Matrix matrix = Rmsd.GetRmsdAndTransform(chain1, nStart, nEnd, chain2, cStart, cEnd, out rmsd);
                        bool   fail   = rmsd > maxRmsd;

#if DEBUG && false
                        float  rmsd2   = float.NaN;
                        Matrix matrix2 = Rmsd.GetRmsdAndTransform(chain2, cStart, cEnd, chain1, nStart, nEnd, out rmsd2);
                        bool   fail2   = rmsd2 > maxRmsd;
                        Trace.Assert(fail == fail2);
#endif

                        if (rmsd > maxRmsd)
                        {
                            continue;
                        }

                        TransformSequenceAlignment alignment = new TransformSequenceAlignment(nStart, nEnd, cStart, cEnd);
                        alignment.Centroid1 = Geometry.GetCenterNCAC(chain1[nStart, nEnd]);
                        alignment.Centroid2 = Geometry.GetCenterNCAC(chain2[cStart, cEnd]);
                        alignment.Align1    = matrix;
                        alignment.Align2    = Matrix.Invert(matrix);
                        alignments.Add(alignment);
                    }
                }
            }

            return(alignments);
        }
Esempio n. 4
0
        public static IChain GetChain(IChain[] peptides, SequenceAlignment[] alignments, Selection immutableAas)
        {
            IChain fusion = new Chain();

            // Do all pairwise analysis
            for (int i = 0; i < peptides.Length - 1; i++)
            {
                // Determine the ranges outside of the splice
                int start1 = i == 0 ? 0 : alignments[i - 1].Range2.End + 1;
                int end1   = alignments[i].Range1.Start - 1;
                int start2 = alignments[i].Range2.End + 1;
                int end2   = i < alignments.Length - 1 ? alignments[i + 1].Range1.Start - 1 : peptides[i + 1].Count - 1;

                // Add the non-overlapping region of the first peptide
                if (start1 <= end1)
                {
                    foreach (Aa aa in peptides[i][start1, end1])
                    {
                        Aa copy = new Aa(aa, i == 0 && start1 == 0, false);
                        copy.NodeTransform = aa.TotalTransform;
                        fusion.Add(copy);
                    }
                }

                // Add the alignment region, selecting either from the first or second peptide so as to minimize clashes with the sidechains that
                // are for sure being kept on either side
                SequenceAlignment alignment = alignments[i];
                Debug.Assert(alignment.Range1.Length == alignment.Range2.Length);
                for (int alignmentOffset = 0; alignmentOffset < alignment.Range1.Length; alignmentOffset++)
                {
                    int  index1    = alignment.Range1.Start + alignmentOffset;
                    int  index2    = alignment.Range2.Start + alignmentOffset;
                    IAa  option1   = peptides[i][index1];
                    IAa  option2   = peptides[i + 1][index2];
                    bool nTerminus = i == 0 && index1 == 0;
                    bool cTerminus = (i == peptides.Length - 2) && (index2 == peptides[i + 1].Count - 1);

                    if (immutableAas.Aas.Contains(option1))
                    {
                        Aa copy = new Aa(option1, nTerminus, cTerminus);
                        copy.NodeTransform = option1.TotalTransform;
                        fusion.Add(copy);
                    }
                    else if (immutableAas.Aas.Contains(option2))
                    {
                        Aa copy = new Aa(option2, nTerminus, cTerminus);
                        copy.NodeTransform = option2.TotalTransform;
                        fusion.Add(copy);
                    }
                    else
                    {
                        if (option2.Letter == 'P' && index1 >= 4)
                        {
                            SS[] ss1 = SecondaryStructure.GetPhiPsiSS(peptides[i], 5);

                            bool allHelical = (ss1[index1] | ss1[index1 - 1] | ss1[index1 - 2] | ss1[index1 - 3] | ss1[index1 - 4]) == SS.Helix;
                            if (allHelical)
                            {
                                Aa copy = new Aa(option1, nTerminus, cTerminus);
                                copy.NodeTransform = option1.TotalTransform;
                                fusion.Add(copy);
                                continue;
                            }
                        }

                        // Otherwise, select the residue with fewer clashes
                        int clashCount1 = end2 >= start2? peptides[i + 1][start2, end2].Select(other => Clash.AnyContact(other, option1, Clash.ContactType.SidechainSidechainClash) ? 1 : 0).Aggregate(0, (a, b) => a + b) : 0;
                        int clashCount2 = end1 >= start1? peptides[i][start1, end1].Select(other => Clash.AnyContact(other, option2, Clash.ContactType.SidechainSidechainClash) ? 1 : 0).Aggregate(0, (a, b) => a + b) : 0;

                        if (clashCount1 <= clashCount2)
                        {
                            Aa copy = new Aa(option1, nTerminus, cTerminus);
                            copy.NodeTransform = option1.TotalTransform;
                            fusion.Add(copy);
                            continue;
                        }

                        if (clashCount2 < clashCount1)
                        {
                            Aa copy = new Aa(option2, nTerminus, cTerminus);
                            copy.NodeTransform = option2.TotalTransform;
                            fusion.Add(copy);
                            continue;
                        }
                    }
                }

                // Add the non-overlapping region of the last peptide
                if (i == peptides.Length - 2 && start2 <= end2)
                {
                    foreach (Aa aa in peptides[i + 1][start2, end2])
                    {
                        Aa copy = new Aa(aa, false, aa.IsCTerminus);
                        copy.NodeTransform = aa.TotalTransform;
                        fusion.Add(copy);
                    }
                }
            }
            return(fusion);
        }