Example #1
0
        public static List <SequenceAlignment> GetFilteredSequenceAlignments(IChain n, bool nKeepRepeats, IChain c, bool cKeepRepeats, SS allowedTypes, int minAlignmentLength = DefaultMinAlignmentLength, float maxRmsd = DefaultRmsdThreshold)
        {
            List <SequenceAlignment> alignments = GetAlignmentsPreservingFullSsBlocks(n, c, allowedTypes, minAlignmentLength, maxRmsd);
            int maxThirdHelixShortening         = DefaultMaxThirdHelixTrimming;

            //int maxOvershoot = 8;

            // Find all alignments and remove those that would:
            // -- remove more than one repeat of a repeat protein
            if (nKeepRepeats)
            {
                int nRepeatLength;
                if (Sequence.TryGetInternalRepeatLength(n, out nRepeatLength) && nRepeatLength > 15)
                {
                    alignments.RemoveAll(a => a.Range1.End < n.Count - 1.25 * nRepeatLength);
                }
            }
            if (cKeepRepeats)
            {
                int cRepeatLength;
                if (Sequence.TryGetInternalRepeatLength(c, out cRepeatLength) && cRepeatLength > 15)
                {
                    alignments.RemoveAll(a => a.Range1.Start < 1.25 * cRepeatLength);
                }
            }

            // -- leave two or fewer secondary structure elements in either chain. An exception is made if there are only two secondary structure
            // elements total to begin with, which is common for helical bundles
            List <SSBlock> nBlocks = SecondaryStructure.GetPhiPsiSSBlocksOfType(n, SS.Helix | SS.Extended, minAlignmentLength);
            List <SSBlock> cBlocks = SecondaryStructure.GetPhiPsiSSBlocksOfType(c, SS.Helix | SS.Extended, minAlignmentLength);

            if (nBlocks.Count > 2)
            {
                alignments.RemoveAll(a => a.Range1.Start < nBlocks[1].End);
            }
            if (cBlocks.Count > 2)
            {
                alignments.RemoveAll(a => a.Range2.End > cBlocks[cBlocks.Count - 2].Start);
            }

            // -- shorten the third helix of a 3-helix bundle too much, because that would also probably destabilize the oligomer
            if (nBlocks.Count > 2)
            {
                int removeCount = alignments.RemoveAll(a => a.Range1.End < nBlocks[2].End - maxThirdHelixShortening);
            }
            if (cBlocks.Count > 2)
            {
                int removeCount = alignments.RemoveAll(a => a.Range2.Start > cBlocks[cBlocks.Count - 3].Start + maxThirdHelixShortening);
            }

            return(alignments);
        }
Example #2
0
        /// <summary>
        /// Find splicing alignment ranges for two peptides that would retain a full secondary structure block for at least one of the two peptides. This prevents
        /// short splice overlaps that remove some of the secondary structure on both sides of the alignment. Basically, the alignment must result in either:
        ///   1) one secondary structure block being fully included in the other
        ///   2) the N-terminal peptide being spliced has the end of its block fully in the resultant alignment, thus preserving the entire N-side block
        ///   3) the C-terminal peptide being spliced has the start of its block fully in the resultant alignment, thus preserving the entire C-side block
        /// </summary>
        /// <param name="n"></param>
        /// <param name="c"></param>
        /// <param name="allowedTypes"></param>
        /// <param name="minAlignmentLength"></param>
        /// <returns></returns>
        public static List <SequenceAlignment> GetAlignmentsPreservingFullSsBlocks(IChain n, IChain c, SS allowedTypes, int minAlignmentLength = DefaultMinAlignmentLength, float maxRmsd = DefaultRmsdThreshold)
        {
            // TODO!!!: Debug why calling this function with peptides A,B vs B,A returns different numbers of alignments.
            List <SequenceAlignment> alignments = new List <SequenceAlignment>();
            List <SSBlock>           nBlocks    = SecondaryStructure.GetPhiPsiSSBlocksOfType(n, allowedTypes, minAlignmentLength).Where(block => block.Length >= minAlignmentLength).ToList();
            List <SSBlock>           cBlocks    = SecondaryStructure.GetPhiPsiSSBlocksOfType(c, allowedTypes, minAlignmentLength).Where(block => block.Length >= minAlignmentLength).ToList();

            foreach (SSBlock nBlock in nBlocks)
            {
                foreach (SSBlock cBlock in cBlocks)
                {
                    for (int nAlignmentOfBlockC = nBlock.End - minAlignmentLength + 1; nAlignmentOfBlockC + cBlock.Length > nBlock.Start + minAlignmentLength; nAlignmentOfBlockC--)
                    {
                        // Figure out the start and ends of the overlap region
                        int nStart          = Math.Max(nBlock.Start, nAlignmentOfBlockC);
                        int nEnd            = Math.Min(nBlock.End, nAlignmentOfBlockC + cBlock.Length - 1);
                        int alignmentLength = nEnd - nStart + 1;

                        int cStart = Math.Max(cBlock.Start, cBlock.Start + (nStart - nAlignmentOfBlockC));
                        int cEnd   = cStart + alignmentLength - 1;

                        Debug.Assert(nBlock.Start <= nStart && nEnd <= nBlock.End);
                        Debug.Assert(cBlock.Start <= cStart && cEnd <= cBlock.End);
                        Debug.Assert(nEnd - nStart == cEnd - cStart);

                        bool cFullyIncluded = (cBlock.Start == cStart && cBlock.End == cEnd);
                        bool nFullyIncluded = (nBlock.Start == nStart && nBlock.End == nEnd);

                        if (!nFullyIncluded && !cFullyIncluded && cStart != cBlock.Start && nEnd != nBlock.End)
                        {
                            continue;
                        }

                        if (Rmsd.GetRmsdNCAC(n, nStart, nEnd, c, cStart, cEnd) > maxRmsd)
                        {
                            continue;
                        }

                        alignments.Add(new SequenceAlignment(nStart, nEnd, cStart, cEnd));
                    }
                }
            }

            return(alignments);
        }
Example #3
0
        /// <summary>
        /// Find splicing alignment ranges for two peptides that would retain a full secondary structure block for at least one of the two peptides. This prevents
        /// short splice overlaps that remove some of the secondary structure on both sides of the alignment. Basically, the alignment must result in either:
        ///   1) one secondary structure block being fully included in the other
        ///   2) the N-terminal peptide being spliced has the end of its block fully in the resultant alignment, thus preserving the entire N-side block
        ///   3) the C-terminal peptide being spliced has the start of its block fully in the resultant alignment, thus preserving the entire C-side block
        /// </summary>
        /// <param name="chain1"></param>
        /// <param name="chain2"></param>
        /// <param name="allowedTypes"></param>
        /// <param name="minAlignmentLength"></param>
        /// <returns></returns>
        public static List <TransformSequenceAlignment> GetTransformAlignments(IChain chain1, IChain chain2, SS allowedTypes, int minAlignmentLength = DefaultMinAlignmentLength, float maxRmsd = DefaultRmsdThreshold)
        {
            List <TransformSequenceAlignment> alignments = new List <TransformSequenceAlignment>();
            List <SSBlock> nBlocks = SecondaryStructure.GetPhiPsiSSBlocksOfType(chain1, allowedTypes, minAlignmentLength).Where(block => block.Length >= minAlignmentLength).ToList();
            List <SSBlock> cBlocks = SecondaryStructure.GetPhiPsiSSBlocksOfType(chain2, allowedTypes, minAlignmentLength).Where(block => block.Length >= minAlignmentLength).ToList();

            // Previously, calling this function with peptides A,B vs B,A returns different numbers of alignments. The original intent was for the first chain passed in to be at the
            // N-terminus and the second at the C-terminus (post-fusion), so these post-fusion lengths were being calculated here and culled differently based on order.

            foreach (SSBlock nBlock in nBlocks)
            {
                foreach (SSBlock cBlock in cBlocks)
                {
                    for (int nAlignmentOfBlockC = nBlock.End - minAlignmentLength + 1; nAlignmentOfBlockC + cBlock.Length >= nBlock.Start + minAlignmentLength; nAlignmentOfBlockC--)
                    {
                        // Figure out the start and ends of the overlap region
                        int nStart          = Math.Max(nBlock.Start, nAlignmentOfBlockC);
                        int nEnd            = Math.Min(nBlock.End, nAlignmentOfBlockC + cBlock.Length - 1);
                        int alignmentLength = nEnd - nStart + 1;

                        int cStart = Math.Max(cBlock.Start, cBlock.Start + (nStart - nAlignmentOfBlockC));
                        int cEnd   = cStart + alignmentLength - 1;

                        //Debug.WriteLine("Comparing {0}-{1} vs {2}-{3}", nStart, nEnd, cStart, cEnd);

                        Debug.Assert(nBlock.Start <= nStart && nEnd <= nBlock.End);
                        Debug.Assert(cBlock.Start <= cStart && cEnd <= cBlock.End);
                        Debug.Assert(nEnd - nStart == cEnd - cStart);

                        //bool cFullyIncluded = (cBlock.Start == cStart && cBlock.End == cEnd);
                        //bool nFullyIncluded = (nBlock.Start == nStart && nBlock.End == nEnd);

                        //if (!nFullyIncluded && !cFullyIncluded && cStart != cBlock.Start && nEnd != nBlock.End)
                        //    continue;

                        Trace.Assert(nEnd - nStart + 1 >= minAlignmentLength);

                        float  rmsd   = float.NaN;
                        Matrix matrix = Rmsd.GetRmsdAndTransform(chain1, nStart, nEnd, chain2, cStart, cEnd, out rmsd);
                        bool   fail   = rmsd > maxRmsd;

#if DEBUG && false
                        float  rmsd2   = float.NaN;
                        Matrix matrix2 = Rmsd.GetRmsdAndTransform(chain2, cStart, cEnd, chain1, nStart, nEnd, out rmsd2);
                        bool   fail2   = rmsd2 > maxRmsd;
                        Trace.Assert(fail == fail2);
#endif

                        if (rmsd > maxRmsd)
                        {
                            continue;
                        }

                        TransformSequenceAlignment alignment = new TransformSequenceAlignment(nStart, nEnd, cStart, cEnd);
                        alignment.Centroid1 = Geometry.GetCenterNCAC(chain1[nStart, nEnd]);
                        alignment.Centroid2 = Geometry.GetCenterNCAC(chain2[cStart, cEnd]);
                        alignment.Align1    = matrix;
                        alignment.Align2    = Matrix.Invert(matrix);
                        alignments.Add(alignment);
                    }
                }
            }

            return(alignments);
        }