Beispiel #1
0
        public static Matrix GetRmsdAndTransform(IChain move, int moveStart, int moveEnd, IChain stationary, int stationaryStart, int stationaryEnd, out float rmsd)
        {
            Matrix transform = Rmsd.GetRmsdTransformForResidues(move, moveStart, moveEnd, stationary, stationaryStart, stationaryEnd);

            rmsd = GetRmsdNCAC(transform, move, moveStart, moveEnd, stationary, stationaryStart, stationaryEnd);
            return(transform);
        }
Beispiel #2
0
        public static float GetRmsdNCAC(IChain peptide1, int rangeStart1, int rangeEnd1, IChain peptide2, int rangeStart2, int rangeEnd2)
        {
            Matrix transform = Rmsd.GetRmsdTransformForResidues(peptide1, rangeStart1, rangeEnd1, peptide2, rangeStart2, rangeEnd2);
            float  rmsd      = GetRmsdNCAC(transform, peptide1, rangeStart1, rangeEnd1, peptide2, rangeStart2, rangeEnd2);

            return(rmsd);
        }
Beispiel #3
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="nChain">The peptide that will be constitute the N-terminal region of the resultant spliced peptide</param>
        /// <param name="cChain">The peptide that will be constitute the C-terminal region of the resultant spliced peptide</param>
        /// <param name="nInclude">The included portion of the N-terminal peptide</param>
        /// <param name="cInclude">The included portion of hte C-terminal peptide</param>
        /// <param name="nAlignNullable">The region to align on prior to splicing</param>
        /// <param name="cAlignNullable">The region to align on prior to splicing</param>
        /// <returns></returns>
        public static IChain GetPeptide(IChain nChain, IChain cChain, Range nInclude, Range cInclude, Range?nAlignNullable = null, Range?cAlignNullable = null)
        {
            bool  performAlignment = nAlignNullable != null && cAlignNullable != null;
            Range nAlign           = performAlignment ? new Range((Range)nAlignNullable) : new Range();
            Range cAlign           = performAlignment ? new Range((Range)cAlignNullable) : new Range();

            if (performAlignment && ((Range)nAlignNullable).Length != ((Range)cAlignNullable).Length)
            {
                throw new ArgumentException("Splice ranges must be of equal length");
            }
            if (nInclude.Start < 0 || nChain.Count <= nInclude.End || cInclude.Start < 0 || cChain.Count <= cInclude.End)
            {
                throw new IndexOutOfRangeException("Splice ranges exceed the peptide ranges");
            }

            IChain chain = new Chain();
            //float rmsd = Rmsd.GetRmsd(cTerminus[cAlign.Start, cAlign.End], nTerminus[nAlign.Start, nAlign.End]);
            Matrix cTerminusTransform = performAlignment ? Rmsd.GetRmsdTransform(cChain[cAlign.Start, cAlign.End], nChain[nAlign.Start, nAlign.End]) : Matrix.Identity;

            cTerminusTransform.Rotation.Normalize();
            //Quaternion rotation = cTerminusTransform.Rotation;
            //rotation.Normalize();
            //cTerminusTransform.Rotation = rotation;
            for (int i = nInclude.Start; i <= nInclude.End; i++)
            {
                chain.Add(new Aa(nChain[i]));
            }
            for (int i = cInclude.Start; i <= cInclude.End; i++)
            {
                IAa residue = new Aa(cChain[i]);
                residue.Transform(cTerminusTransform);
                chain.Add(residue);
            }
            return(chain);
        }
Beispiel #4
0
        public static List <AlignmentInfo> GetSsAlignmentsByRmsd(Chain peptide1, Chain peptide2, SS secondaryStructure, int minimumAlignmentLength, float maxRmsd)
        {
            List <AlignmentInfo> alignments = new List <AlignmentInfo>();
            List <SS>            ss1        = Tools.SecondaryStructure.GetPhiPsiSS(peptide1, minimumAlignmentLength).ToList();
            List <SS>            ss2        = Tools.SecondaryStructure.GetPhiPsiSS(peptide2, minimumAlignmentLength).ToList();

            // Slide the repeat sequence against the oligomer1 sequence and find helix-helix segments
            for (int alignmentOffset2 = minimumAlignmentLength - ss2.Count; alignmentOffset2 < ss1.Count - minimumAlignmentLength; alignmentOffset2++)
            {
                int maxRangeEnd1Added = -1;
                for (int rangeStart1 = Math.Max(0, -alignmentOffset2); rangeStart1 < Math.Min(ss1.Count - minimumAlignmentLength, ss2.Count - minimumAlignmentLength - alignmentOffset2); rangeStart1++)
                {
                    // Grow the alignment length until SS no longer matches for the entire alignment, RMSD gets too big, or the sequence terminates
                    int alignmentLength = minimumAlignmentLength;
                    while (true)
                    {
                        int  rangeStart2            = rangeStart1 + alignmentOffset2;
                        int  rangeEnd2              = rangeStart2 + alignmentLength - 1;
                        int  rangeEnd1              = rangeStart1 + alignmentLength - 1;
                        bool allowableRangeExceeded = ss1.Count <= rangeEnd1 || ss2.Count <= rangeEnd2;
                        bool onlyContainsDesiredSs  = allowableRangeExceeded ? false :
                                                      ss1.GetRange(rangeStart1, alignmentLength).Select(ss => ss == secondaryStructure).Aggregate((a, b) => a && b) &&
                                                      ss2.GetRange(rangeStart2, alignmentLength).Select(ss => ss == secondaryStructure).Aggregate((a, b) => a && b);
                        bool rmsdExceeded = allowableRangeExceeded ? false :
                                            Rmsd.GetRmsdNCAC(peptide1, rangeStart1, rangeEnd1, peptide2, rangeStart2, rangeEnd2) > maxRmsd;

                        if (allowableRangeExceeded || rmsdExceeded || !onlyContainsDesiredSs)
                        {
                            if (alignmentLength > minimumAlignmentLength && rangeEnd1 - 1 > maxRangeEnd1Added)
                            {
                                // This alignment failed but the previous one did not. Add the previous one - **iff it is not a subset of a previous alignment
                                AlignmentInfo alignment = new AlignmentInfo(peptide1, peptide2, new Range(rangeStart1, rangeEnd1 - 1), new Range(rangeStart2, rangeEnd2 - 1));
                                alignments.Add(alignment);
                                maxRangeEnd1Added = rangeEnd1 - 1;
                            }

                            // If failure occurs due to rmsd being exceeded, start again with (rangeStart + 1) because it will result in a slightly different alignment orientation
                            // If failure occurs due to mismatching SS, start again with (rangeStart  +1) because all alignments including (rangeStart) will fail
                            // Otherwise, start at (rangeEnd + 1) because all other intermediate alignments will be identical (within the maxRmsd tolerance)
                            if (alignmentLength > minimumAlignmentLength && !rmsdExceeded)
                            {
                                rangeStart1 = rangeEnd1;
                            }

                            break;
                        }

                        alignmentLength++;
                    }
                }
            }
            return(alignments);
        }
Beispiel #5
0
        /// <summary>
        /// Find splicing alignment ranges for two peptides that would retain a full secondary structure block for at least one of the two peptides. This prevents
        /// short splice overlaps that remove some of the secondary structure on both sides of the alignment. Basically, the alignment must result in either:
        ///   1) one secondary structure block being fully included in the other
        ///   2) the N-terminal peptide being spliced has the end of its block fully in the resultant alignment, thus preserving the entire N-side block
        ///   3) the C-terminal peptide being spliced has the start of its block fully in the resultant alignment, thus preserving the entire C-side block
        /// </summary>
        /// <param name="n"></param>
        /// <param name="c"></param>
        /// <param name="allowedTypes"></param>
        /// <param name="minAlignmentLength"></param>
        /// <returns></returns>
        public static List <SequenceAlignment> GetAlignmentsPreservingFullSsBlocks(IChain n, IChain c, SS allowedTypes, int minAlignmentLength = DefaultMinAlignmentLength, float maxRmsd = DefaultRmsdThreshold)
        {
            // TODO!!!: Debug why calling this function with peptides A,B vs B,A returns different numbers of alignments.
            List <SequenceAlignment> alignments = new List <SequenceAlignment>();
            List <SSBlock>           nBlocks    = SecondaryStructure.GetPhiPsiSSBlocksOfType(n, allowedTypes, minAlignmentLength).Where(block => block.Length >= minAlignmentLength).ToList();
            List <SSBlock>           cBlocks    = SecondaryStructure.GetPhiPsiSSBlocksOfType(c, allowedTypes, minAlignmentLength).Where(block => block.Length >= minAlignmentLength).ToList();

            foreach (SSBlock nBlock in nBlocks)
            {
                foreach (SSBlock cBlock in cBlocks)
                {
                    for (int nAlignmentOfBlockC = nBlock.End - minAlignmentLength + 1; nAlignmentOfBlockC + cBlock.Length > nBlock.Start + minAlignmentLength; nAlignmentOfBlockC--)
                    {
                        // Figure out the start and ends of the overlap region
                        int nStart          = Math.Max(nBlock.Start, nAlignmentOfBlockC);
                        int nEnd            = Math.Min(nBlock.End, nAlignmentOfBlockC + cBlock.Length - 1);
                        int alignmentLength = nEnd - nStart + 1;

                        int cStart = Math.Max(cBlock.Start, cBlock.Start + (nStart - nAlignmentOfBlockC));
                        int cEnd   = cStart + alignmentLength - 1;

                        Debug.Assert(nBlock.Start <= nStart && nEnd <= nBlock.End);
                        Debug.Assert(cBlock.Start <= cStart && cEnd <= cBlock.End);
                        Debug.Assert(nEnd - nStart == cEnd - cStart);

                        bool cFullyIncluded = (cBlock.Start == cStart && cBlock.End == cEnd);
                        bool nFullyIncluded = (nBlock.Start == nStart && nBlock.End == nEnd);

                        if (!nFullyIncluded && !cFullyIncluded && cStart != cBlock.Start && nEnd != nBlock.End)
                        {
                            continue;
                        }

                        if (Rmsd.GetRmsdNCAC(n, nStart, nEnd, c, cStart, cEnd) > maxRmsd)
                        {
                            continue;
                        }

                        alignments.Add(new SequenceAlignment(nStart, nEnd, cStart, cEnd));
                    }
                }
            }

            return(alignments);
        }
Beispiel #6
0
        /// <summary>
        /// Find splicing alignment ranges for two peptides that would retain a full secondary structure block for at least one of the two peptides. This prevents
        /// short splice overlaps that remove some of the secondary structure on both sides of the alignment. Basically, the alignment must result in either:
        ///   1) one secondary structure block being fully included in the other
        ///   2) the N-terminal peptide being spliced has the end of its block fully in the resultant alignment, thus preserving the entire N-side block
        ///   3) the C-terminal peptide being spliced has the start of its block fully in the resultant alignment, thus preserving the entire C-side block
        /// </summary>
        /// <param name="chain1"></param>
        /// <param name="chain2"></param>
        /// <param name="allowedTypes"></param>
        /// <param name="minAlignmentLength"></param>
        /// <returns></returns>
        public static List <TransformSequenceAlignment> GetTransformAlignments(IChain chain1, IChain chain2, SS allowedTypes, int minAlignmentLength = DefaultMinAlignmentLength, float maxRmsd = DefaultRmsdThreshold)
        {
            List <TransformSequenceAlignment> alignments = new List <TransformSequenceAlignment>();
            List <SSBlock> nBlocks = SecondaryStructure.GetPhiPsiSSBlocksOfType(chain1, allowedTypes, minAlignmentLength).Where(block => block.Length >= minAlignmentLength).ToList();
            List <SSBlock> cBlocks = SecondaryStructure.GetPhiPsiSSBlocksOfType(chain2, allowedTypes, minAlignmentLength).Where(block => block.Length >= minAlignmentLength).ToList();

            // Previously, calling this function with peptides A,B vs B,A returns different numbers of alignments. The original intent was for the first chain passed in to be at the
            // N-terminus and the second at the C-terminus (post-fusion), so these post-fusion lengths were being calculated here and culled differently based on order.

            foreach (SSBlock nBlock in nBlocks)
            {
                foreach (SSBlock cBlock in cBlocks)
                {
                    for (int nAlignmentOfBlockC = nBlock.End - minAlignmentLength + 1; nAlignmentOfBlockC + cBlock.Length >= nBlock.Start + minAlignmentLength; nAlignmentOfBlockC--)
                    {
                        // Figure out the start and ends of the overlap region
                        int nStart          = Math.Max(nBlock.Start, nAlignmentOfBlockC);
                        int nEnd            = Math.Min(nBlock.End, nAlignmentOfBlockC + cBlock.Length - 1);
                        int alignmentLength = nEnd - nStart + 1;

                        int cStart = Math.Max(cBlock.Start, cBlock.Start + (nStart - nAlignmentOfBlockC));
                        int cEnd   = cStart + alignmentLength - 1;

                        //Debug.WriteLine("Comparing {0}-{1} vs {2}-{3}", nStart, nEnd, cStart, cEnd);

                        Debug.Assert(nBlock.Start <= nStart && nEnd <= nBlock.End);
                        Debug.Assert(cBlock.Start <= cStart && cEnd <= cBlock.End);
                        Debug.Assert(nEnd - nStart == cEnd - cStart);

                        //bool cFullyIncluded = (cBlock.Start == cStart && cBlock.End == cEnd);
                        //bool nFullyIncluded = (nBlock.Start == nStart && nBlock.End == nEnd);

                        //if (!nFullyIncluded && !cFullyIncluded && cStart != cBlock.Start && nEnd != nBlock.End)
                        //    continue;

                        Trace.Assert(nEnd - nStart + 1 >= minAlignmentLength);

                        float  rmsd   = float.NaN;
                        Matrix matrix = Rmsd.GetRmsdAndTransform(chain1, nStart, nEnd, chain2, cStart, cEnd, out rmsd);
                        bool   fail   = rmsd > maxRmsd;

#if DEBUG && false
                        float  rmsd2   = float.NaN;
                        Matrix matrix2 = Rmsd.GetRmsdAndTransform(chain2, cStart, cEnd, chain1, nStart, nEnd, out rmsd2);
                        bool   fail2   = rmsd2 > maxRmsd;
                        Trace.Assert(fail == fail2);
#endif

                        if (rmsd > maxRmsd)
                        {
                            continue;
                        }

                        TransformSequenceAlignment alignment = new TransformSequenceAlignment(nStart, nEnd, cStart, cEnd);
                        alignment.Centroid1 = Geometry.GetCenterNCAC(chain1[nStart, nEnd]);
                        alignment.Centroid2 = Geometry.GetCenterNCAC(chain2[cStart, cEnd]);
                        alignment.Align1    = matrix;
                        alignment.Align2    = Matrix.Invert(matrix);
                        alignments.Add(alignment);
                    }
                }
            }

            return(alignments);
        }
Beispiel #7
0
        /// <summary>
        /// Outputs a structure that is a fusion of the chains indicated by the sequence alignments. In the case of a cycle (wherein the last structure
        /// is a copy of the first at a different position), only one set of chains for the two endpoint structures is included.
        /// </summary>
        /// <param name="structures">structures to fuse</param>
        /// <param name="alignments">the sequence positions at which to fuse</param>
        /// <param name="ncDirections">the directionality of the alignment: true -> the first structure is N-terminal, false -> C-terminal</param>
        /// <param name="cycle">whether the structure forms a cycle, in which case the first and last fusions affect both ends</param>
        /// <param name="partialChain">a partial chain has been created due to cyclization whose entirety will be restored only through asu pattering</param>
        /// <returns></returns>
        public static IStructure GetStructure(IStructure[] structures, SequenceAlignment[] alignments, bool[] ncDirections, bool cycle, out IChain partialChain)
        {
            partialChain = null;

            // Note: The fusion product's middle residue from the alignment range comes from the N-terminal chain, i.e the N-term range is [0, Middle] and C-term is [Middle + 1, Len-1]
            Trace.Assert(structures != null && alignments != null && ncDirections != null);
            Trace.Assert(structures.Length > 0);
            Trace.Assert(structures.Length == alignments.Length + 1);
            Trace.Assert(alignments.Length == ncDirections.Length);

            // Data for the cyclization case - the chain that the final fusion should be joined with fused back onto fusion products of the first chain should be tracked so that the last chain can be fused back onto that chain
            IChain cycleDoubleFusedChain = null;                                                                     // Track what chain the first fusion chain ends up in
            bool   isCycleDoubleFused    = cycle && alignments.First().ChainIndex1 == alignments.Last().ChainIndex2; // Whether the first chain is fused both to the next chain and to the last/prior (wrap-around) chain
            Matrix cycleAlignment        = isCycleDoubleFused? Rmsd.GetRmsdTransform(structures.First()[0][0], structures.Last()[0][0]) : Matrix.Identity;


            // Mark which aas to remove without actually removing them, so that their indices remain unchanged while
            // those to be removed are being computed
            Selection remove = new Selection();

            for (int alignmentIndex = 0; alignmentIndex < alignments.Length; alignmentIndex++)
            {
                bool ncDirection              = ncDirections[alignmentIndex];
                SequenceAlignment alignment   = alignments[alignmentIndex];
                SequenceAlignment ncAlignment = ncDirection? alignment : SequenceAlignment.Reversed(alignment); // N as chain1
                IStructure        structureN  = ncDirection ? structures[alignmentIndex] : structures[alignmentIndex + 1];
                IStructure        structureC  = ncDirection ? structures[alignmentIndex + 1] : structures[alignmentIndex];
                IChain            chainN      = structureN[ncAlignment.ChainIndex1];
                IChain            chainC      = structureC[ncAlignment.ChainIndex2];
                remove.Aas.UnionWith(chainN[ncAlignment.Range1.Middle + 1, chainN.Count - 1]);
                remove.Aas.UnionWith(chainC[0, ncAlignment.Range2.Middle]);

                // If a cycle exists, then for each chain in the first and last structure (which are identical, modulo a transform), only one copy should be
                // preserved. If that copy is fused on both sides, then it must be trimmed according to both the first and final sequence alignments. In such a
                // case, remove the entire last structure.
                // Cycle and fusion is to the same chain on both sides:
                // -> remove all of the second copy
                // -> transform the second copy neighbor chain onto the first
                // Cycle and fusion is to separate chains on either side:
                // -> remove all of the second copy except for the fused chain
                // -> remove from the first copy the chain that was fused on the second copy
                if (cycle && alignmentIndex == alignments.Length - 1)
                {
                    if (isCycleDoubleFused)
                    {
                        // Mark all chains from second copy for deletion
                        foreach (IChain chain in structures[structures.Length - 1])
                        {
                            remove.Aas.UnionWith(chain);
                        }

                        // Mark the first structure residues for deletion on one side of the fusion point
                        IChain chain0 = structures[0][alignment.ChainIndex2];
                        if (ncDirection)
                        {
                            remove.Aas.UnionWith(chain0[0, alignment.Range2.Middle]);
                        }
                        else
                        {
                            remove.Aas.UnionWith(chain0[alignment.Range2.Middle + 1, chain0.Count - 1]);
                        }
                    }
                    else
                    {
                        // Mark all chains from the second copy for deletion, except the one being fused
                        IChain keep = structures[alignmentIndex + 1][alignment.ChainIndex2];
                        foreach (IChain chain in structures[alignmentIndex + 1])
                        {
                            if (chain != keep)
                            {
                                remove.Aas.UnionWith(chain);
                            }
                        }

                        // For the one being fused, remove that index from the first structure
                        remove.Aas.UnionWith(structures[0][alignment.ChainIndex2]);
                    }
                }
            }

            // Remove them
            for (int alignmentIndex = 0; alignmentIndex < alignments.Length; alignmentIndex++)
            {
                SequenceAlignment alignment  = alignments[alignmentIndex];
                IStructure        structure1 = structures[alignmentIndex];
                IStructure        structure2 = structures[alignmentIndex + 1];
                IChain            chain1     = structure1[alignment.ChainIndex1];
                IChain            chain2     = structure2[alignment.ChainIndex2];

                for (int i = chain1.Count - 1; i >= 0; i--)
                {
                    IAa aa = chain1[i];
                    if (remove.Aas.Contains(aa))
                    {
                        Matrix desired = aa.TotalTransform;
                        chain1.RemoveAt(i);
                        aa.Transform(desired * Matrix.Invert(aa.TotalTransform));
#if DEBUG_TRANSFORMS
                        Debug.Assert((desired - aa.TotalTransform).Translation.Length() < 0.1);
#endif
                    }
                }

                for (int i = chain2.Count - 1; i >= 0; i--)
                {
                    IAa aa = chain2[i];
                    if (remove.Aas.Contains(aa))
                    {
                        Matrix desired = aa.TotalTransform;
                        chain2.RemoveAt(i);
                        aa.Transform(desired * Matrix.Invert(aa.TotalTransform));
#if DEBUG_TRANSFORMS
                        Debug.Assert((desired - aa.TotalTransform).Translation.Length() < 0.1);
#endif
                    }
                }

                if (cycle && alignmentIndex == 0)
                {
                    foreach (IChain chain in structure1)
                    {
                        for (int i = chain.Count - 1; i >= 0; i--)
                        {
                            IAa aa = chain[i];
                            if (remove.Aas.Contains(aa))
                            {
                                Matrix desired = aa.TotalTransform;
                                chain.RemoveAt(i);
                                aa.Transform(desired * Matrix.Invert(aa.TotalTransform));
#if DEBUG_TRANSFORMS
                                Debug.Assert((desired - aa.TotalTransform).Translation.Length() < 0.1);
#endif
                            }
                        }
                    }
                }

                if (cycle && alignmentIndex == alignments.Length - 1)
                {
                    foreach (IChain chain in structure2)
                    {
                        for (int i = chain.Count - 1; i >= 0; i--)
                        {
                            IAa aa = chain[i];
                            if (remove.Aas.Contains(aa))
                            {
                                Matrix desired = aa.TotalTransform;
                                chain.RemoveAt(i);
                                aa.Transform(desired * Matrix.Invert(aa.TotalTransform));
#if DEBUG_TRANSFORMS
                                Debug.Assert((desired - aa.TotalTransform).Translation.Length() < 0.1);
#endif
                            }
                        }
                    }
                }
            }

            // Join the chains and allocate the result to the second structure so as to preserve the indexes for the next fusion step
            for (int alignmentIndex = 0; alignmentIndex < alignments.Length; alignmentIndex++)
            {
                bool ncDirection             = ncDirections[alignmentIndex];
                SequenceAlignment alignment  = alignments[alignmentIndex];
                IStructure        structure1 = structures[alignmentIndex];
                IStructure        structure2 = structures[alignmentIndex + 1];
                IChain            chain1     = structure1[alignment.ChainIndex1];
                IChain            chain2     = structure2[alignment.ChainIndex2];

                if (ncDirection)
                {
#if DEBUG_MIRRORS
                    foreach (IAa aa2 in chain2.ToArray())
                    {
                        chain1.AddInPlace(aa2);
                    }
#else
                    chain1.AddArraySourceInPlace(chain2);
#endif
                    structure2[alignment.ChainIndex2, true] = chain1;
                }
                else
                {
#if DEBUG_MIRRORS
                    foreach (IAa aa1 in chain1.ToArray())
                    {
                        chain2.AddInPlace(aa1);
                    }
#else
                    chain2.AddArraySourceInPlace(chain1);
#endif
                }

                // Track which chain contains the first chain in the cycle so that the final chain in the cycle can fuse to it
                if (isCycleDoubleFused && (alignmentIndex == 0 || chain1 == cycleDoubleFusedChain || chain2 == cycleDoubleFusedChain))
                {
                    cycleDoubleFusedChain = ncDirection? chain1 : chain2;
                }

                if (isCycleDoubleFused && alignmentIndex == alignments.Length - 1)
                {
                    // If it's a cycle on the same chain, move the chain back to where it fuses to the first structure
                    IChain combined = ncDirection ? chain1 : chain2;
                    combined.Transform(cycleAlignment);

                    if (combined == cycleDoubleFusedChain)
                    {
                        // If the structure[0] fusion chain has been fused all the way through, there is no need to move the
                        // current chain to meet the first, since they're already joined
                        partialChain = cycleDoubleFusedChain;
                    }
                    else
                    {
                        if (ncDirection)
                        {
                            combined.AddArraySourceInPlace(cycleDoubleFusedChain);
                            IStructure cycleDoubleFusedParent = (IStructure)cycleDoubleFusedChain.Parent;
                        }
                        else
                        {
                            cycleDoubleFusedChain.AddArraySourceInPlace(combined);
                        }
                    }
                }
            }

            // Add all unique chains to a new structure
            Structure total = new Structure();
            structures.SelectMany(s => s).Distinct().Where(c => c.Count > 0).ToList().ForEach(c => total.AddInPlace(c));
            return(total);
        }