Exemplo n.º 1
0
        public static List <AlignmentInfo> GetSsAlignmentsByRmsd(Chain peptide1, Chain peptide2, SS secondaryStructure, int minimumAlignmentLength, float maxRmsd)
        {
            List <AlignmentInfo> alignments = new List <AlignmentInfo>();
            List <SS>            ss1        = Tools.SecondaryStructure.GetPhiPsiSS(peptide1, minimumAlignmentLength).ToList();
            List <SS>            ss2        = Tools.SecondaryStructure.GetPhiPsiSS(peptide2, minimumAlignmentLength).ToList();

            // Slide the repeat sequence against the oligomer1 sequence and find helix-helix segments
            for (int alignmentOffset2 = minimumAlignmentLength - ss2.Count; alignmentOffset2 < ss1.Count - minimumAlignmentLength; alignmentOffset2++)
            {
                int maxRangeEnd1Added = -1;
                for (int rangeStart1 = Math.Max(0, -alignmentOffset2); rangeStart1 < Math.Min(ss1.Count - minimumAlignmentLength, ss2.Count - minimumAlignmentLength - alignmentOffset2); rangeStart1++)
                {
                    // Grow the alignment length until SS no longer matches for the entire alignment, RMSD gets too big, or the sequence terminates
                    int alignmentLength = minimumAlignmentLength;
                    while (true)
                    {
                        int  rangeStart2            = rangeStart1 + alignmentOffset2;
                        int  rangeEnd2              = rangeStart2 + alignmentLength - 1;
                        int  rangeEnd1              = rangeStart1 + alignmentLength - 1;
                        bool allowableRangeExceeded = ss1.Count <= rangeEnd1 || ss2.Count <= rangeEnd2;
                        bool onlyContainsDesiredSs  = allowableRangeExceeded ? false :
                                                      ss1.GetRange(rangeStart1, alignmentLength).Select(ss => ss == secondaryStructure).Aggregate((a, b) => a && b) &&
                                                      ss2.GetRange(rangeStart2, alignmentLength).Select(ss => ss == secondaryStructure).Aggregate((a, b) => a && b);
                        bool rmsdExceeded = allowableRangeExceeded ? false :
                                            Rmsd.GetRmsdNCAC(peptide1, rangeStart1, rangeEnd1, peptide2, rangeStart2, rangeEnd2) > maxRmsd;

                        if (allowableRangeExceeded || rmsdExceeded || !onlyContainsDesiredSs)
                        {
                            if (alignmentLength > minimumAlignmentLength && rangeEnd1 - 1 > maxRangeEnd1Added)
                            {
                                // This alignment failed but the previous one did not. Add the previous one - **iff it is not a subset of a previous alignment
                                AlignmentInfo alignment = new AlignmentInfo(peptide1, peptide2, new Range(rangeStart1, rangeEnd1 - 1), new Range(rangeStart2, rangeEnd2 - 1));
                                alignments.Add(alignment);
                                maxRangeEnd1Added = rangeEnd1 - 1;
                            }

                            // If failure occurs due to rmsd being exceeded, start again with (rangeStart + 1) because it will result in a slightly different alignment orientation
                            // If failure occurs due to mismatching SS, start again with (rangeStart  +1) because all alignments including (rangeStart) will fail
                            // Otherwise, start at (rangeEnd + 1) because all other intermediate alignments will be identical (within the maxRmsd tolerance)
                            if (alignmentLength > minimumAlignmentLength && !rmsdExceeded)
                            {
                                rangeStart1 = rangeEnd1;
                            }

                            break;
                        }

                        alignmentLength++;
                    }
                }
            }
            return(alignments);
        }
Exemplo n.º 2
0
        /// <summary>
        /// Find splicing alignment ranges for two peptides that would retain a full secondary structure block for at least one of the two peptides. This prevents
        /// short splice overlaps that remove some of the secondary structure on both sides of the alignment. Basically, the alignment must result in either:
        ///   1) one secondary structure block being fully included in the other
        ///   2) the N-terminal peptide being spliced has the end of its block fully in the resultant alignment, thus preserving the entire N-side block
        ///   3) the C-terminal peptide being spliced has the start of its block fully in the resultant alignment, thus preserving the entire C-side block
        /// </summary>
        /// <param name="n"></param>
        /// <param name="c"></param>
        /// <param name="allowedTypes"></param>
        /// <param name="minAlignmentLength"></param>
        /// <returns></returns>
        public static List <SequenceAlignment> GetAlignmentsPreservingFullSsBlocks(IChain n, IChain c, SS allowedTypes, int minAlignmentLength = DefaultMinAlignmentLength, float maxRmsd = DefaultRmsdThreshold)
        {
            // TODO!!!: Debug why calling this function with peptides A,B vs B,A returns different numbers of alignments.
            List <SequenceAlignment> alignments = new List <SequenceAlignment>();
            List <SSBlock>           nBlocks    = SecondaryStructure.GetPhiPsiSSBlocksOfType(n, allowedTypes, minAlignmentLength).Where(block => block.Length >= minAlignmentLength).ToList();
            List <SSBlock>           cBlocks    = SecondaryStructure.GetPhiPsiSSBlocksOfType(c, allowedTypes, minAlignmentLength).Where(block => block.Length >= minAlignmentLength).ToList();

            foreach (SSBlock nBlock in nBlocks)
            {
                foreach (SSBlock cBlock in cBlocks)
                {
                    for (int nAlignmentOfBlockC = nBlock.End - minAlignmentLength + 1; nAlignmentOfBlockC + cBlock.Length > nBlock.Start + minAlignmentLength; nAlignmentOfBlockC--)
                    {
                        // Figure out the start and ends of the overlap region
                        int nStart          = Math.Max(nBlock.Start, nAlignmentOfBlockC);
                        int nEnd            = Math.Min(nBlock.End, nAlignmentOfBlockC + cBlock.Length - 1);
                        int alignmentLength = nEnd - nStart + 1;

                        int cStart = Math.Max(cBlock.Start, cBlock.Start + (nStart - nAlignmentOfBlockC));
                        int cEnd   = cStart + alignmentLength - 1;

                        Debug.Assert(nBlock.Start <= nStart && nEnd <= nBlock.End);
                        Debug.Assert(cBlock.Start <= cStart && cEnd <= cBlock.End);
                        Debug.Assert(nEnd - nStart == cEnd - cStart);

                        bool cFullyIncluded = (cBlock.Start == cStart && cBlock.End == cEnd);
                        bool nFullyIncluded = (nBlock.Start == nStart && nBlock.End == nEnd);

                        if (!nFullyIncluded && !cFullyIncluded && cStart != cBlock.Start && nEnd != nBlock.End)
                        {
                            continue;
                        }

                        if (Rmsd.GetRmsdNCAC(n, nStart, nEnd, c, cStart, cEnd) > maxRmsd)
                        {
                            continue;
                        }

                        alignments.Add(new SequenceAlignment(nStart, nEnd, cStart, cEnd));
                    }
                }
            }

            return(alignments);
        }