Beispiel #1
0
        /// <summary>
        /// Implement the Maximum Contiguous Subsequence Sum alogirthm.
        /// Source: http://www.cs.ucf.edu/~reinhard/classes/cop3503/lectures/AlgAnalysis04.pdf
        /// Maximum Subarray Problem: http://en.wikipedia.org/wiki/Maximum_subarray_problem
        ///
        /// Find the subsequence with the maximum sum of differences between the actual
        /// base quality score and the given cutoff score.
        /// </summary>
        /// <param name="seqObj">The sequence object to be trimmed</param>
        /// <returns>A new sequence object with trimmed sequence. Or null if a maximum subsequence
        /// cannot be found (i.e. quality scores are below the cutoff)</returns>
        public override ISequence Trim(ISequence seqObj)
        {
            byte[] scores = ((QualitativeSequence)seqObj).QualityScores.ToArray();
            scores = QualityScoreAnalyzer.ConvertToPhred(scores, ((QualitativeSequence)seqObj).FormatType);

            // Implement maximum sum segment algorithm.
            int start    = 0;
            int sum      = 0;
            int maxSum   = 0;
            int maxStart = 0;
            int maxEnd   = -1;

            for (int i = 0; i < scores.Length; i++)
            {
                sum += scores[i] - QualityThreshold;

                // If sum is negative, the new subsequence resets from the next position.
                if (sum < 0 && TrimFromStart)
                {
                    start = i + 1;
                    sum   = 0;      // reset the sum to start the new subsequence
                }

                if (sum > maxSum)
                {
                    maxSum   = sum;
                    maxStart = start;
                    maxEnd   = i;
                }
            }

            // Return null if a maximum subsequence cannot be found
            if (maxStart > maxEnd)
            {
                return(null);
            }

            var newLength = maxEnd - maxStart + 1;

            // Also return null if the new trim length is less than the required minimum length
            if (newLength < MinLength)
            {
                return(null);
            }

            var newSeqObj = seqObj.GetSubSequence(maxStart, newLength);

            newSeqObj.ID = seqObj.ID;
            return(newSeqObj);
        }