Example #1
0
        /// <summary>
        /// Find the previous eligible sequence for alignment/extension
        /// </summary>
        /// <param name="alignments">List of alignment</param>
        /// <param name="currentAlignment">Current alignment</param>
        /// <returns>Reverse alignment</returns>
        private DeltaAlignment GetPreviousAlignment(
            IEnumerable <DeltaAlignment> alignments,
            DeltaAlignment currentAlignment)
        {
            long alignmentFirstStart  = currentAlignment.FirstSequenceStart;
            long alignmentSecondStart = currentAlignment.SecondSequenceStart;
            long distance             = (alignmentFirstStart < alignmentSecondStart)
                    ? alignmentFirstStart
                    : alignmentSecondStart;

            DeltaAlignment deltaAlignment = alignments.Last();

            foreach (DeltaAlignment alignment in alignments)
            {
                if (currentAlignment.QueryDirection == alignment.QueryDirection)
                {
                    long alignmentFirstEnd  = alignment.FirstSequenceEnd;
                    long alignmentSecondEnd = alignment.SecondSequenceEnd;

                    if (alignmentFirstEnd <= alignmentFirstStart &&
                        alignmentSecondEnd <= alignmentSecondStart)
                    {
                        long gapHigh;
                        long gapLow;
                        if ((alignmentFirstStart - alignmentFirstEnd)
                            > (alignmentSecondStart - alignmentSecondEnd))
                        {
                            gapHigh = alignmentFirstStart - alignmentFirstEnd;
                            gapLow  = alignmentSecondStart - alignmentSecondEnd;
                        }
                        else
                        {
                            gapLow  = alignmentFirstStart - alignmentFirstEnd;
                            gapHigh = alignmentSecondStart - alignmentSecondEnd;
                        }

                        if (gapHigh < BreakLength ||
                            ((gapLow * _nucmerAligner.ValidScore)
                             + ((gapHigh - gapLow)
                                * _nucmerAligner.GapExtensionScore)) >= 0)
                        {
                            deltaAlignment = alignment;
                            break;
                        }
                        else if ((gapHigh << 1) - gapLow < distance)
                        {
                            deltaAlignment = alignment;
                            distance       = (gapHigh << 1) - gapLow;
                        }
                    }
                }
            }

            return(deltaAlignment);
        }
Example #2
0
        /// <summary>
        /// Find the previous eligible sequence for alignment/extension
        /// </summary>
        /// <param name="alignments">List of alignment</param>
        /// <param name="currentAlignment">Current alignment</param>
        /// <returns>Reverse alignment</returns>
        private DeltaAlignment GetPreviousAlignment(
            IList <DeltaAlignment> alignments,
            DeltaAlignment currentAlignment)
        {
            DeltaAlignment deltaAlignment = null;
            int            alignmentFirstEnd, alignmentSecondEnd, gapHigh, gapLow;
            int            alignmentFirstStart  = currentAlignment.FirstSequenceStart;
            int            alignmentSecondStart = currentAlignment.SecondSequenceStart;
            int            distance             = (alignmentFirstStart < alignmentSecondStart)
                    ? alignmentFirstStart
                    : alignmentSecondStart;

            deltaAlignment = null;
            foreach (DeltaAlignment alignment in alignments)
            {
                if (currentAlignment.QueryDirection == alignment.QueryDirection)
                {
                    alignmentFirstEnd  = alignment.FirstSequenceEnd;
                    alignmentSecondEnd = alignment.SecondSequenceEnd;

                    if (alignmentFirstEnd <= alignmentFirstStart &&
                        alignmentSecondEnd <= alignmentSecondStart)
                    {
                        if ((alignmentFirstStart - alignmentFirstEnd)
                            > (alignmentSecondStart - alignmentSecondEnd))
                        {
                            gapHigh = alignmentFirstStart - alignmentFirstEnd;
                            gapLow  = alignmentSecondStart - alignmentSecondEnd;
                        }
                        else
                        {
                            gapLow  = alignmentFirstStart - alignmentFirstEnd;
                            gapHigh = alignmentSecondStart - alignmentSecondEnd;
                        }

                        if (gapHigh < BreakLength ||
                            ((gapLow * nucmerAligner.ValidScore)
                             + ((gapHigh - gapLow)
                                * nucmerAligner.SubstitutionScore)) >= 0)
                        {
                            deltaAlignment = alignment;
                            break;
                        }
                        else if ((gapHigh << 1) - gapLow < distance)
                        {
                            deltaAlignment = alignment;
                            distance       = (gapHigh << 1) - gapLow;
                        }
                    }
                }
            }

            return(deltaAlignment);
        }
        /// <summary>
        /// Create a new delta alignment
        /// </summary>
        /// <param name="referenceSequence">Reference sequence</param>
        /// <param name="querySequence">Query sequence</param>
        /// <param name="cluster">Cluster object</param>
        /// <param name="match">Match object</param>
        /// <returns>Newly created DeltaAlignment object</returns>
        internal static DeltaAlignment NewAlignment(
            ISequence referenceSequence,
            ISequence querySequence,
            Cluster cluster,
            MatchExtension match)
        {
            DeltaAlignment deltaAlignment = new DeltaAlignment(referenceSequence, querySequence)
            {
                FirstSequenceStart  = match.ReferenceSequenceOffset,
                SecondSequenceStart = match.QuerySequenceOffset,
                FirstSequenceEnd    = match.ReferenceSequenceOffset + match.Length - 1,
                SecondSequenceEnd   = match.QuerySequenceOffset + match.Length - 1,
                QueryDirection      = cluster.QueryDirection
            };

            return(deltaAlignment);
        }
Example #4
0
        /// <summary>
        /// Create a new delta alignment
        /// </summary>
        /// <param name="referenceSequence">Reference sequence</param>
        /// <param name="querySequence">Query sequence</param>
        /// <param name="cluster">Cluster object</param>
        /// <param name="match">Match object</param>
        /// <returns>Newly created DeltaAlignment object</returns>
        internal static DeltaAlignment NewAlignment(
            ISequence referenceSequence,
            ISequence querySequence,
            Cluster cluster,
            MaxUniqueMatchExtension match)
        {
            DeltaAlignment deltaAlignment = new DeltaAlignment(referenceSequence, querySequence);

            deltaAlignment.FirstSequenceStart  = match.FirstSequenceStart;
            deltaAlignment.SecondSequenceStart = match.SecondSequenceStart;
            deltaAlignment.FirstSequenceEnd    = match.FirstSequenceStart
                                                 + match.Length
                                                 - 1;
            deltaAlignment.SecondSequenceEnd = match.SecondSequenceStart
                                               + match.Length
                                               - 1;

            deltaAlignment.QueryDirection = cluster.QueryDirection;

            return(deltaAlignment);
        }
Example #5
0
        /// <summary>
        /// Check if the cluster is shadowed (contained in alignment)
        /// </summary>
        /// <param name="alignments">List of alignment</param>
        /// <param name="currentCluster">current cluster</param>
        /// <param name="currentDeltaAlignment">Current delta alignment</param>
        /// <returns>Is cluster contained in alignment</returns>
        private static bool IsClusterShadowed(
            List <DeltaAlignment> alignments,
            Cluster currentCluster,
            DeltaAlignment currentDeltaAlignment)
        {
            DeltaAlignment alignment;

            long firstSequenceStart = currentCluster.Matches.First().ReferenceSequenceOffset;
            long firstSequenceEnd   = currentCluster.Matches.Last().ReferenceSequenceOffset
                                      + currentCluster.Matches.Last().Length - 1;
            long secondSequenceStart = currentCluster.Matches.First().QuerySequenceOffset;
            long secondSequenceEnd   = currentCluster.Matches.Last().QuerySequenceOffset
                                       + currentCluster.Matches.Last().Length - 1;

            if (0 < alignments.Count)
            {
                int counter;
                for (counter = alignments.IndexOf(currentDeltaAlignment); counter >= 0; counter--)
                {
                    alignment = alignments[counter];
                    if (alignment.QueryDirection == currentCluster.QueryDirection)
                    {
                        if ((alignment.FirstSequenceEnd >= firstSequenceEnd) &&
                            alignment.SecondSequenceEnd >= secondSequenceEnd &&
                            alignment.FirstSequenceStart <= firstSequenceStart &&
                            alignment.SecondSequenceStart <= secondSequenceStart)
                        {
                            break;
                        }
                    }
                }

                if (counter >= 0)
                {
                    return(true);
                }
            }

            return(false);
        }
Example #6
0
        /// <summary>
        /// Extend the cluster backward
        /// </summary>
        /// <param name="referenceSequence">Reference sequence</param>
        /// <param name="querySequence">Query sequence</param>
        /// <param name="alignments">List of alignments</param>
        /// <param name="currentAlignment">current alignment object</param>
        /// <param name="targetAlignment">target alignment object</param>
        /// <returns>Was clusted extended backward</returns>
        private bool ExtendToPreviousSequence(
                ISequence referenceSequence,
                ISequence querySequence,
                IList<DeltaAlignment> alignments,
                DeltaAlignment currentAlignment,
                DeltaAlignment targetAlignment)
        {
            bool isOverflow = false;
            long targetReference;
            long targetQuery;
            int methodName = ModifiedSmithWaterman.BackwardAlignFlag;

            if (alignments.Last() != targetAlignment)
            {
                targetReference = targetAlignment.FirstSequenceEnd;
                targetQuery = targetAlignment.SecondSequenceEnd;
            }
            else
            {
                // If the target alignment is not found then extend till the 
                // start of sequence (0th Symbol)
                targetReference = 0;
                targetQuery = 0;
                methodName |= ModifiedSmithWaterman.OptimalFlag;
            }

            // If the length in first sequence exceeds maximum length then extend 
            // till score is optimized irrespective of length.
            if ((currentAlignment.FirstSequenceStart - targetReference + 1) > ModifiedSmithWaterman.MaximumAlignmentLength)
            {
                targetReference = currentAlignment.FirstSequenceStart - ModifiedSmithWaterman.MaximumAlignmentLength + 1;
                isOverflow = true;
                methodName |= ModifiedSmithWaterman.OptimalFlag;
            }

            // If the length in second sequence exceeds maximum length then extend 
            // till score is optimized irrespective of length.
            if ((currentAlignment.SecondSequenceStart - targetQuery + 1) > ModifiedSmithWaterman.MaximumAlignmentLength)
            {
                targetQuery = currentAlignment.SecondSequenceStart - ModifiedSmithWaterman.MaximumAlignmentLength + 1;
                if (!isOverflow)
                {
                    isOverflow = true;
                }

                methodName |= ModifiedSmithWaterman.OptimalFlag;
            }

            // Extend the sequence to previous sequence (aligned/extended sequence)
            bool isClusterExtended = _nucmerAligner.ExtendSequence(
                referenceSequence,
                currentAlignment.FirstSequenceStart,
                ref targetReference,
                querySequence,
                currentAlignment.SecondSequenceStart,
                ref targetQuery,
                currentAlignment.Deltas,
                methodName);

            if (isOverflow || alignments.Last() == targetAlignment)
            {
                isClusterExtended = false;
            }

            if (isClusterExtended)
            {
                // Extend the sequence to next sequence (aligned/extended sequence)
                ExtendToNextSequence(
                    referenceSequence,
                    querySequence,
                    targetAlignment,
                    currentAlignment.FirstSequenceStart,
                    currentAlignment.SecondSequenceStart,
                    ModifiedSmithWaterman.ForcedForwardAlignFlag);

                targetAlignment.FirstSequenceEnd = currentAlignment.FirstSequenceEnd;
                targetAlignment.SecondSequenceEnd = currentAlignment.SecondSequenceEnd;
                alignments.RemoveAt(alignments.Count - 1);
            }
            else
            {
                long startReference = currentAlignment.FirstSequenceStart;
                long startQuery = currentAlignment.SecondSequenceStart;
                _nucmerAligner.ExtendSequence(
                    referenceSequence,
                    targetReference,
                    ref startReference,
                    querySequence,
                    targetQuery,
                    ref startQuery,
                    currentAlignment.Deltas,
                    ModifiedSmithWaterman.ForcedForwardAlignFlag);

                currentAlignment.FirstSequenceStart = targetReference;
                currentAlignment.SecondSequenceStart = targetQuery;

                // Adjust the delta reference position
                foreach (int deltaPosition in currentAlignment.Deltas)
                {
                    currentAlignment.DeltaReferencePosition +=
                        (deltaPosition > 0)
                        ? deltaPosition
                        : Math.Abs(deltaPosition) - 1;
                }
            }

            return isClusterExtended;
        }
Example #7
0
        /// <summary>
        /// Check if the cluster is shadowed (contained in alignment)
        /// </summary>
        /// <param name="alignments">List of alignment</param>
        /// <param name="currentCluster">current cluster</param>
        /// <param name="currentDeltaAlignment">Current delta alignment</param>
        /// <returns>Is cluster contained in alignment</returns>
        private static bool IsClusterShadowed(
                List<DeltaAlignment> alignments,
                Cluster currentCluster,
                DeltaAlignment currentDeltaAlignment)
        {
            DeltaAlignment alignment;

            long firstSequenceStart = currentCluster.Matches.First().ReferenceSequenceOffset;
            long firstSequenceEnd = currentCluster.Matches.Last().ReferenceSequenceOffset
                    + currentCluster.Matches.Last().Length - 1;
            long secondSequenceStart = currentCluster.Matches.First().QuerySequenceOffset;
            long secondSequenceEnd = currentCluster.Matches.Last().QuerySequenceOffset
                    + currentCluster.Matches.Last().Length - 1;

            if (0 < alignments.Count)
            {
                int counter;
                for (counter = alignments.IndexOf(currentDeltaAlignment); counter >= 0; counter--)
                {
                    alignment = alignments[counter];
                    if (alignment.QueryDirection == currentCluster.QueryDirection)
                    {
                        if ((alignment.FirstSequenceEnd >= firstSequenceEnd)
                                && alignment.SecondSequenceEnd >= secondSequenceEnd
                                && alignment.FirstSequenceStart <= firstSequenceStart
                                && alignment.SecondSequenceStart <= secondSequenceStart)
                        {
                            break;
                        }
                    }
                }

                if (counter >= 0)
                {
                    return true;
                }
            }

            return false;
        }
Example #8
0
        /// <summary>
        /// Extend the cluster forward
        /// </summary>
        /// <param name="referenceSequence">Reference sequence</param>
        /// <param name="querySequence">Query sequence</param>
        /// <param name="currentAlignment">current alignment object</param>
        /// <param name="targetReference">target position in reference sequence</param>
        /// <param name="targetQuery">target position in query sequence</param>
        /// <param name="methodName">Name of the method to be implemented</param>
        /// <returns>Was cluster extended forward</returns>
        private bool ExtendToNextSequence(
            ISequence referenceSequence,
            ISequence querySequence,
            DeltaAlignment currentAlignment,
            long targetReference,
            long targetQuery,
            int methodName)
        {
            bool isOverflow = false;
            bool isDouble   = false;

            int diagonal = currentAlignment.Deltas.Count;

            long referenceDistance = targetReference - currentAlignment.FirstSequenceEnd + 1;
            long queryDistance     = targetQuery - currentAlignment.SecondSequenceEnd + 1;

            // If the length in first sequence exceeds maximum length then extend
            // till score is optimized irrespective of length.
            if (referenceDistance > ModifiedSmithWaterman.MaximumAlignmentLength)
            {
                targetReference = currentAlignment.FirstSequenceEnd + ModifiedSmithWaterman.MaximumAlignmentLength + 1;
                isOverflow      = true;
                methodName     |= ModifiedSmithWaterman.OptimalFlag;
            }

            // If the length in second sequence exceeds maximum length then extend
            // till score is optimized irrespective of length.
            if (queryDistance > ModifiedSmithWaterman.MaximumAlignmentLength)
            {
                targetQuery = currentAlignment.SecondSequenceEnd + ModifiedSmithWaterman.MaximumAlignmentLength + 1;
                if (isOverflow)
                {
                    isDouble = true;
                }
                else
                {
                    isOverflow = true;
                }

                methodName |= ModifiedSmithWaterman.OptimalFlag;
            }

            if (isDouble)
            {
                methodName &= ~ModifiedSmithWaterman.SeqendFlag;
            }

            // Extend the sequence to next sequence (aligned/extended sequence)
            bool isClusterExtended = _nucmerAligner.ExtendSequence(
                referenceSequence,
                currentAlignment.FirstSequenceEnd,
                ref targetReference,
                querySequence,
                currentAlignment.SecondSequenceEnd,
                ref targetQuery,
                currentAlignment.Deltas,
                methodName);

            if (isClusterExtended && isOverflow)
            {
                isClusterExtended = false;
            }

            if (diagonal < currentAlignment.Deltas.Count)
            {
                referenceDistance =
                    (currentAlignment.FirstSequenceEnd - currentAlignment.FirstSequenceStart + 1)
                    - currentAlignment.DeltaReferencePosition - 1;
                currentAlignment.Deltas[diagonal] += (currentAlignment.Deltas[diagonal] > 0)
                    ? referenceDistance
                    : -referenceDistance;

                // Adjust the delta reference position
                for (int index = diagonal; index < currentAlignment.Deltas.Count; index++)
                {
                    int deltaPosition = (int)currentAlignment.Deltas[index];
                    currentAlignment.DeltaReferencePosition +=
                        (deltaPosition > 0)
                        ? deltaPosition
                        : Math.Abs(deltaPosition) - 1;
                }
            }

            currentAlignment.FirstSequenceEnd  = targetReference;
            currentAlignment.SecondSequenceEnd = targetQuery;

            return(isClusterExtended);
        }
Example #9
0
        /// <summary>
        /// Extend the cluster backward
        /// </summary>
        /// <param name="referenceSequence">Reference sequence</param>
        /// <param name="querySequence">Query sequence</param>
        /// <param name="alignments">List of alignments</param>
        /// <param name="currentAlignment">current alignment object</param>
        /// <param name="targetAlignment">target alignment object</param>
        /// <returns>Was clusted extended backward</returns>
        private bool ExtendToPreviousSequence(
            ISequence referenceSequence,
            ISequence querySequence,
            IList <DeltaAlignment> alignments,
            DeltaAlignment currentAlignment,
            DeltaAlignment targetAlignment)
        {
            bool isOverflow = false;
            long targetReference;
            long targetQuery;
            int  methodName = ModifiedSmithWaterman.BackwardAlignFlag;

            if (alignments.Last() != targetAlignment)
            {
                targetReference = targetAlignment.FirstSequenceEnd;
                targetQuery     = targetAlignment.SecondSequenceEnd;
            }
            else
            {
                // If the target alignment is not found then extend till the
                // start of sequence (0th Symbol)
                targetReference = 0;
                targetQuery     = 0;
                methodName     |= ModifiedSmithWaterman.OptimalFlag;
            }

            // If the length in first sequence exceeds maximum length then extend
            // till score is optimized irrespective of length.
            if ((currentAlignment.FirstSequenceStart - targetReference + 1) > ModifiedSmithWaterman.MaximumAlignmentLength)
            {
                targetReference = currentAlignment.FirstSequenceStart - ModifiedSmithWaterman.MaximumAlignmentLength + 1;
                isOverflow      = true;
                methodName     |= ModifiedSmithWaterman.OptimalFlag;
            }

            // If the length in second sequence exceeds maximum length then extend
            // till score is optimized irrespective of length.
            if ((currentAlignment.SecondSequenceStart - targetQuery + 1) > ModifiedSmithWaterman.MaximumAlignmentLength)
            {
                //
                //targetQuery = currentAlignment.SecondSequenceStart = ModifiedSmithWaterman.MaximumAlignmentLength + 1;
                targetQuery = currentAlignment.SecondSequenceStart - ModifiedSmithWaterman.MaximumAlignmentLength + 1;

                if (!isOverflow)
                {
                    isOverflow = true;
                }

                methodName |= ModifiedSmithWaterman.OptimalFlag;
            }

            // Extend the sequence to previous sequence (aligned/extended sequence)
            bool isClusterExtended = _nucmerAligner.ExtendSequence(
                referenceSequence,
                currentAlignment.FirstSequenceStart,
                ref targetReference,
                querySequence,
                currentAlignment.SecondSequenceStart,
                ref targetQuery,
                currentAlignment.Deltas,
                methodName);

            if (isOverflow || alignments.Last() == targetAlignment)
            {
                isClusterExtended = false;
            }

            if (isClusterExtended)
            {
                // Extend the sequence to next sequence (aligned/extended sequence)
                ExtendToNextSequence(
                    referenceSequence,
                    querySequence,
                    targetAlignment,
                    currentAlignment.FirstSequenceStart,
                    currentAlignment.SecondSequenceStart,
                    ModifiedSmithWaterman.ForcedForwardAlignFlag);

                targetAlignment.FirstSequenceEnd  = currentAlignment.FirstSequenceEnd;
                targetAlignment.SecondSequenceEnd = currentAlignment.SecondSequenceEnd;
                alignments.RemoveAt(alignments.Count - 1);
            }
            else
            {
                long startReference = currentAlignment.FirstSequenceStart;
                long startQuery     = currentAlignment.SecondSequenceStart;
                _nucmerAligner.ExtendSequence(
                    referenceSequence,
                    targetReference,
                    ref startReference,
                    querySequence,
                    targetQuery,
                    ref startQuery,
                    currentAlignment.Deltas,
                    ModifiedSmithWaterman.ForcedForwardAlignFlag);

                currentAlignment.FirstSequenceStart  = targetReference;
                currentAlignment.SecondSequenceStart = targetQuery;

                // Adjust the delta reference position
                foreach (int deltaPosition in currentAlignment.Deltas)
                {
                    currentAlignment.DeltaReferencePosition +=
                        (deltaPosition > 0)
                        ? deltaPosition
                        : Math.Abs(deltaPosition) - 1;
                }
            }

            return(isClusterExtended);
        }
Example #10
0
        /// <summary>
        /// Extend the cluster in synteny
        /// </summary>
        /// <param name="synteny">Synteny in which cluster needs to be extened.</param>
        /// <returns>List of delta alignments</returns>
        private List <DeltaAlignment> ExtendClusters(Synteny synteny)
        {
            bool isClusterExtended = false;
            List <DeltaAlignment> deltaAlignments = new List <DeltaAlignment>();
            DeltaAlignment        deltaAlignment  = null;
            Cluster currentCluster;

            IList <Cluster> clusters = synteny.Clusters;

            // Sort the cluster by first sequence start
            clusters = SortCluster(clusters, FirstSequenceStart);

            Cluster targetCluster = synteny.Clusters.Last();

            IEnumerator <Cluster> previousCluster = clusters.GetEnumerator();

            previousCluster.MoveNext();
            IEnumerator <Cluster> cluster = clusters.GetEnumerator();

            while (cluster.MoveNext())
            {
                currentCluster = cluster.Current;

                if (!isClusterExtended &&
                    (currentCluster.IsFused ||
                     IsClusterShadowed(deltaAlignments, currentCluster, deltaAlignment)))
                {
                    currentCluster.IsFused = true;
                    previousCluster.MoveNext();
                    currentCluster = previousCluster.Current;
                    continue;
                }

                // Extend the match
                foreach (MatchExtension match in currentCluster.Matches)
                {
                    if (isClusterExtended)
                    {
                        if (deltaAlignment.FirstSequenceEnd != match.ReferenceSequenceOffset ||
                            deltaAlignment.SecondSequenceEnd != match.QuerySequenceOffset)
                        {
                            continue;
                        }

                        deltaAlignment.FirstSequenceEnd  += match.Length - 1;
                        deltaAlignment.SecondSequenceEnd += match.Length - 1;
                    }
                    else
                    {
                        //TODO: Do we need sequence here? Changed to sequence id.
                        deltaAlignment = DeltaAlignment.NewAlignment(
                            synteny.ReferenceSequence,
                            synteny.QuerySequence,
                            currentCluster,
                            match);
                        deltaAlignments.Add(deltaAlignment);

                        // Find the MUM which is a good candidate for extension in reverse direction
                        DeltaAlignment targetAlignment = GetPreviousAlignment(deltaAlignments, deltaAlignment);
                        if (targetAlignment != deltaAlignment &&
                            //TODO: NEED TO VERIFY THIS!!!
                            //if (
                            ExtendToPreviousSequence(
                                synteny.ReferenceSequence,
                                synteny.QuerySequence,
                                deltaAlignments,
                                deltaAlignment,
                                targetAlignment))
                        {
                            deltaAlignment = targetAlignment;
                        }
                    }

                    int methodName = ModifiedSmithWaterman.ForwardAlignFlag;

                    long targetReference;
                    long targetQuery;
                    if (currentCluster.Matches.IndexOf(match) < currentCluster.Matches.Count - 1)
                    {
                        // extend till the match in the current cluster
                        MatchExtension nextMatch =
                            currentCluster.Matches[currentCluster.Matches.IndexOf(match) + 1];
                        targetReference = nextMatch.ReferenceSequenceOffset;
                        targetQuery     = nextMatch.QuerySequenceOffset;

                        isClusterExtended = ExtendToNextSequence(
                            synteny.ReferenceSequence,
                            synteny.QuerySequence,
                            deltaAlignment,
                            targetReference,
                            targetQuery,
                            methodName);
                    }
                    else
                    {
                        // extend till next cluster
                        targetReference = synteny.ReferenceSequence.Count - 1;
                        targetQuery     = synteny.QuerySequence.Count() - 1;

                        targetCluster = GetNextCluster(
                            clusters,
                            currentCluster,
                            ref targetReference,
                            ref targetQuery);

                        if (!synteny.Clusters.Contains(targetCluster))
                        {
                            methodName |= ModifiedSmithWaterman.OptimalFlag;
                        }

                        isClusterExtended = ExtendToNextSequence(
                            synteny.ReferenceSequence,
                            synteny.QuerySequence,
                            deltaAlignment,
                            targetReference,
                            targetQuery,
                            methodName);
                    }
                }

                if (!synteny.Clusters.Contains(targetCluster))
                {
                    isClusterExtended = false;
                }

                currentCluster.IsFused = true;

                if (!isClusterExtended)
                {
                    previousCluster.MoveNext();
                    currentCluster = previousCluster.Current;
                }
                else
                {
                    currentCluster = targetCluster;
                }
            }

            return(deltaAlignments);
        }
Example #11
0
        /// <summary>
        /// Gets the DeltaAlignment at specified position of the file.
        /// </summary>
        /// <param name="position">Position at which delta alignment is required.</param>
        /// <returns>Delta alignment.</returns>
        public DeltaAlignment GetDeltaAlignmentAt(long position)
        {
            using (var reader = this.deltaStream.OpenRead())
            {
                long deltaPosition = -1;
                string line = ReadNextLine(reader);
                if (line == null || !line.StartsWith("@", StringComparison.OrdinalIgnoreCase))
                {
                    throw new FormatException(string.Format(CultureInfo.CurrentCulture, Properties.Resource.CorruptedDeltaAlignmentFile, position));
                }

                deltaPosition = long.Parse(line.Substring(1), CultureInfo.InvariantCulture);
                if (position != deltaPosition)
                {
                    throw new FormatException(string.Format(CultureInfo.CurrentCulture, Properties.Resource.DeltaAlignmentIDDoesnotMatch, deltaPosition, position));
                }

                line = ReadNextLine(reader);
                if (line == null || !line.StartsWith(">", StringComparison.OrdinalIgnoreCase))
                {
                    throw new Exception(Properties.Resource.INVALID_INPUT_FILE);
                }

                string referenceId = line.Substring(1);

                // Read next line.
                line = ReadNextLine(reader);

                // Second line - Query sequence id
                string queryId = line;

                // fetch the query sequence from the query file
                ISequence querySequence = null;
                Sequence refEmpty = null;

                if (!string.IsNullOrEmpty(queryId))
                {
                    // Get the id and remove any alphas - this can happen because the delta might
                    // have "Reverse" appended to it when it's a reversed sequence.
                    string id = queryId.Substring(queryId.LastIndexOf('@') + 1);
                    int idx = Array.FindIndex(id.ToCharArray(), c => !Char.IsDigit(c));
                    if (idx > 0)
                        id = id.Substring(0, idx);

                    long sequencePosition = long.Parse(id, CultureInfo.InvariantCulture);
                    querySequence = this.QueryParser.GetSequenceAt(sequencePosition);
                    refEmpty = new Sequence(querySequence.Alphabet, "A", false) { ID = referenceId };
                }

                DeltaAlignment deltaAlignment = new DeltaAlignment(refEmpty, querySequence) { Id = deltaPosition };
                line = ReadNextLine(reader);
                string[] deltaAlignmentProperties = line.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
                if (deltaAlignmentProperties != null && deltaAlignmentProperties.Length == 7)
                {
                    long temp;
                    deltaAlignment.FirstSequenceStart = long.TryParse(deltaAlignmentProperties[0], out temp) ? temp : 0;
                    deltaAlignment.FirstSequenceEnd = long.TryParse(deltaAlignmentProperties[1], out temp) ? temp : 0;
                    deltaAlignment.SecondSequenceStart = long.TryParse(deltaAlignmentProperties[2], out temp) ? temp : 0;
                    deltaAlignment.SecondSequenceEnd = long.TryParse(deltaAlignmentProperties[3], out temp) ? temp : 0;

                    // Look for a reversed sequence
                    if (deltaAlignment.SecondSequenceEnd < deltaAlignment.SecondSequenceStart)
                    {
                        temp = deltaAlignment.SecondSequenceEnd;
                        deltaAlignment.SecondSequenceEnd = deltaAlignment.SecondSequenceStart;
                        deltaAlignment.SecondSequenceStart = temp;
                        deltaAlignment.QueryDirection = Cluster.ReverseDirection;
                    }

                    int error;
                    deltaAlignment.Errors = int.TryParse(deltaAlignmentProperties[4], out error) ? error : 0;
                    deltaAlignment.SimilarityErrors = int.TryParse(deltaAlignmentProperties[5], out error) ? error : 0;
                    deltaAlignment.NonAlphas = int.TryParse(deltaAlignmentProperties[6], out error) ? error : 0;
                }

                // Fifth line - either a 0 - marks the end of the delta alignment or they are deltas
                while (line != null && !line.StartsWith("*", StringComparison.OrdinalIgnoreCase))
                {
                    long temp;
                    if (long.TryParse(line, out temp))
                    {
                        deltaAlignment.Deltas.Add(temp);
                    }

                    // Read next line.
                    line = reader.ReadLine();

                    // Continue reading if blank line found.
                    while (line != null && string.IsNullOrEmpty(line))
                    {
                        line = reader.ReadLine();
                    }
                }
                return deltaAlignment;
            }

        }
Example #12
0
        public void ValidateDeltaAlignmentToString()
        {
            ISequence refSeq = new Sequence(Alphabets.DNA, "ATCGGGGGGGGAAAAAAATTTTCCCCGGGGG");
            ISequence qrySeq = new Sequence(Alphabets.DNA, "GGGGG");
            var delta = new DeltaAlignment(refSeq, qrySeq) {FirstSequenceEnd = 21, SecondSequenceEnd = 20};
            
            string actualString = delta.ToString();
            string expectedString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.DeltaAlignmentExpectedNode);
            Assert.AreEqual(expectedString, actualString);

            // Gets the expected sequence from the Xml
            List<ISequence> seqsList;
            string filePath = this.utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.FilePathNode);
            using (var reader = File.OpenRead(filePath))
            {
                var parser = new FastAParser();
                {
                    parser.Alphabet = Alphabets.Protein;
                    seqsList = parser.Parse(reader).ToList();
                }
            }

            delta = new DeltaAlignment(seqsList[0], qrySeq) {FirstSequenceEnd = 21, SecondSequenceStart = 20, QueryDirection = Cluster.ReverseDirection};
            actualString = delta.ToString();
            expectedString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.DeltaAlignmentExpected2Node);
            Assert.AreEqual(expectedString, actualString);
        }
Example #13
0
        /// <summary>
        /// Create a new delta alignment
        /// </summary>
        /// <param name="referenceSequence">Reference sequence</param>
        /// <param name="querySequence">Query sequence</param>
        /// <param name="cluster">Cluster object</param>
        /// <param name="match">Match object</param>
        /// <returns>Newly created DeltaAlignment object</returns>
        internal static DeltaAlignment NewAlignment(
                ISequence referenceSequence,
                ISequence querySequence,
                Cluster cluster,
                MatchExtension match)
        {
            DeltaAlignment deltaAlignment = new DeltaAlignment(referenceSequence, querySequence)
            {
                FirstSequenceStart = match.ReferenceSequenceOffset,
                SecondSequenceStart = match.QuerySequenceOffset,
                FirstSequenceEnd = match.ReferenceSequenceOffset + match.Length - 1,
                SecondSequenceEnd = match.QuerySequenceOffset + match.Length - 1,
                QueryDirection = cluster.QueryDirection
            };

            return deltaAlignment;
        }
Example #14
0
        /// <summary>
        /// Gets the error removed sequence from the delta.
        /// </summary>
        /// <param name="deltaAlignment">DeltaAlignment instance.</param>
        private static ISequence GetSequenceFromDelta(DeltaAlignment deltaAlignment)
        {
            int indelListIndex = 0;
            long indelIndex = 0;
            long nextIndelPosition = 0;
            long indelCount = deltaAlignment.Deltas.Count;

            if (indelListIndex < indelCount)
            {
                indelIndex = deltaAlignment.Deltas[indelListIndex++];
            }

            nextIndelPosition = deltaAlignment.SecondSequenceStart - 1;
            nextIndelPosition += indelIndex >= 0 ? indelIndex : -indelIndex;


            long symbolsCount = deltaAlignment.SecondSequenceEnd - deltaAlignment.SecondSequenceStart + 1 +
                deltaAlignment.Deltas.Count(I => I > 0) - deltaAlignment.Deltas.Count(I => I < 0);

            long symbolIndex = 0;
            byte[] symbols = new byte[symbolsCount];

            for (long index = deltaAlignment.SecondSequenceStart; index <= deltaAlignment.SecondSequenceEnd; )
            {
                if (indelIndex != 0 && index == nextIndelPosition)
                {
                    if (indelIndex > 0)
                    {
                        // a symbol is deleted from the query, thus insert a gap symbol in query.
                        symbols[symbolIndex] = AmbiguousDnaAlphabet.Instance.Gap;
                        symbolIndex++;
                        nextIndelPosition--;
                    }
                    else
                    {
                        // a symbol is inserted to query, thus delete the symbol from query.
                        // skip one symbol from the query sequence.
                        index++;
                    }

                    // Get nextIndelPosition.
                    if (indelListIndex < indelCount)
                    {
                        indelIndex = deltaAlignment.Deltas[indelListIndex++];
                    }
                    else
                    {
                        indelIndex = 0;
                    }

                    nextIndelPosition += indelIndex >= 0 ? indelIndex : -indelIndex;
                }
                else
                {
                    symbols[symbolIndex] = deltaAlignment.QuerySequence[index];
                    symbolIndex++;
                    index++;
                }
            }

            return new Sequence(AmbiguousDnaAlphabet.Instance, symbols) { ID = deltaAlignment.QuerySequence.ID };
        }
Example #15
0
 public void TestDeltaAlignmentToString()
 {
     ISequence refSeq = new Sequence(Alphabets.DNA, "ATCGGGGGGGGAAAAAAATTTTCCCCGGGGG");
     ISequence qrySeq = new Sequence(Alphabets.DNA, "GGGGG");
     DeltaAlignment delta = new DeltaAlignment(refSeq, qrySeq);
     delta.FirstSequenceEnd = 21;
     delta.SecondSequenceEnd = 20;
     string actualString = delta.ToString();
     string expectedString = "Ref ID= Query Id= Ref start=0 Ref End=21 Query start=0 Query End=20, Direction=FORWARD";
     Assert.AreEqual(actualString, expectedString);
 }
Example #16
0
        /// <summary>
        /// Extend the cluster forward
        /// </summary>
        /// <param name="referenceSequence">Reference sequence</param>
        /// <param name="querySequence">Query sequence</param>
        /// <param name="currentAlignment">current alignment object</param>
        /// <param name="targetReference">target position in reference sequence</param>
        /// <param name="targetQuery">target position in query sequence</param>
        /// <param name="methodName">Name of the method to be implemented</param>
        /// <returns>Was cluster extended forward</returns>
        private bool ExtendToNextSequence(
                ISequence referenceSequence,
                ISequence querySequence,
                DeltaAlignment currentAlignment,
                long targetReference,
                long targetQuery,
                int methodName)
        {
            bool isOverflow = false;
            bool isDouble = false;

            int diagonal = currentAlignment.Deltas.Count;

            long referenceDistance = targetReference - currentAlignment.FirstSequenceEnd + 1;
            long queryDistance = targetQuery - currentAlignment.SecondSequenceEnd + 1;

            // If the length in first sequence exceeds maximum length then extend 
            // till score is optimized irrespective of length.
            if (referenceDistance > ModifiedSmithWaterman.MaximumAlignmentLength)
            {
                targetReference = currentAlignment.FirstSequenceEnd + ModifiedSmithWaterman.MaximumAlignmentLength + 1;
                isOverflow = true;
                methodName |= ModifiedSmithWaterman.OptimalFlag;
            }

            // If the length in second sequence exceeds maximum length then extend 
            // till score is optimized irrespective of length.
            if (queryDistance > ModifiedSmithWaterman.MaximumAlignmentLength)
            {
                targetQuery = currentAlignment.SecondSequenceEnd + ModifiedSmithWaterman.MaximumAlignmentLength + 1;
                if (isOverflow)
                {
                    isDouble = true;
                }
                else
                {
                    isOverflow = true;
                }

                methodName |= ModifiedSmithWaterman.OptimalFlag;
            }

            if (isDouble)
            {
                methodName &= ~ModifiedSmithWaterman.SeqendFlag;
            }

            // Extend the sequence to next sequence (aligned/extended sequence)
            bool isClusterExtended = _nucmerAligner.ExtendSequence(
                referenceSequence,
                currentAlignment.FirstSequenceEnd,
                ref targetReference,
                querySequence,
                currentAlignment.SecondSequenceEnd,
                ref targetQuery,
                currentAlignment.Deltas,
                methodName);

            if (isClusterExtended && isOverflow)
            {
                isClusterExtended = false;
            }

            if (diagonal < currentAlignment.Deltas.Count)
            {
                referenceDistance =
                    (currentAlignment.FirstSequenceEnd - currentAlignment.FirstSequenceStart + 1)
                    - currentAlignment.DeltaReferencePosition - 1;
                currentAlignment.Deltas[diagonal] += (currentAlignment.Deltas[diagonal] > 0)
                    ? referenceDistance
                    : -referenceDistance;

                // Adjust the delta reference position
                for (int index = diagonal; index < currentAlignment.Deltas.Count;index++ )
                {
                    int deltaPosition = (int)currentAlignment.Deltas[index];
                    currentAlignment.DeltaReferencePosition +=
                        (deltaPosition > 0)
                        ? deltaPosition
                        : Math.Abs(deltaPosition) - 1;
                }
            }

            currentAlignment.FirstSequenceEnd = targetReference;
            currentAlignment.SecondSequenceEnd = targetQuery;

            return isClusterExtended;
        }
Example #17
0
        /// <summary>
        /// Find the previous eligible sequence for alignment/extension
        /// </summary>
        /// <param name="alignments">List of alignment</param>
        /// <param name="currentAlignment">Current alignment</param>
        /// <returns>Reverse alignment</returns>
        private DeltaAlignment GetPreviousAlignment(
                IEnumerable<DeltaAlignment> alignments,
                DeltaAlignment currentAlignment)
        {
            long alignmentFirstStart = currentAlignment.FirstSequenceStart;
            long alignmentSecondStart = currentAlignment.SecondSequenceStart;
            long distance = (alignmentFirstStart < alignmentSecondStart)
                    ? alignmentFirstStart
                    : alignmentSecondStart;

            DeltaAlignment deltaAlignment = alignments.Last();
            foreach (DeltaAlignment alignment in alignments)
            {
                if (currentAlignment.QueryDirection == alignment.QueryDirection)
                {
                    long alignmentFirstEnd = alignment.FirstSequenceEnd;
                    long alignmentSecondEnd = alignment.SecondSequenceEnd;

                    if (alignmentFirstEnd <= alignmentFirstStart
                        && alignmentSecondEnd <= alignmentSecondStart)
                    {
                        long gapHigh;
                        long gapLow;
                        if ((alignmentFirstStart - alignmentFirstEnd)
                            > (alignmentSecondStart - alignmentSecondEnd))
                        {
                            gapHigh = alignmentFirstStart - alignmentFirstEnd;
                            gapLow = alignmentSecondStart - alignmentSecondEnd;
                        }
                        else
                        {
                            gapLow = alignmentFirstStart - alignmentFirstEnd;
                            gapHigh = alignmentSecondStart - alignmentSecondEnd;
                        }

                        if (gapHigh < BreakLength
                                || ((gapLow * _nucmerAligner.ValidScore)
                                    + ((gapHigh - gapLow)
                                    * _nucmerAligner.GapExtensionScore)) >= 0)
                        {
                            deltaAlignment = alignment;
                            break;
                        }
                        else if ((gapHigh << 1) - gapLow < distance)
                        {
                            deltaAlignment = alignment;
                            distance = (gapHigh << 1) - gapLow;
                        }
                    }
                }
            }

            return deltaAlignment;
        }
Example #18
0
        /// <summary>
        /// Starts parsing from the specified StreamReader.
        /// </summary>
        /// <param name="streamReader">Stream reader to parse.</param>
        /// <returns>IEnumerable of DeltaAlignments.</returns>
        private IEnumerable<DeltaAlignment> ParseFrom(StreamReader streamReader)
        {
            this.parsingReaders.Add(streamReader);

            string lastReadQuerySequenceId = string.Empty;
            ISequence sequence = null;

            if (streamReader.EndOfStream)
            {
                throw new Exception(Properties.Resource.INVALID_INPUT_FILE);
            }

            string line = ReadNextLine(streamReader);
            do
            {
                if (line == null || !line.StartsWith("@", StringComparison.OrdinalIgnoreCase))
                {
                    throw new Exception(Properties.Resource.INVALID_INPUT_FILE);
                }

                long deltaPosition = long.Parse(line.Substring(1));
                line = ReadNextLine(streamReader);
                if (line == null || !line.StartsWith(">", StringComparison.OrdinalIgnoreCase))
                {
                    throw new Exception(Properties.Resource.INVALID_INPUT_FILE);
                }

                DeltaAlignment deltaAlignment = null;

                // First line - reference id
                string referenceId = line.Substring(1);

                // Read next line.
                line = ReadNextLine(streamReader);

                // Second line - Query sequence id
                string queryId = line;

                // fetch the query sequence from the query file
                if (!string.IsNullOrEmpty(queryId))
                {
                    if (queryId != lastReadQuerySequenceId)
                    {
                        // Get the id and remove any alphas - this can happen because the delta might
                        // have "Reverse" appended to it when it's a reversed sequence.
                        string id = queryId.Substring(queryId.LastIndexOf('@') + 1);
                        int idx = Array.FindIndex(id.ToCharArray(), c => !Char.IsDigit(c));
                        if (idx > 0)
                            id = id.Substring(0, idx);
                        
                        long seqPosition = long.Parse(id, CultureInfo.InvariantCulture);
                        sequence = this.QueryParser.GetSequenceAt(seqPosition);
                        lastReadQuerySequenceId = queryId;
                    }

                    Sequence refEmpty = new Sequence(sequence.Alphabet, "A", false) {ID = referenceId};
                    deltaAlignment = new DeltaAlignment(refEmpty, sequence);
                }

                deltaAlignment.Id = deltaPosition;
                // Fourth line - properties of delta alignment
                // Read next line.
                line = ReadNextLine(streamReader);

                string[] deltaAlignmentProperties = line.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
                if (deltaAlignmentProperties != null && deltaAlignmentProperties.Length == 7)
                {
                    long temp;
                    deltaAlignment.FirstSequenceStart = long.TryParse(deltaAlignmentProperties[0], out temp) ? temp : 0;
                    deltaAlignment.FirstSequenceEnd = long.TryParse(deltaAlignmentProperties[1], out temp) ? temp : 0;
                    deltaAlignment.SecondSequenceStart = long.TryParse(deltaAlignmentProperties[2], out temp) ? temp : 0;
                    deltaAlignment.SecondSequenceEnd = long.TryParse(deltaAlignmentProperties[3], out temp) ? temp : 0;

                    // Look for a reversed sequence
                    if (deltaAlignment.SecondSequenceEnd < deltaAlignment.SecondSequenceStart)
                    {
                        temp = deltaAlignment.SecondSequenceEnd;
                        deltaAlignment.SecondSequenceEnd = deltaAlignment.SecondSequenceStart;
                        deltaAlignment.SecondSequenceStart = temp;
                        deltaAlignment.QueryDirection = Cluster.ReverseDirection;
                    }

                    int error;
                    deltaAlignment.Errors = int.TryParse(deltaAlignmentProperties[4], out error) ? error : 0;
                    deltaAlignment.SimilarityErrors = int.TryParse(deltaAlignmentProperties[5], out error) ? error : 0;
                    deltaAlignment.NonAlphas = int.TryParse(deltaAlignmentProperties[6], out error) ? error : 0;
                }

                // Fifth line - either a 0 - marks the end of the delta alignment or they are deltas
                while (line != null && !line.StartsWith("*", StringComparison.OrdinalIgnoreCase))
                {
                    long temp;
                    if (long.TryParse(line, out temp))
                    {
                        deltaAlignment.Deltas.Add(temp);
                    }

                    // Read next line.
                    line = streamReader.ReadLine();

                    // Continue reading if blank line found.
                    while (line != null && string.IsNullOrEmpty(line))
                    {
                        line = streamReader.ReadLine();
                    }
                }

                yield return deltaAlignment;

                // Read the next line
                line = streamReader.ReadLine();
            }
            while (line != null);
        }