示例#1
0
文件: NUCmer.cs 项目: cpatmoore/bio
        /// <summary>
        /// Extend the cluster in synteny
        /// </summary>
        /// <param name="synteny">Synteny in which cluster needs to be extened.</param>
        /// <returns>List of delta alignments</returns>
        private List<DeltaAlignment> ExtendClusters(Synteny synteny)
        {

            bool isClusterExtended = false;
            List<DeltaAlignment> deltaAlignments = new List<DeltaAlignment>();
            DeltaAlignment deltaAlignment = null;
            Cluster currentCluster;
            Cluster targetCluster = synteny.Clusters.Last();

            IList<Cluster> clusters = synteny.Clusters;

            // Sort the cluster by first sequence start
            clusters = SortCluster(clusters, FirstSequenceStart);

            IEnumerator<Cluster> previousCluster = clusters.GetEnumerator();
            previousCluster.MoveNext();
            IEnumerator<Cluster> cluster = clusters.GetEnumerator();

            while (cluster.MoveNext())
            {
                currentCluster = cluster.Current;

                if (!isClusterExtended
                    && (currentCluster.IsFused
                        || IsClusterShadowed(deltaAlignments, currentCluster, deltaAlignment)))
                {
                    currentCluster.IsFused = true;
                    previousCluster.MoveNext();
                    currentCluster = previousCluster.Current;
                    continue;
                }

                // Extend the match
                foreach (MatchExtension match in currentCluster.Matches)
                {
                    if (isClusterExtended)
                    {
                        if (deltaAlignment.FirstSequenceEnd != match.ReferenceSequenceOffset
                            || deltaAlignment.SecondSequenceEnd != match.QuerySequenceOffset)
                        {
                            continue;
                        }

                        deltaAlignment.FirstSequenceEnd += match.Length - 1;
                        deltaAlignment.SecondSequenceEnd += match.Length - 1;
                    }
                    else
                    {
                        //TODO: Do we need sequence here? Changed to sequence id.
                        deltaAlignment = DeltaAlignment.NewAlignment(
                                synteny.ReferenceSequence,
                                synteny.QuerySequence,
                                currentCluster,
                                match);
                        deltaAlignments.Add(deltaAlignment);

                        // Find the MUM which is a good candidate for extension in reverse direction
                        DeltaAlignment targetAlignment = GetPreviousAlignment(deltaAlignments, deltaAlignment);

                        if (ExtendToPreviousSequence(
                                synteny.ReferenceSequence,
                                synteny.QuerySequence,
                                deltaAlignments,
                                deltaAlignment,
                                targetAlignment))
                        {
                            deltaAlignment = targetAlignment;
                        }
                    }

                    int methodName = ModifiedSmithWaterman.ForwardAlignFlag;

                    long targetReference;
                    long targetQuery;
                    if (currentCluster.Matches.IndexOf(match) < currentCluster.Matches.Count - 1)
                    {
                        // extend till the match in the current cluster
                        MatchExtension nextMatch =
                            currentCluster.Matches[currentCluster.Matches.IndexOf(match) + 1];
                        targetReference = nextMatch.ReferenceSequenceOffset;
                        targetQuery = nextMatch.QuerySequenceOffset;

                        isClusterExtended = ExtendToNextSequence(
                            synteny.ReferenceSequence,
                            synteny.QuerySequence,
                            deltaAlignment,
                            targetReference,
                            targetQuery,
                            methodName);
                    }
                    else
                    {
                        // extend till next cluster
                        targetReference = synteny.ReferenceSequence.Count - 1;
                        targetQuery = synteny.QuerySequence.Count() - 1;

                        targetCluster = GetNextCluster(
                                clusters,
                                currentCluster,
                                ref targetReference,
                                ref targetQuery);

                        if (!synteny.Clusters.Contains(targetCluster))
                        {
                            methodName |= ModifiedSmithWaterman.OptimalFlag;
                        }

                        isClusterExtended = ExtendToNextSequence(
                                synteny.ReferenceSequence,
                                synteny.QuerySequence,
                                deltaAlignment,
                                targetReference,
                                targetQuery,
                                methodName);
                    }
                }

                if (!synteny.Clusters.Contains(targetCluster))
                {
                    isClusterExtended = false;
                }

                currentCluster.IsFused = true;

                if (!isClusterExtended)
                {
                    previousCluster.MoveNext();
                    currentCluster = previousCluster.Current;
                }
                else
                {
                    currentCluster = targetCluster;
                }
            }

            return deltaAlignments;
        }
示例#2
0
文件: NUCmer.cs 项目: cpatmoore/bio
        /// <summary>
        /// Process the cluster
        /// 1. Re-map the reference sequence index to original index
        /// 2. Create synteny
        /// 3. Process synteny
        /// </summary>
        /// <param name="clusters">List of clusters of a read.</param>
        /// <returns>List of delta alignments</returns>
        public IEnumerable<DeltaAlignment> ProcessCluster(IList<Cluster> clusters)
        {
            if (clusters == null)
            {
                throw new ArgumentNullException("clusters");
            }

            if (clusters.Count == 0)
            {
                return Enumerable.Empty<DeltaAlignment>();
            }

            bool isReverse = clusters.Any(c => c.IsReverseQueryDirection);
            ISequence currentReference = null;
            List<Synteny> syntenies = new List<Synteny>();
            Synteny currentSynteny = null;
            ISequence querySequence = null;
            ISequence referenceSequence = null;

            _nucmerAligner.SimilarityMatrix = SimilarityMatrix;
            _nucmerAligner.BreakLength = BreakLength;

            long referenceSequenceLength = _internalReferenceSequence.Count;

            foreach (Cluster clusterIterator in clusters)
            {
                List<MatchExtension> clusterMatches;
                if (null != currentSynteny)
                {
                    // Remove the empty clusters (if any)
                    if ((0 < currentSynteny.Clusters.Count)
                            && (0 == currentSynteny.Clusters.Last().Matches.Count))
                    {
                        currentSynteny.Clusters.Remove(
                            currentSynteny.Clusters.Last());
                    }

                    clusterMatches = new List<MatchExtension>();
                    currentSynteny.Clusters.Add(new Cluster(clusterMatches, isReverse));
                }

                foreach (MatchExtension matchIterator in clusterIterator.Matches)
                {
                    ISequence currentQuery = matchIterator.Query;

                    if (matchIterator.ReferenceSequenceOffset < referenceSequenceLength)
                    {
                        currentReference = _internalReferenceSequence;
                    }
                    else
                    {
                        matchIterator.ReferenceSequenceOffset -= referenceSequenceLength + 1;
                    }

                    if ((null == referenceSequence)
                        || (null == querySequence)
                        || (string.Compare(referenceSequence.ID, currentReference.ID, StringComparison.OrdinalIgnoreCase) != 0)
                        || string.Compare(querySequence.ID, currentQuery.ID, StringComparison.OrdinalIgnoreCase) != 0)
                    {
                        bool found = false;

                        if ((null != querySequence)
                            && (string.Compare(querySequence.ID, currentQuery.ID, StringComparison.OrdinalIgnoreCase) == 0))
                        {
                            // Check if Synteny already exists
                            // If found, mark the synteny and break
                            foreach (Synteny syntenyIterator in syntenies)
                            {
                                if ((String.Compare(
                                        syntenyIterator.ReferenceSequence.ID,
                                        currentReference.ID,
                                        StringComparison.OrdinalIgnoreCase) == 0)
                                    && (String.Compare(
                                        syntenyIterator.QuerySequence.ID,
                                        currentQuery.ID,
                                        StringComparison.OrdinalIgnoreCase) == 0))
                                {
                                    currentSynteny = syntenyIterator;
                                    found = true;
                                    break;
                                }
                            }
                        }
                        else
                        {
                            ProcessSynteny(syntenies);
                        }

                        referenceSequence = currentReference;
                        querySequence = currentQuery;

                        // Remove the empty clusters (if any)
                        if ((null != currentSynteny)
                                && (0 < currentSynteny.Clusters.Count)
                                && (0 == currentSynteny.Clusters.Last().Matches.Count))
                        {
                            currentSynteny.Clusters.Remove(
                                currentSynteny.Clusters.Last());
                        }

                        if (!found)
                        {
                            // Create a Synteny
                            currentSynteny = new Synteny(
                                    currentReference,
                                    currentQuery);

                            // Add a cluster to Synteny
                            syntenies.Add(currentSynteny);
                        }

                        clusterMatches = new List<MatchExtension>();
                        currentSynteny.Clusters.Add(new Cluster(clusterMatches, isReverse));
                    }

                    if (1 < matchIterator.Length)
                    {
                        currentSynteny.Clusters.Last().Matches.Add(matchIterator);
                    }
                }
            }

            return ProcessSynteny(syntenies);
        }
示例#3
0
        /// <summary>
        /// Process the cluster
        /// 1. Re-map the reference sequence index to original index
        /// 2. Create synteny
        /// 3. Process synteny
        /// </summary>
        /// <param name="clusters">List of clusters of a read.</param>
        /// <returns>List of delta alignments</returns>
        public IEnumerable <DeltaAlignment> ProcessCluster(IList <Cluster> clusters)
        {
            if (clusters == null)
            {
                throw new ArgumentNullException("clusters");
            }

            if (clusters.Count == 0)
            {
                return(Enumerable.Empty <DeltaAlignment>());
            }

            bool           isReverse         = clusters.Any(c => c.IsReverseQueryDirection);
            ISequence      currentReference  = null;
            List <Synteny> syntenies         = new List <Synteny>();
            Synteny        currentSynteny    = null;
            ISequence      querySequence     = null;
            ISequence      referenceSequence = null;

            _nucmerAligner.SimilarityMatrix = SimilarityMatrix;
            _nucmerAligner.BreakLength      = BreakLength;

            long referenceSequenceLength = _internalReferenceSequence.Count;

            foreach (Cluster clusterIterator in clusters)
            {
                List <MatchExtension> clusterMatches;
                if (null != currentSynteny)
                {
                    // Remove the empty clusters (if any)
                    if ((0 < currentSynteny.Clusters.Count) &&
                        (0 == currentSynteny.Clusters.Last().Matches.Count))
                    {
                        currentSynteny.Clusters.Remove(
                            currentSynteny.Clusters.Last());
                    }

                    clusterMatches = new List <MatchExtension>();
                    currentSynteny.Clusters.Add(new Cluster(clusterMatches, isReverse));
                }

                foreach (MatchExtension matchIterator in clusterIterator.Matches)
                {
                    ISequence currentQuery = matchIterator.Query;

                    if (matchIterator.ReferenceSequenceOffset < referenceSequenceLength)
                    {
                        currentReference = _internalReferenceSequence;
                    }
                    else
                    {
                        matchIterator.ReferenceSequenceOffset -= referenceSequenceLength + 1;
                    }

                    if ((null == referenceSequence) ||
                        (null == querySequence) ||
                        (string.Compare(referenceSequence.ID, currentReference.ID, StringComparison.OrdinalIgnoreCase) != 0) ||
                        string.Compare(querySequence.ID, currentQuery.ID, StringComparison.OrdinalIgnoreCase) != 0)
                    {
                        bool found = false;

                        if ((null != querySequence) &&
                            (string.Compare(querySequence.ID, currentQuery.ID, StringComparison.OrdinalIgnoreCase) == 0))
                        {
                            // Check if Synteny already exists
                            // If found, mark the synteny and break
                            foreach (Synteny syntenyIterator in syntenies)
                            {
                                if ((String.Compare(
                                         syntenyIterator.ReferenceSequence.ID,
                                         currentReference.ID,
                                         StringComparison.OrdinalIgnoreCase) == 0) &&
                                    (String.Compare(
                                         syntenyIterator.QuerySequence.ID,
                                         currentQuery.ID,
                                         StringComparison.OrdinalIgnoreCase) == 0))
                                {
                                    currentSynteny = syntenyIterator;
                                    found          = true;
                                    break;
                                }
                            }
                        }
                        else
                        {
                            ProcessSynteny(syntenies);
                        }

                        referenceSequence = currentReference;
                        querySequence     = currentQuery;

                        // Remove the empty clusters (if any)
                        if ((null != currentSynteny) &&
                            (0 < currentSynteny.Clusters.Count) &&
                            (0 == currentSynteny.Clusters.Last().Matches.Count))
                        {
                            currentSynteny.Clusters.Remove(
                                currentSynteny.Clusters.Last());
                        }

                        if (!found)
                        {
                            // Create a Synteny
                            currentSynteny = new Synteny(
                                currentReference,
                                currentQuery);

                            // Add a cluster to Synteny
                            syntenies.Add(currentSynteny);
                        }

                        clusterMatches = new List <MatchExtension>();
                        currentSynteny.Clusters.Add(new Cluster(clusterMatches, isReverse));
                    }

                    if (1 < matchIterator.Length)
                    {
                        currentSynteny.Clusters.Last().Matches.Add(matchIterator);
                    }
                }
            }

            return(ProcessSynteny(syntenies));
        }
示例#4
0
        /// <summary>
        /// Extend the cluster in synteny
        /// </summary>
        /// <param name="synteny">Synteny in which cluster needs to be extened.</param>
        /// <returns>List of delta alignments</returns>
        private List <DeltaAlignment> ExtendClusters(Synteny synteny)
        {
            bool isClusterExtended = false;
            List <DeltaAlignment> deltaAlignments = new List <DeltaAlignment>();
            DeltaAlignment        deltaAlignment  = null;
            Cluster currentCluster;

            IList <Cluster> clusters = synteny.Clusters;

            // Sort the cluster by first sequence start
            clusters = SortCluster(clusters, FirstSequenceStart);

            Cluster targetCluster = synteny.Clusters.Last();

            IEnumerator <Cluster> previousCluster = clusters.GetEnumerator();

            previousCluster.MoveNext();
            IEnumerator <Cluster> cluster = clusters.GetEnumerator();

            while (cluster.MoveNext())
            {
                currentCluster = cluster.Current;

                if (!isClusterExtended &&
                    (currentCluster.IsFused ||
                     IsClusterShadowed(deltaAlignments, currentCluster, deltaAlignment)))
                {
                    currentCluster.IsFused = true;
                    previousCluster.MoveNext();
                    currentCluster = previousCluster.Current;
                    continue;
                }

                // Extend the match
                foreach (MatchExtension match in currentCluster.Matches)
                {
                    if (isClusterExtended)
                    {
                        if (deltaAlignment.FirstSequenceEnd != match.ReferenceSequenceOffset ||
                            deltaAlignment.SecondSequenceEnd != match.QuerySequenceOffset)
                        {
                            continue;
                        }

                        deltaAlignment.FirstSequenceEnd  += match.Length - 1;
                        deltaAlignment.SecondSequenceEnd += match.Length - 1;
                    }
                    else
                    {
                        //TODO: Do we need sequence here? Changed to sequence id.
                        deltaAlignment = DeltaAlignment.NewAlignment(
                            synteny.ReferenceSequence,
                            synteny.QuerySequence,
                            currentCluster,
                            match);
                        deltaAlignments.Add(deltaAlignment);

                        // Find the MUM which is a good candidate for extension in reverse direction
                        DeltaAlignment targetAlignment = GetPreviousAlignment(deltaAlignments, deltaAlignment);
                        if (targetAlignment != deltaAlignment &&
                            //TODO: NEED TO VERIFY THIS!!!
                            //if (
                            ExtendToPreviousSequence(
                                synteny.ReferenceSequence,
                                synteny.QuerySequence,
                                deltaAlignments,
                                deltaAlignment,
                                targetAlignment))
                        {
                            deltaAlignment = targetAlignment;
                        }
                    }

                    int methodName = ModifiedSmithWaterman.ForwardAlignFlag;

                    long targetReference;
                    long targetQuery;
                    if (currentCluster.Matches.IndexOf(match) < currentCluster.Matches.Count - 1)
                    {
                        // extend till the match in the current cluster
                        MatchExtension nextMatch =
                            currentCluster.Matches[currentCluster.Matches.IndexOf(match) + 1];
                        targetReference = nextMatch.ReferenceSequenceOffset;
                        targetQuery     = nextMatch.QuerySequenceOffset;

                        isClusterExtended = ExtendToNextSequence(
                            synteny.ReferenceSequence,
                            synteny.QuerySequence,
                            deltaAlignment,
                            targetReference,
                            targetQuery,
                            methodName);
                    }
                    else
                    {
                        // extend till next cluster
                        targetReference = synteny.ReferenceSequence.Count - 1;
                        targetQuery     = synteny.QuerySequence.Count() - 1;

                        targetCluster = GetNextCluster(
                            clusters,
                            currentCluster,
                            ref targetReference,
                            ref targetQuery);

                        if (!synteny.Clusters.Contains(targetCluster))
                        {
                            methodName |= ModifiedSmithWaterman.OptimalFlag;
                        }

                        isClusterExtended = ExtendToNextSequence(
                            synteny.ReferenceSequence,
                            synteny.QuerySequence,
                            deltaAlignment,
                            targetReference,
                            targetQuery,
                            methodName);
                    }
                }

                if (!synteny.Clusters.Contains(targetCluster))
                {
                    isClusterExtended = false;
                }

                currentCluster.IsFused = true;

                if (!isClusterExtended)
                {
                    previousCluster.MoveNext();
                    currentCluster = previousCluster.Current;
                }
                else
                {
                    currentCluster = targetCluster;
                }
            }

            return(deltaAlignments);
        }
示例#5
0
        /// <summary>
        /// Process the cluster
        /// 1. Re-map the reference sequence index to original index
        /// 2. Create synteny
        /// 3. Process synteny
        /// </summary>
        /// <param name="referenceSequenceList">List of reference sequences</param>
        /// <param name="clusters">List of clusters</param>
        /// <returns>List of delta alignments</returns>
        protected override IList <DeltaAlignment> ProcessCluster(
            IList <ISequence> referenceSequenceList,
            IList <Cluster> clusters)
        {
            ISequence       currentReference = null;
            ISequence       currentQuery     = null;
            IList <Synteny> syntenies        = new List <Synteny>();
            IList <MaxUniqueMatchExtension> clusterMatches = null;
            Synteny   currentSynteny    = null;
            ISequence referenceSequence = null;
            ISequence querySequence     = null;
            bool      found             = false;

            nucmerAligner.SimilarityMatrix = SimilarityMatrix;
            nucmerAligner.BreakLength      = BreakLength;

            foreach (Cluster clusterIterator in clusters)
            {
                if (null != currentSynteny)
                {
                    // Remove the empty clusters (if any)
                    if ((null != currentSynteny) &&
                        (0 < currentSynteny.Clusters.Count) &&
                        (0 == currentSynteny.Clusters.Last().Matches.Count))
                    {
                        currentSynteny.Clusters.Remove(
                            currentSynteny.Clusters.Last());
                    }

                    clusterMatches = new List <MaxUniqueMatchExtension>();
                    currentSynteny.Clusters.Add(new Cluster(clusterMatches));
                }

                foreach (MaxUniqueMatchExtension matchIterator in clusterIterator.Matches)
                {
                    currentQuery = matchIterator.Query;

                    // Re-map the reference coordinate back to its original sequence
                    foreach (ISequence sequence in referenceSequenceList)
                    {
                        if (matchIterator.FirstSequenceStart < sequence.Count)
                        {
                            currentReference = sequence;
                            break;
                        }
                        else
                        {
                            matchIterator.FirstSequenceStart -= sequence.Count + 1;
                        }
                    }

                    if ((null == referenceSequence) ||
                        (null == querySequence) ||
                        (string.Compare(referenceSequence.ID, currentReference.ID, StringComparison.OrdinalIgnoreCase) != 0) ||
                        string.Compare(querySequence.ID, currentQuery.ID, StringComparison.OrdinalIgnoreCase) != 0)
                    {
                        found = false;

                        if ((null != querySequence) &&
                            (string.Compare(querySequence.ID, currentQuery.ID, StringComparison.OrdinalIgnoreCase) == 0))
                        {
                            // Check if Synteny already exists
                            // If found, mark the synteny and break
                            foreach (Synteny syntenyIterator in syntenies)
                            {
                                if ((String.Compare(
                                         syntenyIterator.ReferenceSequence.ID,
                                         currentReference.ID,
                                         StringComparison.OrdinalIgnoreCase) == 0) &&
                                    (String.Compare(
                                         syntenyIterator.QuerySequence.ID,
                                         currentQuery.ID,
                                         StringComparison.OrdinalIgnoreCase) == 0))
                                {
                                    currentSynteny = syntenyIterator;
                                    found          = true;
                                    break;
                                }
                            }
                        }
                        else
                        {
                            ProcessSynteny(syntenies);
                        }

                        referenceSequence = currentReference;
                        querySequence     = currentQuery;

                        // Remove the empty clusters (if any)
                        if ((null != currentSynteny) &&
                            (0 < currentSynteny.Clusters.Count) &&
                            (0 == currentSynteny.Clusters.Last().Matches.Count))
                        {
                            currentSynteny.Clusters.Remove(
                                currentSynteny.Clusters.Last());
                        }

                        if (!found)
                        {
                            // Create a Synteny
                            currentSynteny = new Synteny(
                                currentReference,
                                currentQuery);

                            // Add a cluster to Synteny
                            syntenies.Add(currentSynteny);
                        }

                        clusterMatches = new List <MaxUniqueMatchExtension>();
                        currentSynteny.Clusters.Add(new Cluster(clusterMatches));
                    }

                    if (1 < matchIterator.Length)
                    {
                        currentSynteny.Clusters.Last().Matches.Add(matchIterator);
                    }
                }
            }

            return(ProcessSynteny(syntenies));
        }