Example #1
0
        /// <summary>
        /// Calculate sequence weights from the guide tree
        /// </summary>
        /// <param name="tree">a binary guide tree</param>
        public SequenceWeighting(BinaryGuideTree tree)
        {
            _weights = new float[tree.NumberOfLeaves];

            BinaryGuideTreeEdge _edge;
            BinaryGuideTreeNode _node;

            // Initialize: all weights are 0.
            // Then sum up the weights from the leaf to the root
            for (int i = 0; i < _weights.Length; ++i)
            {
                _weights[i] = 0;
                _node = tree.Nodes[i];
                while (!_node.IsRoot)
                {
                    _edge = _node.ParentEdge;
                    _weights[i] += _edge.Length;
                    _node = _node.Parent;
                }
            }

            // Normalize so that the average is 1.            
            float s = 0;
            for (int i = 0; i < _weights.Length; ++i)
            {
                s += _weights[i];
            }
            for (int i = 0; i < _weights.Length; ++i)
            {
                _weights[i] = _weights[i] * _weights.Length / s;
                _weights[i] = 1 / Weights[i];
            }
            
        }
Example #2
0
        /// <summary>
        /// Calculate sequence weights from the guide tree
        /// </summary>
        /// <param name="tree">a binary guide tree</param>
        public SequenceWeighting(BinaryGuideTree tree)
        {
            _weights = new float[tree.NumberOfLeaves];

            BinaryGuideTreeEdge _edge;
            BinaryGuideTreeNode _node;

            // Initialize: all weights are 0.
            // Then sum up the weights from the leaf to the root
            for (int i = 0; i < _weights.Length; ++i)
            {
                _weights[i] = 0;
                _node       = tree.Nodes[i];
                while (!_node.IsRoot)
                {
                    _edge        = _node.ParentEdge;
                    _weights[i] += _edge.Length;
                    _node        = _node.Parent;
                }
            }

            // Normalize so that the average is 1.
            float s = 0;

            for (int i = 0; i < _weights.Length; ++i)
            {
                s += _weights[i];
            }
            for (int i = 0; i < _weights.Length; ++i)
            {
                _weights[i] = _weights[i] * _weights.Length / s;
                _weights[i] = 1 / Weights[i];
            }
        }
Example #3
0
        /// <summary>
        /// Performs Stage 1, 2, and 3 as described in class description.
        /// </summary>
        /// <param name="inputSequences"></param>
        /// <returns></returns>
        public IList <Bio.Algorithms.Alignment.ISequenceAlignment> Align(IEnumerable <ISequence> inputSequences)
        {
            List <ISequence> sequences = inputSequences.ToList();

            // Initializations
            if (sequences.Count > 0)
            {
                if (ConsensusResolver == null)
                {
                    ConsensusResolver = new SimpleConsensusResolver(_alphabet);
                }
                else
                {
                    ConsensusResolver.SequenceAlphabet = _alphabet;
                }
            }

            // Get ProfileAligner ready
            IProfileAligner profileAligner = null;

            switch (_profileAlignerName)
            {
            case (ProfileAlignerNames.NeedlemanWunschProfileAligner):
                if (_degreeOfParallelism == 1)
                {
                    profileAligner = new NeedlemanWunschProfileAlignerSerial(
                        SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions);
                }
                else
                {
                    profileAligner = new NeedlemanWunschProfileAlignerParallel(
                        SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions);
                }
                break;

            case (ProfileAlignerNames.SmithWatermanProfileAligner):
                if (_degreeOfParallelism == 1)
                {
                    profileAligner = new SmithWatermanProfileAlignerSerial(
                        SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions);
                }
                else
                {
                    profileAligner = new SmithWatermanProfileAlignerParallel(
                        SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions);
                }
                break;

            default:
                throw new ArgumentException("Invalid profile aligner name");
            }

            _alignedSequences = new List <ISequence>(sequences.Count);
            float currentScore = 0;

            // STAGE 1

            Performance.Snapshot("Stage 1");
            // Generate DistanceMatrix
            KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator =
                new KmerDistanceMatrixGenerator(sequences, _kmerLength, _alphabet, _distanceFunctionName);

            // Hierarchical clustering
            IHierarchicalClustering hierarcicalClustering =
                new HierarchicalClusteringParallel
                    (kmerDistanceMatrixGenerator.DistanceMatrix, _hierarchicalClusteringMethodName);

            // Generate Guide Tree
            BinaryGuideTree binaryGuideTree =
                new BinaryGuideTree(hierarcicalClustering);

            // Progressive Alignment
            IProgressiveAligner progressiveAlignerA = new ProgressiveAligner(profileAligner);

            progressiveAlignerA.Align(sequences, binaryGuideTree);

            currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerA.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost);
            if (currentScore > _alignmentScoreA)
            {
                _alignmentScoreA   = currentScore;
                _alignedSequencesA = progressiveAlignerA.AlignedSequences;
            }
            if (_alignmentScoreA > _alignmentScore)
            {
                _alignmentScore   = _alignmentScoreA;
                _alignedSequences = _alignedSequencesA;
            }

            if (PAMSAMMultipleSequenceAligner.FasterVersion)
            {
                _alignedSequencesB = _alignedSequencesA;
                _alignedSequencesC = _alignedSequencesA;
                _alignmentScoreB   = _alignmentScoreA;
                _alignmentScoreC   = _alignmentScoreA;
            }
            else
            {
                BinaryGuideTree               binaryGuideTreeB              = null;
                IHierarchicalClustering       hierarcicalClusteringB        = null;
                KimuraDistanceMatrixGenerator kimuraDistanceMatrixGenerator = new KimuraDistanceMatrixGenerator();

                if (PAMSAMMultipleSequenceAligner.UseStageB)
                {
                    // STAGE 2
                    Performance.Snapshot("Stage 2");
                    // Generate DistanceMatrix from Multiple Sequence Alignment

                    int iterateTime = 0;

                    while (true)
                    {
                        ++iterateTime;
                        kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(_alignedSequences);

                        // Hierarchical clustering
                        hierarcicalClusteringB = new HierarchicalClusteringParallel
                                                     (kimuraDistanceMatrixGenerator.DistanceMatrix, _hierarchicalClusteringMethodName);

                        // Generate Guide Tree
                        binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB);

                        BinaryGuideTree.CompareTwoTrees(binaryGuideTreeB, binaryGuideTree);
                        binaryGuideTree = binaryGuideTreeB;

                        // Progressive Alignment
                        IProgressiveAligner progressiveAlignerB = new ProgressiveAligner(profileAligner);
                        progressiveAlignerB.Align(sequences, binaryGuideTreeB);

                        currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerB.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost);

                        if (currentScore > _alignmentScoreB)
                        {
                            _alignmentScoreB   = currentScore;
                            _alignedSequencesB = progressiveAlignerB.AlignedSequences;
                            break;
                        }
                        else
                        {
                            break;
                        }
                    }
                    if (_alignmentScoreB > _alignmentScore)
                    {
                        _alignmentScore   = _alignmentScoreB;
                        _alignedSequences = _alignedSequencesB;
                    }
                }
                else
                {
                    binaryGuideTreeB = binaryGuideTree;
                }


                // STAGE 3
                Performance.Snapshot("Stage 3");
                // refinement
                //int maxRefineMentTime = sequences.Count * 2 - 2;
                int maxRefineMentTime = 1;
                if (sequences.Count == 2)
                {
                    maxRefineMentTime = 0;
                }

                int refinementTime = 0;
                _alignedSequencesC = new List <ISequence>(sequences.Count);
                for (int i = 0; i < sequences.Count; ++i)
                {
                    _alignedSequencesC.Add(
                        new Sequence(Alphabets.GetAmbiguousAlphabet(_alphabet),
                                     _alignedSequences[i].ToArray())
                    {
                        ID       = _alignedSequences[i].ID,
                        Metadata = _alignedSequences[i].Metadata
                    });
                }

                List <int>[]        leafNodeIndices            = null;
                List <int>[]        allIndelPositions          = null;
                IProfileAlignment[] separatedProfileAlignments = null;
                List <int>[]        eStrings = null;

                while (refinementTime < maxRefineMentTime)
                {
                    ++refinementTime;
                    Performance.Snapshot("Refinement iter " + refinementTime.ToString());
                    bool needRefinement = false;
                    for (int edgeIndex = 0; edgeIndex < binaryGuideTreeB.NumberOfEdges; ++edgeIndex)
                    {
                        leafNodeIndices = binaryGuideTreeB.SeparateSequencesByCuttingTree(edgeIndex);

                        allIndelPositions = new List <int> [2];

                        separatedProfileAlignments = ProfileAlignment.ProfileExtraction(_alignedSequencesC, leafNodeIndices[0], leafNodeIndices[1], out allIndelPositions);
                        eStrings = new List <int> [2];

                        if (separatedProfileAlignments[0].NumberOfSequences < separatedProfileAlignments[1].NumberOfSequences)
                        {
                            profileAligner.Align(separatedProfileAlignments[0], separatedProfileAlignments[1]);
                            eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedA);
                            eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedB);
                        }
                        else
                        {
                            profileAligner.Align(separatedProfileAlignments[1], separatedProfileAlignments[0]);
                            eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedB);
                            eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedA);
                        }

                        for (int set = 0; set < 2; ++set)
                        {
                            Parallel.ForEach(leafNodeIndices[set], PAMSAMMultipleSequenceAligner.parallelOption, i =>
                            {
                                //Sequence seq = new Sequence(_alphabet, "");
                                List <byte> seqBytes = new List <byte>();

                                int indexAllIndel = 0;
                                for (int j = 0; j < _alignedSequencesC[i].Count; ++j)
                                {
                                    if (indexAllIndel < allIndelPositions[set].Count && j == allIndelPositions[set][indexAllIndel])
                                    {
                                        ++indexAllIndel;
                                    }
                                    else
                                    {
                                        seqBytes.Add(_alignedSequencesC[i][j]);
                                    }
                                }

                                _alignedSequencesC[i]    = profileAligner.GenerateSequenceFromEString(eStrings[set], new Sequence(Alphabets.GetAmbiguousAlphabet(_alphabet), seqBytes.ToArray()));
                                _alignedSequencesC[i].ID = _alignedSequencesC[i].ID;
                                (_alignedSequencesC[i] as Sequence).Metadata = _alignedSequencesC[i].Metadata;
                            });
                        }

                        currentScore = MsaUtils.MultipleAlignmentScoreFunction(_alignedSequencesC, SimilarityMatrix, GapOpenCost, GapExtensionCost);

                        if (currentScore > _alignmentScoreC)
                        {
                            _alignmentScoreC = currentScore;
                            needRefinement   = true;

                            // recreate the tree
                            kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(_alignedSequencesC);
                            hierarcicalClusteringB = new HierarchicalClusteringParallel
                                                         (kimuraDistanceMatrixGenerator.DistanceMatrix, _hierarchicalClusteringMethodName);

                            binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB);
                            break;
                        }
                    }
                    if (!needRefinement)
                    {
                        refinementTime = maxRefineMentTime;
                        break;
                    }
                }
                if (_alignmentScoreC > _alignmentScore)
                {
                    _alignmentScore   = _alignmentScoreC;
                    _alignedSequences = _alignedSequencesC;
                }
                Performance.Snapshot("Stop Stage 3");
            }

            //just for the purpose of integrating PW and MSA with the same output
            IList <Bio.Algorithms.Alignment.ISequenceAlignment> results = new List <Bio.Algorithms.Alignment.ISequenceAlignment>();

            return(results);
        }
        /// <summary>
        /// Performs Stage 1, 2, and 3 as described in class description.
        /// </summary>
        /// <param name="sequences">Input sequences</param>
        /// <returns>Alignment results</returns>
        private void DoAlignment(IList<ISequence> sequences)
        {
            Debug.Assert(this.alphabet != null);
            Debug.Assert(sequences.Count > 0);

            // Initializations
            if (ConsensusResolver == null)
                ConsensusResolver = new SimpleConsensusResolver(this.alphabet);
            else
                ConsensusResolver.SequenceAlphabet = this.alphabet;

            // Get ProfileAligner ready
            IProfileAligner profileAligner = null;
            switch (ProfileAlignerName)
            {
                case (ProfileAlignerNames.NeedlemanWunschProfileAligner):
                    if (this.degreeOfParallelism == 1)
                    {
                        profileAligner = new NeedlemanWunschProfileAlignerSerial(
                            SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions);
                    }
                    else
                    {
                        profileAligner = new NeedlemanWunschProfileAlignerParallel(
                            SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions);
                    }
                    break;
                case (ProfileAlignerNames.SmithWatermanProfileAligner):
                    if (this.degreeOfParallelism == 1)
                    {
                        profileAligner = new SmithWatermanProfileAlignerSerial(
                        SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions);

                    }
                    else
                    {
                        profileAligner = new SmithWatermanProfileAlignerParallel(
                    SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions);

                    }
                    break;
                default:
                    throw new ArgumentException("Invalid profile aligner name");
            }

            this.AlignedSequences = new List<ISequence>(sequences.Count);
            float currentScore = 0;

            // STAGE 1

            ReportLog("Stage 1");
            // Generate DistanceMatrix
            var kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, KmerLength, this.alphabet, DistanceFunctionName);

            // Hierarchical clustering
            IHierarchicalClustering hierarcicalClustering =
                new HierarchicalClusteringParallel
                    (kmerDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName);

            // Generate Guide Tree
            var binaryGuideTree = new BinaryGuideTree(hierarcicalClustering);

            // Progressive Alignment
            IProgressiveAligner progressiveAlignerA = new ProgressiveAligner(profileAligner);
            progressiveAlignerA.Align(sequences, binaryGuideTree);

            currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerA.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost);
            if (currentScore > this.AlignmentScoreA)
            {
                this.AlignmentScoreA = currentScore;
                this.AlignedSequencesA = progressiveAlignerA.AlignedSequences;
            }
            if (this.AlignmentScoreA > this.AlignmentScore)
            {
                this.AlignmentScore = this.AlignmentScoreA;
                this.AlignedSequences = this.AlignedSequencesA;
            }

            if (PAMSAMMultipleSequenceAligner.FasterVersion)
            {
                this.AlignedSequencesB = this.AlignedSequencesA;
                this.AlignedSequencesC = this.AlignedSequencesA;
                this.AlignmentScoreB = this.AlignmentScoreA;
                this.AlignmentScoreC = this.AlignmentScoreA;
            }
            else
            {
                BinaryGuideTree binaryGuideTreeB = null;
                IHierarchicalClustering hierarcicalClusteringB = null;
                KimuraDistanceMatrixGenerator kimuraDistanceMatrixGenerator = new KimuraDistanceMatrixGenerator();

                if (UseStageB)
                {
                    // STAGE 2
                    ReportLog("Stage 2");
                    // Generate DistanceMatrix from Multiple Sequence Alignment

                    while (true)
                    {
                        kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(this.AlignedSequences);

                        // Hierarchical clustering
                        hierarcicalClusteringB = new HierarchicalClusteringParallel
                                (kimuraDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName);

                        // Generate Guide Tree
                        binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB);

                        BinaryGuideTree.CompareTwoTrees(binaryGuideTreeB, binaryGuideTree);
                        binaryGuideTree = binaryGuideTreeB;

                        // Progressive Alignment
                        IProgressiveAligner progressiveAlignerB = new ProgressiveAligner(profileAligner);
                        progressiveAlignerB.Align(sequences, binaryGuideTreeB);

                        currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerB.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost);

                        if (currentScore > this.AlignmentScoreB)
                        {
                            this.AlignmentScoreB = currentScore;
                            this.AlignedSequencesB = progressiveAlignerB.AlignedSequences;
                        }
                        break;
                    }
                    if (this.AlignmentScoreB > this.AlignmentScore)
                    {
                        this.AlignmentScore = this.AlignmentScoreB;
                        this.AlignedSequences = this.AlignedSequencesB;
                    }
                }
                else
                {
                    binaryGuideTreeB = binaryGuideTree;
                }


                // STAGE 3
                ReportLog("Stage 3");
                // refinement
                int maxRefineMentTime = 1;
                if (sequences.Count == 2)
                {
                    maxRefineMentTime = 0;
                }

                int refinementTime = 0;
                this.AlignedSequencesC = new List<ISequence>(this.AlignedSequences.Count);
                foreach (ISequence t in this.AlignedSequences)
                {
                    this.AlignedSequencesC.Add(new Sequence(Alphabets.GetAmbiguousAlphabet(this.alphabet), t.ToArray())
                        {
                            ID = t.ID,
                            // Do not shallow copy dictionary
                            //Metadata = t.Metadata
                        });
                }

                while (refinementTime < maxRefineMentTime)
                {
                    ++refinementTime;
                    ReportLog("Refinement iter " + refinementTime);
                    bool needRefinement = false;
                    for (int edgeIndex = 0; edgeIndex < binaryGuideTreeB.NumberOfEdges; ++edgeIndex)
                    {
                        List<int>[] leafNodeIndices = binaryGuideTreeB.SeparateSequencesByCuttingTree(edgeIndex);

                        List<int>[] allIndelPositions = new List<int>[2];

                        IProfileAlignment[] separatedProfileAlignments = ProfileAlignment.ProfileExtraction(this.AlignedSequencesC, leafNodeIndices[0], leafNodeIndices[1], out allIndelPositions);
                        List<int>[] eStrings = new List<int>[2];

                        if (separatedProfileAlignments[0].NumberOfSequences < separatedProfileAlignments[1].NumberOfSequences)
                        {
                            profileAligner.Align(separatedProfileAlignments[0], separatedProfileAlignments[1]);
                            eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedA);
                            eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedB);
                        }
                        else
                        {
                            profileAligner.Align(separatedProfileAlignments[1], separatedProfileAlignments[0]);
                            eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedB);
                            eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedA);
                        }

                        for (int set = 0; set < 2; ++set)
                        {
                            Parallel.ForEach(leafNodeIndices[set], ParallelOption, i =>
                            {
                                //Sequence seq = new Sequence(_alphabet, "");
                                List<byte> seqBytes = new List<byte>();

                                int indexAllIndel = 0;
                                for (int j = 0; j < this.AlignedSequencesC[i].Count; ++j)
                                {
                                    if (indexAllIndel < allIndelPositions[set].Count && j == allIndelPositions[set][indexAllIndel])
                                    {
                                        ++indexAllIndel;
                                    }
                                    else
                                    {
                                        seqBytes.Add(this.AlignedSequencesC[i][j]);
                                    }
                                }

                                this.AlignedSequencesC[i] = profileAligner.GenerateSequenceFromEString(eStrings[set], new Sequence(Alphabets.GetAmbiguousAlphabet(this.alphabet), seqBytes.ToArray()));
                                this.AlignedSequencesC[i].ID = this.AlignedSequencesC[i].ID;
                                // Do not shallow copy dictionary
                                //(_alignedSequencesC[i] as Sequence).Metadata = _alignedSequencesC[i].Metadata;
                            });
                        }

                        currentScore = MsaUtils.MultipleAlignmentScoreFunction(this.AlignedSequencesC, SimilarityMatrix, GapOpenCost, GapExtensionCost);

                        if (currentScore > this.AlignmentScoreC)
                        {
                            this.AlignmentScoreC = currentScore;
                            needRefinement = true;

                            // recreate the tree
                            kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(this.AlignedSequencesC);
                            hierarcicalClusteringB = new HierarchicalClusteringParallel
                                    (kimuraDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName);

                            binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB);
                            break;
                        }
                    }
                    if (!needRefinement)
                    {
                        refinementTime = maxRefineMentTime;
                        break;
                    }

                }
                if (this.AlignmentScoreC > this.AlignmentScore)
                {
                    this.AlignmentScore = this.AlignmentScoreC;
                    this.AlignedSequences = this.AlignedSequencesC;
                }
                ReportLog("Stop Stage 3");
            }
        }
        /// <summary>
        /// Performs Stage 1, 2, and 3 as described in class description.
        /// </summary>
        /// <param name="sequences">Input sequences</param>
        /// <returns>Alignment results</returns>
        private void DoAlignment(IList <ISequence> sequences)
        {
            Debug.Assert(this.alphabet != null);
            Debug.Assert(sequences.Count > 0);

            // Initializations
            if (ConsensusResolver == null)
            {
                ConsensusResolver = new SimpleConsensusResolver(this.alphabet);
            }
            else
            {
                ConsensusResolver.SequenceAlphabet = this.alphabet;
            }

            // Get ProfileAligner ready
            IProfileAligner profileAligner = null;

            switch (ProfileAlignerName)
            {
            case (ProfileAlignerNames.NeedlemanWunschProfileAligner):
                if (this.degreeOfParallelism == 1)
                {
                    profileAligner = new NeedlemanWunschProfileAlignerSerial(
                        SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions);
                }
                else
                {
                    profileAligner = new NeedlemanWunschProfileAlignerParallel(
                        SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions);
                }
                break;

            case (ProfileAlignerNames.SmithWatermanProfileAligner):
                if (this.degreeOfParallelism == 1)
                {
                    profileAligner = new SmithWatermanProfileAlignerSerial(
                        SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions);
                }
                else
                {
                    profileAligner = new SmithWatermanProfileAlignerParallel(
                        SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions);
                }
                break;

            default:
                throw new ArgumentException("Invalid profile aligner name");
            }

            this.AlignedSequences = new List <ISequence>(sequences.Count);
            float currentScore = 0;

            // STAGE 1

            ReportLog("Stage 1");
            // Generate DistanceMatrix
            var kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, KmerLength, this.alphabet, DistanceFunctionName);

            // Hierarchical clustering
            IHierarchicalClustering hierarcicalClustering =
                new HierarchicalClusteringParallel
                    (kmerDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName);

            // Generate Guide Tree
            var binaryGuideTree = new BinaryGuideTree(hierarcicalClustering);

            // Progressive Alignment
            IProgressiveAligner progressiveAlignerA = new ProgressiveAligner(profileAligner);

            progressiveAlignerA.Align(sequences, binaryGuideTree);

            currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerA.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost);
            if (currentScore > this.AlignmentScoreA)
            {
                this.AlignmentScoreA   = currentScore;
                this.AlignedSequencesA = progressiveAlignerA.AlignedSequences;
            }
            if (this.AlignmentScoreA > this.AlignmentScore)
            {
                this.AlignmentScore   = this.AlignmentScoreA;
                this.AlignedSequences = this.AlignedSequencesA;
            }

            if (PAMSAMMultipleSequenceAligner.FasterVersion)
            {
                this.AlignedSequencesB = this.AlignedSequencesA;
                this.AlignedSequencesC = this.AlignedSequencesA;
                this.AlignmentScoreB   = this.AlignmentScoreA;
                this.AlignmentScoreC   = this.AlignmentScoreA;
            }
            else
            {
                BinaryGuideTree               binaryGuideTreeB              = null;
                IHierarchicalClustering       hierarcicalClusteringB        = null;
                KimuraDistanceMatrixGenerator kimuraDistanceMatrixGenerator = new KimuraDistanceMatrixGenerator();

                if (UseStageB)
                {
                    // STAGE 2
                    ReportLog("Stage 2");
                    // Generate DistanceMatrix from Multiple Sequence Alignment

                    while (true)
                    {
                        kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(this.AlignedSequences);

                        // Hierarchical clustering
                        hierarcicalClusteringB = new HierarchicalClusteringParallel
                                                     (kimuraDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName);

                        // Generate Guide Tree
                        binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB);

                        BinaryGuideTree.CompareTwoTrees(binaryGuideTreeB, binaryGuideTree);
                        binaryGuideTree = binaryGuideTreeB;

                        // Progressive Alignment
                        IProgressiveAligner progressiveAlignerB = new ProgressiveAligner(profileAligner);
                        progressiveAlignerB.Align(sequences, binaryGuideTreeB);

                        currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerB.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost);

                        if (currentScore > this.AlignmentScoreB)
                        {
                            this.AlignmentScoreB   = currentScore;
                            this.AlignedSequencesB = progressiveAlignerB.AlignedSequences;
                        }
                        break;
                    }
                    if (this.AlignmentScoreB > this.AlignmentScore)
                    {
                        this.AlignmentScore   = this.AlignmentScoreB;
                        this.AlignedSequences = this.AlignedSequencesB;
                    }
                }
                else
                {
                    binaryGuideTreeB = binaryGuideTree;
                }


                // STAGE 3
                ReportLog("Stage 3");
                // refinement
                int maxRefineMentTime = 1;
                if (sequences.Count == 2)
                {
                    maxRefineMentTime = 0;
                }

                int refinementTime = 0;
                this.AlignedSequencesC = new List <ISequence>(this.AlignedSequences.Count);
                foreach (ISequence t in this.AlignedSequences)
                {
                    this.AlignedSequencesC.Add(new Sequence(Alphabets.GetAmbiguousAlphabet(this.alphabet), t.ToArray())
                    {
                        ID = t.ID,
                        // Do not shallow copy dictionary
                        //Metadata = t.Metadata
                    });
                }

                while (refinementTime < maxRefineMentTime)
                {
                    ++refinementTime;
                    ReportLog("Refinement iter " + refinementTime);
                    bool needRefinement = false;
                    for (int edgeIndex = 0; edgeIndex < binaryGuideTreeB.NumberOfEdges; ++edgeIndex)
                    {
                        List <int>[] leafNodeIndices = binaryGuideTreeB.SeparateSequencesByCuttingTree(edgeIndex);

                        List <int>[] allIndelPositions = new List <int> [2];

                        IProfileAlignment[] separatedProfileAlignments = ProfileAlignment.ProfileExtraction(this.AlignedSequencesC, leafNodeIndices[0], leafNodeIndices[1], out allIndelPositions);
                        List <int>[]        eStrings = new List <int> [2];

                        if (separatedProfileAlignments[0].NumberOfSequences < separatedProfileAlignments[1].NumberOfSequences)
                        {
                            profileAligner.Align(separatedProfileAlignments[0], separatedProfileAlignments[1]);
                            eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedA);
                            eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedB);
                        }
                        else
                        {
                            profileAligner.Align(separatedProfileAlignments[1], separatedProfileAlignments[0]);
                            eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedB);
                            eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedA);
                        }

                        for (int set = 0; set < 2; ++set)
                        {
                            Parallel.ForEach(leafNodeIndices[set], ParallelOption, i =>
                            {
                                //Sequence seq = new Sequence(_alphabet, "");
                                List <byte> seqBytes = new List <byte>();

                                int indexAllIndel = 0;
                                for (int j = 0; j < this.AlignedSequencesC[i].Count; ++j)
                                {
                                    if (indexAllIndel < allIndelPositions[set].Count && j == allIndelPositions[set][indexAllIndel])
                                    {
                                        ++indexAllIndel;
                                    }
                                    else
                                    {
                                        seqBytes.Add(this.AlignedSequencesC[i][j]);
                                    }
                                }

                                this.AlignedSequencesC[i]    = profileAligner.GenerateSequenceFromEString(eStrings[set], new Sequence(Alphabets.GetAmbiguousAlphabet(this.alphabet), seqBytes.ToArray()));
                                this.AlignedSequencesC[i].ID = this.AlignedSequencesC[i].ID;
                                // Do not shallow copy dictionary
                                //(_alignedSequencesC[i] as Sequence).Metadata = _alignedSequencesC[i].Metadata;
                            });
                        }

                        currentScore = MsaUtils.MultipleAlignmentScoreFunction(this.AlignedSequencesC, SimilarityMatrix, GapOpenCost, GapExtensionCost);

                        if (currentScore > this.AlignmentScoreC)
                        {
                            this.AlignmentScoreC = currentScore;
                            needRefinement       = true;

                            // recreate the tree
                            kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(this.AlignedSequencesC);
                            hierarcicalClusteringB = new HierarchicalClusteringParallel
                                                         (kimuraDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName);

                            binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB);
                            break;
                        }
                    }
                    if (!needRefinement)
                    {
                        refinementTime = maxRefineMentTime;
                        break;
                    }
                }
                if (this.AlignmentScoreC > this.AlignmentScore)
                {
                    this.AlignmentScore   = this.AlignmentScoreC;
                    this.AlignedSequences = this.AlignedSequencesC;
                }
                ReportLog("Stop Stage 3");
            }
        }
Example #6
0
        /// <summary>
        /// Main pregressive alignment algorithm aligns a set of sequences guided by
        /// a binary tree.
        /// </summary>
        /// <param name="sequences">input sequences</param>
        /// <param name="tree">a binary guide tree</param>
        public void Align(IList <ISequence> sequences, BinaryGuideTree tree)
        {
            SequenceWeighting sequenceWeighting = null;

            if (PAMSAMMultipleSequenceAligner.UseWeights)
            {
                sequenceWeighting = new SequenceWeighting(tree);

                /*
                 * for (int i = 0; i < sequenceWeighting.Weights.Length; ++i)
                 * {
                 *  sequenceWeighting.Weights[i] = 1;
                 * }
                 */
            }

            if (sequences.Count == 0)
            {
                throw new ArgumentException("Empty set of sequences");
            }
            IAlphabet alphabet = sequences[0].Alphabet;

            Parallel.For(1, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i =>
            {
                if (!Alphabets.CheckIsFromSameBase(sequences[i].Alphabet, alphabet))
                {
                    throw new ArgumentException("Inconsistent sequence alphabet");
                }
            });

            if (PAMSAMMultipleSequenceAligner.UseWeights)
            {
                // Generate profile for leaf nodes
                Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i =>
                {
                    tree.Nodes[i].ProfileAlignment = ProfileAlignment.GenerateProfileAlignment(sequences[i], sequenceWeighting.Weights[i]);
                    tree.Nodes[i].Weight           = sequenceWeighting.Weights[i];
                });
            }
            else
            {
                // Generate profile for leaf nodes
                Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i =>
                {
                    tree.Nodes[i].ProfileAlignment = ProfileAlignment.GenerateProfileAlignment(sequences[i]);
                });
            }

            // Iterate internal nodes;
            // as defined in the tree, the last node is the root
            for (int i = sequences.Count; i < tree.Nodes.Count; ++i)
            {
                if (tree.Nodes[i].NeedReAlignment)
                {
                    // pull out its children
                    _nodeA = tree.Nodes[i].LeftChildren;
                    _nodeB = tree.Nodes[i].RightChildren;

                    if (PAMSAMMultipleSequenceAligner.UseWeights)
                    {
                        _profileAligner.Weights    = new float[2];
                        _profileAligner.Weights[0] = _nodeA.Weight;
                        _profileAligner.Weights[1] = _nodeB.Weight;

                        tree.Nodes[i].Weight = _nodeA.Weight + _nodeB.Weight;
                    }

                    // align two profiles
                    ProfileAlignment result = null;
                    if (_nodeA.ProfileAlignment.NumberOfSequences < _nodeB.ProfileAlignment.NumberOfSequences)
                    {
                        result = (ProfileAlignment)_profileAligner.Align(
                            _nodeA.ProfileAlignment, _nodeB.ProfileAlignment);
                        // assign aligned profiles to the current internal node
                        tree.Nodes[i].ProfileAlignment = result;

                        // generate eString for the children nodes
                        _nodeA.EString = _profileAligner.GenerateEString(_profileAligner.AlignedA);
                        _nodeB.EString = _profileAligner.GenerateEString(_profileAligner.AlignedB);
                    }
                    else
                    {
                        result = (ProfileAlignment)_profileAligner.Align(
                            _nodeB.ProfileAlignment, _nodeA.ProfileAlignment);
                        // assign aligned profiles to the current internal node
                        tree.Nodes[i].ProfileAlignment = result;

                        // generate eString for the children nodes
                        _nodeA.EString = _profileAligner.GenerateEString(_profileAligner.AlignedB);
                        _nodeB.EString = _profileAligner.GenerateEString(_profileAligner.AlignedA);
                    }


                    // children node profiles can be deleted
                    _nodeA.ProfileAlignment.Clear();
                    _nodeB.ProfileAlignment.Clear();
                }
            }

            // Convert original unaligned sequences to aligned ones by applying alignment paths in eStrings
            try
            {
                _alignedSequences = new List <ISequence>(sequences.Count);
            }
            catch (OutOfMemoryException ex)
            {
                throw new Exception("Out of memory", ex.InnerException);
            }

            for (int i = 0; i < sequences.Count; ++i)
            {
                _alignedSequences.Add(null);
            }

            Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i =>
            {
                ISequence seq = sequences[i];
                BinaryGuideTreeNode node;
                node = tree.Nodes[i];
                while (!node.IsRoot)
                {
                    seq  = _profileAligner.GenerateSequenceFromEString(node.EString, seq);
                    node = node.Parent;
                }
                _alignedSequences[i] = seq;
            });
        }
Example #7
0
        /// <summary>
        ///     Gets progressive aligner aligned sequences
        /// </summary>
        /// <param name="sequences">list of sequences</param>
        /// <param name="binaryGuidTree">binary guide tree</param>
        /// <returns>list of aligned sequences</returns>
        private List<ISequence> GetProgressiveAlignerAlignedSequences(IList<ISequence> sequences,
                                                                      BinaryGuideTree binaryGuidTree)
        {
            // Progressive Alignment
            IProgressiveAligner progressiveAligner = new ProgressiveAligner(profileAligner);
            progressiveAligner.Align(sequences, binaryGuidTree);

            return progressiveAligner.AlignedSequences;
        }
Example #8
0
        /// <summary>
        ///     Gets profiles for the give edge index and binary tree
        /// </summary>
        /// <param name="edgeIndex">Edge index</param>
        /// <param name="binaryTree">Binary Guide tree</param>
        private void GetProfiles(int edgeIndex, BinaryGuideTree binaryTree)
        {
            // Cut Tree at an edge and get sequences.
            List<int>[] leafNodeIndices = binaryTree.SeparateSequencesByCuttingTree(edgeIndex);

            // Extract profiles and align it.
            List<int>[] removedPositions = null;
            IProfileAlignment[] separatedProfileAlignments =
                ProfileAlignment.ProfileExtraction(stage2ExpectedSequences,
                                                   leafNodeIndices[0], leafNodeIndices[1], out removedPositions);

            profileAligner.Align(separatedProfileAlignments[0], separatedProfileAlignments[1]);
        }
Example #9
0
        /// <summary>
        ///     Validate the leaves and root of binary tree
        /// </summary>
        /// <param name="binaryTree">binary tree object.</param>
        /// <param name="nodeName">xml node name.</param>
        private void ValidateBinaryTree(BinaryGuideTree binaryTree, string nodeName)
        {
            string rootId = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RootId);
            string leavesCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.LeavesCount);
            string expectedNodesLeftChild = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                            Constants.NodesLeftChild);
            string expectedNodesRightChild = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                             Constants.NodesRightChild);
            string expectenode = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Nodes);
            string[] expectedNodes = expectenode.Split(',');

            Assert.IsTrue(rootId.Contains(binaryTree.Root.ID.ToString((IFormatProvider) null)));
            Assert.IsTrue(leavesCount.Contains(binaryTree.NumberOfLeaves.ToString((IFormatProvider) null)));
            int index = 0;
            foreach (BinaryGuideTreeNode node in binaryTree.Nodes)
            {
                Assert.AreEqual(expectedNodes[index], node.ID.ToString((IFormatProvider) null));
                if (node.LeftChildren != null)
                {
                    Assert.IsTrue(expectedNodesLeftChild.Contains(node.LeftChildren.ID.ToString((IFormatProvider) null)));
                }
                if (node.RightChildren != null)
                {
                    Assert.IsTrue(
                        expectedNodesRightChild.Contains(node.RightChildren.ID.ToString((IFormatProvider) null)));
                }
                index++;
            }
        }
Example #10
0
        /// <summary>
        ///     Get the binary tree object using hierarchical clustering object
        /// </summary>
        /// <param name="hierarchicalClustering">hierarchical Clustering</param>
        /// <returns>Binary guide tree</returns>
        private static BinaryGuideTree GetBinaryTree(IHierarchicalClustering hierarchicalClustering)
        {
            // Generate Guide Tree
            var binaryGuideTree =
                new BinaryGuideTree(hierarchicalClustering);

            return binaryGuideTree;
        }
Example #11
0
        /// <summary>
        /// Main pregressive alignment algorithm aligns a set of sequences guided by
        /// a binary tree. 
        /// </summary>
        /// <param name="sequences">input sequences</param>
        /// <param name="tree">a binary guide tree</param>
        public void Align(IList<ISequence> sequences, BinaryGuideTree tree)
        {
            SequenceWeighting sequenceWeighting = null;
            if (PAMSAMMultipleSequenceAligner.UseWeights)
            {

                sequenceWeighting = new SequenceWeighting(tree);
                /*
                for (int i = 0; i < sequenceWeighting.Weights.Length; ++i)
                {
                    sequenceWeighting.Weights[i] = 1;
                }
                */
            }

            if (sequences.Count==0)
            {
                throw new ArgumentException("Empty set of sequences");
            }
            IAlphabet alphabet = sequences[0].Alphabet;

            Parallel.For(1, sequences.Count, PAMSAMMultipleSequenceAligner.ParallelOption, i =>
            {
                if (!Alphabets.CheckIsFromSameBase(sequences[i].Alphabet, alphabet))
                {
                    throw new ArgumentException("Inconsistent sequence alphabet");
                }
            });

            if (PAMSAMMultipleSequenceAligner.UseWeights)
            {
                // Generate profile for leaf nodes
                Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.ParallelOption, i =>
                {
                    tree.Nodes[i].ProfileAlignment = ProfileAlignment.GenerateProfileAlignment(sequences[i], sequenceWeighting.Weights[i]);
                    tree.Nodes[i].Weight = sequenceWeighting.Weights[i];
                });
            }
            else
            {
                // Generate profile for leaf nodes
                Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.ParallelOption, i =>
                {
                    tree.Nodes[i].ProfileAlignment = ProfileAlignment.GenerateProfileAlignment(sequences[i]);
                });
            }
            
            // Iterate internal nodes; 
            // as defined in the tree, the last node is the root
            for (int i = sequences.Count; i < tree.Nodes.Count; ++i)
            {
                if (tree.Nodes[i].NeedReAlignment)
                {
                    // pull out its children
                    _nodeA = tree.Nodes[i].LeftChildren;
                    _nodeB = tree.Nodes[i].RightChildren;

                    if (PAMSAMMultipleSequenceAligner.UseWeights)
                    {
                        _profileAligner.Weights = new float[2];
                        _profileAligner.Weights[0] = _nodeA.Weight;
                        _profileAligner.Weights[1] = _nodeB.Weight;

                        tree.Nodes[i].Weight = _nodeA.Weight + _nodeB.Weight;
                    }

                    // align two profiles
                    ProfileAlignment result = null;
                    if (_nodeA.ProfileAlignment.NumberOfSequences < _nodeB.ProfileAlignment.NumberOfSequences)
                    {
                        result = (ProfileAlignment)_profileAligner.Align(
                                                    _nodeA.ProfileAlignment, _nodeB.ProfileAlignment);
                        // assign aligned profiles to the current internal node
                        tree.Nodes[i].ProfileAlignment = result;

                        // generate eString for the children nodes
                        _nodeA.EString = _profileAligner.GenerateEString(_profileAligner.AlignedA);
                        _nodeB.EString = _profileAligner.GenerateEString(_profileAligner.AlignedB);
                    }
                    else
                    {
                        result = (ProfileAlignment)_profileAligner.Align(
                                                    _nodeB.ProfileAlignment, _nodeA.ProfileAlignment);
                        // assign aligned profiles to the current internal node
                        tree.Nodes[i].ProfileAlignment = result;

                        // generate eString for the children nodes
                        _nodeA.EString = _profileAligner.GenerateEString(_profileAligner.AlignedB);
                        _nodeB.EString = _profileAligner.GenerateEString(_profileAligner.AlignedA);
                    }


                    // children node profiles can be deleted
                    _nodeA.ProfileAlignment.Clear();
                    _nodeB.ProfileAlignment.Clear();
                }
            }

            // Convert original unaligned sequences to aligned ones by applying alignment paths in eStrings
            try
            {
                _alignedSequences = new List<ISequence>(sequences.Count);
            }
            catch (OutOfMemoryException ex)
            {
                throw new Exception("Out of memory", ex.InnerException);
            }

            for (int i=0; i<sequences.Count;++i)
            {
                _alignedSequences.Add(null);
            }

            Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.ParallelOption, i =>
            {
                ISequence seq = sequences[i];
                BinaryGuideTreeNode node;
                node = tree.Nodes[i];
                while (!node.IsRoot)
                {
                    seq = _profileAligner.GenerateSequenceFromEString(node.EString, seq);
                    node = node.Parent;
                }
                _alignedSequences[i] = seq;
            });
        }
Example #12
0
        /// <summary>
        /// Cut a tree at an edge to generate 2 subtrees
        /// </summary>
        /// <param name="edgeIndex">zero-based edge index</param>
        /// <returns>return[0] is the subtree with the same root as the original tree;
        ///          return[1] is the subtree rooted below the cutting edge</returns>
        public BinaryGuideTree[] CutTree(int edgeIndex)
        {
            if (edgeIndex < 0 || edgeIndex >= _edges.Count)
            {
                throw new ArgumentException(string.Format("The edge ID provided when cutting the binary tree was not available. Given edge ID: {0}, available edges: {1}", edgeIndex, _edges.Count));
            }
            if (_edges[edgeIndex].ChildNode == null)
            {
                throw new Exception("The edge specified was not properly extended to a child node.Edge ID: " + edgeIndex);
            }

            _edges[edgeIndex].ChildNode.Parent = null;

            if (_edges[edgeIndex].ParentNode.LeftChildren.ID == _edges[edgeIndex].ChildNode.ID)
            {
                _edges[edgeIndex].ParentNode.LeftChildren = null;
            }
            else
            {
                _edges[edgeIndex].ParentNode.RightChildren = null;
            }

            // generate two new trees
            BinaryGuideTree treeA = new BinaryGuideTree(_root);
            BinaryGuideTree treeB = new BinaryGuideTree(_edges[edgeIndex].ChildNode);

            treeA.NumberOfNodes  = _numberOfNodes;
            treeB.NumberOfNodes  = _numberOfNodes;
            treeA.NumberOfLeaves = _numberOfLeaves;
            treeB.NumberOfLeaves = _numberOfLeaves;

            treeA.Nodes = _nodes;
            treeA.Edges = _edges;
            treeB.Nodes = _nodes;
            treeB.Edges = _edges;

            // pull the subtree nodes out for the two new roots
            treeA.Nodes = (List <BinaryGuideTreeNode>)ExtractSubTreeNodes(treeA.Root);
            treeB.Nodes = (List <BinaryGuideTreeNode>)ExtractSubTreeNodes(treeB.Root);

            treeA.NumberOfNodes  = treeA.Nodes.Count;
            treeB.NumberOfNodes  = treeB.Nodes.Count;
            treeA.NumberOfLeaves = 0;
            treeB.NumberOfLeaves = 0;
            for (int i = 0; i < treeA.Nodes.Count; ++i)
            {
                if (treeA.Nodes[i].IsLeaf)
                {
                    ++treeA.NumberOfLeaves;
                }
            }
            for (int i = 0; i < treeB.Nodes.Count; ++i)
            {
                if (treeB.Nodes[i].IsLeaf)
                {
                    ++treeB.NumberOfLeaves;
                }
            }

            return(new BinaryGuideTree[2] {
                treeA, treeB
            });
        }
Example #13
0
        /// <summary>
        /// Compare two guide (sub)trees and mark the nodes that need to be re-aligned.
        ///
        /// The algorithm traverses tree A in prefix order (children before parents),
        /// assigning internal nodes ids N+1 through 2N-1 in the order visited. When visiting
        /// an internal node, if any child node needs to be re-aligned, the node needs to
        /// be re-aligned too. If the two children are both unmarked, and the two children nodes
        /// are also having the same parent in tree B, this internal node does not need to be
        /// re-aligned, and be assigned an ID the same as the parent node in tree B.
        /// </summary>
        /// <param name="treeA">binary guide (sub)tree</param>
        /// <param name="treeB">binary guide (sub)tree</param>
        public static void CompareTwoTrees(BinaryGuideTree treeA, BinaryGuideTree treeB)
        {
            if (treeA == null)
            {
                throw new ArgumentNullException("treeA");
            }

            if (treeB == null)
            {
                throw new ArgumentNullException("treeB");
            }

            if (treeA.NumberOfNodes != treeB.NumberOfNodes || treeA.NumberOfLeaves != treeB.NumberOfLeaves)
            {
                throw new ArgumentException("The two trees are not comparable");
            }

            Dictionary <int, int> nodeID2ListIndex = new Dictionary <int, int>(treeB.NumberOfNodes);

            for (int i = 0; i < treeB.NumberOfNodes; ++i)
            {
                nodeID2ListIndex[treeB.Nodes[i].ID] = i;
            }

            BinaryGuideTreeNode node, nodeB;

            for (int i = treeA.NumberOfLeaves; i < treeA.NumberOfNodes; ++i)
            {
                node = treeA.Nodes[i];
                if (node.LeftChildren.NeedReAlignment == true || node.RightChildren.NeedReAlignment == true)
                {
                    node.NeedReAlignment = true;
                }
                else
                {
                    if (!nodeID2ListIndex.ContainsKey(node.LeftChildren.ID) || !nodeID2ListIndex.ContainsKey(node.RightChildren.ID))
                    {
                        node.NeedReAlignment = true;
                    }
                    else
                    {
                        nodeB = treeB.Nodes[nodeID2ListIndex[node.LeftChildren.ID]].Parent;
                        try
                        {
                            if (nodeB.LeftChildren.ID == node.RightChildren.ID || nodeB.RightChildren.ID == node.RightChildren.ID)
                            {
                                node.NeedReAlignment = false;
                                node.ID = nodeB.ID;
                            }
                            else
                            {
                                node.NeedReAlignment = true;
                            }
                        }
                        catch (NullReferenceException)
                        {
                            node.NeedReAlignment = true;
                        }
                    }
                }
            }
        }
Example #14
0
        /// <summary>
        /// Cut a tree at an edge to generate 2 subtrees
        /// </summary>
        /// <param name="edgeIndex">zero-based edge index</param>
        /// <returns>return[0] is the subtree with the same root as the original tree;
        ///          return[1] is the subtree rooted below the cutting edge</returns>
        public BinaryGuideTree[] CutTree(int edgeIndex)
        {
            if (edgeIndex < 0 || edgeIndex >= _edges.Count)
            {
                throw new ArgumentException(string.Format("The edge ID provided when cutting the binary tree was not available. Given edge ID: {0}, available edges: {1}", edgeIndex, _edges.Count));
            }
            if (_edges[edgeIndex].ChildNode == null)
            {
                throw new Exception("The edge specified was not properly extended to a child node.Edge ID: " + edgeIndex);
            }

            _edges[edgeIndex].ChildNode.Parent = null;

            if (_edges[edgeIndex].ParentNode.LeftChildren.ID == _edges[edgeIndex].ChildNode.ID)
            {
                _edges[edgeIndex].ParentNode.LeftChildren = null;
            }
            else
            {
                _edges[edgeIndex].ParentNode.RightChildren = null;
            }

            // generate two new trees
            BinaryGuideTree treeA = new BinaryGuideTree(_root);
            BinaryGuideTree treeB = new BinaryGuideTree(_edges[edgeIndex].ChildNode);

            treeA.NumberOfNodes = _numberOfNodes;
            treeB.NumberOfNodes = _numberOfNodes;
            treeA.NumberOfLeaves = _numberOfLeaves;
            treeB.NumberOfLeaves = _numberOfLeaves;

            treeA.Nodes = _nodes;
            treeA.Edges = _edges;
            treeB.Nodes = _nodes;
            treeB.Edges = _edges;

            // pull the subtree nodes out for the two new roots
            treeA.Nodes = (List<BinaryGuideTreeNode>)ExtractSubTreeNodes(treeA.Root);
            treeB.Nodes = (List<BinaryGuideTreeNode>)ExtractSubTreeNodes(treeB.Root);

            treeA.NumberOfNodes = treeA.Nodes.Count;
            treeB.NumberOfNodes = treeB.Nodes.Count;
            treeA.NumberOfLeaves = 0;
            treeB.NumberOfLeaves = 0;
            for (int i = 0; i < treeA.Nodes.Count; ++i)
            {
                if (treeA.Nodes[i].IsLeaf)
                {
                    ++treeA.NumberOfLeaves;
                }
            }
            for (int i = 0; i < treeB.Nodes.Count; ++i)
            {
                if (treeB.Nodes[i].IsLeaf)
                {
                    ++treeB.NumberOfLeaves;
                }
            }

            return new BinaryGuideTree[2] { treeA, treeB };
        }
Example #15
0
        /// <summary>
        /// Compare two guide (sub)trees and mark the nodes that need to be re-aligned.
        /// 
        /// The algorithm traverses tree A in prefix order (children before parents), 
        /// assigning internal nodes ids N+1 through 2N-1 in the order visited. When visiting 
        /// an internal node, if any child node needs to be re-aligned, the node needs to
        /// be re-aligned too. If the two children are both unmarked, and the two children nodes 
        /// are also having the same parent in tree B, this internal node does not need to be
        /// re-aligned, and be assigned an ID the same as the parent node in tree B.
        /// </summary>
        /// <param name="treeA">binary guide (sub)tree</param>
        /// <param name="treeB">binary guide (sub)tree</param>
        public static void CompareTwoTrees(BinaryGuideTree treeA, BinaryGuideTree treeB)
        {
            if (treeA == null)
            {
                throw new ArgumentNullException("treeA");
            }

            if (treeB == null)
            {
                throw new ArgumentNullException("treeB");
            }

            if (treeA.NumberOfNodes != treeB.NumberOfNodes || treeA.NumberOfLeaves != treeB.NumberOfLeaves)
            {
                throw new ArgumentException("The two trees are not comparable");
            }

            Dictionary<int, int> nodeID2ListIndex = new Dictionary<int, int>(treeB.NumberOfNodes);
            for (int i = 0; i < treeB.NumberOfNodes; ++i)
            {
                nodeID2ListIndex[treeB.Nodes[i].ID] = i;
            }

            BinaryGuideTreeNode node, nodeB;

            for (int i = treeA.NumberOfLeaves; i < treeA.NumberOfNodes; ++i)
            {
                node = treeA.Nodes[i];
                if (node.LeftChildren.NeedReAlignment == true || node.RightChildren.NeedReAlignment == true)
                {
                    node.NeedReAlignment = true;
                }
                else
                {
                    if (!nodeID2ListIndex.ContainsKey(node.LeftChildren.ID) || !nodeID2ListIndex.ContainsKey(node.RightChildren.ID))
                    {
                        node.NeedReAlignment = true;
                    }
                    else
                    {
                        nodeB = treeB.Nodes[nodeID2ListIndex[node.LeftChildren.ID]].Parent;
                        try
                        {
                            if (nodeB.LeftChildren.ID == node.RightChildren.ID || nodeB.RightChildren.ID == node.RightChildren.ID)
                            {
                                node.NeedReAlignment = false;
                                node.ID = nodeB.ID;
                            }
                            else
                            {
                                node.NeedReAlignment = true;
                            }
                        }
                        catch (NullReferenceException)
                        {
                            node.NeedReAlignment = true;
                        }
                    }
                    
                }
            }
        }