Exemple #1
0
        public void SmithWatermanProteinSeqAffineGap()
        {
            IPairwiseSequenceAligner sw = new SmithWatermanAligner
                {
                    SimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62),
                    GapOpenCost = -8,
                    GapExtensionCost = -1,
                };

            ISequence sequence1 = new Sequence(Alphabets.Protein, "HEAGAWGHEE");
            ISequence sequence2 = new Sequence(Alphabets.Protein, "PAWHEAE");
            IList<IPairwiseSequenceAlignment> result = sw.Align(sequence1, sequence2);
            AlignmentHelpers.LogResult(sw, result);

            IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment();
            PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence
                {
                    FirstSequence = new Sequence(Alphabets.Protein, "AWGHE"),
                    SecondSequence = new Sequence(Alphabets.Protein, "AW-HE"),
                    Consensus = new Sequence(Alphabets.AmbiguousProtein, "AWGHE"),
                    Score = 20,
                    FirstOffset = 0,
                    SecondOffset = 3
                };
            align.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(align);

            Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput));
        }
Exemple #2
0
        /// <summary>
        /// Convert to delta alignments to sequence alignments.
        /// </summary>
        /// <param name="alignments">List of delta alignments.</param>
        /// <returns>List of Sequence alignment.</returns>
        private static IList <PairwiseAlignedSequence> ConvertDeltaToAlignment(
            IEnumerable <DeltaAlignment> alignments)
        {
            if (alignments == null)
            {
                throw new ArgumentNullException("alignments");
            }

            IList <PairwiseAlignedSequence> alignedSequences = new List <PairwiseAlignedSequence>();

            foreach (DeltaAlignment deltaAlignment in alignments)
            {
                PairwiseAlignedSequence alignedSequence = deltaAlignment.ConvertDeltaToSequences();

                // Find the offsets
                long referenceStart = deltaAlignment.FirstSequenceStart;
                long queryStart     = deltaAlignment.SecondSequenceStart;
                long difference     = referenceStart - queryStart;
                if (0 < difference)
                {
                    alignedSequence.FirstOffset  = 0;
                    alignedSequence.SecondOffset = difference;
                }
                else
                {
                    alignedSequence.FirstOffset  = -1 * difference;
                    alignedSequence.SecondOffset = 0;
                }

                alignedSequences.Add(alignedSequence);
            }

            return(alignedSequences);
        }
Exemple #3
0
        /// <summary>
        /// Adds an aligned sequence to the list of aligned sequences in the PairwiseSequenceAlignment.
        /// Throws exception if sequence alignment is read only.
        /// </summary>
        /// <param name="item">PairwiseAlignedSequence to add.</param>
        public void Add(PairwiseAlignedSequence item)
        {
            if (IsReadOnly)
            {
                throw new NotSupportedException(Properties.Resource.READ_ONLY_COLLECTION_MESSAGE);
            }

            alignedSequences.Add(item);
        }
Exemple #4
0
        /// <summary>
        /// Removes item from the list of aligned sequences in the PairwiseSequenceAlignment.
        /// Throws exception if PairwiseSequenceAlignment is read only.
        /// </summary>
        /// <param name="item">Aligned sequence object.</param>
        /// <returns>True if item was removed, false if item was not found.</returns>
        public bool Remove(PairwiseAlignedSequence item)
        {
            if (IsReadOnly)
            {
                throw new NotSupportedException(Properties.Resource.READ_ONLY_COLLECTION_MESSAGE);
            }

            return(alignedSequences.Remove(item));
        }
Exemple #5
0
        /// <summary>
        /// Add a new Aligned Sequence Object to the end of the list.
        /// </summary>
        /// <param name="pairwiseAlignedSequence">The sequence to add.</param>
        public void AddSequence(PairwiseAlignedSequence pairwiseAlignedSequence)
        {
            if (IsReadOnly)
            {
                Trace.Report(Resource.READ_ONLY_COLLECTION_MESSAGE);
                throw new NotSupportedException(Resource.READ_ONLY_COLLECTION_MESSAGE);
            }

            _alignedSequences.Add(pairwiseAlignedSequence);
        }
Exemple #6
0
        /// <summary>
        /// Convert aligned sequences back to Sequence objects, load output SequenceAlignment object
        /// </summary>
        /// <param name="aInput">First input sequence.</param>
        /// <param name="bInput">Second input sequence.</param>
        /// <param name="alignedSequences">List of aligned sequences</param>
        /// <param name="offsets">List of offsets for each aligned sequence</param>
        /// <param name="optScore">Optimum alignment score</param>
        /// <param name="startOffsets">Start indices of aligned sequences with respect to input sequences.</param>
        /// <param name="endOffsets">End indices of aligned sequences with respect to input sequences.</param>
        /// <param name="insertions">Insetions made to the aligned sequences.</param>
        /// <returns>SequenceAlignment with all alignment information</returns>
        private IList <IPairwiseSequenceAlignment> CollateResults(ISequence aInput, ISequence bInput, List <byte[]> alignedSequences, List <int> offsets, int optScore, List <int> startOffsets, List <int> endOffsets, List <int> insertions)
        {
            if (alignedSequences.Count > 0)
            {
                PairwiseSequenceAlignment alignment = new PairwiseSequenceAlignment(aInput, bInput);
                byte[] aAligned, bAligned;

                for (int i = 0; i < alignedSequences.Count; i += 2)
                {
                    aAligned = alignedSequences[i];
                    bAligned = alignedSequences[i + 1];

                    PairwiseAlignedSequence result = new PairwiseAlignedSequence();
                    result.Score = optScore;

                    Sequence seq = new Sequence(aInput.Alphabet, _similarityMatrix.ToString(aAligned));
                    seq.ID               = aInput.ID;
                    seq.DisplayID        = aInput.DisplayID;
                    result.FirstSequence = seq;

                    seq                   = new Sequence(bInput.Alphabet, _similarityMatrix.ToString(bAligned));
                    seq.ID                = bInput.ID;
                    seq.DisplayID         = bInput.DisplayID;
                    result.SecondSequence = seq;

                    AddSimpleConsensusToResult(result);
                    result.FirstOffset  = offsets[i];
                    result.SecondOffset = offsets[i + 1];

                    result.Metadata["StartOffsets"] = new List <int> {
                        startOffsets[i], startOffsets[i + 1]
                    };
                    result.Metadata["EndOffsets"] = new List <int> {
                        endOffsets[i], endOffsets[i + 1]
                    };
                    result.Metadata["Insertions"] = new List <int> {
                        insertions[i], insertions[i + 1]
                    };
                    alignment.PairwiseAlignedSequences.Add(result);
                }

                return(new List <IPairwiseSequenceAlignment>()
                {
                    alignment
                });
            }
            else
            {
                return(new List <IPairwiseSequenceAlignment>());
            }
        }
Exemple #7
0
        /// <summary>
        /// Adds consensus to the alignment result.  At this point, it is a very simple algorithm
        /// which puts an ambiguity character where the two aligned sequences do not match.
        /// Uses X and N for protein and DNA/RNA alignments, respectively.
        /// </summary>
        /// <param name="alignment">
        /// Alignment to which to add the consensus.  This is the result returned by the main Align
        /// or AlignSimple method, which contains the aligned sequences but not yet a consensus sequence.
        /// </param>
        private void AddSimpleConsensusToResult(PairwiseAlignedSequence alignment)
        {
            ISequence seq0 = alignment.FirstSequence;
            ISequence seq1 = alignment.SecondSequence;

            byte[] consensus = new byte[seq0.Count];
            for (int i = 0; i < seq0.Count; i++)
            {
                consensus[i] = ConsensusResolver.GetConsensus(
                    new byte[] { seq0[i], seq1[i] });
            }

            IAlphabet consensusAlphabet = Alphabets.AutoDetectAlphabet(consensus, 0, consensus.GetLongLength(), seq0.Alphabet);

            alignment.Consensus = new Sequence(consensusAlphabet, consensus, false);
        }
        public void PairwiseOverlapProteinSeqSimpleGap()
        {
            string sequenceString1 = "HEAGAWGHEE";
            string sequenceString2 = "PAWHEAE";

            Sequence sequence1 = new Sequence(Alphabets.Protein, sequenceString1);
            Sequence sequence2 = new Sequence(Alphabets.Protein, sequenceString2);

            SimilarityMatrix sm = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum50);
            int gapPenalty = -8;

            PairwiseOverlapAligner overlap = new PairwiseOverlapAligner();
            overlap.SimilarityMatrix = sm;
            overlap.GapOpenCost = gapPenalty;
            IList<IPairwiseSequenceAlignment> result = overlap.AlignSimple(sequence1, sequence2);

            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                "{0}, Simple; Matrix {1}; GapOpenCost {2}", overlap.Name, overlap.SimilarityMatrix.Name, overlap.GapOpenCost));
            foreach (IPairwiseSequenceAlignment sequenceResult in result)
            {
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                    "score {0}", sequenceResult.PairwiseAlignedSequences[0].Score));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                    "input 0     {0}", sequenceResult.FirstSequence.ToString()));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                    "input 1     {0}", sequenceResult.SecondSequence.ToString()));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                    "result 0    {0}", sequenceResult.PairwiseAlignedSequences[0].FirstSequence.ToString()));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                    "result 1    {0}", sequenceResult.PairwiseAlignedSequences[0].SecondSequence.ToString()));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                    "consesus    {0}", sequenceResult.PairwiseAlignedSequences[0].Consensus.ToString()));
            }

            IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment();
            PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence();
            alignedSeq.FirstSequence = new Sequence(Alphabets.Protein, "GAWGHEE");
            alignedSeq.SecondSequence = new Sequence(Alphabets.Protein, "PAW-HEA");
            alignedSeq.Consensus = new Sequence(Alphabets.AmbiguousProtein, "XAWGHEX");
            alignedSeq.Score = 25;
            alignedSeq.FirstOffset = 0;
            alignedSeq.SecondOffset = 3;
            align.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(align);
            Assert.IsTrue(CompareAlignment(result, expectedOutput));
        }
Exemple #9
0
        /// <summary>
        /// Adds consensus to the alignment result.  At this point, it is a very simple algorithm
        /// which puts an ambiguity character where the two aligned sequences do not match.
        /// Uses X and N for protein and DNA/RNA alignments, respectively.
        /// </summary>
        /// <param name="alignment">
        /// Alignment to which to add the consensus.  This is the result returned by the main Align
        /// or AlignSimple method, which contains the aligned sequences but not yet a consensus sequence.
        /// </param>
        private void AddSimpleConsensusToResult(PairwiseAlignedSequence alignment)
        {
            ISequence seq0 = alignment.FirstSequence;
            ISequence seq1 = alignment.SecondSequence;

            Sequence consensus = new Sequence(seq0.Alphabet);

            for (int i = 0; i < seq0.Count; i++)
            {
                consensus.Add(
                    ConsensusResolver.GetConsensus(
                        new List <ISequenceItem>()
                {
                    seq0[i], seq1[i]
                }));
            }

            alignment.Consensus = consensus;
        }
Exemple #10
0
        public void TestMUMmerAlignerSingleMum()
        {
            const string reference = "TTAATTTTAG";
            const string search = "AGTTTAGAG";

            ISequence referenceSeq = new Sequence(Alphabets.DNA, reference);
            ISequence searchSeq = new Sequence(Alphabets.DNA, search);

            var searchSeqs = new List<ISequence> {searchSeq};

            MUMmerAligner mummer = new MUMmerAligner
            {
                LengthOfMUM = 3,
                PairWiseAlgorithm = new NeedlemanWunschAligner(),
                GapExtensionCost = -2
            };

            IList<IPairwiseSequenceAlignment> result = mummer.Align(referenceSeq, searchSeqs);

            // Check if output is not null
            Assert.AreNotEqual(null, result);

            IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment();
            PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence
            {
                FirstSequence = new Sequence(Alphabets.DNA, "TTAATTTTAG--"),
                SecondSequence = new Sequence(Alphabets.DNA, "---AGTTTAGAG"),
                Consensus = new Sequence(AmbiguousDnaAlphabet.Instance, "TTAAKTTTAGAG"),
                Score = -6,
                FirstOffset = 0,
                SecondOffset = 3
            };
            align.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(align);
            Assert.IsTrue(CompareAlignment(result, expectedOutput));
        }
        /// <summary>
        ///     Validates PairwiseOverlapAlignment algorithm for the parameters passed.
        /// </summary>
        /// <param name="nodeName">Node Name in the xml.</param>
        /// <param name="alignParam">parameter based on which certain validations are done.</param>
        /// <param name="similarityMatrixParam">Similarity Matrix Parameter.</param>
        /// <param name="alignType">Alignment Type</param>
        private void ValidatePairwiseOverlapAlignment(string nodeName, AlignParameters alignParam,
                                                      SimilarityMatrixParameters similarityMatrixParam,
                                                      AlignmentType alignType)
        {
            ISequence aInput;
            ISequence bInput;

            IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode));

            if (alignParam.ToString().Contains("Code"))
            {
                string sequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1);
                string sequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2);

                aInput = new Sequence(alphabet, sequence1);
                bInput = new Sequence(alphabet, sequence2);
            }
            else
            {
                // Read the xml file for getting both the files for aligning.
                string filePath1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1);
                string filePath2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2);

                var parser1 = new FastAParser { Alphabet = alphabet };
                aInput = parser1.Parse(filePath1).ElementAt(0);
                bInput = parser1.Parse(filePath2).ElementAt(0);
            }

            string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode);
            SimilarityMatrix sm;

            switch (similarityMatrixParam)
            {
                case SimilarityMatrixParameters.TextReader:
                    using (TextReader reader = new StreamReader(blosumFilePath))
                        sm = new SimilarityMatrix(reader);
                    break;
                case SimilarityMatrixParameters.DiagonalMatrix:
                    string matchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode);
                    string misMatchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MisMatchScoreNode);
                    sm = new DiagonalSimilarityMatrix(int.Parse(matchValue, null), int.Parse(misMatchValue, null));
                    break;
                default:
                    sm = new SimilarityMatrix(new StreamReader(blosumFilePath));
                    break;
            }

            int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null);
            int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode), null);

            var pairwiseOverlapObj = new PairwiseOverlapAligner();
            if (AlignParameters.AllParam != alignParam)
            {
                pairwiseOverlapObj.SimilarityMatrix = sm;
                pairwiseOverlapObj.GapOpenCost = gapOpenCost;
            }

            IList<IPairwiseSequenceAlignment> result = null;

            switch (alignParam)
            {
                case AlignParameters.AlignList:
                case AlignParameters.AlignListCode:
                    var sequences = new List<ISequence> {aInput, bInput};
                    switch (alignType)
                    {
                        case AlignmentType.Align:
                            pairwiseOverlapObj.GapExtensionCost = gapExtensionCost;
                            result = pairwiseOverlapObj.Align(sequences);
                            break;
                        default:
                            result = pairwiseOverlapObj.AlignSimple(sequences);
                            break;
                    }
                    break;
                case AlignParameters.AllParam:
                case AlignParameters.AllParamCode:
                    switch (alignType)
                    {
                        case AlignmentType.Align:
                            pairwiseOverlapObj.GapExtensionCost = gapExtensionCost;
                            result = pairwiseOverlapObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput);
                            break;
                        default:
                            result = pairwiseOverlapObj.AlignSimple(sm, gapOpenCost, aInput, bInput);
                            break;
                    }
                    break;
                case AlignParameters.AlignTwo:
                case AlignParameters.AlignTwoCode:
                    switch (alignType)
                    {
                        case AlignmentType.Align:
                            pairwiseOverlapObj.GapExtensionCost = gapExtensionCost;
                            result = pairwiseOverlapObj.Align(aInput, bInput);
                            break;
                        default:
                            result = pairwiseOverlapObj.AlignSimple(aInput, bInput);
                            break;
                    }
                    break;
                default:
                    break;
            }

            // Read the xml file for getting both the files for aligning.
            string expectedSequence1;
            string expectedSequence2;
            string expectedScore;

            switch (alignType)
            {
                case AlignmentType.Align:
                    expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionScoreNode);
                    expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionSequence1Node);
                    expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionSequence2Node);
                    break;
                default:
                    expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedScoreNode);
                    expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode1);
                    expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2);
                    break;
            }

            IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>();
            var seperators = new [] {';'};
            string[] expectedSequences1 = expectedSequence1.Split(seperators);
            string[] expectedSequences2 = expectedSequence2.Split(seperators);

            IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment();
            for (int i = 0; i < expectedSequences1.Length; i++)
            {
                PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence
                {
                    FirstSequence = new Sequence(alphabet, expectedSequences1[i]),
                    SecondSequence = new Sequence(alphabet, expectedSequences2[i]),
                    Score = Convert.ToInt32(expectedScore, null),
                    FirstOffset = Int32.MinValue,
                    SecondOffset = Int32.MinValue,
                };
                align.PairwiseAlignedSequences.Add(alignedSeq);
            }
            expectedOutput.Add(align);

            Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput, true));

            ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P1 : Final Score '{0}'.", expectedScore));
            ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P1 : Aligned First Sequence is '{0}'.", expectedSequence1));
            ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P1 : Aligned Second Sequence is '{0}'.", expectedSequence2));
        }
Exemple #12
0
        /// <summary>
        /// This takes a specific starting location in the scoring matrix and generates
        /// an alignment from it using the traceback scores.
        /// </summary>
        /// <param name="startingCell">Starting point</param>
        /// <returns>Pairwise alignment</returns>
        protected PairwiseAlignedSequence CreateAlignmentFromCell(OptScoreMatrixCell startingCell)
        {
            int gapStride = Cols + 1;
            //Using list to avoid allocation issues
            int estimatedLength = (int)(1.1 * Math.Max(ReferenceSequence.Length, QuerySequence.Length));
            var firstAlignment  = new List <byte>(estimatedLength);
            var secondAlignment = new List <byte>(estimatedLength);

            // Get the starting cell position and record the optimal score found there.
            int i          = startingCell.Row;
            int j          = startingCell.Col;
            var finalScore = startingCell.Score;

            long rowGaps = 0, colGaps = 0, identicalCount = 0, similarityCount = 0;

            // Walk the traceback matrix and build the alignments.
            while (!TracebackIsComplete(i, j))
            {
                sbyte tracebackDirection = Traceback[i][j];
                // Walk backwards through the trace back
                int gapLength;
                switch (tracebackDirection)
                {
                case SourceDirection.Diagonal:
                    byte n1 = ReferenceSequence[j - 1];
                    byte n2 = QuerySequence[i - 1];
                    firstAlignment.Add(n1);
                    secondAlignment.Add(n2);
                    i--;
                    j--;
                    // Track some useful statistics
                    if (n1 == n2 && n1 != _gap)
                    {
                        identicalCount++;
                        similarityCount++;
                    }
                    else if (SimilarityMatrix[n2, n1] > 0)
                    {
                        similarityCount++;
                    }
                    break;

                case SourceDirection.Left:
                    //Add 1 because this only counts number of extensions
                    if (usingAffineGapModel)
                    {
                        gapLength = h_Gap_Length[i * gapStride + j];
                        for (int k = 0; k < gapLength; k++)
                        {
                            firstAlignment.Add(ReferenceSequence[--j]);
                            secondAlignment.Add(_gap);
                            rowGaps++;
                        }
                    }
                    else
                    {
                        firstAlignment.Add(ReferenceSequence[--j]);
                        secondAlignment.Add(_gap);
                        rowGaps++;
                    }
                    break;

                case SourceDirection.Up:
                    //add 1 because this only counts number of extensions.
                    if (usingAffineGapModel)
                    {
                        gapLength = v_Gap_Length[i * gapStride + j];
                        for (int k = 0; k < gapLength; k++)
                        {
                            firstAlignment.Add(_gap);
                            colGaps++;
                            secondAlignment.Add(QuerySequence[--i]);
                        }
                    }
                    else
                    {
                        secondAlignment.Add(QuerySequence[--i]);
                        firstAlignment.Add(_gap);
                        colGaps++;
                    }
                    break;

                default:
                    break;
                }
            }

            // We build the alignments in reverse since we were
            // walking backwards through the matrix table. To create
            // the proper alignments we need to resize and reverse
            // both underlying arrays.
            firstAlignment.Reverse();
            secondAlignment.Reverse();
            // Create the Consensus sequence
            byte[] consensus = new byte[Math.Min(firstAlignment.Count, secondAlignment.Count)];
            for (int n = 0; n < consensus.Length; n++)
            {
                consensus[n] = ConsensusResolver.GetConsensus(new[] { firstAlignment[n], secondAlignment[n] });
            }

            // Create the result alignment
            var pairwiseAlignedSequence = new PairwiseAlignedSequence
            {
                Score         = finalScore,
                FirstSequence = new Sequence(_sequence1.Alphabet, firstAlignment.ToArray())
                {
                    ID = _sequence1.ID
                },
                SecondSequence = new Sequence(_sequence2.Alphabet, secondAlignment.ToArray())
                {
                    ID = _sequence2.ID
                },
                Consensus = new Sequence(ConsensusResolver.SequenceAlphabet, consensus),
            };

            // Offset is start of alignment in input sequence with respect to other sequence.
            if (i >= j)
            {
                pairwiseAlignedSequence.FirstOffset  = i - j;
                pairwiseAlignedSequence.SecondOffset = 0;
            }
            else
            {
                pairwiseAlignedSequence.FirstOffset  = 0;
                pairwiseAlignedSequence.SecondOffset = j - i;
            }


            // Add in ISequenceAlignment metadata
            pairwiseAlignedSequence.Metadata["Score"]        = pairwiseAlignedSequence.Score;
            pairwiseAlignedSequence.Metadata["FirstOffset"]  = pairwiseAlignedSequence.FirstOffset;
            pairwiseAlignedSequence.Metadata["SecondOffset"] = pairwiseAlignedSequence.SecondOffset;
            pairwiseAlignedSequence.Metadata["Consensus"]    = pairwiseAlignedSequence.Consensus;
            pairwiseAlignedSequence.Metadata["StartOffsets"] = new List <long> {
                j, i
            };
            pairwiseAlignedSequence.Metadata["EndOffsets"] = new List <long> {
                startingCell.Col - 1, startingCell.Row - 1
            };
            pairwiseAlignedSequence.Metadata["Insertions"] = new List <long> {
                colGaps, rowGaps
            };                                                                                    // ref, query insertions
            pairwiseAlignedSequence.Metadata["IdenticalCount"]  = identicalCount;
            pairwiseAlignedSequence.Metadata["SimilarityCount"] = similarityCount;

            return(pairwiseAlignedSequence);
        }
Exemple #13
0
 /// <summary>
 /// Returns true if the PairwiseSequenceAlignment contains the aligned sequence in the
 /// list of aligned sequences.
 /// </summary>
 /// <param name="item">PairwiseAlignedSequence object.</param>
 /// <returns>True if contains item, otherwise returns false.</returns>
 public bool Contains(PairwiseAlignedSequence item)
 {
     return(alignedSequences.Contains(item));
 }
Exemple #14
0
        /// <summary>
        ///     Validates Sequence Alignment Class General methods.
        /// </summary>
        /// <param name="nodeName">Node Name in the xml.</param>
        /// <param name="methodName">Name of the SequenceAlignment method to be validated</param>
        /// <param name="isSeqAlignDefCtr">Is sequence alignment Def Constructor</param>
        private void ValidateSequenceAlignmentGeneralMethods(string nodeName, SeqAlignmentMethods methodName,
                                                             bool isSeqAlignDefCtr)
        {
            // Read the xml file for getting both the files for aligning.
            string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1);
            string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2);
            IAlphabet alphabet =
                Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode));
            string seqCount = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SeqCountNode);
            string alignedSeqCountAfterAddSeq = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                Constants
                                                                                    .AlignedSeqCountAfterAddAlignedSeqNode);
            string arrayLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ArraySizeNode);

            var alignedSeqItems = new PairwiseAlignedSequence[int.Parse(arrayLength, null)];
            const int Index = 0;

            // Create two sequences
            ISequence aInput = new Sequence(alphabet, origSequence1);
            ISequence bInput = new Sequence(alphabet, origSequence2);

            // Add the sequences to the Sequence alignment object using AddSequence() method.
            IList<IPairwiseSequenceAlignment> sequenceAlignmentObj = new List<IPairwiseSequenceAlignment>();

            var alignSeq = new PairwiseAlignedSequence {FirstSequence = aInput, SecondSequence = bInput};
            IPairwiseSequenceAlignment seqAlignObj = isSeqAlignDefCtr
                                                         ? new PairwiseSequenceAlignment()
                                                         : new PairwiseSequenceAlignment(aInput, bInput);

            seqAlignObj.Add(alignSeq);
            sequenceAlignmentObj.Add(seqAlignObj);

            IList<PairwiseAlignedSequence> newAlignedSequences =
                sequenceAlignmentObj[0].PairwiseAlignedSequences;

            switch (methodName)
            {
                case SeqAlignmentMethods.Add:
                    seqAlignObj.Add(alignSeq);
                    Assert.AreEqual(seqCount,
                                    seqAlignObj.PairwiseAlignedSequences.Count.ToString((IFormatProvider) null));
                    break;
                case SeqAlignmentMethods.Clear:
                    seqAlignObj.Clear();
                    Assert.AreEqual(0, seqAlignObj.PairwiseAlignedSequences.Count);
                    break;
                case SeqAlignmentMethods.Contains:
                    Assert.IsTrue(seqAlignObj.Contains(newAlignedSequences[0]));
                    break;
                case SeqAlignmentMethods.CopyTo:
                    seqAlignObj.CopyTo(alignedSeqItems, Index);

                    // Validate Copied array.
                    Assert.AreEqual(alignedSeqItems[Index].FirstSequence, seqAlignObj.FirstSequence);
                    Assert.AreEqual(alignedSeqItems[Index].SecondSequence, seqAlignObj.SecondSequence);
                    break;
                case SeqAlignmentMethods.Remove:
                    seqAlignObj.Remove(newAlignedSequences[0]);

                    // Validate whether removed item is deleted from SequenceAlignment.
                    Assert.AreEqual(0, newAlignedSequences.Count);
                    break;
                case SeqAlignmentMethods.AddSequence:
                    seqAlignObj.AddSequence(newAlignedSequences[0]);

                    // Validate SeqAlignObj after adding aligned sequence.
                    Assert.AreEqual(alignedSeqCountAfterAddSeq, seqAlignObj.Count.ToString((IFormatProvider) null));
                    break;
                case SeqAlignmentMethods.GetEnumerator:
                    IEnumerator<PairwiseAlignedSequence> alignedSeqList = seqAlignObj.GetEnumerator();

                    // Aligned Sequence list after iterating through ailgnedSeq collection.
                    Assert.IsNotNull(alignedSeqList);
                    break;
                default:
                    break;
            }

            ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the IsRead Property");
            ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the Count Property");
            ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the Sequences Property");
        }
        public void PairwiseOverlapMultipleAlignments()
        {
            Sequence sequence1 = new Sequence(Alphabets.DNA, "CCCAACCC");
            Sequence sequence2 = new Sequence(Alphabets.DNA, "CCC");
            SimilarityMatrix sm = new DiagonalSimilarityMatrix(5, -20);
            int gapPenalty = -10;
            PairwiseOverlapAligner overlap = new PairwiseOverlapAligner();
            overlap.SimilarityMatrix = sm;
            overlap.GapOpenCost = gapPenalty;
            IList<IPairwiseSequenceAlignment> result = overlap.AlignSimple(sequence1, sequence2);

            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                "{0}, Simple; Matrix {1}; GapOpenCost {2}", overlap.Name, overlap.SimilarityMatrix.Name, overlap.GapOpenCost));
            foreach (IPairwiseSequenceAlignment sequenceResult in result)
            {
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                    "score {0}", sequenceResult.PairwiseAlignedSequences[0].Score));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                    "input 0     {0}", sequenceResult.FirstSequence.ToString()));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                    "input 1     {0}", sequenceResult.SecondSequence.ToString()));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                    "result 0    {0}", sequenceResult.PairwiseAlignedSequences[0].FirstSequence.ToString()));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                    "result 1    {0}", sequenceResult.PairwiseAlignedSequences[0].SecondSequence.ToString()));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                    "consesus    {0}", sequenceResult.PairwiseAlignedSequences[0].Consensus.ToString()));
            }

            IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment();

            // First alignment
            PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence();
            alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "CCC");
            alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "CCC");
            alignedSeq.Consensus = new Sequence(Alphabets.DNA, "CCC");
            alignedSeq.Score = 15;
            alignedSeq.FirstOffset = 0;
            alignedSeq.SecondOffset = 0;
            align.PairwiseAlignedSequences.Add(alignedSeq);

            // Second alignment
            alignedSeq = new PairwiseAlignedSequence();
            alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "CCC");
            alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "CCC");
            alignedSeq.Consensus = new Sequence(Alphabets.DNA, "CCC");
            alignedSeq.Score = 15;
            alignedSeq.FirstOffset = 0;
            alignedSeq.SecondOffset = 5;
            align.PairwiseAlignedSequences.Add(alignedSeq);

            expectedOutput.Add(align);
            Assert.IsTrue(CompareAlignment(result, expectedOutput));
        }
        /// <summary>
        ///     Validates PairwiseOverlapAlignment algorithm for the parameters passed.
        /// </summary>
        /// <param name="nodeName">Xml node name</param>
        /// <param name="isTextFile">Is text file an input.</param>
        /// <param name="caseType">Case Type</param>
        /// <param name="additionalParameter">parameter based on which certain validations are done.</param>
        /// <param name="alignType">Is the Align type Simple or Align with Gap Extension cost?</param>
        /// <param name="similarityMatrixParam">Similarity Matrix</param>
        private void ValidatePairwiseOverlapAlignment(string nodeName, bool isTextFile,
                                                      SequenceCaseType caseType, AlignParameters additionalParameter,
                                                      AlignmentType alignType,
                                                      SimilarityMatrixParameters similarityMatrixParam)
        {
            Sequence aInput = null;
            Sequence bInput = null;

            IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                     Constants.AlphabetNameNode));

            if (isTextFile)
            {
                // Read the xml file for getting both the files for aligning.
                string filePath1 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                   Constants.FilePathNode1);
                string filePath2 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                   Constants.FilePathNode2);

                var parser1 = new FastAParser();
                ISequence originalSequence1 = parser1.Parse(filePath1).ElementAt(0);
                ISequence originalSequence2 = parser1.Parse(filePath2).ElementAt(0);

                // Create input sequence for sequence string in different cases.
                GetSequenceWithCaseType(new string(originalSequence1.Select(a => (char) a).ToArray()),
                                        new string(originalSequence2.Select(a => (char) a).ToArray()), alphabet,
                                        caseType, out aInput, out bInput);
            }
            else
            {
                string originalSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1);
                string originalSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2);

                // Create input sequence for sequence string in different cases.
                GetSequenceWithCaseType(
                    originalSequence1,
                    originalSequence2,
                    alphabet,
                    caseType,
                    out aInput,
                    out bInput);
            }

            var aInputString = new string(aInput.Select(a => (char) a).ToArray());
            var bInputString = new string(bInput.Select(a => (char) a).ToArray());

            ApplicationLog.WriteLine(string.Format(null,
                                                   "PairwiseOverlapAligner P2 : First sequence used is '{0}'.",
                                                   aInputString));
            ApplicationLog.WriteLine(string.Format(null,
                                                   "PairwiseOverlapAligner P2 : Second sequence used is '{0}'.",
                                                   bInputString));

            // Create similarity matrix object for a given file.
            string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode);

            SimilarityMatrix sm = null;

            switch (similarityMatrixParam)
            {
                case SimilarityMatrixParameters.TextReader:
                    using (TextReader reader = new StreamReader(blosumFilePath))
                        sm = new SimilarityMatrix(reader);
                    break;
                case SimilarityMatrixParameters.DiagonalMatrix:
                    string matchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                        Constants.MatchScoreNode);
                    string misMatchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                           Constants.MisMatchScoreNode);
                    sm = new DiagonalSimilarityMatrix(int.Parse(matchValue, null),
                                                      int.Parse(misMatchValue, null));
                    break;
                default:
                    sm = new SimilarityMatrix(new StreamReader(blosumFilePath));
                    break;
            }

            int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                        Constants.GapOpenCostNode), null);

            int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                             Constants.GapExtensionCostNode), null);

            // Create PairwiseOverlapAligner instance and set its values.
            var pairwiseOverlapObj = new PairwiseOverlapAligner();
            if (additionalParameter != AlignParameters.AllParam)
            {
                pairwiseOverlapObj.SimilarityMatrix = sm;
                pairwiseOverlapObj.GapOpenCost = gapOpenCost;
                pairwiseOverlapObj.GapExtensionCost = gapExtensionCost;
            }
            IList<IPairwiseSequenceAlignment> result = null;

            // Align the input sequences.
            switch (additionalParameter)
            {
                case AlignParameters.AlignList:
                    var sequences = new List<ISequence>();
                    sequences.Add(aInput);
                    sequences.Add(bInput);
                    switch (alignType)
                    {
                        case AlignmentType.Align:
                            result = pairwiseOverlapObj.Align(sequences);
                            break;
                        default:
                            result = pairwiseOverlapObj.AlignSimple(sequences);
                            break;
                    }
                    break;
                case AlignParameters.AlignTwo:
                    switch (alignType)
                    {
                        case AlignmentType.Align:
                            result = pairwiseOverlapObj.Align(aInput, bInput);
                            break;
                        default:
                            result = pairwiseOverlapObj.AlignSimple(aInput, bInput);
                            break;
                    }
                    break;
                case AlignParameters.AllParam:
                    switch (alignType)
                    {
                        case AlignmentType.Align:
                            result = pairwiseOverlapObj.Align(sm, gapOpenCost,
                                                              gapExtensionCost, aInput, bInput);
                            break;
                        default:
                            result = pairwiseOverlapObj.AlignSimple(sm, gapOpenCost, aInput, bInput);
                            break;
                    }
                    break;
                default:
                    break;
            }

            aInput = null;
            bInput = null;
            sm = null;

            // Get the expected sequence and scorde from xml config.
            string expectedSequence1 = string.Empty;
            string expectedSequence2 = string.Empty;
            string expectedScore = string.Empty;

            switch (alignType)
            {
                case AlignmentType.Align:
                    expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                    Constants.ExpectedGapExtensionScoreNode);
                    expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                        Constants.ExpectedGapExtensionSequence1Node);
                    expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                        Constants.ExpectedGapExtensionSequence2Node);
                    break;
                default:
                    expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                    Constants.ExpectedScoreNode);
                    expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                        Constants.ExpectedSequenceNode1);
                    expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                        Constants.ExpectedSequenceNode2);
                    break;
            }

            IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>();
            string[] expectedSequences1, expectedSequences2;
            var seperators = new char[1] {';'};
            expectedSequences1 = expectedSequence1.Split(seperators);
            expectedSequences2 = expectedSequence2.Split(seperators);

            IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment();
            PairwiseAlignedSequence alignedSeq;
            for (int i = 0; i < expectedSequences1.Length; i++)
            {
                alignedSeq = new PairwiseAlignedSequence
                {
                    FirstSequence = new Sequence(alphabet, expectedSequences1[i]),
                    SecondSequence = new Sequence(alphabet, expectedSequences2[i]),
                    Score = Convert.ToInt32(expectedScore, null),
                    FirstOffset = Int32.MinValue,
                    SecondOffset = Int32.MinValue,
                };
                align.PairwiseAlignedSequences.Add(alignedSeq);
            }

            expectedOutput.Add(align);
            Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput,true));

            ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P2 : Final Score '{0}'.", expectedScore));
            ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P2 : Aligned First Sequence is '{0}'.", expectedSequence1));
            ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P2 : Aligned Second Sequence is '{0}'.", expectedSequence2));
        }
        /// <summary>
        /// This takes a specific starting location in the scoring matrix and generates
        /// an alignment from it using the traceback scores.
        /// </summary>
        /// <param name="startingCell">Starting point</param>
        /// <returns>Pairwise alignment</returns>
        private PairwiseAlignedSequence CreateAlignmentFromCell(OptScoreMatrixCell startingCell)
        {
            long estimatedLength = ReferenceSequence.Length * QuerySequence.Length;
            var  firstAlignment  = new byte[estimatedLength];
            var  secondAlignment = new byte[estimatedLength];

            // Get the starting cell position and record the optimal score found there.
            int i          = startingCell.Row;
            int j          = startingCell.Col;
            var finalScore = startingCell.Score;

            long rowGaps = 0, colGaps = 0, identicalCount = 0, similarityCount = 0;

            // Walk the traceback matrix and build the alignments.
            int faLength = 0, saLength = 0;

            while (!TracebackIsComplete(i, j))
            {
                sbyte tracebackDirection = Traceback[i][j];

                // Reference sequence uses the current cell if we moved diagonal or left.
                if (tracebackDirection == SourceDirection.Left || tracebackDirection == SourceDirection.Diagonal)
                {
                    firstAlignment[faLength++] = ReferenceSequence[j - 1];
                }
                else
                {
                    firstAlignment[faLength++] = _gap;
                    colGaps++;
                }

                // Query sequence uses the current cell if we moved diagonal or up.
                if (tracebackDirection == SourceDirection.Up || tracebackDirection == SourceDirection.Diagonal)
                {
                    secondAlignment[saLength++] = QuerySequence[i - 1];
                }
                else
                {
                    secondAlignment[saLength++] = _gap;
                    rowGaps++;
                }

                // Track some useful statistics
                byte n1 = firstAlignment[faLength - 1];
                byte n2 = secondAlignment[faLength - 1];
                if (n1 == n2 && n1 != _gap)
                {
                    identicalCount++;
                    similarityCount++;
                }
                else if (SimilarityMatrix[n2, n1] > 0)
                {
                    similarityCount++;
                }

                // Walk backwards through the trace back
                switch (tracebackDirection)
                {
                case SourceDirection.Diagonal:
                    i--;
                    j--;
                    break;

                case SourceDirection.Left:
                    j--;
                    break;

                case SourceDirection.Up:
                    i--;
                    break;

                default:
                    break;
                }
            }

            // We build the alignments in reverse since we were
            // walking backwards through the matrix table. To create
            // the proper alignments we need to resize and reverse
            // both underlying arrays.
            Array.Resize(ref firstAlignment, faLength);
            Array.Reverse(firstAlignment);
            Array.Resize(ref secondAlignment, saLength);
            Array.Reverse(secondAlignment);

            // Create the Consensus sequence
            byte[] consensus = new byte[Math.Min(faLength, saLength)];
            for (int n = 0; n < consensus.Length; n++)
            {
                consensus[n] = ConsensusResolver.GetConsensus(new[] { firstAlignment[n], secondAlignment[n] });
            }

            // Create the result alignment
            var pairwiseAlignedSequence = new PairwiseAlignedSequence
            {
                Score         = finalScore,
                FirstSequence = new Sequence(_sequence1.Alphabet, firstAlignment)
                {
                    ID = _sequence1.ID
                },
                SecondSequence = new Sequence(_sequence2.Alphabet, secondAlignment)
                {
                    ID = _sequence2.ID
                },
                Consensus = new Sequence(ConsensusResolver.SequenceAlphabet, consensus),
            };

            // Offset is start of alignment in input sequence with respect to other sequence.
            if (i >= j)
            {
                pairwiseAlignedSequence.FirstOffset  = i - j;
                pairwiseAlignedSequence.SecondOffset = 0;
            }
            else
            {
                pairwiseAlignedSequence.FirstOffset  = 0;
                pairwiseAlignedSequence.SecondOffset = j - i;
            }


            // Add in ISequenceAlignment metadata
            pairwiseAlignedSequence.Metadata["Score"]        = pairwiseAlignedSequence.Score;
            pairwiseAlignedSequence.Metadata["FirstOffset"]  = pairwiseAlignedSequence.FirstOffset;
            pairwiseAlignedSequence.Metadata["SecondOffset"] = pairwiseAlignedSequence.SecondOffset;
            pairwiseAlignedSequence.Metadata["Consensus"]    = pairwiseAlignedSequence.Consensus;
            pairwiseAlignedSequence.Metadata["StartOffsets"] = new List <long> {
                j, i
            };
            pairwiseAlignedSequence.Metadata["EndOffsets"] = new List <long> {
                startingCell.Col - 1, startingCell.Row - 1
            };
            pairwiseAlignedSequence.Metadata["Insertions"] = new List <long> {
                colGaps, rowGaps
            };                                                                                    // ref, query insertions
            pairwiseAlignedSequence.Metadata["IdenticalCount"]  = identicalCount;
            pairwiseAlignedSequence.Metadata["SimilarityCount"] = similarityCount;

            return(pairwiseAlignedSequence);
        }
Exemple #18
0
        /// <summary>
        /// Launches the alignment algorithm
        /// </summary>
        public virtual List <IPairwiseSequenceAlignment> Align()
        {
            InitializeCache();

            // Grid
            for (int diagonal = 0; diagonal < gridCols + gridRows - 2; diagonal++)
            {
                for (int blockRow = 0; blockRow < gridRows; blockRow++)
                {
                    int blockCol = diagonal - blockRow;

                    if ((blockCol >= 0) && (blockCol < gridCols))
                    {
                        int lastRow = (blockRow == gridRows - 1) ? (int)(colHeight - Math.BigMul(blockRow, gridStride) - 1) : gridStride;
                        int lastCol = (blockCol == gridCols - 1) ? (int)(rowWidth - Math.BigMul(blockCol, gridStride) - 1) : gridStride;

                        ComputeIntermediateBlock(blockRow, blockCol, lastRow, lastCol);
                    }
                }
            }

            sbyte[][] trace = new sbyte[gridStride + 1][];
            for (int i = 0; i <= gridStride; i++)
            {
                trace[i] = new sbyte[gridStride + 1];
            }

            // Last Block - grid calculation and Traceback combined
            int completeTraceRow = gridRows - 1;
            int completeTraceCol = gridCols - 1;

            int completeLastRow = (int)(colHeight - Math.BigMul(completeTraceRow, gridStride) - 1);
            int completeLastCol = (int)(rowWidth - Math.BigMul(completeTraceCol, gridStride) - 1);

            ComputeCornerBlock(completeTraceRow, completeTraceCol, completeLastRow, completeLastCol, trace);

            //Traceback
            if (optScoreCells.Count == 0)
            {
                return(new List <IPairwiseSequenceAlignment>());
            }
            else
            {
                PairwiseSequenceAlignment alignment = new PairwiseSequenceAlignment(sequenceI, sequenceJ);

                for (int alignmentCount = 0; alignmentCount < optScoreCells.Count; alignmentCount++)
                {
                    PairwiseAlignedSequence result = new PairwiseAlignedSequence();
                    result.Score = optScore;

                    long alignmentRow = optScoreCells[alignmentCount].Item1;
                    long alignmentCol = optScoreCells[alignmentCount].Item2;

                    int blockRow = (int)(alignmentRow / gridStride);
                    int blockCol = (int)(alignmentCol / gridStride);

                    int lastRow = (int)(alignmentRow - Math.BigMul(blockRow, gridStride));
                    int lastCol = (int)(alignmentCol - Math.BigMul(blockCol, gridStride));

                    result.Metadata["EndOffsets"] = new List <long> {
                        alignmentRow - 1, alignmentCol - 1
                    };

                    long   alignmentLength = 0;
                    byte[] sequence1       = new byte[colHeight + rowWidth];
                    byte[] sequence2       = new byte[colHeight + rowWidth];

                    int colGaps = 0;
                    int rowGaps = 0;

                    while ((blockRow >= 0) && (blockCol >= 0))
                    {
                        if ((blockRow != completeTraceRow) || (blockCol != completeTraceCol) || (lastRow > completeLastRow) || (lastCol > completeLastCol))
                        {
                            ComputeTraceBlock(blockRow, blockCol, lastRow, lastCol, trace);

                            completeTraceRow = blockRow;
                            completeTraceCol = blockCol;

                            completeLastRow = lastRow;
                            completeLastCol = lastCol;
                        }

                        long startPositionI = blockRow * gridStride - 1;
                        long startPositionJ = blockCol * gridStride - 1;

                        while ((trace[lastRow][lastCol] != SourceDirection.Stop) && (trace[lastRow][lastCol] != SourceDirection.Block))
                        {
                            switch (trace[lastRow][lastCol])
                            {
                            case SourceDirection.Diagonal:
                                // diagonal, no gap, use both sequence residues
                                sequence1[alignmentLength] = sequenceI[startPositionI + lastRow];
                                sequence2[alignmentLength] = sequenceJ[startPositionJ + lastCol];
                                alignmentLength++;
                                lastRow--;
                                lastCol--;
                                break;

                            case SourceDirection.Up:
                                // up, gap in J
                                sequence1[alignmentLength] = sequenceI[startPositionI + lastRow];
                                sequence2[alignmentLength] = this.gapCode;
                                alignmentLength++;
                                lastRow--;
                                colGaps++;
                                break;

                            case SourceDirection.Left:
                                // left, gap in I
                                sequence1[alignmentLength] = this.gapCode;
                                sequence2[alignmentLength] = sequenceJ[startPositionJ + lastCol];
                                alignmentLength++;
                                lastCol--;
                                rowGaps++;
                                break;
                            }
                        }

                        if (trace[lastRow][lastCol] == SourceDirection.Stop)
                        {
                            // Be nice, turn aligned solutions around so that they match the input sequences
                            byte[] alignedA = new byte[alignmentLength];
                            byte[] alignedB = new byte[alignmentLength];
                            for (long i = 0, j = alignmentLength - 1; i < alignmentLength; i++, j--)
                            {
                                alignedA[i] = sequence1[j];
                                alignedB[i] = sequence2[j];
                            }

                            // If alphabet of inputA is DnaAlphabet then alphabet of alignedA may be Dna or AmbiguousDna.
                            IAlphabet alphabet = Alphabets.AutoDetectAlphabet(alignedA, 0, alignedA.LongLength, sequenceI.Alphabet);
                            Sequence  seq      = new Sequence(alphabet, alignedA, false);
                            seq.ID = sequenceI.ID;
                            // seq.DisplayID = aInput.DisplayID;
                            result.FirstSequence = seq;

                            alphabet = Alphabets.AutoDetectAlphabet(alignedB, 0, alignedB.LongLength, sequenceJ.Alphabet);
                            seq      = new Sequence(alphabet, alignedB, false);
                            seq.ID   = sequenceJ.ID;
                            // seq.DisplayID = bInput.DisplayID;
                            result.SecondSequence = seq;

                            // Offset is start of alignment in input sequence with respect to other sequence.
                            if (lastCol >= lastRow)
                            {
                                result.FirstOffset  = lastCol - lastRow;
                                result.SecondOffset = 0;
                            }
                            else
                            {
                                result.FirstOffset  = 0;
                                result.SecondOffset = lastRow - lastCol;
                            }
                            result.Metadata["StartOffsets"] = new List <long> {
                                lastRow, lastCol
                            };
                            result.Metadata["Insertions"] = new List <long> {
                                rowGaps, colGaps
                            };
                            alignment.PairwiseAlignedSequences.Add(result);

                            break;
                        }
                        else
                        {
                            if (lastRow == 0 && lastCol == 0)
                            {
                                blockRow--;
                                blockCol--;
                                lastRow = gridStride;
                                lastCol = gridStride;
                            }
                            else
                            {
                                if (lastRow == 0)
                                {
                                    blockRow--;
                                    lastRow = gridStride;
                                }
                                else
                                {
                                    blockCol--;
                                    lastCol = gridStride;
                                }
                            }
                        }
                    }
                }

                return(new List <IPairwiseSequenceAlignment>()
                {
                    alignment
                });
            }
        }
Exemple #19
0
        /// <summary>
        /// Validates the Mummer align method for several test cases for the parameters passed.
        /// </summary>
        /// <param name="nodeName">Node name to be read from xml</param>
        /// <param name="isFilePath"></param>
        /// <param name="isSeqList">Is MUMmer alignment with List of sequences</param>
        void ValidateMUMmerAlignGeneralTestCases(string nodeName, bool isFilePath, bool isSeqList)
        {
            ISequence referenceSeq;
            ISequence querySeq;
            IList<ISequence> querySeqs = new List<ISequence>();
            string referenceSequence;
            string querySequence;
            IList<IPairwiseSequenceAlignment> align;

            if (isFilePath)
            {
                // Gets the reference sequence from the configuration file
                string filePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);

                Assert.IsNotNull(filePath);
                ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the File Path '{0}'.", filePath));

                FastAParser parser = new FastAParser();
                IEnumerable<ISequence> referenceSeqs = parser.Parse(filePath);
                referenceSeq = referenceSeqs.FirstOrDefault();
                Assert.IsNotNull(referenceSeq);
                referenceSequence = referenceSeq.ConvertToString();
                parser.Close();

                // Gets the reference sequence from the configuration file
                string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode);

                Assert.IsNotNull(queryFilePath);
                ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the Search File Path '{0}'.", queryFilePath));

                FastAParser queryParserObj = new FastAParser();
                querySeqs = queryParserObj.Parse(queryFilePath).ToList();
                querySeq = querySeqs.FirstOrDefault();
                Assert.IsNotNull(querySeq);
                querySequence = querySeq.ConvertToString();
                queryParserObj.Close();
            }
            else
            {
                // Gets the reference sequence from the configuration file
                referenceSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode);
                string referenceSeqAlphabet = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode);
                referenceSeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), referenceSequence);
                
                querySequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceNode);
                referenceSeqAlphabet = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceAlphabetNode);
                querySeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), querySequence);
                querySeqs = new List<ISequence> {querySeq};
            }

            string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode);

            var mumAlignObj = new Bio.Algorithms.MUMmer.MUMmerAligner
            {
                LengthOfMUM = long.Parse(mumLength, null),
                GapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null)
            };

            if (isSeqList)
            {
                querySeqs.Add(referenceSeq);
                align = mumAlignObj.Align(querySeqs);
            }
            else
            {
                align = mumAlignObj.AlignSimple(referenceSeq, querySeqs);
            }

            string expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ScoreNodeName);
            Assert.AreEqual(expectedScore, align[0].PairwiseAlignedSequences[0].Score.ToString((IFormatProvider)null));
            ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the score for the sequence '{0}' and '{1}'.", referenceSequence, querySequence));

            string[] expectedSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ExpectedSequencesNode);
            IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>();

            IPairwiseSequenceAlignment seqAlign = new PairwiseSequenceAlignment();
            var alignedSeq = new PairwiseAlignedSequence
            {
                FirstSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[0]),
                SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[1]),
                Score = Convert.ToInt32(expectedScore, null),
                FirstOffset = Int32.MinValue,
                SecondOffset = Int32.MinValue
            };
            seqAlign.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(seqAlign);
            Assert.IsTrue(CompareAlignment(align, expectedOutput));
            ApplicationLog.WriteLine("MUMmer BVT : Successfully validated the aligned sequences.");
        }
        /// <summary>
        /// Launches the alignment algorithm
        /// </summary>
        public virtual List<IPairwiseSequenceAlignment> Align()
        {
            InitializeCache();

            // Grid
            for (int diagonal = 0; diagonal < gridCols + gridRows - 2; diagonal++)
            {
                for (int blockRow = 0; blockRow < gridRows; blockRow++)
                {
                    int blockCol = diagonal - blockRow;

                    if ((blockCol >= 0) && (blockCol < gridCols))
                    {
                        int lastRow = (blockRow == gridRows - 1) ? (int)(colHeight - Helper.BigMul(blockRow, gridStride) - 1) : gridStride;
                        int lastCol = (blockCol == gridCols - 1) ? (int)(rowWidth - Helper.BigMul(blockCol, gridStride) - 1) : gridStride;

                        ComputeIntermediateBlock(blockRow, blockCol, lastRow, lastCol);
                    }
                }
            }

            sbyte[][] trace = new sbyte[gridStride + 1][];
            for (int i = 0; i <= gridStride; i++)
            {
                trace[i] = new sbyte[gridStride + 1];
            }

            // Last Block - grid calculation and Traceback combined
            int completeTraceRow = gridRows - 1;
            int completeTraceCol = gridCols - 1;

            int completeLastRow = (int)(colHeight - Helper.BigMul(completeTraceRow, gridStride) - 1);
            int completeLastCol = (int)(rowWidth - Helper.BigMul(completeTraceCol, gridStride) - 1);

            ComputeCornerBlock(completeTraceRow, completeTraceCol, completeLastRow, completeLastCol, trace);

            //Traceback
            if (optScoreCells.Count == 0)
            {
                return new List<IPairwiseSequenceAlignment>();
            }
            else
            {
                PairwiseSequenceAlignment alignment = new PairwiseSequenceAlignment(sequenceI, sequenceJ);

                for (int alignmentCount = 0; alignmentCount < optScoreCells.Count; alignmentCount++)
                {
                    PairwiseAlignedSequence result = new PairwiseAlignedSequence();
                    result.Score = optScore;

                    long alignmentRow = optScoreCells[alignmentCount].Item1;
                    long alignmentCol = optScoreCells[alignmentCount].Item2;

                    int blockRow = (int)(alignmentRow / gridStride);
                    int blockCol = (int)(alignmentCol / gridStride);

                    int lastRow = (int)(alignmentRow - Helper.BigMul(blockRow, gridStride));
                    int lastCol = (int)(alignmentCol - Helper.BigMul(blockCol, gridStride));

                    result.Metadata["EndOffsets"] = new List<long> { alignmentRow - 1, alignmentCol - 1 };

                    long alignmentLength = 0;
                    byte[] sequence1 = new byte[colHeight + rowWidth];
                    byte[] sequence2 = new byte[colHeight + rowWidth];

                    int colGaps = 0;
                    int rowGaps = 0;

                    while ((blockRow >= 0) && (blockCol >= 0))
                    {
                        if ((blockRow != completeTraceRow) || (blockCol != completeTraceCol) || (lastRow > completeLastRow) || (lastCol > completeLastCol))
                        {
                            ComputeTraceBlock(blockRow, blockCol, lastRow, lastCol, trace);

                            completeTraceRow = blockRow;
                            completeTraceCol = blockCol;

                            completeLastRow = lastRow;
                            completeLastCol = lastCol;
                        }

                        long startPositionI = blockRow * gridStride - 1;
                        long startPositionJ = blockCol * gridStride - 1;

                        while ((trace[lastRow][lastCol] != SourceDirection.Stop) && (trace[lastRow][lastCol] != SourceDirection.Block))
                        {
                            switch (trace[lastRow][lastCol])
                            {
                                case SourceDirection.Diagonal:
                                    // diagonal, no gap, use both sequence residues
                                    sequence1[alignmentLength] = sequenceI[startPositionI + lastRow];
                                    sequence2[alignmentLength] = sequenceJ[startPositionJ + lastCol];
                                    alignmentLength++;
                                    lastRow--;
                                    lastCol--;
                                    break;

                                case SourceDirection.Up:
                                    // up, gap in J
                                    sequence1[alignmentLength] = sequenceI[startPositionI + lastRow];
                                    sequence2[alignmentLength] = this.gapCode;
                                    alignmentLength++;
                                    lastRow--;
                                    colGaps++;
                                    break;

                                case SourceDirection.Left:
                                    // left, gap in I
                                    sequence1[alignmentLength] = this.gapCode;
                                    sequence2[alignmentLength] = sequenceJ[startPositionJ + lastCol];
                                    alignmentLength++;
                                    lastCol--;
                                    rowGaps++;
                                    break;
                            }
                        }

                        if (trace[lastRow][lastCol] == SourceDirection.Stop)
                        {

                            // Be nice, turn aligned solutions around so that they match the input sequences
                            byte[] alignedA = new byte[alignmentLength];
                            byte[] alignedB = new byte[alignmentLength];
                            for (long i = 0, j = alignmentLength - 1; i < alignmentLength; i++, j--)
                            {
                                alignedA[i] = sequence1[j];
                                alignedB[i] = sequence2[j];
                            }

                            // If alphabet of inputA is DnaAlphabet then alphabet of alignedA may be Dna or AmbiguousDna.
                            IAlphabet alphabet = Alphabets.AutoDetectAlphabet(alignedA, 0, alignedA.GetLongLength(), sequenceI.Alphabet);
                            Sequence seq = new Sequence(alphabet, alignedA, false);
                            seq.ID = sequenceI.ID;
                            // seq.DisplayID = aInput.DisplayID;
                            result.FirstSequence = seq;

                            alphabet = Alphabets.AutoDetectAlphabet(alignedB, 0, alignedB.GetLongLength(), sequenceJ.Alphabet);
                            seq = new Sequence(alphabet, alignedB, false);
                            seq.ID = sequenceJ.ID;
                            // seq.DisplayID = bInput.DisplayID;
                            result.SecondSequence = seq;

                            // Offset is start of alignment in input sequence with respect to other sequence.
                            if (lastCol >= lastRow)
                            {
                                result.FirstOffset = lastCol - lastRow;
                                result.SecondOffset = 0;
                            }
                            else
                            {
                                result.FirstOffset = 0;
                                result.SecondOffset = lastRow - lastCol;
                            }
                            result.Metadata["StartOffsets"] = new List<long> { lastRow, lastCol };
                            result.Metadata["Insertions"] = new List<long> { rowGaps, colGaps };
                            alignment.PairwiseAlignedSequences.Add(result);

                            break;
                        }
                        else
                        {
                            if (lastRow == 0 && lastCol == 0)
                            {
                                blockRow--;
                                blockCol--;
                                lastRow = gridStride;
                                lastCol = gridStride;
                            }
                            else
                            {
                                if (lastRow == 0)
                                {
                                    blockRow--;
                                    lastRow = gridStride;
                                }
                                else
                                {
                                    blockCol--;
                                    lastCol = gridStride;
                                }
                            }
                        }
                    }
                }

                return new List<IPairwiseSequenceAlignment>() { alignment };
            }
        }
Exemple #21
0
        /// <summary>
        /// get all the gaps in each sequence and call pairwise alignment
        /// </summary>
        /// <param name="referenceSequence">Reference sequence</param>
        /// <param name="sequence">Query sequence</param>
        /// <returns>Aligned sequences</returns>
        private PairwiseAlignedSequence ProcessGaps(
            ISequence referenceSequence,
            ISequence sequence)
        {
            Sequence                sequenceResult1;
            Sequence                sequenceResult2;
            Sequence                consensusResult;
            MaxUniqueMatch          mum1            = null;
            MaxUniqueMatch          mum2            = null;
            PairwiseAlignedSequence alignedSequence = new PairwiseAlignedSequence();

            sequenceResult1            = new Sequence(referenceSequence.Alphabet);
            sequenceResult1.IsReadOnly = false;
            sequenceResult1.ID         = referenceSequence.ID;
            sequenceResult1.DisplayID  = referenceSequence.DisplayID;

            sequenceResult2            = new Sequence(referenceSequence.Alphabet);
            sequenceResult2.IsReadOnly = false;
            sequenceResult2.ID         = sequence.ID;
            sequenceResult2.DisplayID  = sequence.DisplayID;

            consensusResult            = new Sequence(referenceSequence.Alphabet);
            consensusResult.IsReadOnly = false;
            consensusResult.ID         = sequence.ID;
            consensusResult.DisplayID  = sequence.DisplayID;

            // Run the alignment for gap before first MUM
            List <int> insertions = new List <int>(2);

            insertions.Add(0);
            insertions.Add(0);

            List <int> gapInsertions;

            mum1 = _finalMumList[0];
            alignedSequence.Score += AlignGap(
                referenceSequence,
                sequence,
                sequenceResult1,
                sequenceResult2,
                consensusResult,
                null,     // Here the first MUM does not exist
                mum1,
                out gapInsertions);

            insertions[0] += gapInsertions[0];
            insertions[1] += gapInsertions[1];

            // Run the alignment for all the gaps between MUM
            for (int index = 1; index < _finalMumList.Count; index++)
            {
                mum2 = _finalMumList[index];

                alignedSequence.Score += AlignGap(
                    referenceSequence,
                    sequence,
                    sequenceResult1,
                    sequenceResult2,
                    consensusResult,
                    mum1,
                    mum2,
                    out gapInsertions);

                insertions[0] += gapInsertions[0];
                insertions[1] += gapInsertions[1];

                mum1 = mum2;
            }

            // Run the alignment for gap after last MUM
            alignedSequence.Score += AlignGap(
                referenceSequence,
                sequence,
                sequenceResult1,
                sequenceResult2,
                consensusResult,
                mum1,
                null,
                out gapInsertions);

            insertions[0] += gapInsertions[0];
            insertions[1] += gapInsertions[1];

            alignedSequence.FirstSequence  = sequenceResult1;
            alignedSequence.SecondSequence = sequenceResult2;
            alignedSequence.Consensus      = consensusResult;

            // Offset is not required as Smith Waterman will  fragmented alignment.
            // Offset is the starting position of alignment of sequence1 with respect to sequence2.
            if (PairWiseAlgorithm is NeedlemanWunschAligner)
            {
                alignedSequence.FirstOffset  = sequenceResult1.IndexOfNonGap() - referenceSequence.IndexOfNonGap();
                alignedSequence.SecondOffset = sequenceResult2.IndexOfNonGap() - sequence.IndexOfNonGap();
            }


            List <int> startOffsets = new List <int>(2);
            List <int> endOffsets   = new List <int>(2);

            startOffsets.Add(0);
            startOffsets.Add(0);

            endOffsets.Add(referenceSequence.Count - 1);
            endOffsets.Add(sequence.Count - 1);

            alignedSequence.Metadata["StartOffsets"] = startOffsets;
            alignedSequence.Metadata["EndOffsets"]   = endOffsets;
            alignedSequence.Metadata["Insertions"]   = insertions;

            // return the aligned sequence
            return(alignedSequence);
        }
        /// <summary>
        /// Removes item from the list of aligned sequences in the PairwiseSequenceAlignment.
        /// Throws exception if PairwiseSequenceAlignment is read only.
        /// </summary>
        /// <param name="item">Aligned sequence object.</param>
        /// <returns>True if item was removed, false if item was not found.</returns>
        public bool Remove(PairwiseAlignedSequence item)
        {
            if (IsReadOnly)
                throw new NotSupportedException(Properties.Resource.READ_ONLY_COLLECTION_MESSAGE);

            return alignedSequences.Remove(item);
        }
        /// <summary>
        /// Copies the aligned sequences from the PairwiseSequenceAlignment into an existing aligned sequence array.
        /// </summary>
        /// <param name="array">Array into which to copy the sequences.</param>
        /// <param name="arrayIndex">Starting index in array at which to begin the copy.</param>
        public void CopyTo(PairwiseAlignedSequence[] array, int arrayIndex)
        {
            if (array == null)
            {
                throw new ArgumentNullException(Properties.Resource.ParameterNameArray);
            }

            foreach (PairwiseAlignedSequence seq in alignedSequences)
            {
                array[arrayIndex++] = seq;
            }
        }
        private void ValidateSmithWatermanAlignment(string nodeName, bool isTextFile,
                                                    SequenceCaseType caseType, AlignParameters additionalParameter,
                                                    AlignmentType alignType,
                                                    SimilarityMatrixParameters similarityMatrixParam)
        {
            Sequence aInput, bInput;
            IAlphabet alphabet =
                Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode));

            if (isTextFile)
            {
                // Read the xml file for getting both the files for aligning.
                string filePath1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1);
                string filePath2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2);

                // Parse the files and get the sequence.
                ISequence originalSequence1 = null;
                ISequence originalSequence2 = null;

                var parseObjectForFile1 = new FastAParser { Alphabet = alphabet };
                originalSequence1 = parseObjectForFile1.Parse(filePath1).ElementAt(0);
                originalSequence2 = parseObjectForFile1.Parse(filePath2).ElementAt(0);

                // Create input sequence for sequence string in different cases.             
                GetSequenceWithCaseType(originalSequence1.ConvertToString(), originalSequence2.ConvertToString(),
                                        alphabet, caseType, out aInput, out bInput);
            }
            else
            {
                string originalSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1);
                string originalSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2);

                // Create input sequence for sequence string in different cases.
                GetSequenceWithCaseType(
                    originalSequence1,
                    originalSequence2,
                    alphabet,
                    caseType,
                    out aInput,
                    out bInput);
            }

            ApplicationLog.WriteLine(string.Format("SmithWatermanAligner P2 : First sequence used is '{0}'.",
                                                   aInput.ConvertToString()));
            ApplicationLog.WriteLine(string.Format("SmithWatermanAligner P2 : Second sequence used is '{0}'.",
                                                   bInput.ConvertToString()));

            // Create similarity matrix object for a given file.
            string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode);
            SimilarityMatrix sm;
            switch (similarityMatrixParam)
            {
                case SimilarityMatrixParameters.TextReader:
                    using (TextReader reader = new StreamReader(blosumFilePath))
                        sm = new SimilarityMatrix(reader);
                    break;
                case SimilarityMatrixParameters.DiagonalMatrix:
                    string matchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode);
                    string misMatchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                           Constants.MisMatchScoreNode);
                    sm = new DiagonalSimilarityMatrix(int.Parse(matchValue, null),
                                                      int.Parse(misMatchValue, null));
                    break;
                default:
                    sm = new SimilarityMatrix(new StreamReader(blosumFilePath));
                    break;
            }

            int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null);
            int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode),
                                             null);

            // Create SmithWatermanAligner instance and set its values.
            var smithWatermanObj = new SmithWatermanAligner();
            if (additionalParameter != AlignParameters.AllParam)
            {
                smithWatermanObj.SimilarityMatrix = sm;
                smithWatermanObj.GapOpenCost = gapOpenCost;
                smithWatermanObj.GapExtensionCost = gapExtensionCost;
            }
            IList<IPairwiseSequenceAlignment> result = null;

            // Align the input sequences.
            switch (additionalParameter)
            {
                case AlignParameters.AlignList:
                    switch (alignType)
                    {
                        case AlignmentType.Align:
                            result = smithWatermanObj.Align(new List<ISequence> {aInput, bInput});
                            break;
                        default:
                            result = smithWatermanObj.AlignSimple(new List<ISequence> {aInput, bInput});
                            break;
                    }
                    break;
                case AlignParameters.AlignTwo:
                    switch (alignType)
                    {
                        case AlignmentType.Align:
                            result = smithWatermanObj.Align(aInput, bInput);
                            break;
                        default:
                            result = smithWatermanObj.AlignSimple(aInput, bInput);
                            break;
                    }
                    break;
                case AlignParameters.AllParam:
                    switch (alignType)
                    {
                        case AlignmentType.Align:
                            result = smithWatermanObj.Align(sm, gapOpenCost,
                                                            gapExtensionCost, aInput, bInput);
                            break;
                        default:
                            result = smithWatermanObj.AlignSimple(sm, gapOpenCost, aInput, bInput);
                            break;
                    }
                    break;
                default:
                    break;
            }

            // Get the expected sequence and scorde from xml config.
            string expectedSequence1, expectedSequence2, expectedScore;

            switch (alignType)
            {
                case AlignmentType.Align:
                    expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                    Constants.ExpectedGapExtensionScoreNode);

                    switch (caseType)
                    {
                        case SequenceCaseType.LowerCase:
                            expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                Constants
                                                                                    .ExpectedGapExtensionSequence1InLower);
                            expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                Constants
                                                                                    .ExpectedGapExtensionSequence2InLower);
                            break;
                        default:
                            expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                Constants
                                                                                    .ExpectedGapExtensionSequence1Node);
                            expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                Constants
                                                                                    .ExpectedGapExtensionSequence2Node);
                            break;
                    }
                    break;
                default:
                    expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                    Constants.ExpectedScoreNode);

                    switch (caseType)
                    {
                        case SequenceCaseType.LowerCase:
                            expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                Constants.ExpectedSequence1inLowerNode);
                            expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                Constants.ExpectedSequence2inLowerNode);
                            break;
                        case SequenceCaseType.LowerUpperCase:
                            expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                Constants.ExpectedSequence1inLowerNode);
                            expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                Constants.ExpectedSequenceNode2);
                            break;
                        default:
                            expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                Constants.ExpectedSequenceNode1);
                            expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                Constants.ExpectedSequenceNode2);
                            break;
                    }

                    break;
            }

            // Match the alignment result with expected result.
            IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>();

            IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment();
            var alignedSeq = new PairwiseAlignedSequence
                                 {
                                     FirstSequence = new Sequence(alphabet, expectedSequence1),
                                     SecondSequence = new Sequence(alphabet, expectedSequence2),
                                     Score = Convert.ToInt32(expectedScore, null),
                                     FirstOffset = Int32.MinValue,
                                     SecondOffset = Int32.MinValue,
                                 };
            align.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(align);

            ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner P2 : Final Score '{0}'.", expectedScore));
            ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner P2 : Aligned First Sequence is '{0}'.",
                                                   expectedSequence1));
            ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner P2 : Aligned Second Sequence is '{0}'.",
                                                   expectedSequence2));

            Assert.IsTrue(CompareAlignment(result, expectedOutput));
        }
 /// <summary>
 /// Returns true if the PairwiseSequenceAlignment contains the aligned sequence in the
 /// list of aligned sequences.
 /// </summary>
 /// <param name="item">PairwiseAlignedSequence object.</param>
 /// <returns>True if contains item, otherwise returns false.</returns>
 public bool Contains(PairwiseAlignedSequence item)
 {
     return alignedSequences.Contains(item);
 }
Exemple #26
0
        public void TestMUMmer3MultipleMumWithCustomMatrix()
        {
            string reference = "ATGCGCATCCCCTT";
            string search = "GCGCCCCCTA";

            Sequence referenceSeq = null;
            Sequence searchSeq = null;

            referenceSeq = new Sequence(Alphabets.DNA, reference);
            searchSeq = new Sequence(Alphabets.DNA, search);

            List<ISequence> searchSeqs = new List<ISequence>();
            searchSeqs.Add(searchSeq);

            int[,] customMatrix = new int[256, 256];

            customMatrix[(byte)'A', (byte)'A'] = 3;
            customMatrix[(byte)'A', (byte)'T'] = -2;
            customMatrix[(byte)'A', (byte)'G'] = -2;
            customMatrix[(byte)'A', (byte)'c'] = -2;

            customMatrix[(byte)'G', (byte)'G'] = 3;
            customMatrix[(byte)'G', (byte)'A'] = -2;
            customMatrix[(byte)'G', (byte)'T'] = -2;
            customMatrix[(byte)'G', (byte)'C'] = -2;

            customMatrix[(byte)'T', (byte)'T'] = 3;
            customMatrix[(byte)'T', (byte)'A'] = -2;
            customMatrix[(byte)'T', (byte)'G'] = -2;
            customMatrix[(byte)'T', (byte)'C'] = -2;

            customMatrix[(byte)'C', (byte)'C'] = 3;
            customMatrix[(byte)'C', (byte)'T'] = -2;
            customMatrix[(byte)'C', (byte)'A'] = -2;
            customMatrix[(byte)'C', (byte)'G'] = -2;

            DiagonalSimilarityMatrix matrix = new DiagonalSimilarityMatrix(3, -2);

            int gapOpenCost = -6;

            MUMmerAligner mummer = new MUMmerAligner();
            mummer.LengthOfMUM = 4;
            mummer.PairWiseAlgorithm = new NeedlemanWunschAligner();
            mummer.SimilarityMatrix = matrix;
            mummer.GapOpenCost = gapOpenCost;
            mummer.GapExtensionCost = -2;

            IList<IPairwiseSequenceAlignment> result = mummer.AlignSimple(referenceSeq, searchSeqs);

            // Check if output is not null
            Assert.AreNotEqual(null, result);

            IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment();
            PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence();
            alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "ATGCGCATCCCCTT");
            alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "--GCGC--CCCCTA");
            alignedSeq.Consensus = new Sequence(AmbiguousDnaAlphabet.Instance, "ATGCGCATCCCCTW");
            alignedSeq.Score = 1;
            alignedSeq.FirstOffset = 0;
            alignedSeq.SecondOffset = 2;
            align.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(align);
            Assert.IsTrue(CompareAlignment(result, expectedOutput));
        }
Exemple #27
0
        public void ValidatePairwiseSequenceAlignmentToString()
        {
            IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment();
            var alignedSeq = new PairwiseAlignedSequence();
            alignedSeq.FirstSequence = new Sequence(Alphabets.Protein, "AWGHE");
            alignedSeq.SecondSequence = new Sequence(Alphabets.Protein, "AW-HE");
            alignedSeq.Consensus = new Sequence(Alphabets.Protein, "AWGHE");
            alignedSeq.Score = 28;
            alignedSeq.FirstOffset = 0;
            alignedSeq.SecondOffset = 3;
            align.PairwiseAlignedSequences.Add(alignedSeq);

            string actualString = align.ToString();
            string expectedString = "AWGHE\r\nAWGHE\r\nAW-HE\r\n\r\n".Replace("\r\n", System.Environment.NewLine);
            Assert.AreEqual(actualString, expectedString);
        }
Exemple #28
0
        public void TestMUMmerAlignerMultipleMum()
        {
            string reference = "ATGCGCATCCCCTT";
            string search = "GCGCCCCCTA";

            Sequence referenceSeq = null;
            Sequence searchSeq = null;

            referenceSeq = new Sequence(Alphabets.DNA, reference);
            searchSeq = new Sequence(Alphabets.DNA, search);

            List<ISequence> searchSeqs = new List<ISequence>();
            searchSeqs.Add(searchSeq);

            MUMmerAligner mummer = new MUMmerAligner();
            mummer.LengthOfMUM = 4;
            mummer.PairWiseAlgorithm = new NeedlemanWunschAligner();

            IList<IPairwiseSequenceAlignment> result = mummer.AlignSimple(referenceSeq, searchSeqs);

            // Check if output is not null
            Assert.AreNotEqual(null, result);
            IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment();
            PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence();
            alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "ATGCGCATCCCCTT");
            alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "--GCGC--CCCCTA");
            alignedSeq.Consensus = new Sequence(AmbiguousDnaAlphabet.Instance, "ATGCGCATCCCCTW");
            alignedSeq.Score = -11;
            alignedSeq.FirstOffset = 0;
            alignedSeq.SecondOffset = 2;
            align.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(align);
            Assert.IsTrue(CompareAlignment(result, expectedOutput));
        }
Exemple #29
0
        /// <summary>
        /// Get all the gaps in each sequence and call pairwise alignment.
        /// </summary>
        /// <param name="referenceSequence">Reference sequence.</param>
        /// <param name="sequence">Query sequence.</param>
        /// <param name="mums">List of MUMs.</param>
        /// <returns>Aligned sequences.</returns>
        private PairwiseAlignedSequence ProcessGaps(
                ISequence referenceSequence,
                ISequence sequence,
                IList<Match> mums)
        {
            List<byte> sequenceResult1 = new List<byte>();
            List<byte> sequenceResult2 = new List<byte>();
            List<byte> consensusResult = new List<byte>();
            PairwiseAlignedSequence alignedSequence = new PairwiseAlignedSequence();
            Match mum1;
            Match mum2;

            // Run the alignment for gap before first MUM
            List<long> insertions = new List<long>(2);
            insertions.Add(0);
            insertions.Add(0);

            List<long> gapInsertions;
            mum1 = mums.First();
            alignedSequence.Score += this.AlignGap(
                    referenceSequence,
                    sequence,
                    sequenceResult1,
                    sequenceResult2,
                    consensusResult,
                    new Match() { Length = 0 }, // Here the first MUM does not exist
                    mum1,
                    out gapInsertions);

            insertions[0] += gapInsertions[0];
            insertions[1] += gapInsertions[1];

            // Run the alignment for all the gaps between MUM
            for (int index = 1; index < mums.Count; index++)
            {
                mum2 = mums[index];

                alignedSequence.Score += this.AlignGap(
                        referenceSequence,
                        sequence,
                        sequenceResult1,
                        sequenceResult2,
                        consensusResult,
                        mum1,
                        mum2,
                        out gapInsertions);

                insertions[0] += gapInsertions[0];
                insertions[1] += gapInsertions[1];

                mum1 = mum2;
            }

            // Run the alignment for gap after last MUM
            alignedSequence.Score += this.AlignGap(
                    referenceSequence,
                    sequence,
                    sequenceResult1,
                    sequenceResult2,
                    consensusResult,
                    mum1,
                    new Match() { Length = 0 },
                    out gapInsertions);

            insertions[0] += gapInsertions[0];
            insertions[1] += gapInsertions[1];

            byte[] result1 = sequenceResult1.ToArray();
            IAlphabet alphabet = Alphabets.AutoDetectAlphabet(result1, 0, result1.GetLongLength(), referenceSequence.Alphabet);
            alignedSequence.FirstSequence = new Sequence(
                alphabet,
                result1)
                {
                    ID = referenceSequence.ID,
                    // Do not shallow copy dictionary
                    //Metadata = referenceSequence.Metadata
                };

            byte[] result2 = sequenceResult2.ToArray();
            alphabet = Alphabets.AutoDetectAlphabet(result2, 0, result2.GetLongLength(), sequence.Alphabet);

            alignedSequence.SecondSequence = new Sequence(
                alphabet,
                result2)
                {
                    ID = sequence.ID,
                    // Do not shallow copy dictionary
                    //Metadata = sequence.Metadata
                };

            byte[] consensus = consensusResult.ToArray();
            alphabet = Alphabets.AutoDetectAlphabet(consensus, 0, consensus.GetLongLength(), referenceSequence.Alphabet);
            alignedSequence.Consensus = new Sequence(
                alphabet,
                consensus);

            // Offset is not required as Smith Waterman will  fragmented alignment. 
            // Offset is the starting position of alignment of sequence1 with respect to sequence2.
            if (this.PairWiseAlgorithm is NeedlemanWunschAligner)
            {
                alignedSequence.FirstOffset = alignedSequence.FirstSequence.IndexOfNonGap() -
                    referenceSequence.IndexOfNonGap();
                alignedSequence.SecondOffset = alignedSequence.SecondSequence.IndexOfNonGap() -
                    sequence.IndexOfNonGap();
            }

            List<long> startOffsets = new List<long>(2);
            List<long> endOffsets = new List<long>(2);
            startOffsets.Add(0);
            startOffsets.Add(0);

            endOffsets.Add(referenceSequence.Count - 1);
            endOffsets.Add(sequence.Count - 1);

            alignedSequence.Metadata["StartOffsets"] = startOffsets;
            alignedSequence.Metadata["EndOffsets"] = endOffsets;
            alignedSequence.Metadata["Insertions"] = insertions;

            // return the aligned sequence
            return alignedSequence;
        }
Exemple #30
0
        private void ValidateMUMmerAlignGeneralTestCases(string nodeName)
        {
            // Gets the reference sequence from the configuration file
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);

            Assert.IsNotNull(filePath);
            ApplicationLog.WriteLine(string.Format(null, "MUMmer P2 : Successfully validated the File Path '{0}'.", filePath));

            var fastaParserObj = new FastAParser();
            IEnumerable<ISequence> referenceSeqs = fastaParserObj.Parse(filePath);

            ISequence referenceSeq = referenceSeqs.ElementAt(0);

            // Gets the reference sequence from the configuration file
            string queryFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode);

            Assert.IsNotNull(queryFilePath);
            ApplicationLog.WriteLine(string.Format(null, "MUMmer P2 : Successfully validated the Search File Path '{0}'.", queryFilePath));

            var fastaParserObj1 = new FastAParser();
            IEnumerable<ISequence> querySeqs = fastaParserObj1.Parse(queryFilePath);

            string mumLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode);

            var mum = new MUMmerAligner
            {
                LengthOfMUM = long.Parse(mumLength, null),
                StoreMUMs = true,
                PairWiseAlgorithm = new NeedlemanWunschAligner(),
                GapOpenCost = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null)
            };

            IList<IPairwiseSequenceAlignment> align = mum.Align(referenceSeq, querySeqs);

            // Validate FinalMUMs and MUMs Properties.
            Assert.IsNotNull(mum.MUMs);

            string expectedScore = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ScoreNodeName);

            string[] expectedSequences = utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ExpectedSequencesNode);
            IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>();

            IPairwiseSequenceAlignment seqAlign = new PairwiseSequenceAlignment();
            var alignedSeq = new PairwiseAlignedSequence
            {
                FirstSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[0]),
                SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[1]),
                Score = Convert.ToInt32(expectedScore, null),
                FirstOffset = Int32.MinValue,
                SecondOffset = Int32.MinValue,
            };
            seqAlign.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(seqAlign);
            Assert.IsTrue(AlignmentHelpers.CompareAlignment(align, expectedOutput));

            ApplicationLog.WriteLine("MUMmer P2 : Successfully validated the aligned sequences.");
        }
Exemple #31
0
        public static void TestLeftAlignmentStep() {
            var refseq =   "ACAATAAAAGCGCGCGCGCGTTACGTATAT--ATGGATAT";
            var queryseq = "ACAATAA-AGC--GCGC--GTTACGTATATATATGGATAT";

            var r = new Sequence (DnaAlphabet.Instance, refseq);
            var q = new Sequence (DnaAlphabet.Instance, queryseq);
            var aln = new PairwiseSequenceAlignment (r, q);
            var pas = new PairwiseAlignedSequence ();
            pas.FirstSequence = r;
            pas.SecondSequence = q;
            aln.Add (pas);
            var tpl = VariantCaller.LeftAlignIndelsAndCallVariants (aln, true);

            // Check the left alignment
            aln = tpl.Item1 as PairwiseSequenceAlignment;
            var lar = aln.PairwiseAlignedSequences [0].FirstSequence.ConvertToString();
            var laq = aln.PairwiseAlignedSequences [0].SecondSequence.ConvertToString();
            var exprefseq =   "ACAATAAAAGCGCGCGCGCGTTACG--TATATATGGATAT";
            var expqueryseq = "ACAAT-AAA----GCGCGCGTTACGTATATATATGGATAT";
            Assert.AreEqual (exprefseq, lar);
            Assert.AreEqual (expqueryseq, laq);

            // And it's hard, so we might as well check the variants
            var variants = tpl.Item2;
            Assert.AreEqual (3, variants.Count);
            string[] bases = new string[] { "A", "GCGC", "TA" };
            char[] hpbases = new char[] { 'A', 'G', 'T' };
            bool[] inHp = new bool[] { true, false, false };
            int[] lengths = new int[] { 1, 4, 2 };
            int[] starts = new int[] { 4, 8, 24 };
            IndelType[] types = new IndelType[] { IndelType.Deletion, IndelType.Deletion, IndelType.Insertion };
            for (int i = 0; i < 3; i++) {
                Assert.AreEqual (VariantType.INDEL, variants [i].Type);
                var vi = variants [i] as IndelVariant;
                Assert.AreEqual (hpbases[i], vi.HomopolymerBase);
                Assert.AreEqual (starts [i], vi.StartPosition);
                Assert.AreEqual (lengths [i], vi.Length);
                Assert.AreEqual (bases [i], vi.InsertedOrDeletedBases);
                Assert.AreEqual (inHp [i], vi.InHomopolymer);
                Assert.AreEqual (types [i], vi.InsertionOrDeletion);

            }
        
        }
        public void ValidatePairwiseAlignedSequenceMultipleRefQuery()
        {
            var referenceSeqs = new List<ISequence>()
            {
                new Sequence(Alphabets.DNA, "ATGCGCATCCCC") {ID = "R1"},
                new Sequence(Alphabets.DNA, "TAGCT") {ID = "R2"}
            };

            var searchSeqs = new List<ISequence>()
            {
                new Sequence(Alphabets.DNA, "CCGCGCCCCCTC") {ID = "Q1"},
                new Sequence(Alphabets.DNA, "AGCT") {ID = "Q2"}
            };

            var nucmer = new NucmerPairwiseAligner
            {
                FixedSeparation = 0,
                MinimumScore = 2,
                SeparationFactor = -1,
                LengthOfMUM = 3,
                ForwardOnly = true,
            };

            IList<IPairwiseSequenceAlignment> result = nucmer.Align(referenceSeqs, searchSeqs).Select(a => a as IPairwiseSequenceAlignment).ToList();

            // Check if output is not null
            Assert.AreNotEqual(null, result);

            IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment();
            var alignedSeq = new PairwiseAlignedSequence
            {
                FirstSequence = new Sequence(Alphabets.DNA, "GCGCATCCCC"),
                SecondSequence = new Sequence(Alphabets.DNA, "GCGC--CCCC"),
                Consensus = new Sequence(Alphabets.DNA, "GCGCATCCCC"),
                Score = -5,
                FirstOffset = 0,
                SecondOffset = 0
            };
            align.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(align);

            align = new PairwiseSequenceAlignment();
            alignedSeq = new PairwiseAlignedSequence
            {
                FirstSequence = new Sequence(Alphabets.DNA, "AGCT"),
                SecondSequence = new Sequence(Alphabets.DNA, "AGCT"),
                Consensus = new Sequence(Alphabets.DNA, "AGCT"),
                Score = 12,
                FirstOffset = 0,
                SecondOffset = 1
            };
            align.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(align);

            Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput));
            ApplicationLog.WriteLine("PairwiseAlignedSequence P1: Successfully validated Sequence with Multiple Reference.");
        }
Exemple #33
0
        private void ValidateGeneralSequenceAlignment(string nodeName, bool validateProperty)
        {
            // Read the xml file for getting both the files for aligning.
            string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1);
            string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2);
            IAlphabet alphabet =
                Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode));

            ApplicationLog.WriteLine(string.Format("SequenceAlignment P1 : First sequence used is '{0}'.",
                                                   origSequence1));
            ApplicationLog.WriteLine(string.Format("SequenceAlignment P1 : Second sequence used is '{0}'.",
                                                   origSequence2));

            // Create two sequences
            ISequence aInput = new Sequence(alphabet, origSequence1);
            ISequence bInput = new Sequence(alphabet, origSequence2);

            // Add the sequences to the Sequence alignment object using AddSequence() method.
            IList<IPairwiseSequenceAlignment> sequenceAlignmentObj = new List<IPairwiseSequenceAlignment>();

            var alignSeq = new PairwiseAlignedSequence {FirstSequence = aInput, SecondSequence = bInput};
            IPairwiseSequenceAlignment seqAlignObj = new PairwiseSequenceAlignment();
            seqAlignObj.Add(alignSeq);
            sequenceAlignmentObj.Add(seqAlignObj);

            // Read the output back and validate the same.
            IList<PairwiseAlignedSequence> newAlignedSequences =
                sequenceAlignmentObj[0].PairwiseAlignedSequences;

            ApplicationLog.WriteLine(string.Format("SequenceAlignment P1 : First sequence read is '{0}'.",
                                                   origSequence1));
            ApplicationLog.WriteLine(string.Format("SequenceAlignment P1 : Second sequence read is '{0}'.",
                                                   origSequence2));

            if (validateProperty)
            {
                string score = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode);
                string seqCount = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceCountNode);

                Assert.IsFalse(sequenceAlignmentObj.IsReadOnly);
                Assert.AreEqual(sequenceAlignmentObj.Count.ToString((IFormatProvider) null), seqCount);
                Assert.AreEqual(
                    sequenceAlignmentObj[0].PairwiseAlignedSequences[0].Score.ToString((IFormatProvider) null), score);
                Assert.AreEqual(sequenceAlignmentObj.Count.ToString((IFormatProvider) null), seqCount);

                ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the IsRead Property");
                ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the Count Property");
                ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the Sequences Property");
            }
            else
            {
                Assert.AreEqual(new String(newAlignedSequences[0].FirstSequence.Select(a => (char) a).ToArray()),
                                origSequence1);
                Assert.AreEqual(new String(newAlignedSequences[0].SecondSequence.Select(a => (char) a).ToArray()),
                                origSequence2);
            }
        }
Exemple #34
0
        public void ValidateSequenceAlignmentProperties()
        {
            // Read the xml file for getting both the files for aligning.
            string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.AlignDnaAlgorithmNodeName,
                                                                   Constants.SequenceNode1);
            string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.AlignDnaAlgorithmNodeName,
                                                                   Constants.SequenceNode2);
            IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(
                Constants.AlignDnaAlgorithmNodeName,
                Constants.AlphabetNameNode));
            string seqCount = this.utilityObj.xmlUtil.GetTextValue(
                Constants.AlignDnaAlgorithmNodeName,
                Constants.SequenceCountNode);

            // Create two sequences
            ISequence aInput = new Sequence(alphabet, origSequence1);
            ISequence bInput = new Sequence(alphabet, origSequence2);

            // Add the sequences to the Sequence alignment object using AddSequence() method.
            IList<IPairwiseSequenceAlignment> sequenceAlignmentObj = new List<IPairwiseSequenceAlignment>();

            var alignSeq = new PairwiseAlignedSequence();

            alignSeq.FirstSequence = aInput;
            alignSeq.SecondSequence = bInput;
            IPairwiseSequenceAlignment seqAlignObj = new PairwiseSequenceAlignment(aInput, bInput);
            seqAlignObj.Add(alignSeq);
            sequenceAlignmentObj.Add(seqAlignObj);

            // Validate all properties of sequence alignment class. 
            Assert.AreEqual(seqCount, seqAlignObj.Count.ToString((IFormatProvider) null));
            Assert.AreEqual(origSequence1, new string(seqAlignObj.FirstSequence.Select(a => (char) a).ToArray()));
            Assert.AreEqual(origSequence2, new string(seqAlignObj.SecondSequence.Select(a => (char) a).ToArray()));
            Assert.IsFalse(seqAlignObj.IsReadOnly);
            Assert.IsNull(seqAlignObj.Documentation);
            Assert.AreEqual(seqCount, seqAlignObj.PairwiseAlignedSequences.Count.ToString((IFormatProvider) null));

            ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the IsRead Property");
            ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the Count Property");
            ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the Sequences Property");
        }
Exemple #35
0
        private void ValidateNeedlemanWunschAlignment(string nodeName, AlignParameters alignParam,
                                                      SimilarityMatrixParameters similarityMatrixParam,
                                                      AlignmentType alignType)
        {
            ISequence aInput, bInput;

            IAlphabet alphabet =
                Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode));

            // Parse the files and get the sequence.
            if (alignParam.ToString().Contains("Code"))
            {
                string sequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1);
                string sequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2);

                aInput = new Sequence(alphabet, sequence1);
                bInput = new Sequence(alphabet, sequence2);
            }
            else
            {
                // Read the xml file for getting both the files for aligning.
                string filePath1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1);
                string filePath2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2);
                var parseObjectForFile1 = new FastAParser { Alphabet = alphabet };
                ISequence originalSequence1 = parseObjectForFile1.Parse(filePath1).FirstOrDefault();
                Assert.IsNotNull(originalSequence1);
                aInput = new Sequence(alphabet, originalSequence1.ConvertToString());

                var parseObjectForFile2 = new FastAParser { Alphabet = alphabet };
                ISequence originalSequence2 = parseObjectForFile2.Parse(filePath2).FirstOrDefault();
                Assert.IsNotNull(originalSequence2);
                bInput = new Sequence(alphabet, originalSequence2.ConvertToString());
            }

            string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode);
            SimilarityMatrix sm;

            switch (similarityMatrixParam)
            {
                case SimilarityMatrixParameters.TextReader:
                    using (TextReader reader = new StreamReader(blosumFilePath))
                        sm = new SimilarityMatrix(reader);
                    break;
                case SimilarityMatrixParameters.DiagonalMatrix:
                    string matchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                        Constants.MatchScoreNode);
                    string misMatchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                           Constants.MisMatchScoreNode);
                    sm = new DiagonalSimilarityMatrix(int.Parse(matchValue, null),
                                                      int.Parse(misMatchValue, null));
                    break;
                default:
                    sm = new SimilarityMatrix(new StreamReader(blosumFilePath));
                    break;
            }

            int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null);
            int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode),
                                             null);

            var needlemanWunschObj = new NeedlemanWunschAligner();
            if (AlignParameters.AllParam != alignParam)
            {
                needlemanWunschObj.SimilarityMatrix = sm;
                needlemanWunschObj.GapOpenCost = gapOpenCost;
            }

            IList<IPairwiseSequenceAlignment> result = null;

            switch (alignParam)
            {
                case AlignParameters.AlignList:
                case AlignParameters.AlignListCode:
                    var sequences = new List<ISequence> {aInput, bInput};
                    switch (alignType)
                    {
                        case AlignmentType.Align:
                            needlemanWunschObj.GapExtensionCost = gapExtensionCost;
                            result = needlemanWunschObj.Align(sequences);
                            break;
                        default:
                            result = needlemanWunschObj.AlignSimple(sequences);
                            break;
                    }
                    break;
                case AlignParameters.AllParam:
                case AlignParameters.AllParamCode:
                    switch (alignType)
                    {
                        case AlignmentType.Align:
                            needlemanWunschObj.GapExtensionCost = gapExtensionCost;
                            result = needlemanWunschObj.Align(sm,
                                                              gapOpenCost, gapExtensionCost, aInput, bInput);
                            break;
                        default:
                            result = needlemanWunschObj.AlignSimple(sm, gapOpenCost, aInput, bInput);
                            break;
                    }
                    break;
                case AlignParameters.AlignTwo:
                case AlignParameters.AlignTwoCode:
                    switch (alignType)
                    {
                        case AlignmentType.Align:
                            needlemanWunschObj.GapExtensionCost = gapExtensionCost;
                            result = needlemanWunschObj.Align(aInput, bInput);
                            break;
                        default:
                            result = needlemanWunschObj.AlignSimple(aInput, bInput);
                            break;
                    }
                    break;
                default:
                    break;
            }

            // Read the xml file for getting both the files for aligning.
            string expectedSequence1, expectedSequence2, expectedScore;

            switch (alignType)
            {
                case AlignmentType.Align:
                    expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionScoreNode);
                    expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                        Constants.ExpectedGapExtensionSequence1Node);
                    expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                        Constants.ExpectedGapExtensionSequence2Node);
                    break;
                default:
                    expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedScoreNode);
                    expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode1);
                    expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2);
                    break;
            }

            IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>();

            IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(aInput, bInput);
            var alignedSeq = new PairwiseAlignedSequence
                                 {
                                     FirstSequence = new Sequence(alphabet, expectedSequence1),
                                     SecondSequence = new Sequence(alphabet, expectedSequence2),
                                     Score = Convert.ToInt32(expectedScore, null)
                                 };
            align.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(align);

            ApplicationLog.WriteLine(string.Format("NeedlemanWunschAligner P1 : Final Score '{0}'.", expectedScore));
            ApplicationLog.WriteLine(string.Format("NeedlemanWunschAligner P1 : Aligned First Sequence is '{0}'.", expectedSequence1));
            ApplicationLog.WriteLine(string.Format("NeedlemanWunschAligner P1 : Aligned Second Sequence is '{0}'.", expectedSequence2));

            Assert.IsTrue(CompareAlignment(result, expectedOutput));
        }
        /// <summary>
        /// This takes a specific starting location in the scoring matrix and generates
        /// an alignment from it using the traceback scores.
        /// </summary>
        /// <param name="startingCell">Starting point</param>
        /// <returns>Pairwise alignment</returns>
        protected PairwiseAlignedSequence CreateAlignmentFromCell(OptScoreMatrixCell startingCell)
        {
            int gapStride = Cols + 1;
            //Using list to avoid allocation issues
            int estimatedLength = (int)( 1.1*Math.Max(ReferenceSequence.Length,QuerySequence.Length));
            var firstAlignment = new List<byte>(estimatedLength);
            var secondAlignment = new List<byte>(estimatedLength);

            // Get the starting cell position and record the optimal score found there.
            int i = startingCell.Row;
            int j = startingCell.Col;
            var finalScore = startingCell.Score;

            long rowGaps = 0, colGaps = 0, identicalCount = 0, similarityCount = 0;

            // Walk the traceback matrix and build the alignments.
            while (!TracebackIsComplete(i, j))
            {
                sbyte tracebackDirection = Traceback[i][j];
                // Walk backwards through the trace back
                int gapLength;
                switch (tracebackDirection)
                {
                    case SourceDirection.Diagonal:
                        byte n1 = ReferenceSequence[j - 1];
                        byte n2 = QuerySequence[i - 1];
                        firstAlignment.Add(n1);
                        secondAlignment.Add(n2);
                        i--;
                        j--;
                        // Track some useful statistics
                        if (n1 == n2 && n1 != _gap)
                        {
                            identicalCount++;
                            similarityCount++;
                        }
                        else if (SimilarityMatrix[n2, n1] > 0)
                            similarityCount++;
                        break;
                    case SourceDirection.Left:
                        //Add 1 because this only counts number of extensions
                        if (usingAffineGapModel)
                        {
                            gapLength = h_Gap_Length[i * gapStride + j];
                            for (int k = 0; k < gapLength; k++)
                            {
                                firstAlignment.Add(ReferenceSequence[--j]);
                                secondAlignment.Add(_gap);
                                rowGaps++;
                            }
                        }
                        else
                        {
                            firstAlignment.Add(ReferenceSequence[--j]);
                            secondAlignment.Add(_gap);
                            rowGaps++;
                        }
                        break;
                    case SourceDirection.Up:
                        //add 1 because this only counts number of extensions.
                        if (usingAffineGapModel)
                        {
                            gapLength = v_Gap_Length[i * gapStride + j];
                            for (int k = 0; k < gapLength; k++)
                            {
                                firstAlignment.Add(_gap);
                                colGaps++;
                                secondAlignment.Add(QuerySequence[--i]);
                            }
                        }
                        else
                        {
                            secondAlignment.Add(QuerySequence[--i]);
                            firstAlignment.Add(_gap);
                            colGaps++;
                        }
                        break;
                    default:
                        break;
                }
            }

            // We build the alignments in reverse since we were
            // walking backwards through the matrix table. To create
            // the proper alignments we need to resize and reverse
            // both underlying arrays.
            firstAlignment.Reverse();
            secondAlignment.Reverse();
            // Create the Consensus sequence
            byte[] consensus = new byte[Math.Min(firstAlignment.Count, secondAlignment.Count)];
            for (int n = 0; n < consensus.Length; n++)
            {
                consensus[n] = ConsensusResolver.GetConsensus(new[] { firstAlignment[n], secondAlignment[n] });
            }

            // Create the result alignment
            var pairwiseAlignedSequence = new PairwiseAlignedSequence
            {
                Score = finalScore,
                FirstSequence = new Sequence(_sequence1.Alphabet, firstAlignment.ToArray()) { ID = _sequence1.ID },
                SecondSequence = new Sequence(_sequence2.Alphabet, secondAlignment.ToArray()) { ID = _sequence2.ID },
                Consensus = new Sequence(ConsensusResolver.SequenceAlphabet, consensus),
            };

            // Offset is start of alignment in input sequence with respect to other sequence.
            if (i >= j)
            {
                pairwiseAlignedSequence.FirstOffset = i - j;
                pairwiseAlignedSequence.SecondOffset = 0;
            }
            else
            {
                pairwiseAlignedSequence.FirstOffset = 0;
                pairwiseAlignedSequence.SecondOffset = j - i;
            }


            // Add in ISequenceAlignment metadata
            pairwiseAlignedSequence.Metadata["Score"] = pairwiseAlignedSequence.Score;
            pairwiseAlignedSequence.Metadata["FirstOffset"] = pairwiseAlignedSequence.FirstOffset;
            pairwiseAlignedSequence.Metadata["SecondOffset"] = pairwiseAlignedSequence.SecondOffset;
            pairwiseAlignedSequence.Metadata["Consensus"] = pairwiseAlignedSequence.Consensus;
            pairwiseAlignedSequence.Metadata["StartOffsets"] = new List<long> { j, i };
            pairwiseAlignedSequence.Metadata["EndOffsets"] = new List<long> { startingCell.Col - 1, startingCell.Row - 1 };
            pairwiseAlignedSequence.Metadata["Insertions"] = new List<long> { colGaps, rowGaps }; // ref, query insertions
            pairwiseAlignedSequence.Metadata["IdenticalCount"] = identicalCount;
            pairwiseAlignedSequence.Metadata["SimilarityCount"] = similarityCount;

            return pairwiseAlignedSequence;

        }
Exemple #37
0
        /// <summary>
        /// Convert the delta alignment object to its sequence representation
        /// </summary>
        /// <returns>Reference sequence alignment at 0th index and
        /// Query sequence alignment at 1st index</returns>
        public PairwiseAlignedSequence ConvertDeltaToSequences()
        {
            PairwiseAlignedSequence alignedSequence = new PairwiseAlignedSequence();
            int         gap          = 0;
            List <long> startOffsets = new List <long>(2);
            List <long> endOffsets   = new List <long>(2);
            List <long> insertions   = new List <long>(2);

            startOffsets.Add(FirstSequenceStart);
            startOffsets.Add(SecondSequenceStart);
            endOffsets.Add(FirstSequenceEnd);
            endOffsets.Add(SecondSequenceEnd);

            insertions.Add(0);
            insertions.Add(0);

            // Create the new sequence object with given start and end indices
            List <byte> referenceSequence = new List <byte>();

            for (long index = this.FirstSequenceStart; index <= this.FirstSequenceEnd; index++)
            {
                referenceSequence.Add(this.ReferenceSequence[index]);
            }

            List <byte> querySequence = new List <byte>();

            for (long index = this.SecondSequenceStart; index <= this.SecondSequenceEnd; index++)
            {
                querySequence.Add(this.QuerySequence[index]);
            }
            // Insert the Alignment character at delta position
            // +ve delta: Insertion in reference sequence
            // -ve delta: Insertion in query sequence (deletion in reference sequence)
            foreach (int delta in Deltas)
            {
                gap += Math.Abs(delta);
                if (delta < 0)
                {
                    referenceSequence.Insert(gap - 1, DnaAlphabet.Instance.Gap);
                    insertions[0]++;
                }
                else
                {
                    querySequence.Insert(gap - 1, DnaAlphabet.Instance.Gap);
                    insertions[1]++;
                }
            }

            byte[]    refSeq   = referenceSequence.ToArray();
            IAlphabet alphabet = Alphabets.AutoDetectAlphabet(refSeq, 0, refSeq.LongLength, null);

            alignedSequence.FirstSequence = new Sequence(alphabet, refSeq, false);

            byte[] querySeq = querySequence.ToArray();
            alphabet = Alphabets.AutoDetectAlphabet(querySeq, 0, querySeq.LongLength, QuerySequence.Alphabet);
            alignedSequence.SecondSequence = new Sequence(alphabet, querySeq, false);

            alignedSequence.Metadata["StartOffsets"] = startOffsets;
            alignedSequence.Metadata["EndOffsets"]   = endOffsets;
            alignedSequence.Metadata["Insertions"]   = insertions;

            return(alignedSequence);
        }
Exemple #38
0
        public static void TestExceptionThrownForUnclippedAlignment() {
            var refseq =   "ACAATATA";
            var queryseq = "ACAATAT-";

            var r = new Sequence (DnaAlphabet.Instance, refseq);
            var q = new Sequence (DnaAlphabet.Instance, queryseq);
            var aln = new PairwiseSequenceAlignment (r, q);
            var pas = new PairwiseAlignedSequence ();
            pas.FirstSequence = r;
            pas.SecondSequence = q;
            aln.Add (pas);
            Assert.Throws<FormatException> (() => VariantCaller.LeftAlignIndelsAndCallVariants (aln, true));

            refseq =   "AAACAATATA";
            queryseq = "AA-CAATATA";

            r = new Sequence (DnaAlphabet.Instance, refseq);
            q = new Sequence (DnaAlphabet.Instance, queryseq);
            aln = new PairwiseSequenceAlignment (r, q);
            pas = new PairwiseAlignedSequence ();
            pas.FirstSequence = r;
            pas.SecondSequence = q;
            aln.Add (pas);
            Assert.Throws<FormatException> (() => VariantCaller.LeftAlignIndelsAndCallVariants (aln, true));
        }
Exemple #39
0
        /// <summary>
        /// Given two byte arrays representing a pairwise alignment, shift them so 
        /// that all deletions start as early as possible.  For example:
        /// 
        /// <code>
        /// TTTTAAAATTTT  -> Converts to ->  TTTTAAAATTTT
        /// TTTTAA--TTTT                     TTTT--AATTTT
        /// </code>
        /// 
        /// This function takes a IPairwiseSequenceAlignment and assumes that the first sequence is the reference and second
        /// sequence is the query.  It returns a new Pairwise sequence alignment with all of the indels left aligned as well as a list of variants.
        /// </summary>
        /// <param name="aln">Aln. The second sequence should be of type QualitativeSequence or Sequence</param>
        /// <param name="callVariants">callVariants.  If true, it will call variants, otherwise the second half of tuple will be null. </param>
        public static Tuple<IPairwiseSequenceAlignment, List<Variant>> LeftAlignIndelsAndCallVariants(IPairwiseSequenceAlignment aln, bool callVariants = true) {

            if (aln == null) {
                throw new NullReferenceException ("aln");
            }
            if (aln.PairwiseAlignedSequences == null || aln.PairwiseAlignedSequences.Count != 1) {
                throw new ArgumentException ("The pairwise aligned sequence should only have one alignment");
            }
            var frstAln = aln.PairwiseAlignedSequences.First ();
            var seq1 = frstAln.FirstSequence;
            var seq2 = frstAln.SecondSequence;
            if (seq1 == null) {
                throw new NullReferenceException ("seq1");
            } else if (seq2 == null) {
                throw new NullReferenceException ("seq2");
            }

            //TODO: Might implement an ambiguity check later.
            #if FALSE
            if (seq1.Alphabet.HasAmbiguity || seq2.Alphabet.HasAmbiguity) {
                throw new ArgumentException ("Cannot left align sequences with ambiguous symbols.");
            }
            #endif

            // Note we have to copy unless we can guarantee the array will not be mutated.
            byte[] refseq = seq1.ToArray ();
            ISequence newQuery;
            List<Variant> variants = null;
            // Call variants for a qualitative sequence
            if (seq2 is QualitativeSequence) {
                var qs = seq2 as QualitativeSequence;
                var query = Enumerable.Zip (qs, qs.GetQualityScores (), (bp, qv) => new BPandQV (bp, (byte)qv, false)).ToArray ();
                AlignmentUtils.LeftAlignIndels (refseq, query);
                AlignmentUtils.VerifyNoGapsOnEnds (refseq, query);
                if (callVariants) {
                    variants = VariantCaller.CallVariants (refseq, query, seq2.IsMarkedAsReverseComplement());
                }
                var newQueryQS = new QualitativeSequence (qs.Alphabet, 
                    qs.FormatType,
                    query.Select (z => z.BP).ToArray (),
                    query.Select (p => p.QV).ToArray (),
                    false);
                newQueryQS.Metadata = seq2.Metadata;
                newQuery = newQueryQS;
                
            } else if (seq2 is Sequence) {  // For a sequence with no QV values.
                var qs = seq2 as Sequence;
                var query = qs.Select (v => new BPandQV (v, 0, false)).ToArray();
                AlignmentUtils.LeftAlignIndels (refseq, query);
                AlignmentUtils.VerifyNoGapsOnEnds (refseq, query);
                // ISequence does not have a setable metadata
                var newQueryS = new Sequence(qs.Alphabet, query.Select(z=>z.BP).ToArray(), false);
                newQueryS.Metadata = seq2.Metadata;
                if (callVariants) {
                    variants = VariantCaller.CallVariants (refseq, query, seq2.IsMarkedAsReverseComplement());
                }
                newQuery = newQueryS;
            } else {
                throw new ArgumentException ("Can only left align indels if the query sequence is of type Sequence or QualitativeSequence.");
            }

            if (aln.FirstSequence != null && aln.FirstSequence.ID != null) {
                foreach (var v in variants) {
                    v.RefName = aln.FirstSequence.ID;
                }
            }

            var newRef = new Sequence (seq1.Alphabet, refseq, false);
            newRef.ID = seq1.ID;
            newRef.Metadata = seq1.Metadata;

            newQuery.ID = seq2.ID;

            var newaln = new PairwiseSequenceAlignment (aln.FirstSequence, aln.SecondSequence);
            var pas = new PairwiseAlignedSequence ();
            pas.FirstSequence = newRef;
            pas.SecondSequence = newQuery;
            newaln.Add (pas);
            return new Tuple<IPairwiseSequenceAlignment, List<Variant>> (newaln, variants);
        }
Exemple #40
0
        /// <summary>
        /// Convert the delta alignment object to its sequence representation
        /// </summary>
        /// <returns>Reference sequence alignment at 0th index and
        /// Query sequence alignment at 1st index</returns>
        internal PairwiseAlignedSequence ConvertDeltaToSequences()
        {
            PairwiseAlignedSequence alignedSequence = new PairwiseAlignedSequence();
            Sequence   referenceSequence            = null;
            Sequence   querySequence = null;
            int        gap           = 0;
            int        length        = 0;
            List <int> startOffsets  = new List <int>(2);
            List <int> endOffsets    = new List <int>(2);
            List <int> insertions    = new List <int>(2);

            startOffsets.Add(FirstSequenceStart);
            startOffsets.Add(SecondSequenceStart);
            endOffsets.Add(FirstSequenceEnd);
            endOffsets.Add(SecondSequenceEnd);

            insertions.Add(0);
            insertions.Add(0);

            // Create the new sequence object with given start and end indices
            referenceSequence            = new Sequence(ReferenceSequence.Alphabet);
            referenceSequence.IsReadOnly = false;
            length = FirstSequenceEnd - FirstSequenceStart + 1;
            referenceSequence.InsertRange(
                0,
                ReferenceSequence.Range(FirstSequenceStart, length).ToString());

            querySequence            = new Sequence(QuerySequence.Alphabet);
            querySequence.IsReadOnly = false;
            length = SecondSequenceEnd - SecondSequenceStart + 1;
            querySequence.InsertRange(
                0,
                QuerySequence.Range(SecondSequenceStart, length).ToString());

            // Insert the Alignment character at delta postion
            // +ve delta: Insertion in reference sequence
            // -ve delta: Insertion in query sequence (deletion in reference sequence)
            foreach (int delta in Deltas)
            {
                gap += Math.Abs(delta);
                if (delta < 0)
                {
                    referenceSequence.Insert(gap - 1, DnaAlphabet.Instance.Gap.Symbol);
                    insertions[0]++;
                }
                else
                {
                    querySequence.Insert(gap - 1, DnaAlphabet.Instance.Gap.Symbol);
                    insertions[1]++;
                }
            }

            alignedSequence.FirstSequence  = referenceSequence;
            alignedSequence.SecondSequence = querySequence;

            alignedSequence.Metadata["StartOffsets"] = startOffsets;
            alignedSequence.Metadata["EndOffsets"]   = endOffsets;
            alignedSequence.Metadata["Insertions"]   = insertions;

            return(alignedSequence);
        }
Exemple #41
0
        public void TestNUCmer3MultipleReferencesAndQueries()
        {
            Sequence referenceSeq = null;
            Sequence searchSeq = null;
            List<ISequence> referenceSeqs = null;
            List<ISequence> searchSeqs = null;

            referenceSeqs = new List<ISequence>();

            string reference = "ATGCGCATCCCC";
            referenceSeq = new Sequence(Alphabets.DNA, reference);
            referenceSeq.ID = "R1";
            referenceSeqs.Add(referenceSeq);

            reference = "TAGCT";
            referenceSeq = new Sequence(Alphabets.DNA, reference);
            referenceSeq.ID = "R11";
            referenceSeqs.Add(referenceSeq);

            searchSeqs = new List<ISequence>();

            string search = "CCGCGCCCCCTC";
            searchSeq = new Sequence(Alphabets.DNA, search);
            searchSeq.ID = "Q1";
            searchSeqs.Add(searchSeq);

            search = "AGCT";
            searchSeq = new Sequence(Alphabets.DNA, search);
            searchSeq.ID = "Q11";
            searchSeqs.Add(searchSeq);

            NucmerPairwiseAligner nucmer = new NucmerPairwiseAligner();
            nucmer.FixedSeparation = 0;
            nucmer.MinimumScore = 2;
            nucmer.SeparationFactor = -1;
            nucmer.LengthOfMUM = 3;
            nucmer.ForwardOnly = true;
            IList<IPairwiseSequenceAlignment> result = nucmer.Align(referenceSeqs, searchSeqs).Select(a => a as IPairwiseSequenceAlignment).ToList();

            // Check if output is not null
            Assert.AreNotEqual(null, result);

            IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment();
            PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence();

            alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "GCGCATCCCC");
            alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "GCGC--CCCC");
            alignedSeq.Consensus = new Sequence(Alphabets.DNA, "GCGCATCCCC");
            alignedSeq.Score = -5;
            alignedSeq.FirstOffset = 0;
            alignedSeq.SecondOffset = 0;
            align.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(align);
            align = new PairwiseSequenceAlignment();
            alignedSeq = new PairwiseAlignedSequence();
            alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "AGCT");
            alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "AGCT");
            alignedSeq.Consensus = new Sequence(Alphabets.DNA, "AGCT");
            alignedSeq.Score = 12;
            alignedSeq.FirstOffset = 0;
            alignedSeq.SecondOffset = 1;
            align.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(align);
            Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput));
        }
        /// <summary>
        /// Adds an aligned sequence to the list of aligned sequences in the PairwiseSequenceAlignment.
        /// Throws exception if sequence alignment is read only.
        /// </summary>
        /// <param name="item">PairwiseAlignedSequence to add.</param>
        public void Add(PairwiseAlignedSequence item)
        {
            if (IsReadOnly)
                throw new NotSupportedException(Properties.Resource.READ_ONLY_COLLECTION_MESSAGE);

            alignedSequences.Add(item);
        }
Exemple #43
0
        /// <summary>
        /// Convert the delta alignment object to its sequence representation
        /// </summary>
        /// <returns>Reference sequence alignment at 0th index and
        /// Query sequence alignment at 1st index</returns>
        public PairwiseAlignedSequence ConvertDeltaToSequences()
        {
            PairwiseAlignedSequence alignedSequence = new PairwiseAlignedSequence();
            int gap = 0;
            List<long> startOffsets = new List<long>(2);
            List<long> endOffsets = new List<long>(2);
            List<long> insertions = new List<long>(2);

            startOffsets.Add(FirstSequenceStart);
            startOffsets.Add(SecondSequenceStart);
            endOffsets.Add(FirstSequenceEnd);
            endOffsets.Add(SecondSequenceEnd);

            insertions.Add(0);
            insertions.Add(0);

            // Create the new sequence object with given start and end indices
            List<byte> referenceSequence = new List<byte>();
            for (long index = this.FirstSequenceStart; index <= this.FirstSequenceEnd; index++)
            {
                referenceSequence.Add(this.ReferenceSequence[index]);
            }

            List<byte> querySequence = new List<byte>();
            for (long index = this.SecondSequenceStart; index <= this.SecondSequenceEnd; index++)
            {
                querySequence.Add(this.QuerySequence[index]);
            }
            // Insert the Alignment character at delta position
            // +ve delta: Insertion in reference sequence
            // -ve delta: Insertion in query sequence (deletion in reference sequence)
            foreach (int delta in Deltas)
            {
                gap += Math.Abs(delta);
                if (delta < 0)
                {
                    referenceSequence.Insert(gap - 1, DnaAlphabet.Instance.Gap);
                    insertions[0]++;
                }
                else
                {
                    querySequence.Insert(gap - 1, DnaAlphabet.Instance.Gap);
                    insertions[1]++;
                }
            }

            byte[] refSeq = referenceSequence.ToArray();
            IAlphabet alphabet = Alphabets.AutoDetectAlphabet(refSeq, 0, refSeq.GetLongLength(), null);
            alignedSequence.FirstSequence = new Sequence(alphabet, refSeq, false)
            {
                ID = ReferenceSequence.ID,
                Metadata = new Dictionary<string, object>(ReferenceSequence.Metadata)
            };

            byte[] querySeq = querySequence.ToArray();
            alphabet = Alphabets.AutoDetectAlphabet(querySeq, 0, querySeq.GetLongLength(), QuerySequence.Alphabet);
            alignedSequence.SecondSequence = new Sequence(alphabet, querySeq, false)
            {
                ID = QuerySequence.ID,
                Metadata = new Dictionary<string, object>(QuerySequence.Metadata)
            };

            alignedSequence.Metadata["StartOffsets"] = startOffsets;
            alignedSequence.Metadata["EndOffsets"] = endOffsets;
            alignedSequence.Metadata["Insertions"] = insertions;

            return alignedSequence;
        }
Exemple #44
0
        /// <summary>
        /// Validates the Mummer align method for several test cases for the parameters passed.
        /// </summary>
        /// <param name="nodeName">Node name to be read from xml</param>
        /// <param name="isFilePath">Is Sequence saved in File</param>
        /// <param name="isAlignList">Is align method to take list?</param>
        /// <param name="addParam">Additional parameter</param>
        /// Suppress the ParserParam variable CA1801 as this would be reused later.
        void ValidateMUMmerAlignGeneralTestCases(string nodeName, bool isFilePath, bool isAlignList, AdditionalParameters addParam)
        {
            ISequence referenceSeq;
            IList<ISequence> querySeqs;
            List<ISequence> alignList = null;

            if (isFilePath)
            {
                // Gets the reference sequence from the configuration file
                string filePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
                Assert.IsNotNull(filePath);
                Assert.IsTrue(File.Exists(filePath));

                IEnumerable<ISequence> referenceSeqs;
                FastAParser fastaParserObj = new FastAParser();
                referenceSeqs = fastaParserObj.Parse(filePath);
                referenceSeq = referenceSeqs.FirstOrDefault();
                Assert.IsNotNull(referenceSeq);

                // Gets the query sequence from the configuration file
                string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode);
                Assert.IsNotNull(queryFilePath);
                Assert.IsTrue(File.Exists(queryFilePath));

                querySeqs = fastaParserObj.Parse(queryFilePath).ToList();
                ISequence querySeq = querySeqs.First();
                if (isAlignList)
                {
                    alignList = new List<ISequence> {referenceSeq, querySeq};
                }
            }
            else
            {
                // Gets the reference sequence from the configuration file
                string referenceSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode);
                string referenceSeqAlphabet = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode);
                referenceSeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), referenceSequence);

                string querySequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceNode);
                referenceSeqAlphabet = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceAlphabetNode);

                ISequence querySeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), querySequence);
                querySeqs = new List<ISequence>();

                if (isAlignList)
                {
                    alignList = new List<ISequence> {referenceSeq, querySeq};
                }
                else
                    querySeqs.Add(querySeq);
            }

            // Setup the algorithm
            string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode);
            MUMmerAligner mumAlignObj = new MUMmerAligner {LengthOfMUM = long.Parse(mumLength, null), StoreMUMs = true};

            switch (addParam)
            {
                case AdditionalParameters.PerformSimilarityMatrixChange:
                    mumAlignObj.SimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum50);
                    mumAlignObj.GapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null);
                    break;
                default:
                    mumAlignObj.GapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null);
                    break;
            }

            IEnumerable<ISequence> alignEnumSeqs = alignList;
            IList<IPairwiseSequenceAlignment> align = isAlignList 
                ? mumAlignObj.AlignSimple(alignEnumSeqs) 
                : mumAlignObj.AlignSimple(referenceSeq, querySeqs);

            // Validate MUMs Properties
            Assert.IsNotNull(mumAlignObj.MUMs);

            string expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ScoreNodeName);

            string[] expectedSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ExpectedSequencesNode);
            IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>();

            // Validate for two aligned sequences and single aligned sequences appropriately
            if (querySeqs.Count <= 1)
            {
                IPairwiseSequenceAlignment seqAlign = new PairwiseSequenceAlignment();
                PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence
                {
                    FirstSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[0]),
                    SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[1]),
                    Score = Convert.ToInt32(expectedScore,null),
                    FirstOffset = Int32.MinValue,
                    SecondOffset = Int32.MinValue,
                };
                seqAlign.PairwiseAlignedSequences.Add(alignedSeq);
                expectedOutput.Add(seqAlign);
                Assert.IsTrue(CompareAlignment(align, expectedOutput));
            }
            else
            {
                string[] expectedScores = expectedScore.Split(',');
                IPairwiseSequenceAlignment seq1Align = new PairwiseSequenceAlignment();
                IPairwiseSequenceAlignment seq2Align = new PairwiseSequenceAlignment();

                // Get the first sequence for validation
                PairwiseAlignedSequence alignedSeq1 = new PairwiseAlignedSequence
                {
                    FirstSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[0]),
                    SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[1]),
                    Score = int.Parse(expectedScores[0], null),
                    FirstOffset = Int32.MinValue,
                    SecondOffset = Int32.MinValue,
                };
                seq1Align.PairwiseAlignedSequences.Add(alignedSeq1);
                expectedOutput.Add(seq1Align);

                // Get the second sequence for validation
                PairwiseAlignedSequence alignedSeq2 = new PairwiseAlignedSequence
                {
                    FirstSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[2]),
                    SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[3]),
                    Score = int.Parse(expectedScores[1], null),
                    FirstOffset = Int32.MinValue,
                    SecondOffset = Int32.MinValue,
                };
                seq2Align.PairwiseAlignedSequences.Add(alignedSeq2);
                expectedOutput.Add(seq2Align);
                Assert.IsTrue(CompareAlignment(align, expectedOutput));
            }
        }